Actual source code: mpinit.c

petsc-3.4.5 2014-06-29

  3: #include <petscsys.h>        /*I  "petscsys.h"   I*/

  5: static MPI_Comm saved_PETSC_COMM_WORLD = 0;
  6: MPI_Comm        PETSC_COMM_LOCAL_WORLD = 0;           /* comm for a single node (local set of processes) */
  7: PetscBool       PetscHMPIWorker        = PETSC_FALSE; /* this is a regular process, nonworker process */
  8: void            * PetscHMPICtx         = 0;

 10: extern PetscErrorCode  PetscHMPIHandle(MPI_Comm);

 12: #if defined(PETSC_HAVE_MPI_COMM_SPAWN)
 15: /*@C
 16:    PetscHMPISpawn - Initialize additional processes to be used as "worker" processes. This is not generally
 17:      called by users. One should use -hmpi_spawn_size <n> to indicate that you wish to have n-1 new MPI
 18:      processes spawned for each current process.

 20:    Not Collective (could make collective on MPI_COMM_WORLD, generate one huge comm and then split it up)

 22:    Input Parameter:
 23: .  nodesize - size of each compute node that will share processors

 25:    Options Database:
 26: .   -hmpi_spawn_size nodesize

 28:    Notes: This is only supported on systems with an MPI 2 implementation that includes the MPI_Comm_Spawn() routine.

 30: $    Comparison of two approaches for HMPI usage (MPI started with N processes)
 31: $
 32: $    -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
 33: $                                           and n-1 worker processes (used by PETSc) for each application node.
 34: $                           You MUST launch MPI so that only ONE MPI process is created for each hardware node.
 35: $
 36: $    -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
 37: $                            (used by PETSc)
 38: $                           You MUST launch MPI so that n MPI processes are created for each hardware node.
 39: $
 40: $    petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
 41: $    petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
 42: $       This is what would use if each of the computers hardware nodes had 3 CPUs.
 43: $
 44: $      These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per
 45: $   computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
 46: $   utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
 47: $   PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
 48: $   are always working on p task, never more than p.
 49: $
 50: $    See PCHMPI for a PETSc preconditioner that can use this functionality
 51: $

 53:    For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
 54:    consists of all the processes in a "node."

 56:    In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).

 58:    Level: developer

 60:    Concepts: HMPI

 62: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPIMerge(), PetscHMPIRun()

 64: @*/
 65: PetscErrorCode  PetscHMPISpawn(PetscMPIInt nodesize)
 66: {

 68:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HMPI functionality is currently broken");
 69: #if defined(broken_functionality_commented_out)
 71:   PetscMPIInt    size;
 72:   MPI_Comm       parent,children;

 75:   MPI_Comm_get_parent(&parent);
 76:   if (parent == MPI_COMM_NULL) {  /* the original processes started by user */
 77:     char programname[PETSC_MAX_PATH_LEN];
 78:     char **argv;

 80:     PetscGetProgramName(programname,PETSC_MAX_PATH_LEN);
 81:     PetscGetArguments(&argv);
 82:     MPI_Comm_spawn(programname,argv,nodesize-1,MPI_INFO_NULL,0,PETSC_COMM_SELF,&children,MPI_ERRCODES_IGNORE);
 83:     PetscFreeArguments(argv);
 84:     MPI_Intercomm_merge(children,0,&PETSC_COMM_LOCAL_WORLD);

 86:     MPI_Comm_size(PETSC_COMM_WORLD,&size);
 87:     PetscInfo2(0,"PETSc HMPI successfully spawned: number of nodes = %d node size = %d\n",size,nodesize);

 89:     saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
 90:   } else { /* worker nodes that get spawned */
 91:     MPI_Intercomm_merge(parent,1,&PETSC_COMM_LOCAL_WORLD);
 92:     PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);
 93:     PetscHMPIWorker = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */
 94:     PetscEnd();  /* cannot continue into user code */
 95:   }
 96:   return(0);
 97: #endif
 98: }
 99: #endif

103: /*@C
104:    PetscHMPIMerge - Initializes the PETSc and MPI to work with HMPI. This is not usually called
105:       by the user. One should use -hmpi_merge_size <n> to indicate the node size of merged communicator
106:       to be.

108:    Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set

110:    Input Parameter:
111: +  nodesize - size of each compute node that will share processors
112: .  func - optional function to call on the master nodes
113: -  ctx - context passed to function on master nodes

115:    Options Database:
116: .   -hmpi_merge_size <n>

118:    Level: developer

120: $    Comparison of two approaches for HMPI usage (MPI started with N processes)
121: $
122: $    -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
123: $                                           and n-1 worker processes (used by PETSc) for each application node.
124: $                           You MUST launch MPI so that only ONE MPI process is created for each hardware node.
125: $
126: $    -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
127: $                            (used by PETSc)
128: $                           You MUST launch MPI so that n MPI processes are created for each hardware node.
129: $
130: $    petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
131: $    petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
132: $       This is what would use if each of the computers hardware nodes had 3 CPUs.
133: $
134: $      These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per
135: $   computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
136: $   utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
137: $   PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
138: $   are always working on p task, never more than p.
139: $
140: $    See PCHMPI for a PETSc preconditioner that can use this functionality
141: $

143:    For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
144:    consists of all the processes in a "node."

146:    In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).

148:    Concepts: HMPI

150: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPISpawn(), PetscHMPIRun()

152: @*/
153: PetscErrorCode  PetscHMPIMerge(PetscMPIInt nodesize,PetscErrorCode (*func)(void*),void *ctx)
154: {

156:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HMPI functionality is currently broken");
157: #if defined(broken_functionality_commented_out)
159:   PetscMPIInt    size,rank,*ranks,i;
160:   MPI_Group      group,newgroup;

163:   saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;

165:   MPI_Comm_size(saved_PETSC_COMM_WORLD,&size);
166:   if (size % nodesize) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Total number of process nodes %d is not divisible by number of processes per node %d",size,nodesize);
167:   MPI_Comm_rank(saved_PETSC_COMM_WORLD,&rank);


170:   /* create two communicators
171:       *) one that contains the first process from each node: 0,nodesize,2*nodesize,...
172:       *) one that contains all processes in a node:  (0,1,2...,nodesize-1), (nodesize,nodesize+1,...2*nodesize-), ...
173:   */
174:   MPI_Comm_group(saved_PETSC_COMM_WORLD,&group);
175:   PetscMalloc((size/nodesize)*sizeof(PetscMPIInt),&ranks);
176:   for (i=0; i<(size/nodesize); i++) ranks[i] = i*nodesize;
177:   MPI_Group_incl(group,size/nodesize,ranks,&newgroup);
178:   PetscFree(ranks);
179:   MPI_Comm_create(saved_PETSC_COMM_WORLD,newgroup,&PETSC_COMM_WORLD);
180:   if (rank % nodesize) PETSC_COMM_WORLD = 0; /* mark invalid processes for easy debugging */
181:   MPI_Group_free(&group);
182:   MPI_Group_free(&newgroup);

184:   MPI_Comm_split(saved_PETSC_COMM_WORLD,rank/nodesize,rank % nodesize,&PETSC_COMM_LOCAL_WORLD);

186:   PetscInfo2(0,"PETSc HMPI successfully started: number of nodes = %d node size = %d\n",size/nodesize,nodesize);
187:   PetscInfo1(0,"PETSc HMPI process %sactive\n",(rank % nodesize) ? "in" : "");

189:   PetscHMPICtx = ctx;
190:   /*
191:      All process not involved in user application code wait here
192:   */
193:   if (!PETSC_COMM_WORLD) {
194:     PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);
195:     PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
196:     PetscHMPIWorker  = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */
197:     PetscInfo(0,"PETSc HMPI inactive process becoming active");
198:   } else if (func) {
199:     (*func)(ctx);
200:   }
201:   return(0);
202: #endif
203: }

207: /*@C
208:    PetscHMPIFinalize - Finalizes the PETSc and MPI to work with HMPI. Called by PetscFinalize() cannot
209:        be called by user.

211:    Collective on the entire system

213:    Level: developer

215: .seealso: PetscFinalize(), PetscGetArgs(), PetscHMPIMerge(), PCHMPIRun()

217: @*/
218: PetscErrorCode  PetscHMPIFinalize(void)
219: {
220:   PetscErrorCode 0;
221:   PetscInt       command = 3;

224:   if (!PetscHMPIWorker && PETSC_COMM_LOCAL_WORLD) {
225:     MPI_Bcast(&command,1,MPIU_INT,0,PETSC_COMM_LOCAL_WORLD); /* broadcast to my worker group to end program */

227:     PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;

229:     PetscInfo(0,"PETSc HMPI active process ending PetscHMPIMerge()");
230:   }
231:   PetscFunctionReturn(ierr);
232: }

234: static PetscInt numberobjects = 0;
235: static void     *objects[100];

239: /*@C
240:    PetscHMPIHandle - Receives commands from the master node and processes them

242:    Collective on MPI_Comm

244:    Input Parameter:
245: .   comm - Must be PETSC_COMM_LOCAL_WORLD

247:    Level: developer

249:    Notes: this is usually handled automatically, likely you do not need to use this directly

251:    Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?

253: .seealso: PetscHMPIMerge(), PCHMPIRun(), PCHMPINew()

255: @*/
256: PetscErrorCode  PetscHMPIHandle(MPI_Comm comm)
257: {
259:   PetscInt       command       = 0; /* dummy value so MPI-Uni doesn't think it is not set*/
260:   PetscBool      exitwhileloop = PETSC_FALSE;

263:   while (!exitwhileloop) {
264:     MPI_Bcast(&command,1,MPIU_INT,0,comm);
265:     switch (command) {
266:     case 0: { /* allocate some memory on this worker process */
267:       size_t n = 0;   /* dummy value so MPI-Uni doesn't think it is not set*/
268:       void   *ptr;
269:       MPI_Bcast(&n,1,MPIU_SIZE_T,0,comm);
270:       /* cannot use PetscNew() cause it requires struct argument */
271:       PetscMalloc(n,&ptr);
272:       PetscMemzero(ptr,n);

274:       objects[numberobjects++] = ptr;
275:       break;
276:     }
277:     case 1: {  /* free some memory on this worker process */
278:       PetscInt i;
279:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
280:       PetscFree(objects[i]);
281:       break;
282:     }
283:     case 2: {  /* run a function on this worker process */
284:       PetscInt       i;
285:       PetscErrorCode (*f)(MPI_Comm,void*);
286:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
287:       MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
288:       (*f)(comm,objects[i]);
289:       break;
290:     }
291:     case 4: {  /* run a function on this worker process with provided context */
292:       PetscInt       i;
293:       PetscErrorCode (*f)(MPI_Comm,void*,void*);
294:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
295:       MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
296:       (*f)(comm,PetscHMPICtx,objects[i]);
297:       break;
298:     }
299:     case 3: {
300:       exitwhileloop = PETSC_TRUE;
301:       break;
302:     }
303:     default:
304:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unknown HMPI command %D",command);
305:     }
306:   }
307:   return(0);
308: }

312: /*@C
313:    PetscHMPIMalloc - Creates a "c struct" on all nodes of an HMPI communicator

315:    Collective on MPI_Comm

317:    Input Parameters:
318: +   comm - Must be PETSC_COMM_LOCAL_WORLD
319: -   n  - amount of memory requested

321:    Level: developer

323:    Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?

325: .seealso: PetscHMPIMerge(), PCHMPIRun(), PCHMPIFree()

327: @*/
328: PetscErrorCode  PetscHMPIMalloc(MPI_Comm comm,size_t n,void **ptr)
329: {
331:   PetscInt       command = 0;

334:   if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");

336:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
337:   MPI_Bcast(&n,1,MPIU_SIZE_T,0,comm);

339:   /* cannot use PetscNew() cause it requires struct argument */
340:   PetscMalloc(n,ptr);
341:   PetscMemzero(*ptr,n);

343:   objects[numberobjects++] = *ptr;
344:   return(0);
345: }

349: /*@C
350:    PetscHMPIFree - Frees a "c struct" on all nodes of an HMPI communicator

352:    Collective on MPI_Comm

354:    Input Parameters:
355: +   comm - Must be PETSC_COMM_LOCAL_WORLD
356: -   ptr - pointer to data to be freed, must have been obtained with PetscHMPIMalloc()

358:    Level: developer

360:   Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?

362: .seealso: PetscHMPIMerge(), PetscHMPIMalloc()

364: @*/
365: PetscErrorCode  PetscHMPIFree(MPI_Comm comm,void *ptr)
366: {
368:   PetscInt       command = 1,i;

371:   if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");

373:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
374:   for (i=0; i<numberobjects; i++) {
375:     if (objects[i] == ptr) {
376:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
377:       PetscFree(objects[i]);
378:       return(0);
379:     }
380:   }
381:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
382:   PetscFunctionReturn(ierr);
383: }

387: /*@C
388:    PetscHMPIRun - runs a function on all the processes of a node

390:    Collective on MPI_Comm

392:    Input Parameters:
393: +   comm - communicator to run function on, must be PETSC_COMM_LOCAL_WORLD
394: .   f - function to run
395: -   ptr - pointer to data to pass to function; must be obtained with PetscHMPIMalloc()

397:    Level: developer

399:    Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?

401: .seealso: PetscHMPIMerge(), PetscHMPIMalloc(), PetscHMPIFree(), PetscHMPIRunCtx()

403: @*/
404: PetscErrorCode  PetscHMPIRun(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void*),void *ptr)
405: {
407:   PetscInt       command = 2,i;

410:   if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");

412:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
413:   for (i=0; i<numberobjects; i++) {
414:     if (objects[i] == ptr) {
415:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
416:       MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
417:       (*f)(comm,ptr);
418:       return(0);
419:     }
420:   }
421:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
422:   PetscFunctionReturn(ierr);
423: }

427: /*@C
428:    PetscHMPIRunCtx - runs a function on all the processes of a node

430:    Collective on MPI_Comm

432:    Input Parameters:
433: +   comm - communicator to run function on, must be PETSC_COMM_LOCAL_WORLD
434: .   f - function to run
435: -   ptr - pointer to data to pass to function; must be obtained with PetscHMPIMalloc()

437:    Notes: This is like PetscHMPIRun() except it also passes the context passed in PetscHMPIMerge()
438:    Level: developer

440:    Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?

442: .seealso: PetscHMPIMerge(), PetscHMPIMalloc(), PetscHMPIFree(), PetscHMPIRun()

444: @*/
445: PetscErrorCode  PetscHMPIRunCtx(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void*,void*),void *ptr)
446: {
448:   PetscInt       command = 4,i;

451:   if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");

453:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
454:   for (i=0; i<numberobjects; i++) {
455:     if (objects[i] == ptr) {
456:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
457:       MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
458:       (*f)(comm,PetscHMPICtx,ptr);
459:       return(0);
460:     }
461:   }
462:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
463:   PetscFunctionReturn(ierr);
464: }