Actual source code: sf.c

petsc-3.3-p7 2013-05-11
  1: #include <petsc-private/sfimpl.h>
  2: #include <petscctable.h>

  4: #if defined(PETSC_USE_DEBUG)
  5: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
  6:     if (PetscUnlikely(!(sf)->graphset))                              \
  7:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
  8:   } while (0)
  9: #else
 10: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 11: #endif

 13: const char *const PetscSFSynchronizationTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFSynchronizationType","PETSCSF_SYNCHRONIZATION_",0};

 15: #if !defined(PETSC_HAVE_MPI_WIN_CREATE)
 16: #define MPI_WIN_NULL ((MPI_Win)0)
 17: #define MPI_REPLACE 0
 18: #define MPI_Win_create(base,size,disp_unit,info,comm,win) 1;SETERRQ(comm,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 19: #define MPI_Win_free(win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 20: #define MPI_Win_start(group,assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 21: #define MPI_Win_post(group,assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 22: #define MPI_Win_complete(win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 23: #define MPI_Win_wait(win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 24: #define MPI_Win_lock(lock_type,rank,assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 25: #define MPI_Win_unlock(rank,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 26: #define MPI_Win_fence(assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 27: #define MPI_Get(origin_addr,origin_count,origin_datatype,target_rank,target_displ,target_count,target_datatype,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 28: #define MPI_Accumulate(origin_addr,origin_count,origin_datatype,target_rank,target_displ,target_count,target_datatype,op,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 29: /* Independent of MPI_Win, but also not in MPI-1 */
 30: #define MPI_Type_get_envelope(datatype,num_ints,num_addrs,num_dtypes,combiner) (*(num_ints)=0,*(num_addrs)=0,*(num_dtypes)=0,*(combiner)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 31: #define MPI_Type_get_contents(datatype,num_ints,num_addrs,num_dtypes,ints,addrs,dtypes) (*(ints)=0,*(addrs)=0,*(dtypes)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 32: #define MPI_Type_dup(datatype,newtype) (*(newtype)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 33: #define MPI_Type_create_indexed_block(count,blocklength,displs,oldtype,newtype) (*(newtype)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 34: #define MPI_Type_get_true_extent(type,lb,bytes) (*(lb)=0,*(bytes)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 35: #ifndef PETSC_HAVE_MPIUNI
 36: #define MPI_Type_get_extent(type,lb,bytes) (*(lb)=0,*(bytes)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
 37: #endif
 38: #define MPI_COMBINER_DUP   0
 39: #define MPI_MODE_NOPUT     0
 40: #define MPI_MODE_NOPRECEDE 0
 41: #define MPI_MODE_NOSUCCEED 0
 42: #define MPI_MODE_NOSTORE   0
 43: #endif

 47: /*@C
 48:    PetscSFCreate - create a star forest communication context

 50:    Not Collective

 52:    Input Arguments:
 53: .  comm - communicator on which the star forest will operate

 55:    Output Arguments:
 56: .  sf - new star forest context

 58:    Level: intermediate

 60: .seealso: PetscSFSetGraph(), PetscSFDestroy()
 61: @*/
 62: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 63: {
 65:   PetscSF        b;

 69: #ifndef PETSC_USE_DYNAMIC_LIBRARIES
 70:   PetscSFInitializePackage(PETSC_NULL);
 71: #endif

 73:   PetscHeaderCreate(b,_p_PetscSF,struct _PetscSFOps,PETSCSF_CLASSID,-1,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
 74:   b->nroots    = -1;
 75:   b->nleaves   = -1;
 76:   b->nranks    = -1;
 77:   b->sync      = PETSCSF_SYNCHRONIZATION_FENCE;
 78:   b->rankorder = PETSC_TRUE;
 79:   b->ingroup   = MPI_GROUP_NULL;
 80:   b->outgroup  = MPI_GROUP_NULL;
 81:   b->graphset  = PETSC_FALSE;
 82:   *sf = b;
 83:   return(0);
 84: }

 88: /*@C
 89:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

 91:    Collective

 93:    Input Arguments:
 94: .  sf - star forest

 96:    Level: advanced

 98: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
 99: @*/
100: PetscErrorCode PetscSFReset(PetscSF sf)
101: {
103:   PetscSFDataLink link,next;
104:   PetscSFWinLink  wlink,wnext;
105:   PetscInt i;

109:   sf->mine = PETSC_NULL;
110:   PetscFree(sf->mine_alloc);
111:   sf->remote = PETSC_NULL;
112:   PetscFree(sf->remote_alloc);
113:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
114:   PetscFree(sf->degree);
115:   for (link=sf->link; link; link=next) {
116:     next = link->next;
117:     MPI_Type_free(&link->unit);
118:     for (i=0; i<sf->nranks; i++) {
119:       MPI_Type_free(&link->mine[i]);
120:       MPI_Type_free(&link->remote[i]);
121:     }
122:     PetscFree2(link->mine,link->remote);
123:     PetscFree(link);
124:   }
125:   sf->link = PETSC_NULL;
126:   for (wlink=sf->wins; wlink; wlink=wnext) {
127:     wnext = wlink->next;
128:     if (wlink->inuse) SETERRQ1(((PetscObject)sf)->comm,PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
129:     MPI_Win_free(&wlink->win);
130:     PetscFree(wlink);
131:   }
132:   sf->wins = PETSC_NULL;
133:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
134:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
135:   PetscSFDestroy(&sf->multi);
136:   sf->graphset = PETSC_FALSE;
137:   return(0);
138: }

142: /*@C
143:    PetscSFDestroy - destroy star forest

145:    Collective

147:    Input Arguments:
148: .  sf - address of star forest

150:    Level: intermediate

152: .seealso: PetscSFCreate(), PetscSFReset()
153: @*/
154: PetscErrorCode PetscSFDestroy(PetscSF *sf)
155: {

159:   if (!*sf) return(0);
161:   if (--((PetscObject)(*sf))->refct > 0) {*sf = 0; return(0);}
162:   PetscSFReset(*sf);
163:   PetscHeaderDestroy(sf);
164:   return(0);
165: }

169: /*@C
170:    PetscSFSetFromOptions - set PetscSF options using the options database

172:    Logically Collective

174:    Input Arguments:
175: .  sf - star forest

177:    Options Database Keys:
178: .  -sf_synchronization - synchronization type used by PetscSF

180:    Level: intermediate

182: .keywords: KSP, set, from, options, database

184: .seealso: PetscSFSetSynchronizationType()
185: @*/
186: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
187: {

192:   PetscObjectOptionsBegin((PetscObject)sf);
193:   PetscOptionsEnum("-sf_synchronization","synchronization type to use for PetscSF communication","PetscSFSetSynchronizationType",PetscSFSynchronizationTypes,(PetscEnum)sf->sync,(PetscEnum*)&sf->sync,PETSC_NULL);
194:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,PETSC_NULL);
195:   PetscOptionsEnd();
196:   return(0);
197: }

201: /*@C
202:    PetscSFSetSynchronizationType - set synchrozitaion type for PetscSF communication

204:    Logically Collective

206:    Input Arguments:
207: +  sf - star forest for communication
208: -  sync - synchronization type

210:    Options Database Key:
211: .  -sf_synchronization <sync> - sets the synchronization type

213:    Level: intermediate

215: .seealso: PetscSFSetFromOptions()
216: @*/
217: PetscErrorCode PetscSFSetSynchronizationType(PetscSF sf,PetscSFSynchronizationType sync)
218: {

223:   sf->sync = sync;
224:   return(0);
225: }

229: /*@C
230:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

232:    Logically Collective

234:    Input Arguments:
235: +  sf - star forest
236: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

238:    Level: advanced

240: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
241: @*/
242: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
243: {

248:   if (sf->multi) SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
249:   sf->rankorder = flg;
250:   return(0);
251: }

255: /*@C
256:    PetscSFSetGraph - Set a parallel star forest

258:    Collective

260:    Input Arguments:
261: +  sf - star forest
262: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
263: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
264: .  ilocal - locations of leaves in leafdata buffers, pass PETSC_NULL for contiguous storage
265: .  localmode - copy mode for ilocal
266: .  iremote - remote locations of root vertices for each leaf on the current process
267: -  remotemode - copy mode for iremote

269:    Level: intermediate

271: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
272: @*/
273: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
274: {
276:   PetscTable table;
277:   PetscTablePosition pos;
278:   PetscMPIInt size;
279:   PetscInt i,*rcount,*ranks;

285:   if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots);
286:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
287:   PetscSFReset(sf);
288:   sf->nroots = nroots;
289:   sf->nleaves = nleaves;
290:   if (ilocal) {
291:     switch (localmode) {
292:     case PETSC_COPY_VALUES:
293:       PetscMalloc(nleaves*sizeof(*sf->mine),&sf->mine_alloc);
294:       sf->mine = sf->mine_alloc;
295:       PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));
296:       break;
297:     case PETSC_OWN_POINTER:
298:       sf->mine_alloc = (PetscInt*)ilocal;
299:       sf->mine = sf->mine_alloc;
300:       break;
301:     case PETSC_USE_POINTER:
302:       sf->mine = (PetscInt*)ilocal;
303:       break;
304:     default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
305:     }
306:   }
307:   switch (remotemode) {
308:   case PETSC_COPY_VALUES:
309:     PetscMalloc(nleaves*sizeof(*sf->remote),&sf->remote_alloc);
310:     sf->remote = sf->remote_alloc;
311:     PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));
312:     break;
313:   case PETSC_OWN_POINTER:
314:     sf->remote_alloc = (PetscSFNode*)iremote;
315:     sf->remote = sf->remote_alloc;
316:     break;
317:   case PETSC_USE_POINTER:
318:     sf->remote = (PetscSFNode*)iremote;
319:     break;
320:   default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
321:   }

323:   MPI_Comm_size(((PetscObject)sf)->comm,&size);
324:   PetscTableCreate(10,size,&table);
325:   for (i=0; i<nleaves; i++) {
326:     /* Log 1-based rank */
327:     PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);
328:   }
329:   PetscTableGetCount(table,&sf->nranks);
330:   PetscMalloc4(sf->nranks,PetscInt,&sf->ranks,sf->nranks+1,PetscInt,&sf->roffset,nleaves,PetscMPIInt,&sf->rmine,nleaves,PetscMPIInt,&sf->rremote);
331:   PetscMalloc2(sf->nranks,PetscInt,&rcount,sf->nranks,PetscInt,&ranks);
332:   PetscTableGetHeadPosition(table,&pos);
333:   for (i=0; i<sf->nranks; i++) {
334:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
335:     ranks[i]--;             /* Convert back to 0-based */
336:   }
337:   PetscTableDestroy(&table);
338:   PetscSortIntWithArray(sf->nranks,ranks,rcount);
339:   sf->roffset[0] = 0;
340:   for (i=0; i<sf->nranks; i++) {
341:     sf->ranks[i] = PetscMPIIntCast(ranks[i]);
342:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
343:     rcount[i] = 0;
344:   }
345:   for (i=0; i<nleaves; i++) {
346:     PetscInt lo,hi,irank;
347:     /* Search for index of iremote[i].rank in sf->ranks */
348:     lo = 0; hi = sf->nranks;
349:     while (hi - lo > 1) {
350:       PetscInt mid = lo + (hi - lo)/2;
351:       if (iremote[i].rank < sf->ranks[mid]) hi = mid;
352:       else                                  lo = mid;
353:     }
354:     if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo;
355:     else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank);
356:     sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i;
357:     sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index;
358:     rcount[irank]++;
359:   }
360:   PetscFree2(rcount,ranks);
361: #if !defined(PETSC_USE_64BIT_INDICES)
362:   if (nroots == PETSC_DETERMINE) {
363:     /* Jed, if you have a better way to do this, put it in */
364:     PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0;

366:     /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */
367:     PetscMalloc4(size,PetscInt,&numRankLeaves,size+1,PetscInt,&leafOff,size,PetscInt,&numRankRoots,size+1,PetscInt,&rootOff);
368:     PetscMemzero(numRankLeaves, size * sizeof(PetscInt));
369:     for(i = 0; i < nleaves; ++i) {
370:       ++numRankLeaves[iremote[i].rank];
371:     }
372:     MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, ((PetscObject) sf)->comm);
373:     /* Could set nroots to this maximum */
374:     for(i = 0; i < size; ++i) {
375:       maxRoots += numRankRoots[i];
376:     }
377:     /* Gather all indices */
378:     PetscMalloc2(nleaves,PetscInt,&leafIndices,maxRoots,PetscInt,&rootIndices);
379:     leafOff[0] = 0;
380:     for(i = 0; i < size; ++i) {
381:       leafOff[i+1] = leafOff[i] + numRankLeaves[i];
382:     }
383:     for(i = 0; i < nleaves; ++i) {
384:       leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index;
385:     }
386:     leafOff[0] = 0;
387:     for(i = 0; i < size; ++i) {
388:       leafOff[i+1] = leafOff[i] + numRankLeaves[i];
389:     }
390:     rootOff[0] = 0;
391:     for(i = 0; i < size; ++i) {
392:       rootOff[i+1] = rootOff[i] + numRankRoots[i];
393:     }
394:     MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, ((PetscObject) sf)->comm);
395:     /* Sort and reduce */
396:     PetscSortRemoveDupsInt(&maxRoots, rootIndices);
397:     PetscFree2(leafIndices,rootIndices);
398:     PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);
399:     sf->nroots = maxRoots;
400:   }
401: #endif

403:   sf->graphset = PETSC_TRUE;
404:   return(0);
405: }

409: /*@C
410:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

412:    Collective

414:    Input Arguments:
415: .  sf - star forest to invert

417:    Output Arguments:
418: .  isf - inverse of sf

420:    Level: advanced

422:    Notes:
423:    All roots must have degree 1.

425:    The local space may be a permutation, but cannot be sparse.

427: .seealso: PetscSFSetGraph()
428: @*/
429: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
430: {
432:   PetscMPIInt rank;
433:   PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
434:   const PetscInt *ilocal;
435:   PetscSFNode *roots,*leaves;

438:   MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
439:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,PETSC_NULL);
440:   for (i=0,maxlocal=0; i<nleaves; i++) maxlocal = PetscMax(maxlocal,(ilocal?ilocal[i]:i)+1);
441:   PetscMalloc2(nroots,PetscSFNode,&roots,nleaves,PetscSFNode,&leaves);
442:   for (i=0; i<nleaves; i++) {
443:     leaves[i].rank = rank;
444:     leaves[i].index = i;
445:   }
446:   for (i=0;i <nroots; i++) {
447:     roots[i].rank = -1;
448:     roots[i].index = -1;
449:   }
450:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
451:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);

453:   /* Check whether our leaves are sparse */
454:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
455:   if (count == nroots) newilocal = PETSC_NULL;
456:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
457:     PetscMalloc(count*sizeof(PetscInt),&newilocal);
458:     for (i=0,count=0; i<nroots; i++) {
459:       if (roots[i].rank >= 0) {
460:         newilocal[count] = i;
461:         roots[count].rank  = roots[i].rank;
462:         roots[count].index = roots[i].index;
463:         count++;
464:       }
465:     }
466:   }

468:   PetscSFCreate(((PetscObject)sf)->comm,isf);
469:   PetscSFSetSynchronizationType(*isf,sf->sync);
470:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
471:   PetscFree2(roots,leaves);
472:   return(0);
473: }

477: /*@C
478:    PetscSFGetGraph - Get the graph specifying a parallel star forest

480:    Collective

482:    Input Arguments:
483: +  sf - star forest
484: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
485: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
486: .  ilocal - locations of leaves in leafdata buffers
487: -  iremote - remote locations of root vertices for each leaf on the current process

489:    Level: intermediate

491: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
492: @*/
493: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
494: {

498:   if (nroots) *nroots = sf->nroots;
499:   if (nleaves) *nleaves = sf->nleaves;
500:   if (ilocal) *ilocal = sf->mine;
501:   if (iremote) *iremote = sf->remote;
502:   return(0);
503: }

507: /*@C
508:    PetscSFView - view a star forest

510:    Collective

512:    Input Arguments:
513: +  sf - star forest
514: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

516:    Level: beginner

518: .seealso: PetscSFCreate(), PetscSFSetGraph()
519: @*/
520: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
521: {
523:   PetscBool iascii;

527:   if (!viewer) {PetscViewerASCIIGetStdout(((PetscObject)sf)->comm,&viewer);}
530:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
531:   if (iascii) {
532:     PetscMPIInt rank;
533:     PetscInt i,j;
534:     PetscBool verbose;
535:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer,"Star Forest Object");
536:     PetscViewerASCIIPushTab(viewer);
537:     PetscViewerASCIIPrintf(viewer,"synchronization=%s sort=%s\n",PetscSFSynchronizationTypes[sf->sync],sf->rankorder?"rank-order":"unordered");
538:     MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
539:     PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
540:     PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
541:     for (i=0; i<sf->nleaves; i++) {
542:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine?sf->mine[i]:i,sf->remote[i].rank,sf->remote[i].index);
543:     }
544:     PetscViewerFlush(viewer);
545:     verbose = PETSC_FALSE;
546:     PetscOptionsGetBool(((PetscObject)sf)->prefix,"-sf_view_verbose",&verbose,PETSC_NULL);
547:     if (verbose) {
548:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
549:       for (i=0; i<sf->nranks; i++) {
550:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
551:         for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
552:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
553:         }
554:       }
555:     }
556:     PetscViewerFlush(viewer);
557:     PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
558:     PetscViewerASCIIPopTab(viewer);
559:   }
560:   return(0);
561: }

565: static PetscErrorCode MPIU_Type_unwrap(MPI_Datatype a,MPI_Datatype *atype)
566: {
567:   PetscMPIInt nints,naddrs,ntypes,combiner;

571:   MPI_Type_get_envelope(a,&nints,&naddrs,&ntypes,&combiner);
572:   if (combiner == MPI_COMBINER_DUP) {
573:     PetscMPIInt ints[1];
574:     MPI_Aint addrs[1];
575:     MPI_Datatype types[1];
576:     if (nints != 0 || naddrs != 0 || ntypes != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Unexpected returns from MPI_Type_get_envelope()");
577:     MPI_Type_get_contents(a,0,0,1,ints,addrs,types);
578:     *atype = types[0];
579:   } else {
580:     *atype = a;
581:   }
582:   return(0);
583: }

587: static PetscErrorCode MPIU_Type_compare(MPI_Datatype a,MPI_Datatype b,PetscBool *match)
588: {
590:   MPI_Datatype atype,btype;
591:   PetscMPIInt aintcount,aaddrcount,atypecount,acombiner;
592:   PetscMPIInt bintcount,baddrcount,btypecount,bcombiner;

595:   MPIU_Type_unwrap(a,&atype);
596:   MPIU_Type_unwrap(b,&btype);
597:   *match = PETSC_FALSE;
598:   if (atype == btype) {
599:     *match = PETSC_TRUE;
600:     return(0);
601:   }
602:   MPI_Type_get_envelope(atype,&aintcount,&aaddrcount,&atypecount,&acombiner);
603:   MPI_Type_get_envelope(btype,&bintcount,&baddrcount,&btypecount,&bcombiner);
604:   if (acombiner == bcombiner && aintcount == bintcount && aaddrcount == baddrcount && atypecount == btypecount && (aintcount > 0 || aaddrcount > 0 || atypecount > 0)) {
605:     PetscMPIInt  *aints,*bints;
606:     MPI_Aint     *aaddrs,*baddrs;
607:     MPI_Datatype *atypes,*btypes;
608:     PetscBool    same;
609:     PetscMalloc6(aintcount,PetscMPIInt,&aints,bintcount,PetscMPIInt,&bints,aaddrcount,MPI_Aint,&aaddrs,baddrcount,MPI_Aint,&baddrs,atypecount,MPI_Datatype,&atypes,btypecount,MPI_Datatype,&btypes);
610:     MPI_Type_get_contents(atype,aintcount,aaddrcount,atypecount,aints,aaddrs,atypes);
611:     MPI_Type_get_contents(btype,bintcount,baddrcount,btypecount,bints,baddrs,btypes);
612:     PetscMemcmp(aints,bints,aintcount*sizeof(aints[0]),&same);
613:     if (same) {
614:       PetscMemcmp(aaddrs,baddrs,aaddrcount*sizeof(aaddrs[0]),&same);
615:       if (same) {
616:         /* This comparison should be recursive */
617:         PetscMemcmp(atypes,btypes,atypecount*sizeof(atypes[0]),&same);
618:       }
619:     }
620:     if (same) *match = PETSC_TRUE;
621:     return(0);
622:   }
623:   return(0);
624: }

628: /*@C
629:    PetscSFGetDataTypes - gets composite local and remote data types for each rank

631:    Not Collective

633:    Input Arguments:
634: +  sf - star forest
635: -  unit - data type for each node

637:    Output Arguments:
638: +  localtypes - types describing part of local leaf buffer referencing each remote rank
639: -  remotetypes - types describing part of remote root buffer referenced for each remote rank

641:    Level: developer

643: .seealso: PetscSFSetGraph(), PetscSFView()
644: @*/
645: PetscErrorCode PetscSFGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
646: {
648:   PetscSFDataLink link;
649:   PetscInt i,nranks;
650:   const PetscInt *roffset;
651:   const PetscMPIInt *ranks,*rmine,*rremote;

654:   /* Look for types in cache */
655:   for (link=sf->link; link; link=link->next) {
656:     PetscBool match;
657:     MPIU_Type_compare(unit,link->unit,&match);
658:     if (match) {
659:       *localtypes = link->mine;
660:       *remotetypes = link->remote;
661:       return(0);
662:     }
663:   }

665:   /* Create new composite types for each send rank */
666:   PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
667:   PetscMalloc(sizeof(*link),&link);
668:   MPI_Type_dup(unit,&link->unit);
669:   PetscMalloc2(nranks,MPI_Datatype,&link->mine,nranks,MPI_Datatype,&link->remote);
670:   for (i=0; i<nranks; i++) {
671:     PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
672:     MPI_Type_create_indexed_block(rcount,1,sf->rmine+sf->roffset[i],link->unit,&link->mine[i]);
673:     MPI_Type_create_indexed_block(rcount,1,sf->rremote+sf->roffset[i],link->unit,&link->remote[i]);
674:     MPI_Type_commit(&link->mine[i]);
675:     MPI_Type_commit(&link->remote[i]);
676:   }
677:   link->next = sf->link;
678:   sf->link = link;

680:   *localtypes = link->mine;
681:   *remotetypes = link->remote;
682:   return(0);
683: }

687: /*@C
688:    PetscSFGetRanks - Get ranks and number of vertices referenced by leaves on this process

690:    Not Collective

692:    Input Arguments:
693: .  sf - star forest

695:    Output Arguments:
696: +  nranks - number of ranks referenced by local part
697: .  ranks - array of ranks
698: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
699: .  rmine - concatenated array holding local indices referencing each remote rank
700: -  rremote - concatenated array holding remote indices referenced for each remote rank

702:    Level: developer

704: .seealso: PetscSFSetGraph(), PetscSFGetDataTypes()
705: @*/
706: PetscErrorCode PetscSFGetRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscMPIInt **rmine,const PetscMPIInt **rremote)
707: {

711:   if (nranks)  *nranks  = sf->nranks;
712:   if (ranks)   *ranks   = sf->ranks;
713:   if (roffset) *roffset = sf->roffset;
714:   if (rmine)   *rmine   = sf->rmine;
715:   if (rremote) *rremote = sf->rremote;
716:   return(0);
717: }

721: /*@C
722:    PetscSFGetWindow - Get a window for use with a given data type

724:    Collective on PetscSF

726:    Input Arguments:
727: +  sf - star forest
728: .  unit - data type
729: .  array - array to be sent
730: .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
731: .  fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_SYNCHRONIZATION_FENCE
732: .  postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_SYNCHRONIZATION_ACTIVE
733: -  startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_SYNCHRONIZATION_ACTIVE

735:    Output Arguments:
736: .  win - window

738:    Level: developer

740:    Developer Notes:
741:    This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
742:    reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
743:    whichever one is available, by copying the array into it if necessary.

745: .seealso: PetscSFGetRanks(), PetscSFGetDataTypes()
746: @*/
747: PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
748: {
750:   MPI_Aint lb,lb_true,bytes,bytes_true;
751:   PetscSFWinLink link;

754:   MPI_Type_get_extent(unit,&lb,&bytes);
755:   MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
756:   if (lb != 0 || lb_true != 0) SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
757:   if (bytes != bytes_true) SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
758:   PetscMalloc(sizeof(*link),&link);
759:   link->bytes = bytes;
760:   link->addr  = array;
761:   MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,((PetscObject)sf)->comm,&link->win);
762:   link->epoch = epoch;
763:   link->next = sf->wins;
764:   link->inuse = PETSC_TRUE;
765:   sf->wins = link;
766:   *win = link->win;

768:   if (epoch) {
769:     switch (sf->sync) {
770:     case PETSCSF_SYNCHRONIZATION_FENCE:
771:       MPI_Win_fence(fenceassert,*win);
772:       break;
773:     case PETSCSF_SYNCHRONIZATION_LOCK: /* Handled outside */
774:       break;
775:     case PETSCSF_SYNCHRONIZATION_ACTIVE: {
776:       MPI_Group ingroup,outgroup;
777:       PetscSFGetGroups(sf,&ingroup,&outgroup);
778:       MPI_Win_post(ingroup,postassert,*win);
779:       MPI_Win_start(outgroup,startassert,*win);
780:     } break;
781:     default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_PLIB,"Unknown synchronization type");
782:     }
783:   }
784:   return(0);
785: }

789: /*@C
790:    PetscSFFindWindow - Finds a window that is already in use

792:    Not Collective

794:    Input Arguments:
795: +  sf - star forest
796: .  unit - data type
797: -  array - array with which the window is associated

799:    Output Arguments:
800: .  win - window

802:    Level: developer

804: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
805: @*/
806: PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
807: {
808:   PetscSFWinLink link;

811:   for (link=sf->wins; link; link=link->next) {
812:     if (array == link->addr) {
813:       *win = link->win;
814:       return(0);
815:     }
816:   }
817:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
818:   return(0);
819: }

823: /*@C
824:    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()

826:    Collective

828:    Input Arguments:
829: +  sf - star forest
830: .  unit - data type
831: .  array - array associated with window
832: .  epoch - close an epoch, must match argument to PetscSFGetWindow()
833: -  win - window

835:    Level: developer

837: .seealso: PetscSFFindWindow()
838: @*/
839: PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
840: {
842:   PetscSFWinLink *p,link;

845:   for (p=&sf->wins; *p; p=&(*p)->next) {
846:     link = *p;
847:     if (*win == link->win) {
848:       if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
849:       if (epoch != link->epoch) {
850:         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
851:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
852:       }
853:       *p = link->next;
854:       goto found;
855:     }
856:   }
857:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");

859:   found:
860:   if (epoch) {
861:     switch (sf->sync) {
862:     case PETSCSF_SYNCHRONIZATION_FENCE:
863:       MPI_Win_fence(fenceassert,*win);
864:       break;
865:     case PETSCSF_SYNCHRONIZATION_LOCK:
866:       break;                    /* handled outside */
867:     case PETSCSF_SYNCHRONIZATION_ACTIVE: {
868:       MPI_Win_complete(*win);
869:       MPI_Win_wait(*win);
870:     } break;
871:     default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_PLIB,"Unknown synchronization type");
872:     }
873:   }

875:   MPI_Win_free(&link->win);
876:   PetscFree(link);
877:   *win = MPI_WIN_NULL;
878:   return(0);
879: }

883: /*@C
884:    PetscSFGetGroups - gets incoming and outgoing process groups

886:    Collective

888:    Input Argument:
889: .  sf - star forest

891:    Output Arguments:
892: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
893: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

895:    Level: developer

897: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
898: @*/
899: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
900: {
902:   MPI_Group group;

905:   if (sf->ingroup == MPI_GROUP_NULL) {
906:     PetscInt    i;
907:     const PetscInt *indegree;
908:     PetscMPIInt rank,*outranks,*inranks;
909:     PetscSFNode *remote;
910:     PetscSF     bgcount;

912:     /* Compute the number of incoming ranks */
913:     PetscMalloc(sf->nranks*sizeof(PetscSFNode),&remote);
914:     for (i=0; i<sf->nranks; i++) {
915:       remote[i].rank = sf->ranks[i];
916:       remote[i].index = 0;
917:     }
918:     PetscSFCreate(((PetscObject)sf)->comm,&bgcount);
919:     PetscSFSetSynchronizationType(bgcount,PETSCSF_SYNCHRONIZATION_LOCK); /* or FENCE, ACTIVE here would cause recursion */
920:     PetscSFSetGraph(bgcount,1,sf->nranks,PETSC_NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
921:     PetscSFComputeDegreeBegin(bgcount,&indegree);
922:     PetscSFComputeDegreeEnd(bgcount,&indegree);

924:     /* Enumerate the incoming ranks */
925:     PetscMalloc2(indegree[0],PetscMPIInt,&inranks,sf->nranks,PetscMPIInt,&outranks);
926:     MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
927:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
928:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
929:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
930:     MPI_Comm_group(((PetscObject)sf)->comm,&group);
931:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
932:     MPI_Group_free(&group);
933:     PetscFree2(inranks,outranks);
934:     PetscSFDestroy(&bgcount);
935:   }
936:   *incoming = sf->ingroup;

938:   if (sf->outgroup == MPI_GROUP_NULL) {
939:     MPI_Comm_group(((PetscObject)sf)->comm,&group);
940:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
941:     MPI_Group_free(&group);
942:   }
943:   *outgoing = sf->outgroup;
944:   return(0);
945: }

949: /*@C
950:    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

952:    Collective

954:    Input Argument:
955: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

957:    Output Arguments:
958: .  multi - star forest with split roots, such that each root has degree exactly 1

960:    Level: developer

962:    Notes:

964:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
965:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
966:    edge, it is a candidate for future optimization that might involve its removal.

968: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin()
969: @*/
970: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
971: {

977:   if (sf->nroots < 0) {
978:     PetscSFCreate(((PetscObject)sf)->comm,&sf->multi);
979:     *multi = sf->multi;
980:     return(0);
981:   }
982:   if (!sf->multi) {
983:     const PetscInt *indegree;
984:     PetscInt i,*inoffset,*outones,*outoffset;
985:     PetscSFNode *remote;
986:     PetscSFComputeDegreeBegin(sf,&indegree);
987:     PetscSFComputeDegreeEnd(sf,&indegree);
988:     PetscMalloc3(sf->nroots+1,PetscInt,&inoffset,sf->nleaves,PetscInt,&outones,sf->nleaves,PetscInt,&outoffset);
989:     inoffset[0] = 0;
990: #if 1
991:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = PetscMax(i+1, inoffset[i] + indegree[i]);
992: #else
993:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
994: #endif
995:     for (i=0; i<sf->nleaves; i++) outones[i] = 1;
996:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);
997:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);
998:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
999: #if 0
1000: #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1001:     for (i=0; i<sf->nroots; i++) {
1002:       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1003:     }
1004: #endif
1005: #endif
1006:     PetscMalloc(sf->nleaves*sizeof(*remote),&remote);
1007:     for (i=0; i<sf->nleaves; i++) {
1008:       remote[i].rank = sf->remote[i].rank;
1009:       remote[i].index = outoffset[i];
1010:     }
1011:     PetscSFCreate(((PetscObject)sf)->comm,&sf->multi);
1012:     PetscSFSetSynchronizationType(sf->multi,sf->sync);
1013:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,PETSC_NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1014:     if (sf->rankorder) {        /* Sort the ranks */
1015:       PetscMPIInt rank;
1016:       PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1017:       PetscSFNode *newremote;
1018:       MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
1019:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1020:       PetscMalloc5(sf->multi->nroots,PetscInt,&inranks,sf->multi->nroots,PetscInt,&newoffset,sf->nleaves,PetscInt,&outranks,sf->nleaves,PetscInt,&newoutoffset,maxdegree,PetscInt,&tmpoffset);
1021:       for (i=0; i<sf->nleaves; i++) outranks[i] = rank;
1022:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1023:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1024:       /* Sort the incoming ranks at each vertex, build the inverse map */
1025:       for (i=0; i<sf->nroots; i++) {
1026:         PetscInt j;
1027:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1028:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1029:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1030:       }
1031:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1032:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1033:       PetscMalloc(sf->nleaves*sizeof(*newremote),&newremote);
1034:       for (i=0; i<sf->nleaves; i++) {
1035:         newremote[i].rank = sf->remote[i].rank;
1036:         newremote[i].index = newoutoffset[i];
1037:       }
1038:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,PETSC_NULL,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1039:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1040:     }
1041:     PetscFree3(inoffset,outones,outoffset);
1042:   }
1043:   *multi = sf->multi;
1044:   return(0);
1045: }

1049: /*@C
1050:    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices

1052:    Collective

1054:    Input Arguments:
1055: +  sf - original star forest
1056: .  nroots - number of roots to select on this process
1057: -  selected - selected roots on this process

1059:    Output Arguments:
1060: .  newsf - new star forest

1062:    Level: advanced

1064:    Note:
1065:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1066:    be done by calling PetscSFGetGraph().

1068: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1069: @*/
1070: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nroots,const PetscInt *selected,PetscSF *newsf)
1071: {
1073:   PetscInt i,nleaves,*ilocal,*rootdata,*leafdata;
1074:   PetscSFNode *iremote;

1080:   PetscMalloc2(sf->nroots,PetscInt,&rootdata,sf->nleaves,PetscInt,&leafdata);
1081:   PetscMemzero(rootdata,sf->nroots*sizeof(PetscInt));
1082:   PetscMemzero(leafdata,sf->nleaves*sizeof(PetscInt));
1083:   for (i=0; i<nroots; i++) rootdata[selected[i]] = 1;
1084:   PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);
1085:   PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);

1087:   for (i=0,nleaves=0; i<sf->nleaves; i++) nleaves += leafdata[i];
1088:   PetscMalloc(nleaves*sizeof(PetscInt),&ilocal);
1089:   PetscMalloc(nleaves*sizeof(PetscSFNode),&iremote);
1090:   for (i=0,nleaves=0; i<sf->nleaves; i++) {
1091:     if (leafdata[i]) {
1092:       ilocal[nleaves]        = sf->mine ? sf->mine[i] : i;
1093:       iremote[nleaves].rank  = sf->remote[i].rank;
1094:       iremote[nleaves].index = sf->remote[i].index;
1095:       nleaves++;
1096:     }
1097:   }
1098:   PetscSFCreate(((PetscObject)sf)->comm,newsf);
1099:   PetscSFSetSynchronizationType(*newsf,sf->sync);
1100:   PetscSFSetGraph(*newsf,sf->nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
1101:   PetscFree2(rootdata,leafdata);
1102:   return(0);
1103: }

1107: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
1108: static PetscErrorCode PetscSFOpTranslate(MPI_Op *op)
1109: {

1112:   if (*op == MPIU_SUM) *op = MPI_SUM;
1113:   else if (*op == MPIU_MAX) *op = MPI_MAX;
1114:   else if (*op == MPIU_MIN) *op = MPI_MIN;
1115:   return(0);
1116: }

1120: /*@C
1121:    PetscSFBcastBegin - begin pointwise broadcast to be concluded with call to PetscSFBcastEnd()

1123:    Collective on PetscSF

1125:    Input Arguments:
1126: +  sf - star forest on which to communicate
1127: .  unit - data type associated with each node
1128: -  rootdata - buffer to broadcast

1130:    Output Arguments:
1131: .  leafdata - buffer to update with values from each leaf's respective root

1133:    Level: intermediate

1135: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFView(), PetscSFBcastEnd(), PetscSFReduceBegin()
1136: @*/
1137: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1138: {
1139:   PetscErrorCode     ierr;
1140:   PetscInt           i,nranks;
1141:   const PetscMPIInt  *ranks;
1142:   const MPI_Datatype *mine,*remote;
1143:   MPI_Win            win;

1147:   PetscSFCheckGraphSet(sf,1);
1148:   PetscSFGetRanks(sf,&nranks,&ranks,PETSC_NULL,PETSC_NULL,PETSC_NULL);
1149:   PetscSFGetDataTypes(sf,unit,&mine,&remote);
1150:   PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);
1151:   for (i=0; i<nranks; i++) {
1152:     if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
1153:     MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);
1154:     if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_unlock(ranks[i],win);}
1155:   }
1156:   return(0);
1157: }

1161: /*@C
1162:    PetscSFBcastEnd - end a broadcast operation started with PetscSFBcastBegin()

1164:    Collective

1166:    Input Arguments:
1167: +  sf - star forest
1168: .  unit - data type
1169: -  rootdata - buffer to broadcast

1171:    Output Arguments:
1172: .  leafdata - buffer to update with values from each leaf's respective root

1174:    Level: intermediate

1176: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1177: @*/
1178: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1179: {
1181:   MPI_Win win;

1185:   PetscSFCheckGraphSet(sf,1);
1186:   PetscSFFindWindow(sf,unit,rootdata,&win);
1187:   PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);
1188:   return(0);
1189: }

1193: /*@C
1194:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1196:    Collective

1198:    Input Arguments:
1199: +  sf - star forest
1200: .  unit - data type
1201: .  leafdata - values to reduce
1202: -  op - reduction operation

1204:    Output Arguments:
1205: .  rootdata - result of reduction of values from all leaves of each root

1207:    Level: intermediate

1209: .seealso: PetscSFBcastBegin()
1210: @*/
1211: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1212: {
1213:   PetscErrorCode     ierr;
1214:   PetscInt           i,nranks;
1215:   const PetscMPIInt  *ranks;
1216:   const MPI_Datatype *mine,*remote;
1217:   MPI_Win            win;

1221:   PetscSFCheckGraphSet(sf,1);
1222:   PetscSFGetRanks(sf,&nranks,&ranks,PETSC_NULL,PETSC_NULL,PETSC_NULL);
1223:   PetscSFGetDataTypes(sf,unit,&mine,&remote);
1224:   PetscSFOpTranslate(&op);
1225:   PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);
1226:   for (i=0; i<nranks; i++) {
1227:     if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
1228:     MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
1229:     if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_unlock(ranks[i],win);}
1230:   }
1231:   return(0);
1232: }

1236: /*@C
1237:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1239:    Collective

1241:    Input Arguments:
1242: +  sf - star forest
1243: .  unit - data type
1244: .  leafdata - values to reduce
1245: -  op - reduction operation

1247:    Output Arguments:
1248: .  rootdata - result of reduction of values from all leaves of each root

1250:    Level: intermediate

1252: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1253: @*/
1254: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1255: {
1257:   MPI_Win win;

1261:   PetscSFCheckGraphSet(sf,1);
1262:   if (!sf->wins) {return(0);}
1263:   PetscSFFindWindow(sf,unit,rootdata,&win);
1264:   MPI_Win_fence(MPI_MODE_NOSUCCEED,win);
1265:   PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);
1266:   return(0);
1267: }

1271: /*@C
1272:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1274:    Collective

1276:    Input Arguments:
1277: .  sf - star forest

1279:    Output Arguments:
1280: .  degree - degree of each root vertex

1282:    Level: advanced

1284: .seealso: PetscSFGatherBegin()
1285: @*/
1286: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1287: {

1292:   PetscSFCheckGraphSet(sf,1);
1294:   if (!sf->degree) {
1295:     PetscInt i;
1296:     PetscMalloc(sf->nroots*sizeof(PetscInt),&sf->degree);
1297:     PetscMalloc(sf->nleaves*sizeof(PetscInt),&sf->degreetmp);
1298:     for (i=0; i<sf->nroots; i++) sf->degree[i] = 0;
1299:     for (i=0; i<sf->nleaves; i++) sf->degreetmp[i] = 1;
1300:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);
1301:   }
1302:   *degree = PETSC_NULL;
1303:   return(0);
1304: }

1308: /*@C
1309:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1311:    Collective

1313:    Input Arguments:
1314: .  sf - star forest

1316:    Output Arguments:
1317: .  degree - degree of each root vertex

1319:    Level: developer

1321: .seealso:
1322: @*/
1323: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1324: {

1329:   PetscSFCheckGraphSet(sf,1);
1330:   if (!sf->degreeknown) {
1331:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);
1332:     PetscFree(sf->degreetmp);
1333:     sf->degreeknown = PETSC_TRUE;
1334:   }
1335:   *degree = sf->degree;
1336:   return(0);
1337: }

1341: /*@C
1342:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1344:    Collective

1346:    Input Arguments:
1347: +  sf - star forest
1348: .  unit - data type
1349: .  leafdata - leaf values to use in reduction
1350: -  op - operation to use for reduction

1352:    Output Arguments:
1353: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1354: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1356:    Level: advanced

1358:    Note:
1359:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1360:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1361:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1362:    integers.

1364: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1365: @*/
1366: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1367: {
1369:   PetscInt           i,nranks;
1370:   const PetscMPIInt  *ranks;
1371:   const MPI_Datatype *mine,*remote;
1372:   MPI_Win            win;

1376:   PetscSFCheckGraphSet(sf,1);
1377:   PetscSFGetRanks(sf,&nranks,&ranks,PETSC_NULL,PETSC_NULL,PETSC_NULL);
1378:   PetscSFGetDataTypes(sf,unit,&mine,&remote);
1379:   PetscSFOpTranslate(&op);
1380:   PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);
1381:   for (i=0; i<sf->nranks; i++) {
1382:     MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);
1383:     MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);
1384:     MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
1385:     MPI_Win_unlock(ranks[i],win);
1386:   }
1387:   return(0);
1388: }

1392: /*@C
1393:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1395:    Collective

1397:    Input Arguments:
1398: +  sf - star forest
1399: .  unit - data type
1400: .  leafdata - leaf values to use in reduction
1401: -  op - operation to use for reduction

1403:    Output Arguments:
1404: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1405: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1407:    Level: advanced

1409: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1410: @*/
1411: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1412: {
1414:   MPI_Win        win;

1418:   PetscSFCheckGraphSet(sf,1);
1419:   PetscSFFindWindow(sf,unit,rootdata,&win);
1420:   /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
1421:   PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);
1422:   return(0);
1423: }

1427: /*@C
1428:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1430:    Collective

1432:    Input Arguments:
1433: +  sf - star forest
1434: .  unit - data type
1435: -  leafdata - leaf data to gather to roots

1437:    Output Argument:
1438: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1440:    Level: intermediate

1442: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1443: @*/
1444: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1445: {
1447:   PetscSF        multi;

1450:   PetscSFGetMultiSF(sf,&multi);
1451:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1452:   return(0);
1453: }

1457: /*@C
1458:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1460:    Collective

1462:    Input Arguments:
1463: +  sf - star forest
1464: .  unit - data type
1465: -  leafdata - leaf data to gather to roots

1467:    Output Argument:
1468: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1470:    Level: intermediate

1472: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1473: @*/
1474: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1475: {
1477:   PetscSF        multi;

1481:   PetscSFCheckGraphSet(sf,1);
1482:   PetscSFGetMultiSF(sf,&multi);
1483:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1484:   return(0);
1485: }

1489: /*@C
1490:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1492:    Collective

1494:    Input Arguments:
1495: +  sf - star forest
1496: .  unit - data type
1497: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1499:    Output Argument:
1500: .  leafdata - leaf data to be update with personal data from each respective root

1502:    Level: intermediate

1504: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1505: @*/
1506: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1507: {
1509:   PetscSF        multi;

1513:   PetscSFCheckGraphSet(sf,1);
1514:   PetscSFGetMultiSF(sf,&multi);
1515:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1516:   return(0);
1517: }

1521: /*@C
1522:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1524:    Collective

1526:    Input Arguments:
1527: +  sf - star forest
1528: .  unit - data type
1529: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1531:    Output Argument:
1532: .  leafdata - leaf data to be update with personal data from each respective root

1534:    Level: intermediate

1536: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1537: @*/
1538: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1539: {
1541:   PetscSF        multi;

1545:   PetscSFCheckGraphSet(sf,1);
1546:   PetscSFGetMultiSF(sf,&multi);
1547:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1548:   return(0);
1549: }