Actual source code: sf.c

petsc-master 2020-01-21
Report Typos and Errors
  1:  #include <petsc/private/sfimpl.h>
  2:  #include <petsc/private/hashseti.h>
  3:  #include <petscctable.h>

  5: #if defined(PETSC_USE_DEBUG)
  6: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
  7:     if (PetscUnlikely(!(sf)->graphset))                              \
  8:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
  9:   } while (0)
 10: #else
 11: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 12: #endif

 14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};

 16: /*@
 17:    PetscSFCreate - create a star forest communication context

 19:    Collective

 21:    Input Arguments:
 22: .  comm - communicator on which the star forest will operate

 24:    Output Arguments:
 25: .  sf - new star forest context

 27:    Options Database Keys:
 28: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 29: .  -sf_type window    -Use MPI-3 one-sided window for communication
 30: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 32:    Level: intermediate

 34:    Notes:
 35:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 36:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 37:    SFs are optimized and they have better performance than general SFs.

 39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 40: @*/
 41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 42: {
 44:   PetscSF        b;

 48:   PetscSFInitializePackage();

 50:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 52:   b->nroots    = -1;
 53:   b->nleaves   = -1;
 54:   b->minleaf   = PETSC_MAX_INT;
 55:   b->maxleaf   = PETSC_MIN_INT;
 56:   b->nranks    = -1;
 57:   b->rankorder = PETSC_TRUE;
 58:   b->ingroup   = MPI_GROUP_NULL;
 59:   b->outgroup  = MPI_GROUP_NULL;
 60:   b->graphset  = PETSC_FALSE;

 62:   *sf = b;
 63:   return(0);
 64: }

 66: /*@
 67:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

 69:    Collective

 71:    Input Arguments:
 72: .  sf - star forest

 74:    Level: advanced

 76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
 77: @*/
 78: PetscErrorCode PetscSFReset(PetscSF sf)
 79: {

 84:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
 85:   sf->nroots   = -1;
 86:   sf->nleaves  = -1;
 87:   sf->minleaf  = PETSC_MAX_INT;
 88:   sf->maxleaf  = PETSC_MIN_INT;
 89:   sf->mine     = NULL;
 90:   sf->remote   = NULL;
 91:   sf->graphset = PETSC_FALSE;
 92:   PetscFree(sf->mine_alloc);
 93:   PetscFree(sf->remote_alloc);
 94:   sf->nranks = -1;
 95:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
 96: #if defined(PETSC_HAVE_CUDA)
 97:   if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;}
 98: #endif
 99:   sf->degreeknown = PETSC_FALSE;
100:   PetscFree(sf->degree);
101:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
102:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
103:   PetscSFDestroy(&sf->multi);
104:   PetscLayoutDestroy(&sf->map);
105:   sf->setupcalled = PETSC_FALSE;
106:   return(0);
107: }

109: /*@C
110:    PetscSFSetType - Set the PetscSF communication implementation

112:    Collective on PetscSF

114:    Input Parameters:
115: +  sf - the PetscSF context
116: -  type - a known method

118:    Options Database Key:
119: .  -sf_type <type> - Sets the method; use -help for a list
120:    of available methods (for instance, window, pt2pt, neighbor)

122:    Notes:
123:    See "include/petscsf.h" for available methods (for instance)
124: +    PETSCSFWINDOW - MPI-2/3 one-sided
125: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

127:   Level: intermediate

129: .seealso: PetscSFType, PetscSFCreate()
130: @*/
131: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
132: {
133:   PetscErrorCode ierr,(*r)(PetscSF);
134:   PetscBool      match;


140:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
141:   if (match) return(0);

143:   PetscFunctionListFind(PetscSFList,type,&r);
144:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
145:   /* Destroy the previous PetscSF implementation context */
146:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
147:   PetscMemzero(sf->ops,sizeof(*sf->ops));
148:   PetscObjectChangeTypeName((PetscObject)sf,type);
149:   (*r)(sf);
150:   return(0);
151: }

153: /*@C
154:   PetscSFGetType - Get the PetscSF communication implementation

156:   Not Collective

158:   Input Parameter:
159: . sf  - the PetscSF context

161:   Output Parameter:
162: . type - the PetscSF type name

164:   Level: intermediate

166: .seealso: PetscSFSetType(), PetscSFCreate()
167: @*/
168: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
169: {
173:   *type = ((PetscObject)sf)->type_name;
174:   return(0);
175: }

177: /*@
178:    PetscSFDestroy - destroy star forest

180:    Collective

182:    Input Arguments:
183: .  sf - address of star forest

185:    Level: intermediate

187: .seealso: PetscSFCreate(), PetscSFReset()
188: @*/
189: PetscErrorCode PetscSFDestroy(PetscSF *sf)
190: {

194:   if (!*sf) return(0);
196:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
197:   PetscSFReset(*sf);
198:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
199:   PetscHeaderDestroy(sf);
200:   return(0);
201: }

203: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
204: {
205: #if defined(PETSC_USE_DEBUG)
206:   PetscInt           i, nleaves;
207:   PetscMPIInt        size;
208:   const PetscInt    *ilocal;
209:   const PetscSFNode *iremote;
210:   PetscErrorCode     ierr;

213:   if (!sf->graphset) return(0);
214:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
215:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
216:   for (i = 0; i < nleaves; i++) {
217:     const PetscInt rank = iremote[i].rank;
218:     const PetscInt remote = iremote[i].index;
219:     const PetscInt leaf = ilocal ? ilocal[i] : i;
220:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
221:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
222:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
223:   }
224:   return(0);
225: #else
227:   return(0);
228: #endif
229: }

231: /*@
232:    PetscSFSetUp - set up communication structures

234:    Collective

236:    Input Arguments:
237: .  sf - star forest communication object

239:    Level: beginner

241: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
242: @*/
243: PetscErrorCode PetscSFSetUp(PetscSF sf)
244: {

249:   PetscSFCheckGraphSet(sf,1);
250:   if (sf->setupcalled) return(0);
251:   PetscSFCheckGraphValid_Private(sf);
252:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
253:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
254:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
255:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
256:   sf->setupcalled = PETSC_TRUE;
257:   return(0);
258: }

260: /*@
261:    PetscSFSetFromOptions - set PetscSF options using the options database

263:    Logically Collective

265:    Input Arguments:
266: .  sf - star forest

268:    Options Database Keys:
269: +  -sf_type              - implementation type, see PetscSFSetType()
270: .  -sf_rank_order        - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
271: -  -sf_use_pinned_buffer - use pinned (nonpagable) memory for send/recv buffers on host when communicating GPU data but GPU-aware MPI is not used.
272:                            Only available for SF types of basic and neighbor.

274:    Level: intermediate

276: .seealso: PetscSFWindowSetSyncType()
277: @*/
278: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
279: {
280:   PetscSFType    deft;
281:   char           type[256];
283:   PetscBool      flg;

287:   PetscObjectOptionsBegin((PetscObject)sf);
288:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
289:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
290:   PetscSFSetType(sf,flg ? type : deft);
291:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
292:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
293:   PetscOptionsEnd();
294:   return(0);
295: }

297: /*@
298:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

300:    Logically Collective

302:    Input Arguments:
303: +  sf - star forest
304: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

306:    Level: advanced

308: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
309: @*/
310: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
311: {

316:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
317:   sf->rankorder = flg;
318:   return(0);
319: }

321: /*@
322:    PetscSFSetGraph - Set a parallel star forest

324:    Collective

326:    Input Arguments:
327: +  sf - star forest
328: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
329: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
330: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
331: during setup in debug mode)
332: .  localmode - copy mode for ilocal
333: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
334: during setup in debug mode)
335: -  remotemode - copy mode for iremote

337:    Level: intermediate

339:    Notes:
340:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

342:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
343:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
344:    needed

346:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
347:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

349: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
350: @*/
351: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
352: {

359:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
360:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

362:   PetscSFReset(sf);
363:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

365:   sf->nroots  = nroots;
366:   sf->nleaves = nleaves;

368:   if (nleaves && ilocal) {
369:     PetscInt i;
370:     PetscInt minleaf = PETSC_MAX_INT;
371:     PetscInt maxleaf = PETSC_MIN_INT;
372:     int      contiguous = 1;
373:     for (i=0; i<nleaves; i++) {
374:       minleaf = PetscMin(minleaf,ilocal[i]);
375:       maxleaf = PetscMax(maxleaf,ilocal[i]);
376:       contiguous &= (ilocal[i] == i);
377:     }
378:     sf->minleaf = minleaf;
379:     sf->maxleaf = maxleaf;
380:     if (contiguous) {
381:       if (localmode == PETSC_OWN_POINTER) {
382:         PetscFree(ilocal);
383:       }
384:       ilocal = NULL;
385:     }
386:   } else {
387:     sf->minleaf = 0;
388:     sf->maxleaf = nleaves - 1;
389:   }

391:   if (ilocal) {
392:     switch (localmode) {
393:     case PETSC_COPY_VALUES:
394:       PetscMalloc1(nleaves,&sf->mine_alloc);
395:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
396:       sf->mine = sf->mine_alloc;
397:       break;
398:     case PETSC_OWN_POINTER:
399:       sf->mine_alloc = (PetscInt*)ilocal;
400:       sf->mine       = sf->mine_alloc;
401:       break;
402:     case PETSC_USE_POINTER:
403:       sf->mine_alloc = NULL;
404:       sf->mine       = (PetscInt*)ilocal;
405:       break;
406:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
407:     }
408:   }

410:   switch (remotemode) {
411:   case PETSC_COPY_VALUES:
412:     PetscMalloc1(nleaves,&sf->remote_alloc);
413:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
414:     sf->remote = sf->remote_alloc;
415:     break;
416:   case PETSC_OWN_POINTER:
417:     sf->remote_alloc = (PetscSFNode*)iremote;
418:     sf->remote       = sf->remote_alloc;
419:     break;
420:   case PETSC_USE_POINTER:
421:     sf->remote_alloc = NULL;
422:     sf->remote       = (PetscSFNode*)iremote;
423:     break;
424:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
425:   }

427:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
428:   sf->graphset = PETSC_TRUE;
429:   return(0);
430: }

432: /*@
433:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

435:   Collective

437:   Input Parameters:
438: + sf      - The PetscSF
439: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
440: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

442:   Notes:
443:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
444:   n and N are local and global sizes of x respectively.

446:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
447:   sequential vectors y on all ranks.

449:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
450:   sequential vector y on rank 0.

452:   In above cases, entries of x are roots and entries of y are leaves.

454:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
455:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
456:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
457:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
458:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

460:   In this case, roots and leaves are symmetric.

462:   Level: intermediate
463:  @*/
464: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
465: {
466:   MPI_Comm       comm;
467:   PetscInt       n,N,res[2];
468:   PetscMPIInt    rank,size;
469:   PetscSFType    type;

473:   PetscObjectGetComm((PetscObject)sf, &comm);
474:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
475:   MPI_Comm_rank(comm,&rank);
476:   MPI_Comm_size(comm,&size);

478:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
479:     type = PETSCSFALLTOALL;
480:     PetscLayoutCreate(comm,&sf->map);
481:     PetscLayoutSetLocalSize(sf->map,size);
482:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
483:     PetscLayoutSetUp(sf->map);
484:   } else {
485:     PetscLayoutGetLocalSize(map,&n);
486:     PetscLayoutGetSize(map,&N);
487:     res[0] = n;
488:     res[1] = -n;
489:     /* Check if n are same over all ranks so that we can optimize it */
490:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
491:     if (res[0] == -res[1]) { /* same n */
492:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
493:     } else {
494:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
495:     }
496:     PetscLayoutReference(map,&sf->map);
497:   }
498:   PetscSFSetType(sf,type);

500:   sf->pattern = pattern;
501:   sf->mine    = NULL; /* Contiguous */

503:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
504:      Also set other easy stuff.
505:    */
506:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
507:     sf->nleaves      = N;
508:     sf->nroots       = n;
509:     sf->nranks       = size;
510:     sf->minleaf      = 0;
511:     sf->maxleaf      = N - 1;
512:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
513:     sf->nleaves      = rank ? 0 : N;
514:     sf->nroots       = n;
515:     sf->nranks       = rank ? 0 : size;
516:     sf->minleaf      = 0;
517:     sf->maxleaf      = rank ? -1 : N - 1;
518:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
519:     sf->nleaves      = size;
520:     sf->nroots       = size;
521:     sf->nranks       = size;
522:     sf->minleaf      = 0;
523:     sf->maxleaf      = size - 1;
524:   }
525:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
526:   sf->graphset = PETSC_TRUE;
527:   return(0);
528: }

530: /*@
531:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

533:    Collective

535:    Input Arguments:

537: .  sf - star forest to invert

539:    Output Arguments:
540: .  isf - inverse of sf
541:    Level: advanced

543:    Notes:
544:    All roots must have degree 1.

546:    The local space may be a permutation, but cannot be sparse.

548: .seealso: PetscSFSetGraph()
549: @*/
550: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
551: {
553:   PetscMPIInt    rank;
554:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
555:   const PetscInt *ilocal;
556:   PetscSFNode    *roots,*leaves;

560:   PetscSFCheckGraphSet(sf,1);

563:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
564:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

566:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
567:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
568:   for (i=0; i<maxlocal; i++) {
569:     leaves[i].rank  = rank;
570:     leaves[i].index = i;
571:   }
572:   for (i=0; i <nroots; i++) {
573:     roots[i].rank  = -1;
574:     roots[i].index = -1;
575:   }
576:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
577:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);

579:   /* Check whether our leaves are sparse */
580:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
581:   if (count == nroots) newilocal = NULL;
582:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
583:     PetscMalloc1(count,&newilocal);
584:     for (i=0,count=0; i<nroots; i++) {
585:       if (roots[i].rank >= 0) {
586:         newilocal[count]   = i;
587:         roots[count].rank  = roots[i].rank;
588:         roots[count].index = roots[i].index;
589:         count++;
590:       }
591:     }
592:   }

594:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
595:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
596:   PetscFree2(roots,leaves);
597:   return(0);
598: }

600: /*@
601:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

603:    Collective

605:    Input Arguments:
606: +  sf - communication object to duplicate
607: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

609:    Output Arguments:
610: .  newsf - new communication object

612:    Level: beginner

614: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
615: @*/
616: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
617: {
618:   PetscSFType    type;

625:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
626:   PetscSFGetType(sf,&type);
627:   if (type) {PetscSFSetType(*newsf,type);}
628:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
629:     PetscSFCheckGraphSet(sf,1);
630:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
631:       PetscInt          nroots,nleaves;
632:       const PetscInt    *ilocal;
633:       const PetscSFNode *iremote;
634:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
635:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
636:     } else {
637:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
638:     }
639:   }
640:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
641:   return(0);
642: }

644: /*@C
645:    PetscSFGetGraph - Get the graph specifying a parallel star forest

647:    Not Collective

649:    Input Arguments:
650: .  sf - star forest

652:    Output Arguments:
653: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
654: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
655: .  ilocal - locations of leaves in leafdata buffers
656: -  iremote - remote locations of root vertices for each leaf on the current process

658:    Notes:
659:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

661:    When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you
662:    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.

664:    Level: intermediate

666: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
667: @*/
668: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
669: {

674:   if (sf->ops->GetGraph) {
675:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
676:   } else {
677:     if (nroots) *nroots = sf->nroots;
678:     if (nleaves) *nleaves = sf->nleaves;
679:     if (ilocal) *ilocal = sf->mine;
680:     if (iremote) *iremote = sf->remote;
681:   }
682:   return(0);
683: }

685: /*@
686:    PetscSFGetLeafRange - Get the active leaf ranges

688:    Not Collective

690:    Input Arguments:
691: .  sf - star forest

693:    Output Arguments:
694: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
695: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

697:    Level: developer

699: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
700: @*/
701: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
702: {

706:   PetscSFCheckGraphSet(sf,1);
707:   if (minleaf) *minleaf = sf->minleaf;
708:   if (maxleaf) *maxleaf = sf->maxleaf;
709:   return(0);
710: }

712: /*@C
713:    PetscSFViewFromOptions - View from Options

715:    Collective on PetscSF

717:    Input Parameters:
718: +  A - the star forest
719: .  obj - Optional object
720: -  name - command line option

722:    Level: intermediate
723: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
724: @*/
725: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
726: {

731:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
732:   return(0);
733: }

735: /*@C
736:    PetscSFView - view a star forest

738:    Collective

740:    Input Arguments:
741: +  sf - star forest
742: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

744:    Level: beginner

746: .seealso: PetscSFCreate(), PetscSFSetGraph()
747: @*/
748: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
749: {
750:   PetscErrorCode    ierr;
751:   PetscBool         iascii;
752:   PetscViewerFormat format;

756:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
759:   if (sf->graphset) {PetscSFSetUp(sf);}
760:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
761:   if (iascii) {
762:     PetscMPIInt rank;
763:     PetscInt    ii,i,j;

765:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
766:     PetscViewerASCIIPushTab(viewer);
767:     if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
768:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
769:       if (!sf->graphset) {
770:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
771:         PetscViewerASCIIPopTab(viewer);
772:         return(0);
773:       }
774:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
775:       PetscViewerASCIIPushSynchronized(viewer);
776:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
777:       for (i=0; i<sf->nleaves; i++) {
778:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
779:       }
780:       PetscViewerFlush(viewer);
781:       PetscViewerGetFormat(viewer,&format);
782:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
783:         PetscMPIInt *tmpranks,*perm;
784:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
785:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
786:         for (i=0; i<sf->nranks; i++) perm[i] = i;
787:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
788:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
789:         for (ii=0; ii<sf->nranks; ii++) {
790:           i = perm[ii];
791:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
792:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
793:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
794:           }
795:         }
796:         PetscFree2(tmpranks,perm);
797:       }
798:       PetscViewerFlush(viewer);
799:       PetscViewerASCIIPopSynchronized(viewer);
800:     }
801:     PetscViewerASCIIPopTab(viewer);
802:   }
803:   return(0);
804: }

806: /*@C
807:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

809:    Not Collective

811:    Input Arguments:
812: .  sf - star forest

814:    Output Arguments:
815: +  nranks - number of ranks referenced by local part
816: .  ranks - array of ranks
817: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
818: .  rmine - concatenated array holding local indices referencing each remote rank
819: -  rremote - concatenated array holding remote indices referenced for each remote rank

821:    Level: developer

823: .seealso: PetscSFGetLeafRanks()
824: @*/
825: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
826: {

831:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
832:   if (sf->ops->GetRootRanks) {
833:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
834:   } else {
835:     /* The generic implementation */
836:     if (nranks)  *nranks  = sf->nranks;
837:     if (ranks)   *ranks   = sf->ranks;
838:     if (roffset) *roffset = sf->roffset;
839:     if (rmine)   *rmine   = sf->rmine;
840:     if (rremote) *rremote = sf->rremote;
841:   }
842:   return(0);
843: }

845: /*@C
846:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

848:    Not Collective

850:    Input Arguments:
851: .  sf - star forest

853:    Output Arguments:
854: +  niranks - number of leaf ranks referencing roots on this process
855: .  iranks - array of ranks
856: .  ioffset - offset in irootloc for each rank (length niranks+1)
857: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

859:    Level: developer

861: .seealso: PetscSFGetRootRanks()
862: @*/
863: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
864: {

869:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
870:   if (sf->ops->GetLeafRanks) {
871:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
872:   } else {
873:     PetscSFType type;
874:     PetscSFGetType(sf,&type);
875:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
876:   }
877:   return(0);
878: }

880: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
881:   PetscInt i;
882:   for (i=0; i<n; i++) {
883:     if (needle == list[i]) return PETSC_TRUE;
884:   }
885:   return PETSC_FALSE;
886: }

888: /*@C
889:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

891:    Collective

893:    Input Arguments:
894: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
895: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

897:    Level: developer

899: .seealso: PetscSFGetRootRanks()
900: @*/
901: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
902: {
903:   PetscErrorCode     ierr;
904:   PetscTable         table;
905:   PetscTablePosition pos;
906:   PetscMPIInt        size,groupsize,*groupranks;
907:   PetscInt           *rcount,*ranks;
908:   PetscInt           i, irank = -1,orank = -1;

912:   PetscSFCheckGraphSet(sf,1);
913:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
914:   PetscTableCreate(10,size,&table);
915:   for (i=0; i<sf->nleaves; i++) {
916:     /* Log 1-based rank */
917:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
918:   }
919:   PetscTableGetCount(table,&sf->nranks);
920:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
921:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
922:   PetscTableGetHeadPosition(table,&pos);
923:   for (i=0; i<sf->nranks; i++) {
924:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
925:     ranks[i]--;             /* Convert back to 0-based */
926:   }
927:   PetscTableDestroy(&table);

929:   /* We expect that dgroup is reliably "small" while nranks could be large */
930:   {
931:     MPI_Group group = MPI_GROUP_NULL;
932:     PetscMPIInt *dgroupranks;
933:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
934:     MPI_Group_size(dgroup,&groupsize);
935:     PetscMalloc1(groupsize,&dgroupranks);
936:     PetscMalloc1(groupsize,&groupranks);
937:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
938:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
939:     MPI_Group_free(&group);
940:     PetscFree(dgroupranks);
941:   }

943:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
944:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
945:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
946:       if (InList(ranks[i],groupsize,groupranks)) break;
947:     }
948:     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
949:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
950:     }
951:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
952:       PetscInt    tmprank,tmpcount;

954:       tmprank             = ranks[i];
955:       tmpcount            = rcount[i];
956:       ranks[i]            = ranks[sf->ndranks];
957:       rcount[i]           = rcount[sf->ndranks];
958:       ranks[sf->ndranks]  = tmprank;
959:       rcount[sf->ndranks] = tmpcount;
960:       sf->ndranks++;
961:     }
962:   }
963:   PetscFree(groupranks);
964:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
965:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
966:   sf->roffset[0] = 0;
967:   for (i=0; i<sf->nranks; i++) {
968:     PetscMPIIntCast(ranks[i],sf->ranks+i);
969:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
970:     rcount[i]        = 0;
971:   }
972:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
973:     /* short circuit */
974:     if (orank != sf->remote[i].rank) {
975:       /* Search for index of iremote[i].rank in sf->ranks */
976:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
977:       if (irank < 0) {
978:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
979:         if (irank >= 0) irank += sf->ndranks;
980:       }
981:       orank = sf->remote[i].rank;
982:     }
983:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
984:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
985:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
986:     rcount[irank]++;
987:   }
988:   PetscFree2(rcount,ranks);
989:   return(0);
990: }

992: /*@C
993:    PetscSFGetGroups - gets incoming and outgoing process groups

995:    Collective

997:    Input Argument:
998: .  sf - star forest

1000:    Output Arguments:
1001: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1002: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1004:    Level: developer

1006: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1007: @*/
1008: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1009: {
1011:   MPI_Group      group = MPI_GROUP_NULL;

1014:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
1015:   if (sf->ingroup == MPI_GROUP_NULL) {
1016:     PetscInt       i;
1017:     const PetscInt *indegree;
1018:     PetscMPIInt    rank,*outranks,*inranks;
1019:     PetscSFNode    *remote;
1020:     PetscSF        bgcount;

1022:     /* Compute the number of incoming ranks */
1023:     PetscMalloc1(sf->nranks,&remote);
1024:     for (i=0; i<sf->nranks; i++) {
1025:       remote[i].rank  = sf->ranks[i];
1026:       remote[i].index = 0;
1027:     }
1028:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1029:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1030:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1031:     PetscSFComputeDegreeEnd(bgcount,&indegree);

1033:     /* Enumerate the incoming ranks */
1034:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1035:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1036:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1037:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1038:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1039:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1040:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1041:     MPI_Group_free(&group);
1042:     PetscFree2(inranks,outranks);
1043:     PetscSFDestroy(&bgcount);
1044:   }
1045:   *incoming = sf->ingroup;

1047:   if (sf->outgroup == MPI_GROUP_NULL) {
1048:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1049:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1050:     MPI_Group_free(&group);
1051:   }
1052:   *outgoing = sf->outgroup;
1053:   return(0);
1054: }

1056: /*@
1057:    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

1059:    Collective

1061:    Input Argument:
1062: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1064:    Output Arguments:
1065: .  multi - star forest with split roots, such that each root has degree exactly 1

1067:    Level: developer

1069:    Notes:

1071:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1072:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1073:    edge, it is a candidate for future optimization that might involve its removal.

1075: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1076: @*/
1077: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1078: {

1084:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1085:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1086:     *multi = sf->multi;
1087:     return(0);
1088:   }
1089:   if (!sf->multi) {
1090:     const PetscInt *indegree;
1091:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1092:     PetscSFNode    *remote;
1093:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1094:     PetscSFComputeDegreeBegin(sf,&indegree);
1095:     PetscSFComputeDegreeEnd(sf,&indegree);
1096:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1097:     inoffset[0] = 0;
1098:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1099:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1100:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1101:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1102:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1103: #if 0
1104: #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1105:     for (i=0; i<sf->nroots; i++) {
1106:       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1107:     }
1108: #endif
1109: #endif
1110:     PetscMalloc1(sf->nleaves,&remote);
1111:     for (i=0; i<sf->nleaves; i++) {
1112:       remote[i].rank  = sf->remote[i].rank;
1113:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1114:     }
1115:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1116:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1117:     if (sf->rankorder) {        /* Sort the ranks */
1118:       PetscMPIInt rank;
1119:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1120:       PetscSFNode *newremote;
1121:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1122:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1123:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1124:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1125:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1126:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1127:       /* Sort the incoming ranks at each vertex, build the inverse map */
1128:       for (i=0; i<sf->nroots; i++) {
1129:         PetscInt j;
1130:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1131:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1132:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1133:       }
1134:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1135:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1136:       PetscMalloc1(sf->nleaves,&newremote);
1137:       for (i=0; i<sf->nleaves; i++) {
1138:         newremote[i].rank  = sf->remote[i].rank;
1139:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1140:       }
1141:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1142:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1143:     }
1144:     PetscFree3(inoffset,outones,outoffset);
1145:   }
1146:   *multi = sf->multi;
1147:   return(0);
1148: }

1150: /*@C
1151:    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices

1153:    Collective

1155:    Input Arguments:
1156: +  sf - original star forest
1157: .  nselected  - number of selected roots on this process
1158: -  selected   - indices of the selected roots on this process

1160:    Output Arguments:
1161: .  newsf - new star forest

1163:    Level: advanced

1165:    Note:
1166:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1167:    be done by calling PetscSFGetGraph().

1169: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1170: @*/
1171: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1172: {
1173:   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1174:   const PetscSFNode *iremote;
1175:   PetscSFNode       *new_iremote;
1176:   PetscSF           tmpsf;
1177:   MPI_Comm          comm;
1178:   PetscErrorCode    ierr;

1182:   PetscSFCheckGraphSet(sf,1);

1186:   PetscSFSetUp(sf);
1187:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1188:   /* Uniq selected[] and put results in roots[] */
1189:   PetscObjectGetComm((PetscObject)sf,&comm);
1190:   PetscMalloc1(nselected,&roots);
1191:   PetscArraycpy(roots,selected,nselected);
1192:   PetscSortedRemoveDupsInt(&nselected,roots);
1193:   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);

1195:   if (sf->ops->CreateEmbeddedSF) {
1196:     (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);
1197:   } else {
1198:     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1199:     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1200:     PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
1201:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);
1202:     PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
1203:     PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);
1204:     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1205:     PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);
1206:     PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);
1207:     PetscSFDestroy(&tmpsf);

1209:     /* Build newsf with leaves that are still connected */
1210:     connected_leaves = 0;
1211:     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1212:     PetscMalloc1(connected_leaves,&new_ilocal);
1213:     PetscMalloc1(connected_leaves,&new_iremote);
1214:     for (i=0, n=0; i<nleaves; ++i) {
1215:       if (leafdata[i]) {
1216:         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1217:         new_iremote[n].rank  = sf->remote[i].rank;
1218:         new_iremote[n].index = sf->remote[i].index;
1219:         ++n;
1220:       }
1221:     }

1223:     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1224:     PetscSFCreate(comm,newsf);
1225:     PetscSFSetFromOptions(*newsf);
1226:     PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1227:     PetscFree2(rootdata,leafdata);
1228:   }
1229:   PetscFree(roots);
1230:   PetscSFSetUp(*newsf);
1231:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1232:   return(0);
1233: }

1235: /*@C
1236:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1238:   Collective

1240:   Input Arguments:
1241: + sf - original star forest
1242: . nselected  - number of selected leaves on this process
1243: - selected   - indices of the selected leaves on this process

1245:   Output Arguments:
1246: .  newsf - new star forest

1248:   Level: advanced

1250: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1251: @*/
1252: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1253: {
1254:   const PetscSFNode *iremote;
1255:   PetscSFNode       *new_iremote;
1256:   const PetscInt    *ilocal;
1257:   PetscInt          i,nroots,*leaves,*new_ilocal;
1258:   MPI_Comm          comm;
1259:   PetscErrorCode    ierr;

1263:   PetscSFCheckGraphSet(sf,1);

1267:   /* Uniq selected[] and put results in leaves[] */
1268:   PetscObjectGetComm((PetscObject)sf,&comm);
1269:   PetscMalloc1(nselected,&leaves);
1270:   PetscArraycpy(leaves,selected,nselected);
1271:   PetscSortedRemoveDupsInt(&nselected,leaves);
1272:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1274:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1275:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1276:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1277:   } else {
1278:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1279:     PetscMalloc1(nselected,&new_ilocal);
1280:     PetscMalloc1(nselected,&new_iremote);
1281:     for (i=0; i<nselected; ++i) {
1282:       const PetscInt l     = leaves[i];
1283:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1284:       new_iremote[i].rank  = iremote[l].rank;
1285:       new_iremote[i].index = iremote[l].index;
1286:     }
1287:     PetscSFCreate(comm,newsf);
1288:     PetscSFSetFromOptions(*newsf);
1289:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1290:   }
1291:   PetscFree(leaves);
1292:   return(0);
1293: }

1295: /*@C
1296:    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()

1298:    Collective on PetscSF

1300:    Input Arguments:
1301: +  sf - star forest on which to communicate
1302: .  unit - data type associated with each node
1303: .  rootdata - buffer to broadcast
1304: -  op - operation to use for reduction

1306:    Output Arguments:
1307: .  leafdata - buffer to be reduced with values from each leaf's respective root

1309:    Level: intermediate

1311: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1312: @*/
1313: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1314: {
1316:   PetscMemType   rootmtype,leafmtype;

1320:   PetscSFSetUp(sf);
1321:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1322:   PetscGetMemType(rootdata,&rootmtype);
1323:   PetscGetMemType(leafdata,&leafmtype);
1324: #if defined(PETSC_HAVE_CUDA)
1325:   /*  Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)?
1326:     To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream.
1327:     But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if
1328:     we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also
1329:     computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize
1330:     is inevitable. We do it now and put pack/unpack kernels to non-NULL streams.
1331:    */
1332:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1333: #endif
1334:   (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1335:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1336:   return(0);
1337: }

1339: /*@C
1340:    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()

1342:    Collective

1344:    Input Arguments:
1345: +  sf - star forest
1346: .  unit - data type
1347: .  rootdata - buffer to broadcast
1348: -  op - operation to use for reduction

1350:    Output Arguments:
1351: .  leafdata - buffer to be reduced with values from each leaf's respective root

1353:    Level: intermediate

1355: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1356: @*/
1357: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1358: {
1360:   PetscMemType   rootmtype,leafmtype;

1364:   PetscSFSetUp(sf);
1365:   PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1366:   PetscGetMemType(rootdata,&rootmtype);
1367:   PetscGetMemType(leafdata,&leafmtype);
1368:   (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1369:   PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1370:   return(0);
1371: }

1373: /*@C
1374:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1376:    Collective

1378:    Input Arguments:
1379: +  sf - star forest
1380: .  unit - data type
1381: .  leafdata - values to reduce
1382: -  op - reduction operation

1384:    Output Arguments:
1385: .  rootdata - result of reduction of values from all leaves of each root

1387:    Level: intermediate

1389: .seealso: PetscSFBcastBegin()
1390: @*/
1391: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1392: {
1394:   PetscMemType   rootmtype,leafmtype;

1398:   PetscSFSetUp(sf);
1399:   PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1400:   PetscGetMemType(rootdata,&rootmtype);
1401:   PetscGetMemType(leafdata,&leafmtype);
1402: #if defined(PETSC_HAVE_CUDA)
1403:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1404: #endif
1405:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1406:   PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1407:   return(0);
1408: }

1410: /*@C
1411:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1413:    Collective

1415:    Input Arguments:
1416: +  sf - star forest
1417: .  unit - data type
1418: .  leafdata - values to reduce
1419: -  op - reduction operation

1421:    Output Arguments:
1422: .  rootdata - result of reduction of values from all leaves of each root

1424:    Level: intermediate

1426: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1427: @*/
1428: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1429: {
1431:   PetscMemType   rootmtype,leafmtype;

1435:   PetscSFSetUp(sf);
1436:   PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1437:   PetscGetMemType(rootdata,&rootmtype);
1438:   PetscGetMemType(leafdata,&leafmtype);
1439:   (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1440:   PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1441:   return(0);
1442: }

1444: /*@C
1445:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1447:    Collective

1449:    Input Arguments:
1450: +  sf - star forest
1451: .  unit - data type
1452: .  leafdata - leaf values to use in reduction
1453: -  op - operation to use for reduction

1455:    Output Arguments:
1456: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1457: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1459:    Level: advanced

1461:    Note:
1462:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1463:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1464:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1465:    integers.

1467: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1468: @*/
1469: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1470: {
1472:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1476:   PetscSFSetUp(sf);
1477:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1478:   PetscGetMemType(rootdata,&rootmtype);
1479:   PetscGetMemType(leafdata,&leafmtype);
1480:   PetscGetMemType(leafupdate,&leafupdatemtype);
1481:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1482: #if defined(PETSC_HAVE_CUDA)
1483:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1484: #endif
1485:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1486:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1487:   return(0);
1488: }

1490: /*@C
1491:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1493:    Collective

1495:    Input Arguments:
1496: +  sf - star forest
1497: .  unit - data type
1498: .  leafdata - leaf values to use in reduction
1499: -  op - operation to use for reduction

1501:    Output Arguments:
1502: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1503: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1505:    Level: advanced

1507: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1508: @*/
1509: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1510: {
1512:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1516:   PetscSFSetUp(sf);
1517:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1518:   PetscGetMemType(rootdata,&rootmtype);
1519:   PetscGetMemType(leafdata,&leafmtype);
1520:   PetscGetMemType(leafupdate,&leafupdatemtype);
1521:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1522:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1523:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1524:   return(0);
1525: }

1527: /*@C
1528:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1530:    Collective

1532:    Input Arguments:
1533: .  sf - star forest

1535:    Output Arguments:
1536: .  degree - degree of each root vertex

1538:    Level: advanced

1540:    Notes:
1541:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1543: .seealso: PetscSFGatherBegin()
1544: @*/
1545: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1546: {

1551:   PetscSFCheckGraphSet(sf,1);
1553:   if (!sf->degreeknown) {
1554:     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1555:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1556:     PetscMalloc1(nroots,&sf->degree);
1557:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1558:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1559:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1560:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1561:   }
1562:   *degree = NULL;
1563:   return(0);
1564: }

1566: /*@C
1567:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1569:    Collective

1571:    Input Arguments:
1572: .  sf - star forest

1574:    Output Arguments:
1575: .  degree - degree of each root vertex

1577:    Level: developer

1579:    Notes:
1580:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1582: .seealso:
1583: @*/
1584: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1585: {

1590:   PetscSFCheckGraphSet(sf,1);
1592:   if (!sf->degreeknown) {
1593:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1594:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1595:     PetscFree(sf->degreetmp);
1596:     sf->degreeknown = PETSC_TRUE;
1597:   }
1598:   *degree = sf->degree;
1599:   return(0);
1600: }


1603: /*@C
1604:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1605:    Each multi-root is assigned index of the corresponding original root.

1607:    Collective

1609:    Input Arguments:
1610: +  sf - star forest
1611: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1613:    Output Arguments:
1614: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1615: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1617:    Level: developer

1619:    Notes:
1620:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1622: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1623: @*/
1624: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1625: {
1626:   PetscSF             msf;
1627:   PetscInt            i, j, k, nroots, nmroots;
1628:   PetscErrorCode      ierr;

1632:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1636:   PetscSFGetMultiSF(sf,&msf);
1637:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1638:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1639:   for (i=0,j=0,k=0; i<nroots; i++) {
1640:     if (!degree[i]) continue;
1641:     for (j=0; j<degree[i]; j++,k++) {
1642:       (*multiRootsOrigNumbering)[k] = i;
1643:     }
1644:   }
1645: #if defined(PETSC_USE_DEBUG)
1646:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1647: #endif
1648:   if (nMultiRoots) *nMultiRoots = nmroots;
1649:   return(0);
1650: }

1652: /*@C
1653:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1655:    Collective

1657:    Input Arguments:
1658: +  sf - star forest
1659: .  unit - data type
1660: -  leafdata - leaf data to gather to roots

1662:    Output Argument:
1663: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1665:    Level: intermediate

1667: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1668: @*/
1669: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1670: {
1672:   PetscSF        multi;

1676:   PetscSFSetUp(sf);
1677:   PetscSFGetMultiSF(sf,&multi);
1678:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1679:   return(0);
1680: }

1682: /*@C
1683:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1685:    Collective

1687:    Input Arguments:
1688: +  sf - star forest
1689: .  unit - data type
1690: -  leafdata - leaf data to gather to roots

1692:    Output Argument:
1693: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1695:    Level: intermediate

1697: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1698: @*/
1699: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1700: {
1702:   PetscSF        multi;

1706:   PetscSFSetUp(sf);
1707:   PetscSFGetMultiSF(sf,&multi);
1708:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1709:   return(0);
1710: }

1712: /*@C
1713:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1715:    Collective

1717:    Input Arguments:
1718: +  sf - star forest
1719: .  unit - data type
1720: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1722:    Output Argument:
1723: .  leafdata - leaf data to be update with personal data from each respective root

1725:    Level: intermediate

1727: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1728: @*/
1729: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1730: {
1732:   PetscSF        multi;

1736:   PetscSFSetUp(sf);
1737:   PetscSFGetMultiSF(sf,&multi);
1738:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1739:   return(0);
1740: }

1742: /*@C
1743:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1745:    Collective

1747:    Input Arguments:
1748: +  sf - star forest
1749: .  unit - data type
1750: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1752:    Output Argument:
1753: .  leafdata - leaf data to be update with personal data from each respective root

1755:    Level: intermediate

1757: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1758: @*/
1759: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1760: {
1762:   PetscSF        multi;

1766:   PetscSFSetUp(sf);
1767:   PetscSFGetMultiSF(sf,&multi);
1768:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1769:   return(0);
1770: }

1772: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1773: {
1774: #if defined(PETSC_USE_DEBUG)
1775:   PetscInt        i, n, nleaves;
1776:   const PetscInt *ilocal = NULL;
1777:   PetscHSetI      seen;
1778:   PetscErrorCode  ierr;

1781:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1782:   PetscHSetICreate(&seen);
1783:   for (i = 0; i < nleaves; i++) {
1784:     const PetscInt leaf = ilocal ? ilocal[i] : i;
1785:     PetscHSetIAdd(seen,leaf);
1786:   }
1787:   PetscHSetIGetSize(seen,&n);
1788:   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1789:   PetscHSetIDestroy(&seen);
1790:   return(0);
1791: #else
1793:   return(0);
1794: #endif
1795: }

1797: /*@
1798:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1800:   Input Parameters:
1801: + sfA - The first PetscSF
1802: - sfB - The second PetscSF

1804:   Output Parameters:
1805: . sfBA - The composite SF

1807:   Level: developer

1809:   Notes:
1810:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1811:   forests, i.e. the same leaf is not connected with different roots.

1813:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1814:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1815:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1816:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1818: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1819: @*/
1820: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1821: {
1822:   PetscErrorCode    ierr;
1823:   const PetscSFNode *remotePointsA,*remotePointsB;
1824:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1825:   const PetscInt    *localPointsA,*localPointsB;
1826:   PetscInt          *localPointsBA;
1827:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1828:   PetscBool         denseB;

1832:   PetscSFCheckGraphSet(sfA,1);
1834:   PetscSFCheckGraphSet(sfB,2);
1837:   PetscSFCheckLeavesUnique_Private(sfA);
1838:   PetscSFCheckLeavesUnique_Private(sfB);

1840:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1841:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1842:   if (localPointsA) {
1843:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1844:     for (i=0; i<numRootsB; i++) {
1845:       reorderedRemotePointsA[i].rank = -1;
1846:       reorderedRemotePointsA[i].index = -1;
1847:     }
1848:     for (i=0; i<numLeavesA; i++) {
1849:       if (localPointsA[i] >= numRootsB) continue;
1850:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1851:     }
1852:     remotePointsA = reorderedRemotePointsA;
1853:   }
1854:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1855:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1856:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1857:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1858:   PetscFree(reorderedRemotePointsA);

1860:   denseB = (PetscBool)!localPointsB;
1861:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1862:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1863:     else numLeavesBA++;
1864:   }
1865:   if (denseB) {
1866:     localPointsBA  = NULL;
1867:     remotePointsBA = leafdataB;
1868:   } else {
1869:     PetscMalloc1(numLeavesBA,&localPointsBA);
1870:     PetscMalloc1(numLeavesBA,&remotePointsBA);
1871:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1872:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1874:       if (leafdataB[l-minleaf].rank == -1) continue;
1875:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1876:       localPointsBA[numLeavesBA] = l;
1877:       numLeavesBA++;
1878:     }
1879:     PetscFree(leafdataB);
1880:   }
1881:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1882:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1883:   return(0);
1884: }

1886: /*@
1887:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

1889:   Input Parameters:
1890: + sfA - The first PetscSF
1891: - sfB - The second PetscSF

1893:   Output Parameters:
1894: . sfBA - The composite SF.

1896:   Level: developer

1898:   Notes:
1899:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1900:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1901:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

1903:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1904:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1905:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1906:   a Reduce on sfB, on connected roots.

1908: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1909: @*/
1910: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1911: {
1912:   PetscErrorCode    ierr;
1913:   const PetscSFNode *remotePointsA,*remotePointsB;
1914:   PetscSFNode       *remotePointsBA;
1915:   const PetscInt    *localPointsA,*localPointsB;
1916:   PetscSFNode       *reorderedRemotePointsA = NULL;
1917:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;

1921:   PetscSFCheckGraphSet(sfA,1);
1923:   PetscSFCheckGraphSet(sfB,2);
1926:   PetscSFCheckLeavesUnique_Private(sfA);
1927:   PetscSFCheckLeavesUnique_Private(sfB);
1928:   /* TODO: Check roots of sfB have degree of 1 */

1930:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1931:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1932:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1933:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
1934:   for (i=0; i<maxleaf - minleaf + 1; i++) {
1935:     reorderedRemotePointsA[i].rank = -1;
1936:     reorderedRemotePointsA[i].index = -1;
1937:   }
1938:   if (localPointsA) {
1939:     for (i=0; i<numLeavesA; i++) {
1940:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1941:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1942:     }
1943:   } else {
1944:     for (i=0; i<numLeavesA; i++) {
1945:       if (i > maxleaf || i < minleaf) continue;
1946:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1947:     }
1948:   }

1950:   PetscMalloc1(numRootsB,&localPointsBA);
1951:   PetscMalloc1(numRootsB,&remotePointsBA);
1952:   for (i=0; i<numRootsB; i++) {
1953:     remotePointsBA[i].rank = -1;
1954:     remotePointsBA[i].index = -1;
1955:   }

1957:   /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC
1958:      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1959:    */
1960:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);
1961:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);
1962:   PetscFree(reorderedRemotePointsA);
1963:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1964:     if (remotePointsBA[i].rank == -1) continue;
1965:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1966:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1967:     localPointsBA[numLeavesBA] = i;
1968:     numLeavesBA++;
1969:   }
1970:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1971:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1972:   return(0);
1973: }

1975: /*
1976:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

1978:   Input Parameters:
1979: . sf - The global PetscSF

1981:   Output Parameters:
1982: . out - The local PetscSF
1983:  */
1984: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1985: {
1986:   MPI_Comm           comm;
1987:   PetscMPIInt        myrank;
1988:   const PetscInt     *ilocal;
1989:   const PetscSFNode  *iremote;
1990:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1991:   PetscSFNode        *liremote;
1992:   PetscSF            lsf;
1993:   PetscErrorCode     ierr;

1997:   if (sf->ops->CreateLocalSF) {
1998:     (*sf->ops->CreateLocalSF)(sf,out);
1999:   } else {
2000:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2001:     PetscObjectGetComm((PetscObject)sf,&comm);
2002:     MPI_Comm_rank(comm,&myrank);

2004:     /* Find out local edges and build a local SF */
2005:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2006:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2007:     PetscMalloc1(lnleaves,&lilocal);
2008:     PetscMalloc1(lnleaves,&liremote);

2010:     for (i=j=0; i<nleaves; i++) {
2011:       if (iremote[i].rank == (PetscInt)myrank) {
2012:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2013:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2014:         liremote[j].index = iremote[i].index;
2015:         j++;
2016:       }
2017:     }
2018:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2019:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2020:     PetscSFSetUp(lsf);
2021:     *out = lsf;
2022:   }
2023:   return(0);
2024: }

2026: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2027: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2028: {
2029:   PetscErrorCode     ierr;
2030:   PetscMemType       rootmtype,leafmtype;

2034:   PetscSFSetUp(sf);
2035:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2036:   PetscGetMemType(rootdata,&rootmtype);
2037:   PetscGetMemType(leafdata,&leafmtype);
2038:   if (sf->ops->BcastToZero) {
2039:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2040:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2041:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2042:   return(0);
2043: }