Actual source code: sf.c

petsc-master 2019-09-20
Report Typos and Errors
  1:  #include <petsc/private/sfimpl.h>
  2:  #include <petsc/private/hashseti.h>
  3:  #include <petscctable.h>

  5: #if defined(PETSC_USE_DEBUG)
  6: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
  7:     if (PetscUnlikely(!(sf)->graphset))                              \
  8:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
  9:   } while (0)
 10: #else
 11: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 12: #endif

 14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};

 16: /*@
 17:    PetscSFCreate - create a star forest communication context

 19:    Collective

 21:    Input Arguments:
 22: .  comm - communicator on which the star forest will operate

 24:    Output Arguments:
 25: .  sf - new star forest context

 27:    Options Database Keys:
 28: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 29: .  -sf_type window    -Use MPI-3 one-sided window for communication
 30: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 32:    Level: intermediate

 34:    Notes:
 35:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 36:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 37:    SFs are optimized and they have better performance than general SFs.

 39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 40: @*/
 41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 42: {
 44:   PetscSF        b;

 48:   PetscSFInitializePackage();

 50:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 52:   b->nroots    = -1;
 53:   b->nleaves   = -1;
 54:   b->minleaf   = PETSC_MAX_INT;
 55:   b->maxleaf   = PETSC_MIN_INT;
 56:   b->nranks    = -1;
 57:   b->rankorder = PETSC_TRUE;
 58:   b->ingroup   = MPI_GROUP_NULL;
 59:   b->outgroup  = MPI_GROUP_NULL;
 60:   b->graphset  = PETSC_FALSE;

 62:   *sf = b;
 63:   return(0);
 64: }

 66: /*@
 67:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

 69:    Collective

 71:    Input Arguments:
 72: .  sf - star forest

 74:    Level: advanced

 76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
 77: @*/
 78: PetscErrorCode PetscSFReset(PetscSF sf)
 79: {

 84:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
 85:   sf->nroots   = -1;
 86:   sf->nleaves  = -1;
 87:   sf->minleaf  = PETSC_MAX_INT;
 88:   sf->maxleaf  = PETSC_MIN_INT;
 89:   sf->mine     = NULL;
 90:   sf->remote   = NULL;
 91:   sf->graphset = PETSC_FALSE;
 92:   PetscFree(sf->mine_alloc);
 93:   PetscFree(sf->remote_alloc);
 94:   sf->nranks = -1;
 95:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
 96:   sf->degreeknown = PETSC_FALSE;
 97:   PetscFree(sf->degree);
 98:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
 99:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
100:   PetscSFDestroy(&sf->multi);
101:   PetscLayoutDestroy(&sf->map);
102:   sf->setupcalled = PETSC_FALSE;
103:   return(0);
104: }

106: /*@C
107:    PetscSFSetType - Set the PetscSF communication implementation

109:    Collective on PetscSF

111:    Input Parameters:
112: +  sf - the PetscSF context
113: -  type - a known method

115:    Options Database Key:
116: .  -sf_type <type> - Sets the method; use -help for a list
117:    of available methods (for instance, window, pt2pt, neighbor)

119:    Notes:
120:    See "include/petscsf.h" for available methods (for instance)
121: +    PETSCSFWINDOW - MPI-2/3 one-sided
122: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

124:   Level: intermediate

126: .seealso: PetscSFType, PetscSFCreate()
127: @*/
128: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
129: {
130:   PetscErrorCode ierr,(*r)(PetscSF);
131:   PetscBool      match;


137:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
138:   if (match) return(0);

140:   PetscFunctionListFind(PetscSFList,type,&r);
141:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
142:   /* Destroy the previous PetscSF implementation context */
143:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
144:   PetscMemzero(sf->ops,sizeof(*sf->ops));
145:   PetscObjectChangeTypeName((PetscObject)sf,type);
146:   (*r)(sf);
147:   return(0);
148: }

150: /*@C
151:   PetscSFGetType - Get the PetscSF communication implementation

153:   Not Collective

155:   Input Parameter:
156: . sf  - the PetscSF context

158:   Output Parameter:
159: . type - the PetscSF type name

161:   Level: intermediate

163: .seealso: PetscSFSetType(), PetscSFCreate()
164: @*/
165: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
166: {
170:   *type = ((PetscObject)sf)->type_name;
171:   return(0);
172: }

174: /*@
175:    PetscSFDestroy - destroy star forest

177:    Collective

179:    Input Arguments:
180: .  sf - address of star forest

182:    Level: intermediate

184: .seealso: PetscSFCreate(), PetscSFReset()
185: @*/
186: PetscErrorCode PetscSFDestroy(PetscSF *sf)
187: {

191:   if (!*sf) return(0);
193:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
194:   PetscSFReset(*sf);
195:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
196:   PetscHeaderDestroy(sf);
197:   return(0);
198: }

200: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
201: {
202: #if defined(PETSC_USE_DEBUG)
203:   PetscInt           i, nleaves;
204:   PetscMPIInt        size;
205:   const PetscInt    *ilocal;
206:   const PetscSFNode *iremote;
207:   PetscErrorCode     ierr;

210:   if (!sf->graphset) return(0);
211:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
212:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
213:   for (i = 0; i < nleaves; i++) {
214:     const PetscInt rank = iremote[i].rank;
215:     const PetscInt remote = iremote[i].index;
216:     const PetscInt leaf = ilocal ? ilocal[i] : i;
217:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
218:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
219:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
220:   }
221:   return(0);
222: #else
224:   return(0);
225: #endif
226: }

228: /*@
229:    PetscSFSetUp - set up communication structures

231:    Collective

233:    Input Arguments:
234: .  sf - star forest communication object

236:    Level: beginner

238: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
239: @*/
240: PetscErrorCode PetscSFSetUp(PetscSF sf)
241: {

246:   PetscSFCheckGraphSet(sf,1);
247:   if (sf->setupcalled) return(0);
248:   PetscSFCheckGraphValid_Private(sf);
249:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
250:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
251:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
252:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
253:   sf->setupcalled = PETSC_TRUE;
254:   return(0);
255: }

257: /*@
258:    PetscSFSetFromOptions - set PetscSF options using the options database

260:    Logically Collective

262:    Input Arguments:
263: .  sf - star forest

265:    Options Database Keys:
266: +  -sf_type - implementation type, see PetscSFSetType()
267: -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise

269:    Level: intermediate

271: .seealso: PetscSFWindowSetSyncType()
272: @*/
273: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
274: {
275:   PetscSFType    deft;
276:   char           type[256];
278:   PetscBool      flg;

282:   PetscObjectOptionsBegin((PetscObject)sf);
283:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
284:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
285:   PetscSFSetType(sf,flg ? type : deft);
286:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
287:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
288:   PetscOptionsEnd();
289:   return(0);
290: }

292: /*@
293:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

295:    Logically Collective

297:    Input Arguments:
298: +  sf - star forest
299: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

301:    Level: advanced

303: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
304: @*/
305: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
306: {

311:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
312:   sf->rankorder = flg;
313:   return(0);
314: }

316: /*@
317:    PetscSFSetGraph - Set a parallel star forest

319:    Collective

321:    Input Arguments:
322: +  sf - star forest
323: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
324: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
325: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
326: during setup in debug mode)
327: .  localmode - copy mode for ilocal
328: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
329: during setup in debug mode)
330: -  remotemode - copy mode for iremote

332:    Level: intermediate

334:    Notes:
335:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

337:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
338:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
339:    needed

341:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
342:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

344: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
345: @*/
346: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
347: {

354:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
355:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

357:   PetscSFReset(sf);
358:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

360:   sf->nroots  = nroots;
361:   sf->nleaves = nleaves;

363:   if (nleaves && ilocal) {
364:     PetscInt i;
365:     PetscInt minleaf = PETSC_MAX_INT;
366:     PetscInt maxleaf = PETSC_MIN_INT;
367:     int      contiguous = 1;
368:     for (i=0; i<nleaves; i++) {
369:       minleaf = PetscMin(minleaf,ilocal[i]);
370:       maxleaf = PetscMax(maxleaf,ilocal[i]);
371:       contiguous &= (ilocal[i] == i);
372:     }
373:     sf->minleaf = minleaf;
374:     sf->maxleaf = maxleaf;
375:     if (contiguous) {
376:       if (localmode == PETSC_OWN_POINTER) {
377:         PetscFree(ilocal);
378:       }
379:       ilocal = NULL;
380:     }
381:   } else {
382:     sf->minleaf = 0;
383:     sf->maxleaf = nleaves - 1;
384:   }

386:   if (ilocal) {
387:     switch (localmode) {
388:     case PETSC_COPY_VALUES:
389:       PetscMalloc1(nleaves,&sf->mine_alloc);
390:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
391:       sf->mine = sf->mine_alloc;
392:       break;
393:     case PETSC_OWN_POINTER:
394:       sf->mine_alloc = (PetscInt*)ilocal;
395:       sf->mine       = sf->mine_alloc;
396:       break;
397:     case PETSC_USE_POINTER:
398:       sf->mine_alloc = NULL;
399:       sf->mine       = (PetscInt*)ilocal;
400:       break;
401:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
402:     }
403:   }

405:   switch (remotemode) {
406:   case PETSC_COPY_VALUES:
407:     PetscMalloc1(nleaves,&sf->remote_alloc);
408:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
409:     sf->remote = sf->remote_alloc;
410:     break;
411:   case PETSC_OWN_POINTER:
412:     sf->remote_alloc = (PetscSFNode*)iremote;
413:     sf->remote       = sf->remote_alloc;
414:     break;
415:   case PETSC_USE_POINTER:
416:     sf->remote_alloc = NULL;
417:     sf->remote       = (PetscSFNode*)iremote;
418:     break;
419:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
420:   }

422:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
423:   sf->graphset = PETSC_TRUE;
424:   return(0);
425: }

427: /*@
428:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

430:   Collective

432:   Input Parameters:
433: + sf      - The PetscSF
434: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
435: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

437:   Notes:
438:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
439:   n and N are local and global sizes of x respectively.

441:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
442:   sequential vectors y on all ranks.

444:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
445:   sequential vector y on rank 0.

447:   In above cases, entries of x are roots and entries of y are leaves.

449:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
450:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
451:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
452:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
453:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

455:   In this case, roots and leaves are symmetric.

457:   Level: intermediate
458:  @*/
459: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
460: {
461:   MPI_Comm       comm;
462:   PetscInt       n,N,res[2];
463:   PetscMPIInt    rank,size;
464:   PetscSFType    type;

468:   PetscObjectGetComm((PetscObject)sf, &comm);
469:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
470:   MPI_Comm_rank(comm,&rank);
471:   MPI_Comm_size(comm,&size);

473:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
474:     type = PETSCSFALLTOALL;
475:     PetscLayoutCreate(comm,&sf->map);
476:     PetscLayoutSetLocalSize(sf->map,size);
477:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
478:     PetscLayoutSetUp(sf->map);
479:   } else {
480:     PetscLayoutGetLocalSize(map,&n);
481:     PetscLayoutGetSize(map,&N);
482:     res[0] = n;
483:     res[1] = -n;
484:     /* Check if n are same over all ranks so that we can optimize it */
485:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
486:     if (res[0] == -res[1]) { /* same n */
487:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
488:     } else {
489:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
490:     }
491:     PetscLayoutReference(map,&sf->map);
492:   }
493:   PetscSFSetType(sf,type);

495:   sf->pattern = pattern;
496:   sf->mine    = NULL; /* Contiguous */

498:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
499:      Also set other easy stuff.
500:    */
501:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
502:     sf->nleaves      = N;
503:     sf->nroots       = n;
504:     sf->nranks       = size;
505:     sf->minleaf      = 0;
506:     sf->maxleaf      = N - 1;
507:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
508:     sf->nleaves      = rank ? 0 : N;
509:     sf->nroots       = n;
510:     sf->nranks       = rank ? 0 : size;
511:     sf->minleaf      = 0;
512:     sf->maxleaf      = rank ? -1 : N - 1;
513:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
514:     sf->nleaves      = size;
515:     sf->nroots       = size;
516:     sf->nranks       = size;
517:     sf->minleaf      = 0;
518:     sf->maxleaf      = size - 1;
519:   }
520:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
521:   sf->graphset = PETSC_TRUE;
522:   return(0);
523: }

525: /*@
526:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

528:    Collective

530:    Input Arguments:

532: .  sf - star forest to invert

534:    Output Arguments:
535: .  isf - inverse of sf
536:    Level: advanced

538:    Notes:
539:    All roots must have degree 1.

541:    The local space may be a permutation, but cannot be sparse.

543: .seealso: PetscSFSetGraph()
544: @*/
545: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
546: {
548:   PetscMPIInt    rank;
549:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
550:   const PetscInt *ilocal;
551:   PetscSFNode    *roots,*leaves;

555:   PetscSFCheckGraphSet(sf,1);

558:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
559:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

561:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
562:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
563:   for (i=0; i<maxlocal; i++) {
564:     leaves[i].rank  = rank;
565:     leaves[i].index = i;
566:   }
567:   for (i=0; i <nroots; i++) {
568:     roots[i].rank  = -1;
569:     roots[i].index = -1;
570:   }
571:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
572:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);

574:   /* Check whether our leaves are sparse */
575:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
576:   if (count == nroots) newilocal = NULL;
577:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
578:     PetscMalloc1(count,&newilocal);
579:     for (i=0,count=0; i<nroots; i++) {
580:       if (roots[i].rank >= 0) {
581:         newilocal[count]   = i;
582:         roots[count].rank  = roots[i].rank;
583:         roots[count].index = roots[i].index;
584:         count++;
585:       }
586:     }
587:   }

589:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
590:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
591:   PetscFree2(roots,leaves);
592:   return(0);
593: }

595: /*@
596:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

598:    Collective

600:    Input Arguments:
601: +  sf - communication object to duplicate
602: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

604:    Output Arguments:
605: .  newsf - new communication object

607:    Level: beginner

609: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
610: @*/
611: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
612: {
613:   PetscSFType    type;

620:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
621:   PetscSFGetType(sf,&type);
622:   if (type) {PetscSFSetType(*newsf,type);}
623:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
624:     PetscSFCheckGraphSet(sf,1);
625:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
626:       PetscInt          nroots,nleaves;
627:       const PetscInt    *ilocal;
628:       const PetscSFNode *iremote;
629:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
630:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
631:     } else {
632:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
633:     }
634:   }
635:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
636:   return(0);
637: }

639: /*@C
640:    PetscSFGetGraph - Get the graph specifying a parallel star forest

642:    Not Collective

644:    Input Arguments:
645: .  sf - star forest

647:    Output Arguments:
648: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
649: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
650: .  ilocal - locations of leaves in leafdata buffers
651: -  iremote - remote locations of root vertices for each leaf on the current process

653:    Notes:
654:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

656:    Level: intermediate

658: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
659: @*/
660: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
661: {

666:   if (sf->ops->GetGraph) {
667:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
668:   } else {
669:     if (nroots) *nroots = sf->nroots;
670:     if (nleaves) *nleaves = sf->nleaves;
671:     if (ilocal) *ilocal = sf->mine;
672:     if (iremote) *iremote = sf->remote;
673:   }
674:   return(0);
675: }

677: /*@
678:    PetscSFGetLeafRange - Get the active leaf ranges

680:    Not Collective

682:    Input Arguments:
683: .  sf - star forest

685:    Output Arguments:
686: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
687: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

689:    Level: developer

691: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
692: @*/
693: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
694: {

698:   PetscSFCheckGraphSet(sf,1);
699:   if (minleaf) *minleaf = sf->minleaf;
700:   if (maxleaf) *maxleaf = sf->maxleaf;
701:   return(0);
702: }

704: /*@C
705:    PetscSFView - view a star forest

707:    Collective

709:    Input Arguments:
710: +  sf - star forest
711: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

713:    Level: beginner

715: .seealso: PetscSFCreate(), PetscSFSetGraph()
716: @*/
717: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
718: {
719:   PetscErrorCode    ierr;
720:   PetscBool         iascii;
721:   PetscViewerFormat format;

725:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
728:   if (sf->graphset) {PetscSFSetUp(sf);}
729:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
730:   if (iascii) {
731:     PetscMPIInt rank;
732:     PetscInt    ii,i,j;

734:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
735:     PetscViewerASCIIPushTab(viewer);
736:     if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
737:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
738:       if (!sf->graphset) {
739:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
740:         PetscViewerASCIIPopTab(viewer);
741:         return(0);
742:       }
743:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
744:       PetscViewerASCIIPushSynchronized(viewer);
745:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
746:       for (i=0; i<sf->nleaves; i++) {
747:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
748:       }
749:       PetscViewerFlush(viewer);
750:       PetscViewerGetFormat(viewer,&format);
751:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
752:         PetscMPIInt *tmpranks,*perm;
753:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
754:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
755:         for (i=0; i<sf->nranks; i++) perm[i] = i;
756:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
757:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
758:         for (ii=0; ii<sf->nranks; ii++) {
759:           i = perm[ii];
760:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
761:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
762:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
763:           }
764:         }
765:         PetscFree2(tmpranks,perm);
766:       }
767:       PetscViewerFlush(viewer);
768:       PetscViewerASCIIPopSynchronized(viewer);
769:     }
770:     PetscViewerASCIIPopTab(viewer);
771:   }
772:   return(0);
773: }

775: /*@C
776:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

778:    Not Collective

780:    Input Arguments:
781: .  sf - star forest

783:    Output Arguments:
784: +  nranks - number of ranks referenced by local part
785: .  ranks - array of ranks
786: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
787: .  rmine - concatenated array holding local indices referencing each remote rank
788: -  rremote - concatenated array holding remote indices referenced for each remote rank

790:    Level: developer

792: .seealso: PetscSFGetLeafRanks()
793: @*/
794: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
795: {

800:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
801:   if (sf->ops->GetRootRanks) {
802:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
803:   } else {
804:     /* The generic implementation */
805:     if (nranks)  *nranks  = sf->nranks;
806:     if (ranks)   *ranks   = sf->ranks;
807:     if (roffset) *roffset = sf->roffset;
808:     if (rmine)   *rmine   = sf->rmine;
809:     if (rremote) *rremote = sf->rremote;
810:   }
811:   return(0);
812: }

814: /*@C
815:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

817:    Not Collective

819:    Input Arguments:
820: .  sf - star forest

822:    Output Arguments:
823: +  niranks - number of leaf ranks referencing roots on this process
824: .  iranks - array of ranks
825: .  ioffset - offset in irootloc for each rank (length niranks+1)
826: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

828:    Level: developer

830: .seealso: PetscSFGetRootRanks()
831: @*/
832: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
833: {

838:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
839:   if (sf->ops->GetLeafRanks) {
840:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
841:   } else {
842:     PetscSFType type;
843:     PetscSFGetType(sf,&type);
844:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
845:   }
846:   return(0);
847: }

849: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
850:   PetscInt i;
851:   for (i=0; i<n; i++) {
852:     if (needle == list[i]) return PETSC_TRUE;
853:   }
854:   return PETSC_FALSE;
855: }

857: /*@C
858:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

860:    Collective

862:    Input Arguments:
863: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
864: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

866:    Level: developer

868: .seealso: PetscSFGetRootRanks()
869: @*/
870: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
871: {
872:   PetscErrorCode     ierr;
873:   PetscTable         table;
874:   PetscTablePosition pos;
875:   PetscMPIInt        size,groupsize,*groupranks;
876:   PetscInt           *rcount,*ranks;
877:   PetscInt           i, irank = -1,orank = -1;

881:   PetscSFCheckGraphSet(sf,1);
882:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
883:   PetscTableCreate(10,size,&table);
884:   for (i=0; i<sf->nleaves; i++) {
885:     /* Log 1-based rank */
886:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
887:   }
888:   PetscTableGetCount(table,&sf->nranks);
889:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
890:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
891:   PetscTableGetHeadPosition(table,&pos);
892:   for (i=0; i<sf->nranks; i++) {
893:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
894:     ranks[i]--;             /* Convert back to 0-based */
895:   }
896:   PetscTableDestroy(&table);

898:   /* We expect that dgroup is reliably "small" while nranks could be large */
899:   {
900:     MPI_Group group = MPI_GROUP_NULL;
901:     PetscMPIInt *dgroupranks;
902:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
903:     MPI_Group_size(dgroup,&groupsize);
904:     PetscMalloc1(groupsize,&dgroupranks);
905:     PetscMalloc1(groupsize,&groupranks);
906:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
907:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
908:     MPI_Group_free(&group);
909:     PetscFree(dgroupranks);
910:   }

912:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
913:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
914:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
915:       if (InList(ranks[i],groupsize,groupranks)) break;
916:     }
917:     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
918:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
919:     }
920:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
921:       PetscInt    tmprank,tmpcount;

923:       tmprank             = ranks[i];
924:       tmpcount            = rcount[i];
925:       ranks[i]            = ranks[sf->ndranks];
926:       rcount[i]           = rcount[sf->ndranks];
927:       ranks[sf->ndranks]  = tmprank;
928:       rcount[sf->ndranks] = tmpcount;
929:       sf->ndranks++;
930:     }
931:   }
932:   PetscFree(groupranks);
933:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
934:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
935:   sf->roffset[0] = 0;
936:   for (i=0; i<sf->nranks; i++) {
937:     PetscMPIIntCast(ranks[i],sf->ranks+i);
938:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
939:     rcount[i]        = 0;
940:   }
941:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
942:     /* short circuit */
943:     if (orank != sf->remote[i].rank) {
944:       /* Search for index of iremote[i].rank in sf->ranks */
945:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
946:       if (irank < 0) {
947:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
948:         if (irank >= 0) irank += sf->ndranks;
949:       }
950:       orank = sf->remote[i].rank;
951:     }
952:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
953:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
954:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
955:     rcount[irank]++;
956:   }
957:   PetscFree2(rcount,ranks);
958:   return(0);
959: }

961: /*@C
962:    PetscSFGetGroups - gets incoming and outgoing process groups

964:    Collective

966:    Input Argument:
967: .  sf - star forest

969:    Output Arguments:
970: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
971: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

973:    Level: developer

975: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
976: @*/
977: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
978: {
980:   MPI_Group      group = MPI_GROUP_NULL;

983:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
984:   if (sf->ingroup == MPI_GROUP_NULL) {
985:     PetscInt       i;
986:     const PetscInt *indegree;
987:     PetscMPIInt    rank,*outranks,*inranks;
988:     PetscSFNode    *remote;
989:     PetscSF        bgcount;

991:     /* Compute the number of incoming ranks */
992:     PetscMalloc1(sf->nranks,&remote);
993:     for (i=0; i<sf->nranks; i++) {
994:       remote[i].rank  = sf->ranks[i];
995:       remote[i].index = 0;
996:     }
997:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
998:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
999:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1000:     PetscSFComputeDegreeEnd(bgcount,&indegree);

1002:     /* Enumerate the incoming ranks */
1003:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1004:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1005:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1006:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1007:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1008:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1009:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1010:     MPI_Group_free(&group);
1011:     PetscFree2(inranks,outranks);
1012:     PetscSFDestroy(&bgcount);
1013:   }
1014:   *incoming = sf->ingroup;

1016:   if (sf->outgroup == MPI_GROUP_NULL) {
1017:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1018:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1019:     MPI_Group_free(&group);
1020:   }
1021:   *outgoing = sf->outgroup;
1022:   return(0);
1023: }

1025: /*@
1026:    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

1028:    Collective

1030:    Input Argument:
1031: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1033:    Output Arguments:
1034: .  multi - star forest with split roots, such that each root has degree exactly 1

1036:    Level: developer

1038:    Notes:

1040:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1041:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1042:    edge, it is a candidate for future optimization that might involve its removal.

1044: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1045: @*/
1046: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1047: {

1053:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1054:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1055:     *multi = sf->multi;
1056:     return(0);
1057:   }
1058:   if (!sf->multi) {
1059:     const PetscInt *indegree;
1060:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1061:     PetscSFNode    *remote;
1062:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1063:     PetscSFComputeDegreeBegin(sf,&indegree);
1064:     PetscSFComputeDegreeEnd(sf,&indegree);
1065:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1066:     inoffset[0] = 0;
1067:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1068:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1069:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1070:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1071:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1072: #if 0
1073: #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1074:     for (i=0; i<sf->nroots; i++) {
1075:       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1076:     }
1077: #endif
1078: #endif
1079:     PetscMalloc1(sf->nleaves,&remote);
1080:     for (i=0; i<sf->nleaves; i++) {
1081:       remote[i].rank  = sf->remote[i].rank;
1082:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1083:     }
1084:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1085:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1086:     if (sf->rankorder) {        /* Sort the ranks */
1087:       PetscMPIInt rank;
1088:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1089:       PetscSFNode *newremote;
1090:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1091:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1092:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1093:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1094:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1095:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1096:       /* Sort the incoming ranks at each vertex, build the inverse map */
1097:       for (i=0; i<sf->nroots; i++) {
1098:         PetscInt j;
1099:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1100:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1101:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1102:       }
1103:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1104:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1105:       PetscMalloc1(sf->nleaves,&newremote);
1106:       for (i=0; i<sf->nleaves; i++) {
1107:         newremote[i].rank  = sf->remote[i].rank;
1108:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1109:       }
1110:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1111:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1112:     }
1113:     PetscFree3(inoffset,outones,outoffset);
1114:   }
1115:   *multi = sf->multi;
1116:   return(0);
1117: }

1119: /*@C
1120:    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices

1122:    Collective

1124:    Input Arguments:
1125: +  sf - original star forest
1126: .  nselected  - number of selected roots on this process
1127: -  selected   - indices of the selected roots on this process

1129:    Output Arguments:
1130: .  newsf - new star forest

1132:    Level: advanced

1134:    Note:
1135:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1136:    be done by calling PetscSFGetGraph().

1138: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1139: @*/
1140: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1141: {
1142:   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1143:   const PetscSFNode *iremote;
1144:   PetscSFNode       *new_iremote;
1145:   PetscSF           tmpsf;
1146:   MPI_Comm          comm;
1147:   PetscErrorCode    ierr;

1151:   PetscSFCheckGraphSet(sf,1);

1155:   PetscSFSetUp(sf);

1157:   /* Uniq selected[] and put results in roots[] */
1158:   PetscObjectGetComm((PetscObject)sf,&comm);
1159:   PetscMalloc1(nselected,&roots);
1160:   PetscArraycpy(roots,selected,nselected);
1161:   PetscSortedRemoveDupsInt(&nselected,roots);
1162:   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);

1164:   if (sf->ops->CreateEmbeddedSF) {
1165:     (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);
1166:   } else {
1167:     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1168:     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1169:     PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
1170:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);
1171:     PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
1172:     PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);
1173:     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1174:     PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);
1175:     PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);
1176:     PetscSFDestroy(&tmpsf);

1178:     /* Build newsf with leaves that are still connected */
1179:     connected_leaves = 0;
1180:     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1181:     PetscMalloc1(connected_leaves,&new_ilocal);
1182:     PetscMalloc1(connected_leaves,&new_iremote);
1183:     for (i=0, n=0; i<nleaves; ++i) {
1184:       if (leafdata[i]) {
1185:         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1186:         new_iremote[n].rank  = sf->remote[i].rank;
1187:         new_iremote[n].index = sf->remote[i].index;
1188:         ++n;
1189:       }
1190:     }

1192:     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1193:     PetscSFCreate(comm,newsf);
1194:     PetscSFSetFromOptions(*newsf);
1195:     PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1196:     PetscFree2(rootdata,leafdata);
1197:   }
1198:   PetscFree(roots);
1199:   return(0);
1200: }

1202: /*@C
1203:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1205:   Collective

1207:   Input Arguments:
1208: + sf - original star forest
1209: . nselected  - number of selected leaves on this process
1210: - selected   - indices of the selected leaves on this process

1212:   Output Arguments:
1213: .  newsf - new star forest

1215:   Level: advanced

1217: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1218: @*/
1219: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1220: {
1221:   const PetscSFNode *iremote;
1222:   PetscSFNode       *new_iremote;
1223:   const PetscInt    *ilocal;
1224:   PetscInt          i,nroots,*leaves,*new_ilocal;
1225:   MPI_Comm          comm;
1226:   PetscErrorCode    ierr;

1230:   PetscSFCheckGraphSet(sf,1);

1234:   /* Uniq selected[] and put results in leaves[] */
1235:   PetscObjectGetComm((PetscObject)sf,&comm);
1236:   PetscMalloc1(nselected,&leaves);
1237:   PetscArraycpy(leaves,selected,nselected);
1238:   PetscSortedRemoveDupsInt(&nselected,leaves);
1239:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1241:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1242:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1243:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1244:   } else {
1245:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1246:     PetscMalloc1(nselected,&new_ilocal);
1247:     PetscMalloc1(nselected,&new_iremote);
1248:     for (i=0; i<nselected; ++i) {
1249:       const PetscInt l     = leaves[i];
1250:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1251:       new_iremote[i].rank  = iremote[l].rank;
1252:       new_iremote[i].index = iremote[l].index;
1253:     }
1254:     PetscSFCreate(comm,newsf);
1255:     PetscSFSetFromOptions(*newsf);
1256:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1257:   }
1258:   PetscFree(leaves);
1259:   return(0);
1260: }

1262: /*@C
1263:    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()

1265:    Collective on PetscSF

1267:    Input Arguments:
1268: +  sf - star forest on which to communicate
1269: .  unit - data type associated with each node
1270: .  rootdata - buffer to broadcast
1271: -  op - operation to use for reduction

1273:    Output Arguments:
1274: .  leafdata - buffer to be reduced with values from each leaf's respective root

1276:    Level: intermediate

1278: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1279: @*/
1280: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1281: {

1286:   PetscSFSetUp(sf);
1287:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1288:   (*sf->ops->BcastAndOpBegin)(sf,unit,rootdata,leafdata,op);
1289:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1290:   return(0);
1291: }

1293: /*@C
1294:    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()

1296:    Collective

1298:    Input Arguments:
1299: +  sf - star forest
1300: .  unit - data type
1301: .  rootdata - buffer to broadcast
1302: -  op - operation to use for reduction

1304:    Output Arguments:
1305: .  leafdata - buffer to be reduced with values from each leaf's respective root

1307:    Level: intermediate

1309: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1310: @*/
1311: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1312: {

1317:   PetscSFSetUp(sf);
1318:   PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1319:   (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);
1320:   PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1321:   return(0);
1322: }

1324: /*@C
1325:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1327:    Collective

1329:    Input Arguments:
1330: +  sf - star forest
1331: .  unit - data type
1332: .  leafdata - values to reduce
1333: -  op - reduction operation

1335:    Output Arguments:
1336: .  rootdata - result of reduction of values from all leaves of each root

1338:    Level: intermediate

1340: .seealso: PetscSFBcastBegin()
1341: @*/
1342: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1343: {

1348:   PetscSFSetUp(sf);
1349:   PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1350:   (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);
1351:   PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1352:   return(0);
1353: }

1355: /*@C
1356:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1358:    Collective

1360:    Input Arguments:
1361: +  sf - star forest
1362: .  unit - data type
1363: .  leafdata - values to reduce
1364: -  op - reduction operation

1366:    Output Arguments:
1367: .  rootdata - result of reduction of values from all leaves of each root

1369:    Level: intermediate

1371: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1372: @*/
1373: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1374: {

1379:   PetscSFSetUp(sf);
1380:   PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1381:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1382:   PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1383:   return(0);
1384: }

1386: /*@C
1387:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1389:    Collective

1391:    Input Arguments:
1392: .  sf - star forest

1394:    Output Arguments:
1395: .  degree - degree of each root vertex

1397:    Level: advanced

1399:    Notes:
1400:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1402: .seealso: PetscSFGatherBegin()
1403: @*/
1404: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1405: {

1410:   PetscSFCheckGraphSet(sf,1);
1412:   if (!sf->degreeknown) {
1413:     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1414:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1415:     PetscMalloc1(nroots,&sf->degree);
1416:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1417:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1418:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1419:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1420:   }
1421:   *degree = NULL;
1422:   return(0);
1423: }

1425: /*@C
1426:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1428:    Collective

1430:    Input Arguments:
1431: .  sf - star forest

1433:    Output Arguments:
1434: .  degree - degree of each root vertex

1436:    Level: developer

1438:    Notes:
1439:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1441: .seealso:
1442: @*/
1443: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1444: {

1449:   PetscSFCheckGraphSet(sf,1);
1451:   if (!sf->degreeknown) {
1452:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1453:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1454:     PetscFree(sf->degreetmp);
1455:     sf->degreeknown = PETSC_TRUE;
1456:   }
1457:   *degree = sf->degree;
1458:   return(0);
1459: }


1462: /*@C
1463:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1464:    Each multi-root is assigned index of the corresponding original root.

1466:    Collective

1468:    Input Arguments:
1469: +  sf - star forest
1470: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1472:    Output Arguments:
1473: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1474: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1476:    Level: developer

1478:    Notes:
1479:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1481: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1482: @*/
1483: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1484: {
1485:   PetscSF             msf;
1486:   PetscInt            i, j, k, nroots, nmroots;
1487:   PetscErrorCode      ierr;

1491:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1495:   PetscSFGetMultiSF(sf,&msf);
1496:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1497:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1498:   for (i=0,j=0,k=0; i<nroots; i++) {
1499:     if (!degree[i]) continue;
1500:     for (j=0; j<degree[i]; j++,k++) {
1501:       (*multiRootsOrigNumbering)[k] = i;
1502:     }
1503:   }
1504: #if defined(PETSC_USE_DEBUG)
1505:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1506: #endif
1507:   if (nMultiRoots) *nMultiRoots = nmroots;
1508:   return(0);
1509: }

1511: /*@C
1512:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1514:    Collective

1516:    Input Arguments:
1517: +  sf - star forest
1518: .  unit - data type
1519: .  leafdata - leaf values to use in reduction
1520: -  op - operation to use for reduction

1522:    Output Arguments:
1523: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1524: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1526:    Level: advanced

1528:    Note:
1529:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1530:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1531:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1532:    integers.

1534: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1535: @*/
1536: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1537: {

1542:   PetscSFSetUp(sf);
1543:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1544:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);
1545:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1546:   return(0);
1547: }

1549: /*@C
1550:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1552:    Collective

1554:    Input Arguments:
1555: +  sf - star forest
1556: .  unit - data type
1557: .  leafdata - leaf values to use in reduction
1558: -  op - operation to use for reduction

1560:    Output Arguments:
1561: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1562: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1564:    Level: advanced

1566: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1567: @*/
1568: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1569: {

1574:   PetscSFSetUp(sf);
1575:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1576:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1577:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1578:   return(0);
1579: }

1581: /*@C
1582:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1584:    Collective

1586:    Input Arguments:
1587: +  sf - star forest
1588: .  unit - data type
1589: -  leafdata - leaf data to gather to roots

1591:    Output Argument:
1592: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1594:    Level: intermediate

1596: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1597: @*/
1598: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1599: {
1601:   PetscSF        multi;

1605:   PetscSFSetUp(sf);
1606:   PetscSFGetMultiSF(sf,&multi);
1607:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1608:   return(0);
1609: }

1611: /*@C
1612:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1614:    Collective

1616:    Input Arguments:
1617: +  sf - star forest
1618: .  unit - data type
1619: -  leafdata - leaf data to gather to roots

1621:    Output Argument:
1622: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1624:    Level: intermediate

1626: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1627: @*/
1628: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1629: {
1631:   PetscSF        multi;

1635:   PetscSFSetUp(sf);
1636:   PetscSFGetMultiSF(sf,&multi);
1637:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1638:   return(0);
1639: }

1641: /*@C
1642:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1644:    Collective

1646:    Input Arguments:
1647: +  sf - star forest
1648: .  unit - data type
1649: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1651:    Output Argument:
1652: .  leafdata - leaf data to be update with personal data from each respective root

1654:    Level: intermediate

1656: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1657: @*/
1658: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1659: {
1661:   PetscSF        multi;

1665:   PetscSFSetUp(sf);
1666:   PetscSFGetMultiSF(sf,&multi);
1667:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1668:   return(0);
1669: }

1671: /*@C
1672:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1674:    Collective

1676:    Input Arguments:
1677: +  sf - star forest
1678: .  unit - data type
1679: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1681:    Output Argument:
1682: .  leafdata - leaf data to be update with personal data from each respective root

1684:    Level: intermediate

1686: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1687: @*/
1688: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1689: {
1691:   PetscSF        multi;

1695:   PetscSFSetUp(sf);
1696:   PetscSFGetMultiSF(sf,&multi);
1697:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1698:   return(0);
1699: }

1701: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1702: {
1703: #if defined(PETSC_USE_DEBUG)
1704:   PetscInt        i, n, nleaves;
1705:   const PetscInt *ilocal = NULL;
1706:   PetscHSetI      seen;
1707:   PetscErrorCode  ierr;

1710:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1711:   PetscHSetICreate(&seen);
1712:   for (i = 0; i < nleaves; i++) {
1713:     const PetscInt leaf = ilocal ? ilocal[i] : i;
1714:     PetscHSetIAdd(seen,leaf);
1715:   }
1716:   PetscHSetIGetSize(seen,&n);
1717:   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1718:   PetscHSetIDestroy(&seen);
1719:   return(0);
1720: #else
1722:   return(0);
1723: #endif
1724: }
1725: /*@
1726:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1728:   Input Parameters:
1729: + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1730: - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor

1732:   Output Parameters:
1733: . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB

1735:   Level: developer

1737:   Notes:
1738:   For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is
1739:   checked in debug mode.

1741: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1742: @*/
1743: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1744: {
1745:   PetscErrorCode    ierr;
1746:   MPI_Comm          comm;
1747:   const PetscSFNode *remotePointsA,*remotePointsB;
1748:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1749:   const PetscInt    *localPointsA,*localPointsB,*localPointsBA;
1750:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;

1754:   PetscSFCheckGraphSet(sfA,1);
1756:   PetscSFCheckGraphSet(sfB,2);
1758:   PetscObjectGetComm((PetscObject)sfA,&comm);
1759:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1760:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1761:   PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);
1762:   if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves");
1763:   if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1764:   /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug
1765:    mode, check properly. */
1766:   PetscSFCheckLeavesUnique_Private(sfA);
1767:   if (localPointsA) {
1768:     /* Local space is dense permutation of identity. Need to rewire order of the remote points */
1769:     PetscMalloc1(numLeavesA,&reorderedRemotePointsA);
1770:     for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i];
1771:     remotePointsA = reorderedRemotePointsA;
1772:   }
1773:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1774:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1775:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1776:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1777:   PetscFree(reorderedRemotePointsA);

1779:   /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */
1780:   PetscSFCheckLeavesUnique_Private(sfB);
1781:   if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */
1782:     localPointsBA  = NULL;
1783:     remotePointsBA = leafdataB;
1784:   } else {
1785:     localPointsBA  = localPointsB;
1786:     PetscMalloc1(numLeavesB,&remotePointsBA);
1787:     for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf];
1788:     PetscFree(leafdataB);
1789:   }
1790:   PetscSFCreate(comm, sfBA);
1791:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);
1792:   return(0);
1793: }

1795: /*@
1796:   PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one

1798:   Input Parameters:
1799: + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1800: - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1.

1802:   Output Parameters:
1803: . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB

1805:   Level: developer

1807: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
1808: @*/
1809: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1810: {
1811:   PetscErrorCode    ierr;
1812:   MPI_Comm          comm;
1813:   const PetscSFNode *remotePointsA,*remotePointsB;
1814:   PetscSFNode       *remotePointsBA;
1815:   const PetscInt    *localPointsA,*localPointsB;
1816:   PetscInt          numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;

1820:   PetscSFCheckGraphSet(sfA,1);
1822:   PetscSFCheckGraphSet(sfB,2);
1824:   PetscObjectGetComm((PetscObject) sfA, &comm);
1825:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1826:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1827:   PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);
1828:   if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1829:   if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves");
1830:   PetscMalloc1(numRootsB,&remotePointsBA);
1831:   PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);
1832:   PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);
1833:   PetscSFCreate(comm,sfBA);
1834:   PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);
1835:   return(0);
1836: }

1838: /*
1839:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

1841:   Input Parameters:
1842: . sf - The global PetscSF

1844:   Output Parameters:
1845: . out - The local PetscSF
1846:  */
1847: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1848: {
1849:   MPI_Comm           comm;
1850:   PetscMPIInt        myrank;
1851:   const PetscInt     *ilocal;
1852:   const PetscSFNode  *iremote;
1853:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1854:   PetscSFNode        *liremote;
1855:   PetscSF            lsf;
1856:   PetscErrorCode     ierr;

1860:   if (sf->ops->CreateLocalSF) {
1861:     (*sf->ops->CreateLocalSF)(sf,out);
1862:   } else {
1863:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1864:     PetscObjectGetComm((PetscObject)sf,&comm);
1865:     MPI_Comm_rank(comm,&myrank);

1867:     /* Find out local edges and build a local SF */
1868:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1869:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
1870:     PetscMalloc1(lnleaves,&lilocal);
1871:     PetscMalloc1(lnleaves,&liremote);

1873:     for (i=j=0; i<nleaves; i++) {
1874:       if (iremote[i].rank == (PetscInt)myrank) {
1875:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
1876:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
1877:         liremote[j].index = iremote[i].index;
1878:         j++;
1879:       }
1880:     }
1881:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
1882:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
1883:     PetscSFSetUp(lsf);
1884:     *out = lsf;
1885:   }
1886:   return(0);
1887: }

1889: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
1890: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1891: {
1892:   PetscErrorCode     ierr;

1896:   PetscSFSetUp(sf);
1897:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1898:   if (sf->ops->BcastToZero) {
1899:     (*sf->ops->BcastToZero)(sf,unit,rootdata,leafdata);
1900:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
1901:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1902:   return(0);
1903: }