Actual source code: sf.c

petsc-master 2020-04-03
Report Typos and Errors
  1:  #include <petsc/private/sfimpl.h>
  2:  #include <petsc/private/hashseti.h>
  3:  #include <petscctable.h>

  5: #if defined(PETSC_USE_DEBUG)
  6: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
  7:     if (PetscUnlikely(!(sf)->graphset))                              \
  8:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
  9:   } while (0)
 10: #else
 11: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 12: #endif

 14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};

 16: /*@
 17:    PetscSFCreate - create a star forest communication context

 19:    Collective

 21:    Input Arguments:
 22: .  comm - communicator on which the star forest will operate

 24:    Output Arguments:
 25: .  sf - new star forest context

 27:    Options Database Keys:
 28: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 29: .  -sf_type window    -Use MPI-3 one-sided window for communication
 30: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 32:    Level: intermediate

 34:    Notes:
 35:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 36:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 37:    SFs are optimized and they have better performance than general SFs.

 39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 40: @*/
 41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 42: {
 44:   PetscSF        b;

 48:   PetscSFInitializePackage();

 50:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 52:   b->nroots    = -1;
 53:   b->nleaves   = -1;
 54:   b->minleaf   = PETSC_MAX_INT;
 55:   b->maxleaf   = PETSC_MIN_INT;
 56:   b->nranks    = -1;
 57:   b->rankorder = PETSC_TRUE;
 58:   b->ingroup   = MPI_GROUP_NULL;
 59:   b->outgroup  = MPI_GROUP_NULL;
 60:   b->graphset  = PETSC_FALSE;

 62:   *sf = b;
 63:   return(0);
 64: }

 66: /*@
 67:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

 69:    Collective

 71:    Input Arguments:
 72: .  sf - star forest

 74:    Level: advanced

 76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
 77: @*/
 78: PetscErrorCode PetscSFReset(PetscSF sf)
 79: {

 84:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
 85:   sf->nroots   = -1;
 86:   sf->nleaves  = -1;
 87:   sf->minleaf  = PETSC_MAX_INT;
 88:   sf->maxleaf  = PETSC_MIN_INT;
 89:   sf->mine     = NULL;
 90:   sf->remote   = NULL;
 91:   sf->graphset = PETSC_FALSE;
 92:   PetscFree(sf->mine_alloc);
 93:   PetscFree(sf->remote_alloc);
 94:   sf->nranks = -1;
 95:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
 96: #if defined(PETSC_HAVE_CUDA)
 97:   {
 98:   PetscInt  i;
 99:   for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}}
100:   }
101: #endif
102:   sf->degreeknown = PETSC_FALSE;
103:   PetscFree(sf->degree);
104:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
105:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
106:   PetscSFDestroy(&sf->multi);
107:   PetscLayoutDestroy(&sf->map);
108:   sf->setupcalled = PETSC_FALSE;
109:   return(0);
110: }

112: /*@C
113:    PetscSFSetType - Set the PetscSF communication implementation

115:    Collective on PetscSF

117:    Input Parameters:
118: +  sf - the PetscSF context
119: -  type - a known method

121:    Options Database Key:
122: .  -sf_type <type> - Sets the method; use -help for a list
123:    of available methods (for instance, window, basic, neighbor)

125:    Notes:
126:    See "include/petscsf.h" for available methods (for instance)
127: +    PETSCSFWINDOW - MPI-2/3 one-sided
128: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

130:   Level: intermediate

132: .seealso: PetscSFType, PetscSFCreate()
133: @*/
134: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
135: {
136:   PetscErrorCode ierr,(*r)(PetscSF);
137:   PetscBool      match;


143:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
144:   if (match) return(0);

146:   PetscFunctionListFind(PetscSFList,type,&r);
147:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
148:   /* Destroy the previous PetscSF implementation context */
149:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
150:   PetscMemzero(sf->ops,sizeof(*sf->ops));
151:   PetscObjectChangeTypeName((PetscObject)sf,type);
152:   (*r)(sf);
153:   return(0);
154: }

156: /*@C
157:   PetscSFGetType - Get the PetscSF communication implementation

159:   Not Collective

161:   Input Parameter:
162: . sf  - the PetscSF context

164:   Output Parameter:
165: . type - the PetscSF type name

167:   Level: intermediate

169: .seealso: PetscSFSetType(), PetscSFCreate()
170: @*/
171: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
172: {
176:   *type = ((PetscObject)sf)->type_name;
177:   return(0);
178: }

180: /*@
181:    PetscSFDestroy - destroy star forest

183:    Collective

185:    Input Arguments:
186: .  sf - address of star forest

188:    Level: intermediate

190: .seealso: PetscSFCreate(), PetscSFReset()
191: @*/
192: PetscErrorCode PetscSFDestroy(PetscSF *sf)
193: {

197:   if (!*sf) return(0);
199:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
200:   PetscSFReset(*sf);
201:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
202:   PetscHeaderDestroy(sf);
203:   return(0);
204: }

206: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
207: {
208: #if defined(PETSC_USE_DEBUG)
209:   PetscInt           i, nleaves;
210:   PetscMPIInt        size;
211:   const PetscInt    *ilocal;
212:   const PetscSFNode *iremote;
213:   PetscErrorCode     ierr;

216:   if (!sf->graphset) return(0);
217:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
218:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
219:   for (i = 0; i < nleaves; i++) {
220:     const PetscInt rank = iremote[i].rank;
221:     const PetscInt remote = iremote[i].index;
222:     const PetscInt leaf = ilocal ? ilocal[i] : i;
223:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
224:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
225:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
226:   }
227:   return(0);
228: #else
230:   return(0);
231: #endif
232: }

234: /*@
235:    PetscSFSetUp - set up communication structures

237:    Collective

239:    Input Arguments:
240: .  sf - star forest communication object

242:    Level: beginner

244: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
245: @*/
246: PetscErrorCode PetscSFSetUp(PetscSF sf)
247: {

252:   PetscSFCheckGraphSet(sf,1);
253:   if (sf->setupcalled) return(0);
254:   PetscSFCheckGraphValid_Private(sf);
255:   sf->use_gpu_aware_mpi = use_gpu_aware_mpi;
256:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
257:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
258:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
259:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
260:   sf->setupcalled = PETSC_TRUE;
261:   return(0);
262: }

264: /*@
265:    PetscSFSetFromOptions - set PetscSF options using the options database

267:    Logically Collective

269:    Input Arguments:
270: .  sf - star forest

272:    Options Database Keys:
273: +  -sf_type               - implementation type, see PetscSFSetType()
274: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
275: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
276:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
277:                             If true, this option only works with -use_cuda_aware_mpi 1.
278: -  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
279:                                If true, this option only works with -use_cuda_aware_mpi 1.

281:    Level: intermediate
282: @*/
283: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
284: {
285:   PetscSFType    deft;
286:   char           type[256];
288:   PetscBool      flg;

292:   PetscObjectOptionsBegin((PetscObject)sf);
293:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
294:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
295:   PetscSFSetType(sf,flg ? type : deft);
296:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);

298: #if defined(PETSC_HAVE_CUDA)
299:   sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
300:   PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);
301:   sf->use_stream_aware_mpi = PETSC_FALSE;
302:   PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
303: #endif
304:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
305:   PetscOptionsEnd();
306:   return(0);
307: }

309: /*@
310:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

312:    Logically Collective

314:    Input Arguments:
315: +  sf - star forest
316: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

318:    Level: advanced

320: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
321: @*/
322: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
323: {
327:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
328:   sf->rankorder = flg;
329:   return(0);
330: }

332: /*@
333:    PetscSFSetGraph - Set a parallel star forest

335:    Collective

337:    Input Arguments:
338: +  sf - star forest
339: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
340: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
341: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
342: during setup in debug mode)
343: .  localmode - copy mode for ilocal
344: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
345: during setup in debug mode)
346: -  remotemode - copy mode for iremote

348:    Level: intermediate

350:    Notes:
351:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

353:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
354:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
355:    needed

357:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
358:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

360: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
361: @*/
362: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
363: {

370:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
371:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

373:   if (sf->nroots >= 0) { /* Reset only if graph already set */
374:     PetscSFReset(sf);
375:   }

377:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

379:   sf->nroots  = nroots;
380:   sf->nleaves = nleaves;

382:   if (nleaves && ilocal) {
383:     PetscInt i;
384:     PetscInt minleaf = PETSC_MAX_INT;
385:     PetscInt maxleaf = PETSC_MIN_INT;
386:     int      contiguous = 1;
387:     for (i=0; i<nleaves; i++) {
388:       minleaf = PetscMin(minleaf,ilocal[i]);
389:       maxleaf = PetscMax(maxleaf,ilocal[i]);
390:       contiguous &= (ilocal[i] == i);
391:     }
392:     sf->minleaf = minleaf;
393:     sf->maxleaf = maxleaf;
394:     if (contiguous) {
395:       if (localmode == PETSC_OWN_POINTER) {
396:         PetscFree(ilocal);
397:       }
398:       ilocal = NULL;
399:     }
400:   } else {
401:     sf->minleaf = 0;
402:     sf->maxleaf = nleaves - 1;
403:   }

405:   if (ilocal) {
406:     switch (localmode) {
407:     case PETSC_COPY_VALUES:
408:       PetscMalloc1(nleaves,&sf->mine_alloc);
409:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
410:       sf->mine = sf->mine_alloc;
411:       break;
412:     case PETSC_OWN_POINTER:
413:       sf->mine_alloc = (PetscInt*)ilocal;
414:       sf->mine       = sf->mine_alloc;
415:       break;
416:     case PETSC_USE_POINTER:
417:       sf->mine_alloc = NULL;
418:       sf->mine       = (PetscInt*)ilocal;
419:       break;
420:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
421:     }
422:   }

424:   switch (remotemode) {
425:   case PETSC_COPY_VALUES:
426:     PetscMalloc1(nleaves,&sf->remote_alloc);
427:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
428:     sf->remote = sf->remote_alloc;
429:     break;
430:   case PETSC_OWN_POINTER:
431:     sf->remote_alloc = (PetscSFNode*)iremote;
432:     sf->remote       = sf->remote_alloc;
433:     break;
434:   case PETSC_USE_POINTER:
435:     sf->remote_alloc = NULL;
436:     sf->remote       = (PetscSFNode*)iremote;
437:     break;
438:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
439:   }

441:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
442:   sf->graphset = PETSC_TRUE;
443:   return(0);
444: }

446: /*@
447:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

449:   Collective

451:   Input Parameters:
452: + sf      - The PetscSF
453: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
454: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

456:   Notes:
457:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
458:   n and N are local and global sizes of x respectively.

460:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
461:   sequential vectors y on all ranks.

463:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
464:   sequential vector y on rank 0.

466:   In above cases, entries of x are roots and entries of y are leaves.

468:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
469:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
470:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
471:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
472:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

474:   In this case, roots and leaves are symmetric.

476:   Level: intermediate
477:  @*/
478: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
479: {
480:   MPI_Comm       comm;
481:   PetscInt       n,N,res[2];
482:   PetscMPIInt    rank,size;
483:   PetscSFType    type;

487:   PetscObjectGetComm((PetscObject)sf, &comm);
488:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
489:   MPI_Comm_rank(comm,&rank);
490:   MPI_Comm_size(comm,&size);

492:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
493:     type = PETSCSFALLTOALL;
494:     PetscLayoutCreate(comm,&sf->map);
495:     PetscLayoutSetLocalSize(sf->map,size);
496:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
497:     PetscLayoutSetUp(sf->map);
498:   } else {
499:     PetscLayoutGetLocalSize(map,&n);
500:     PetscLayoutGetSize(map,&N);
501:     res[0] = n;
502:     res[1] = -n;
503:     /* Check if n are same over all ranks so that we can optimize it */
504:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
505:     if (res[0] == -res[1]) { /* same n */
506:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
507:     } else {
508:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
509:     }
510:     PetscLayoutReference(map,&sf->map);
511:   }
512:   PetscSFSetType(sf,type);

514:   sf->pattern = pattern;
515:   sf->mine    = NULL; /* Contiguous */

517:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
518:      Also set other easy stuff.
519:    */
520:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
521:     sf->nleaves      = N;
522:     sf->nroots       = n;
523:     sf->nranks       = size;
524:     sf->minleaf      = 0;
525:     sf->maxleaf      = N - 1;
526:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
527:     sf->nleaves      = rank ? 0 : N;
528:     sf->nroots       = n;
529:     sf->nranks       = rank ? 0 : size;
530:     sf->minleaf      = 0;
531:     sf->maxleaf      = rank ? -1 : N - 1;
532:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
533:     sf->nleaves      = size;
534:     sf->nroots       = size;
535:     sf->nranks       = size;
536:     sf->minleaf      = 0;
537:     sf->maxleaf      = size - 1;
538:   }
539:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
540:   sf->graphset = PETSC_TRUE;
541:   return(0);
542: }

544: /*@
545:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

547:    Collective

549:    Input Arguments:

551: .  sf - star forest to invert

553:    Output Arguments:
554: .  isf - inverse of sf
555:    Level: advanced

557:    Notes:
558:    All roots must have degree 1.

560:    The local space may be a permutation, but cannot be sparse.

562: .seealso: PetscSFSetGraph()
563: @*/
564: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
565: {
567:   PetscMPIInt    rank;
568:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
569:   const PetscInt *ilocal;
570:   PetscSFNode    *roots,*leaves;

574:   PetscSFCheckGraphSet(sf,1);

577:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
578:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

580:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
581:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
582:   for (i=0; i<maxlocal; i++) {
583:     leaves[i].rank  = rank;
584:     leaves[i].index = i;
585:   }
586:   for (i=0; i <nroots; i++) {
587:     roots[i].rank  = -1;
588:     roots[i].index = -1;
589:   }
590:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
591:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);

593:   /* Check whether our leaves are sparse */
594:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
595:   if (count == nroots) newilocal = NULL;
596:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
597:     PetscMalloc1(count,&newilocal);
598:     for (i=0,count=0; i<nroots; i++) {
599:       if (roots[i].rank >= 0) {
600:         newilocal[count]   = i;
601:         roots[count].rank  = roots[i].rank;
602:         roots[count].index = roots[i].index;
603:         count++;
604:       }
605:     }
606:   }

608:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
609:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
610:   PetscFree2(roots,leaves);
611:   return(0);
612: }

614: /*@
615:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

617:    Collective

619:    Input Arguments:
620: +  sf - communication object to duplicate
621: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

623:    Output Arguments:
624: .  newsf - new communication object

626:    Level: beginner

628: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
629: @*/
630: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
631: {
632:   PetscSFType    type;

639:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
640:   PetscSFGetType(sf,&type);
641:   if (type) {PetscSFSetType(*newsf,type);}
642:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
643:     PetscSFCheckGraphSet(sf,1);
644:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
645:       PetscInt          nroots,nleaves;
646:       const PetscInt    *ilocal;
647:       const PetscSFNode *iremote;
648:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
649:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
650:     } else {
651:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
652:     }
653:   }
654:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
655:   return(0);
656: }

658: /*@C
659:    PetscSFGetGraph - Get the graph specifying a parallel star forest

661:    Not Collective

663:    Input Arguments:
664: .  sf - star forest

666:    Output Arguments:
667: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
668: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
669: .  ilocal - locations of leaves in leafdata buffers
670: -  iremote - remote locations of root vertices for each leaf on the current process

672:    Notes:
673:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

675:    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
676:    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.

678:    Level: intermediate

680: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
681: @*/
682: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
683: {

688:   if (sf->ops->GetGraph) {
689:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
690:   } else {
691:     if (nroots) *nroots = sf->nroots;
692:     if (nleaves) *nleaves = sf->nleaves;
693:     if (ilocal) *ilocal = sf->mine;
694:     if (iremote) *iremote = sf->remote;
695:   }
696:   return(0);
697: }

699: /*@
700:    PetscSFGetLeafRange - Get the active leaf ranges

702:    Not Collective

704:    Input Arguments:
705: .  sf - star forest

707:    Output Arguments:
708: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
709: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

711:    Level: developer

713: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
714: @*/
715: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
716: {

720:   PetscSFCheckGraphSet(sf,1);
721:   if (minleaf) *minleaf = sf->minleaf;
722:   if (maxleaf) *maxleaf = sf->maxleaf;
723:   return(0);
724: }

726: /*@C
727:    PetscSFViewFromOptions - View from Options

729:    Collective on PetscSF

731:    Input Parameters:
732: +  A - the star forest
733: .  obj - Optional object
734: -  name - command line option

736:    Level: intermediate
737: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
738: @*/
739: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
740: {

745:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
746:   return(0);
747: }

749: /*@C
750:    PetscSFView - view a star forest

752:    Collective

754:    Input Arguments:
755: +  sf - star forest
756: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

758:    Level: beginner

760: .seealso: PetscSFCreate(), PetscSFSetGraph()
761: @*/
762: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
763: {
764:   PetscErrorCode    ierr;
765:   PetscBool         iascii;
766:   PetscViewerFormat format;

770:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
773:   if (sf->graphset) {PetscSFSetUp(sf);}
774:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
775:   if (iascii) {
776:     PetscMPIInt rank;
777:     PetscInt    ii,i,j;

779:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
780:     PetscViewerASCIIPushTab(viewer);
781:     if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
782:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
783:       if (!sf->graphset) {
784:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
785:         PetscViewerASCIIPopTab(viewer);
786:         return(0);
787:       }
788:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
789:       PetscViewerASCIIPushSynchronized(viewer);
790:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
791:       for (i=0; i<sf->nleaves; i++) {
792:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
793:       }
794:       PetscViewerFlush(viewer);
795:       PetscViewerGetFormat(viewer,&format);
796:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
797:         PetscMPIInt *tmpranks,*perm;
798:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
799:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
800:         for (i=0; i<sf->nranks; i++) perm[i] = i;
801:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
802:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
803:         for (ii=0; ii<sf->nranks; ii++) {
804:           i = perm[ii];
805:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
806:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
807:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
808:           }
809:         }
810:         PetscFree2(tmpranks,perm);
811:       }
812:       PetscViewerFlush(viewer);
813:       PetscViewerASCIIPopSynchronized(viewer);
814:     }
815:     PetscViewerASCIIPopTab(viewer);
816:   }
817:   return(0);
818: }

820: /*@C
821:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

823:    Not Collective

825:    Input Arguments:
826: .  sf - star forest

828:    Output Arguments:
829: +  nranks - number of ranks referenced by local part
830: .  ranks - array of ranks
831: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
832: .  rmine - concatenated array holding local indices referencing each remote rank
833: -  rremote - concatenated array holding remote indices referenced for each remote rank

835:    Level: developer

837: .seealso: PetscSFGetLeafRanks()
838: @*/
839: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
840: {

845:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
846:   if (sf->ops->GetRootRanks) {
847:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
848:   } else {
849:     /* The generic implementation */
850:     if (nranks)  *nranks  = sf->nranks;
851:     if (ranks)   *ranks   = sf->ranks;
852:     if (roffset) *roffset = sf->roffset;
853:     if (rmine)   *rmine   = sf->rmine;
854:     if (rremote) *rremote = sf->rremote;
855:   }
856:   return(0);
857: }

859: /*@C
860:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

862:    Not Collective

864:    Input Arguments:
865: .  sf - star forest

867:    Output Arguments:
868: +  niranks - number of leaf ranks referencing roots on this process
869: .  iranks - array of ranks
870: .  ioffset - offset in irootloc for each rank (length niranks+1)
871: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

873:    Level: developer

875: .seealso: PetscSFGetRootRanks()
876: @*/
877: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
878: {

883:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
884:   if (sf->ops->GetLeafRanks) {
885:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
886:   } else {
887:     PetscSFType type;
888:     PetscSFGetType(sf,&type);
889:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
890:   }
891:   return(0);
892: }

894: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
895:   PetscInt i;
896:   for (i=0; i<n; i++) {
897:     if (needle == list[i]) return PETSC_TRUE;
898:   }
899:   return PETSC_FALSE;
900: }

902: /*@C
903:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

905:    Collective

907:    Input Arguments:
908: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
909: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

911:    Level: developer

913: .seealso: PetscSFGetRootRanks()
914: @*/
915: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
916: {
917:   PetscErrorCode     ierr;
918:   PetscTable         table;
919:   PetscTablePosition pos;
920:   PetscMPIInt        size,groupsize,*groupranks;
921:   PetscInt           *rcount,*ranks;
922:   PetscInt           i, irank = -1,orank = -1;

926:   PetscSFCheckGraphSet(sf,1);
927:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
928:   PetscTableCreate(10,size,&table);
929:   for (i=0; i<sf->nleaves; i++) {
930:     /* Log 1-based rank */
931:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
932:   }
933:   PetscTableGetCount(table,&sf->nranks);
934:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
935:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
936:   PetscTableGetHeadPosition(table,&pos);
937:   for (i=0; i<sf->nranks; i++) {
938:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
939:     ranks[i]--;             /* Convert back to 0-based */
940:   }
941:   PetscTableDestroy(&table);

943:   /* We expect that dgroup is reliably "small" while nranks could be large */
944:   {
945:     MPI_Group group = MPI_GROUP_NULL;
946:     PetscMPIInt *dgroupranks;
947:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
948:     MPI_Group_size(dgroup,&groupsize);
949:     PetscMalloc1(groupsize,&dgroupranks);
950:     PetscMalloc1(groupsize,&groupranks);
951:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
952:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
953:     MPI_Group_free(&group);
954:     PetscFree(dgroupranks);
955:   }

957:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
958:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
959:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
960:       if (InList(ranks[i],groupsize,groupranks)) break;
961:     }
962:     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
963:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
964:     }
965:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
966:       PetscInt tmprank,tmpcount;

968:       tmprank             = ranks[i];
969:       tmpcount            = rcount[i];
970:       ranks[i]            = ranks[sf->ndranks];
971:       rcount[i]           = rcount[sf->ndranks];
972:       ranks[sf->ndranks]  = tmprank;
973:       rcount[sf->ndranks] = tmpcount;
974:       sf->ndranks++;
975:     }
976:   }
977:   PetscFree(groupranks);
978:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
979:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
980:   sf->roffset[0] = 0;
981:   for (i=0; i<sf->nranks; i++) {
982:     PetscMPIIntCast(ranks[i],sf->ranks+i);
983:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
984:     rcount[i]        = 0;
985:   }
986:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
987:     /* short circuit */
988:     if (orank != sf->remote[i].rank) {
989:       /* Search for index of iremote[i].rank in sf->ranks */
990:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
991:       if (irank < 0) {
992:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
993:         if (irank >= 0) irank += sf->ndranks;
994:       }
995:       orank = sf->remote[i].rank;
996:     }
997:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
998:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
999:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1000:     rcount[irank]++;
1001:   }
1002:   PetscFree2(rcount,ranks);
1003:   return(0);
1004: }

1006: /*@C
1007:    PetscSFGetGroups - gets incoming and outgoing process groups

1009:    Collective

1011:    Input Argument:
1012: .  sf - star forest

1014:    Output Arguments:
1015: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1016: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1018:    Level: developer

1020: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1021: @*/
1022: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1023: {
1025:   MPI_Group      group = MPI_GROUP_NULL;

1028:   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1029:   if (sf->ingroup == MPI_GROUP_NULL) {
1030:     PetscInt       i;
1031:     const PetscInt *indegree;
1032:     PetscMPIInt    rank,*outranks,*inranks;
1033:     PetscSFNode    *remote;
1034:     PetscSF        bgcount;

1036:     /* Compute the number of incoming ranks */
1037:     PetscMalloc1(sf->nranks,&remote);
1038:     for (i=0; i<sf->nranks; i++) {
1039:       remote[i].rank  = sf->ranks[i];
1040:       remote[i].index = 0;
1041:     }
1042:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1043:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1044:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1045:     PetscSFComputeDegreeEnd(bgcount,&indegree);
1046:     /* Enumerate the incoming ranks */
1047:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1048:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1049:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1050:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1051:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1052:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1053:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1054:     MPI_Group_free(&group);
1055:     PetscFree2(inranks,outranks);
1056:     PetscSFDestroy(&bgcount);
1057:   }
1058:   *incoming = sf->ingroup;

1060:   if (sf->outgroup == MPI_GROUP_NULL) {
1061:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1062:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1063:     MPI_Group_free(&group);
1064:   }
1065:   *outgoing = sf->outgroup;
1066:   return(0);
1067: }

1069: /*@
1070:    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

1072:    Collective

1074:    Input Argument:
1075: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1077:    Output Arguments:
1078: .  multi - star forest with split roots, such that each root has degree exactly 1

1080:    Level: developer

1082:    Notes:

1084:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1085:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1086:    edge, it is a candidate for future optimization that might involve its removal.

1088: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1089: @*/
1090: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1091: {

1097:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1098:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1099:     *multi = sf->multi;
1100:     return(0);
1101:   }
1102:   if (!sf->multi) {
1103:     const PetscInt *indegree;
1104:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1105:     PetscSFNode    *remote;
1106:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1107:     PetscSFComputeDegreeBegin(sf,&indegree);
1108:     PetscSFComputeDegreeEnd(sf,&indegree);
1109:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1110:     inoffset[0] = 0;
1111:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1112:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1113:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1114:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1115:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1116: #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1117:     for (i=0; i<sf->nroots; i++) {
1118:       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1119:     }
1120: #endif
1121:     PetscMalloc1(sf->nleaves,&remote);
1122:     for (i=0; i<sf->nleaves; i++) {
1123:       remote[i].rank  = sf->remote[i].rank;
1124:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1125:     }
1126:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1127:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1128:     if (sf->rankorder) {        /* Sort the ranks */
1129:       PetscMPIInt rank;
1130:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1131:       PetscSFNode *newremote;
1132:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1133:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1134:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1135:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1136:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1137:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1138:       /* Sort the incoming ranks at each vertex, build the inverse map */
1139:       for (i=0; i<sf->nroots; i++) {
1140:         PetscInt j;
1141:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1142:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1143:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1144:       }
1145:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1146:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1147:       PetscMalloc1(sf->nleaves,&newremote);
1148:       for (i=0; i<sf->nleaves; i++) {
1149:         newremote[i].rank  = sf->remote[i].rank;
1150:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1151:       }
1152:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1153:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1154:     }
1155:     PetscFree3(inoffset,outones,outoffset);
1156:   }
1157:   *multi = sf->multi;
1158:   return(0);
1159: }

1161: /*@C
1162:    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices

1164:    Collective

1166:    Input Arguments:
1167: +  sf - original star forest
1168: .  nselected  - number of selected roots on this process
1169: -  selected   - indices of the selected roots on this process

1171:    Output Arguments:
1172: .  esf - new star forest

1174:    Level: advanced

1176:    Note:
1177:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1178:    be done by calling PetscSFGetGraph().

1180: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1181: @*/
1182: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1183: {
1184:   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1185:   const PetscInt    *ilocal;
1186:   signed char       *rootdata,*leafdata,*leafmem;
1187:   const PetscSFNode *iremote;
1188:   PetscSFNode       *new_iremote;
1189:   MPI_Comm          comm;
1190:   PetscErrorCode    ierr;

1194:   PetscSFCheckGraphSet(sf,1);

1198:   PetscSFSetUp(sf);

1200:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1201:   PetscObjectGetComm((PetscObject)sf,&comm);
1202:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);

1204: #if defined(PETSC_USE_DEBUG)
1205:   /* Error out if selected[] has dups or  out of range indices */
1206:   {
1207:     PetscBool dups;
1209:     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1210:     for (i=0; i<nselected; i++)
1211:       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1212:   }
1213: #endif

1215:   if (sf->ops->CreateEmbeddedSF) {
1216:     (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);
1217:   } else {
1218:     /* A generic version of creating embedded sf */
1219:     PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1220:     maxlocal = maxleaf - minleaf + 1;
1221:     PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1222:     leafdata = leafmem - minleaf;
1223:     /* Tag selected roots and bcast to leaves */
1224:     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1225:     PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1226:     PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);

1228:     /* Build esf with leaves that are still connected */
1229:     esf_nleaves = 0;
1230:     for (i=0; i<nleaves; i++) {
1231:       j = ilocal ? ilocal[i] : i;
1232:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1233:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1234:       */
1235:       esf_nleaves += (leafdata[j] ? 1 : 0);
1236:     }
1237:     PetscMalloc1(esf_nleaves,&new_ilocal);
1238:     PetscMalloc1(esf_nleaves,&new_iremote);
1239:     for (i=n=0; i<nleaves; i++) {
1240:       j = ilocal ? ilocal[i] : i;
1241:       if (leafdata[j]) {
1242:         new_ilocal[n]        = j;
1243:         new_iremote[n].rank  = iremote[i].rank;
1244:         new_iremote[n].index = iremote[i].index;
1245:         ++n;
1246:       }
1247:     }
1248:     PetscSFCreate(comm,esf);
1249:     PetscSFSetFromOptions(*esf);
1250:     PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1251:     PetscFree2(rootdata,leafmem);
1252:   }
1253:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1254:   return(0);
1255: }

1257: /*@C
1258:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1260:   Collective

1262:   Input Arguments:
1263: + sf - original star forest
1264: . nselected  - number of selected leaves on this process
1265: - selected   - indices of the selected leaves on this process

1267:   Output Arguments:
1268: .  newsf - new star forest

1270:   Level: advanced

1272: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1273: @*/
1274: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1275: {
1276:   const PetscSFNode *iremote;
1277:   PetscSFNode       *new_iremote;
1278:   const PetscInt    *ilocal;
1279:   PetscInt          i,nroots,*leaves,*new_ilocal;
1280:   MPI_Comm          comm;
1281:   PetscErrorCode    ierr;

1285:   PetscSFCheckGraphSet(sf,1);

1289:   /* Uniq selected[] and put results in leaves[] */
1290:   PetscObjectGetComm((PetscObject)sf,&comm);
1291:   PetscMalloc1(nselected,&leaves);
1292:   PetscArraycpy(leaves,selected,nselected);
1293:   PetscSortedRemoveDupsInt(&nselected,leaves);
1294:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1296:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1297:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1298:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1299:   } else {
1300:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1301:     PetscMalloc1(nselected,&new_ilocal);
1302:     PetscMalloc1(nselected,&new_iremote);
1303:     for (i=0; i<nselected; ++i) {
1304:       const PetscInt l     = leaves[i];
1305:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1306:       new_iremote[i].rank  = iremote[l].rank;
1307:       new_iremote[i].index = iremote[l].index;
1308:     }
1309:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1310:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1311:   }
1312:   PetscFree(leaves);
1313:   return(0);
1314: }

1316: /*@C
1317:    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()

1319:    Collective on PetscSF

1321:    Input Arguments:
1322: +  sf - star forest on which to communicate
1323: .  unit - data type associated with each node
1324: .  rootdata - buffer to broadcast
1325: -  op - operation to use for reduction

1327:    Output Arguments:
1328: .  leafdata - buffer to be reduced with values from each leaf's respective root

1330:    Level: intermediate

1332: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1333: @*/
1334: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1335: {
1337:   PetscMemType   rootmtype,leafmtype;

1341:   PetscSFSetUp(sf);
1342:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1343:   PetscGetMemType(rootdata,&rootmtype);
1344:   PetscGetMemType(leafdata,&leafmtype);
1345:   (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1346:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1347:   return(0);
1348: }

1350: /*@C
1351:    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()

1353:    Collective

1355:    Input Arguments:
1356: +  sf - star forest
1357: .  unit - data type
1358: .  rootdata - buffer to broadcast
1359: -  op - operation to use for reduction

1361:    Output Arguments:
1362: .  leafdata - buffer to be reduced with values from each leaf's respective root

1364:    Level: intermediate

1366: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1367: @*/
1368: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1369: {

1374:   PetscSFSetUp(sf);
1375:   PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1376:   (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);
1377:   PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1378:   return(0);
1379: }

1381: /*@C
1382:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1384:    Collective

1386:    Input Arguments:
1387: +  sf - star forest
1388: .  unit - data type
1389: .  leafdata - values to reduce
1390: -  op - reduction operation

1392:    Output Arguments:
1393: .  rootdata - result of reduction of values from all leaves of each root

1395:    Level: intermediate

1397: .seealso: PetscSFBcastBegin()
1398: @*/
1399: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1400: {
1402:   PetscMemType   rootmtype,leafmtype;

1406:   PetscSFSetUp(sf);
1407:   PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1408:   PetscGetMemType(rootdata,&rootmtype);
1409:   PetscGetMemType(leafdata,&leafmtype);
1410:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1411:   PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1412:   return(0);
1413: }

1415: /*@C
1416:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1418:    Collective

1420:    Input Arguments:
1421: +  sf - star forest
1422: .  unit - data type
1423: .  leafdata - values to reduce
1424: -  op - reduction operation

1426:    Output Arguments:
1427: .  rootdata - result of reduction of values from all leaves of each root

1429:    Level: intermediate

1431: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1432: @*/
1433: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1434: {

1439:   PetscSFSetUp(sf);
1440:   PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1441:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1442:   PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1443:   return(0);
1444: }

1446: /*@C
1447:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1449:    Collective

1451:    Input Arguments:
1452: +  sf - star forest
1453: .  unit - data type
1454: .  leafdata - leaf values to use in reduction
1455: -  op - operation to use for reduction

1457:    Output Arguments:
1458: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1459: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1461:    Level: advanced

1463:    Note:
1464:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1465:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1466:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1467:    integers.

1469: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1470: @*/
1471: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1472: {
1474:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1478:   PetscSFSetUp(sf);
1479:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1480:   PetscGetMemType(rootdata,&rootmtype);
1481:   PetscGetMemType(leafdata,&leafmtype);
1482:   PetscGetMemType(leafupdate,&leafupdatemtype);
1483:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1484:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1485:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1486:   return(0);
1487: }

1489: /*@C
1490:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1492:    Collective

1494:    Input Arguments:
1495: +  sf - star forest
1496: .  unit - data type
1497: .  leafdata - leaf values to use in reduction
1498: -  op - operation to use for reduction

1500:    Output Arguments:
1501: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1502: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1504:    Level: advanced

1506: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1507: @*/
1508: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1509: {

1514:   PetscSFSetUp(sf);
1515:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1516:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1517:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1518:   return(0);
1519: }

1521: /*@C
1522:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1524:    Collective

1526:    Input Arguments:
1527: .  sf - star forest

1529:    Output Arguments:
1530: .  degree - degree of each root vertex

1532:    Level: advanced

1534:    Notes:
1535:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1537: .seealso: PetscSFGatherBegin()
1538: @*/
1539: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1540: {

1545:   PetscSFCheckGraphSet(sf,1);
1547:   if (!sf->degreeknown) {
1548:     PetscInt i, nroots = sf->nroots, maxlocal;
1549:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1550:     maxlocal = sf->maxleaf-sf->minleaf+1;
1551:     PetscMalloc1(nroots,&sf->degree);
1552:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1553:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1554:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1555:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1556:   }
1557:   *degree = NULL;
1558:   return(0);
1559: }

1561: /*@C
1562:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1564:    Collective

1566:    Input Arguments:
1567: .  sf - star forest

1569:    Output Arguments:
1570: .  degree - degree of each root vertex

1572:    Level: developer

1574:    Notes:
1575:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1577: .seealso:
1578: @*/
1579: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1580: {

1585:   PetscSFCheckGraphSet(sf,1);
1587:   if (!sf->degreeknown) {
1588:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1589:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1590:     PetscFree(sf->degreetmp);
1591:     sf->degreeknown = PETSC_TRUE;
1592:   }
1593:   *degree = sf->degree;
1594:   return(0);
1595: }


1598: /*@C
1599:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1600:    Each multi-root is assigned index of the corresponding original root.

1602:    Collective

1604:    Input Arguments:
1605: +  sf - star forest
1606: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1608:    Output Arguments:
1609: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1610: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1612:    Level: developer

1614:    Notes:
1615:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1617: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1618: @*/
1619: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1620: {
1621:   PetscSF             msf;
1622:   PetscInt            i, j, k, nroots, nmroots;
1623:   PetscErrorCode      ierr;

1627:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1631:   PetscSFGetMultiSF(sf,&msf);
1632:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1633:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1634:   for (i=0,j=0,k=0; i<nroots; i++) {
1635:     if (!degree[i]) continue;
1636:     for (j=0; j<degree[i]; j++,k++) {
1637:       (*multiRootsOrigNumbering)[k] = i;
1638:     }
1639:   }
1640: #if defined(PETSC_USE_DEBUG)
1641:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1642: #endif
1643:   if (nMultiRoots) *nMultiRoots = nmroots;
1644:   return(0);
1645: }

1647: /*@C
1648:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1650:    Collective

1652:    Input Arguments:
1653: +  sf - star forest
1654: .  unit - data type
1655: -  leafdata - leaf data to gather to roots

1657:    Output Argument:
1658: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1660:    Level: intermediate

1662: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1663: @*/
1664: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1665: {
1667:   PetscSF        multi = NULL;

1671:   PetscSFSetUp(sf);
1672:   PetscSFGetMultiSF(sf,&multi);
1673:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1674:   return(0);
1675: }

1677: /*@C
1678:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1680:    Collective

1682:    Input Arguments:
1683: +  sf - star forest
1684: .  unit - data type
1685: -  leafdata - leaf data to gather to roots

1687:    Output Argument:
1688: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1690:    Level: intermediate

1692: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1693: @*/
1694: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1695: {
1697:   PetscSF        multi = NULL;

1701:   PetscSFSetUp(sf);
1702:   PetscSFGetMultiSF(sf,&multi);
1703:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1704:   return(0);
1705: }

1707: /*@C
1708:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1710:    Collective

1712:    Input Arguments:
1713: +  sf - star forest
1714: .  unit - data type
1715: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1717:    Output Argument:
1718: .  leafdata - leaf data to be update with personal data from each respective root

1720:    Level: intermediate

1722: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1723: @*/
1724: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1725: {
1727:   PetscSF        multi = NULL;

1731:   PetscSFSetUp(sf);
1732:   PetscSFGetMultiSF(sf,&multi);
1733:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1734:   return(0);
1735: }

1737: /*@C
1738:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1740:    Collective

1742:    Input Arguments:
1743: +  sf - star forest
1744: .  unit - data type
1745: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1747:    Output Argument:
1748: .  leafdata - leaf data to be update with personal data from each respective root

1750:    Level: intermediate

1752: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1753: @*/
1754: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1755: {
1757:   PetscSF        multi = NULL;

1761:   PetscSFSetUp(sf);
1762:   PetscSFGetMultiSF(sf,&multi);
1763:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1764:   return(0);
1765: }

1767: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1768: {
1769: #if defined(PETSC_USE_DEBUG)
1770:   PetscInt        i, n, nleaves;
1771:   const PetscInt *ilocal = NULL;
1772:   PetscHSetI      seen;
1773:   PetscErrorCode  ierr;

1776:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1777:   PetscHSetICreate(&seen);
1778:   for (i = 0; i < nleaves; i++) {
1779:     const PetscInt leaf = ilocal ? ilocal[i] : i;
1780:     PetscHSetIAdd(seen,leaf);
1781:   }
1782:   PetscHSetIGetSize(seen,&n);
1783:   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1784:   PetscHSetIDestroy(&seen);
1785:   return(0);
1786: #else
1788:   return(0);
1789: #endif
1790: }

1792: /*@
1793:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1795:   Input Parameters:
1796: + sfA - The first PetscSF
1797: - sfB - The second PetscSF

1799:   Output Parameters:
1800: . sfBA - The composite SF

1802:   Level: developer

1804:   Notes:
1805:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1806:   forests, i.e. the same leaf is not connected with different roots.

1808:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1809:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1810:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1811:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1813: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1814: @*/
1815: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1816: {
1817:   PetscErrorCode    ierr;
1818:   const PetscSFNode *remotePointsA,*remotePointsB;
1819:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1820:   const PetscInt    *localPointsA,*localPointsB;
1821:   PetscInt          *localPointsBA;
1822:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1823:   PetscBool         denseB;

1827:   PetscSFCheckGraphSet(sfA,1);
1829:   PetscSFCheckGraphSet(sfB,2);
1832:   PetscSFCheckLeavesUnique_Private(sfA);
1833:   PetscSFCheckLeavesUnique_Private(sfB);

1835:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1836:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1837:   if (localPointsA) {
1838:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1839:     for (i=0; i<numRootsB; i++) {
1840:       reorderedRemotePointsA[i].rank = -1;
1841:       reorderedRemotePointsA[i].index = -1;
1842:     }
1843:     for (i=0; i<numLeavesA; i++) {
1844:       if (localPointsA[i] >= numRootsB) continue;
1845:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1846:     }
1847:     remotePointsA = reorderedRemotePointsA;
1848:   }
1849:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1850:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1851:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1852:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1853:   PetscFree(reorderedRemotePointsA);

1855:   denseB = (PetscBool)!localPointsB;
1856:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1857:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1858:     else numLeavesBA++;
1859:   }
1860:   if (denseB) {
1861:     localPointsBA  = NULL;
1862:     remotePointsBA = leafdataB;
1863:   } else {
1864:     PetscMalloc1(numLeavesBA,&localPointsBA);
1865:     PetscMalloc1(numLeavesBA,&remotePointsBA);
1866:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1867:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1869:       if (leafdataB[l-minleaf].rank == -1) continue;
1870:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1871:       localPointsBA[numLeavesBA] = l;
1872:       numLeavesBA++;
1873:     }
1874:     PetscFree(leafdataB);
1875:   }
1876:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1877:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1878:   return(0);
1879: }

1881: /*@
1882:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

1884:   Input Parameters:
1885: + sfA - The first PetscSF
1886: - sfB - The second PetscSF

1888:   Output Parameters:
1889: . sfBA - The composite SF.

1891:   Level: developer

1893:   Notes:
1894:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1895:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1896:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

1898:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1899:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1900:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1901:   a Reduce on sfB, on connected roots.

1903: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1904: @*/
1905: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1906: {
1907:   PetscErrorCode    ierr;
1908:   const PetscSFNode *remotePointsA,*remotePointsB;
1909:   PetscSFNode       *remotePointsBA;
1910:   const PetscInt    *localPointsA,*localPointsB;
1911:   PetscSFNode       *reorderedRemotePointsA = NULL;
1912:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1913:   MPI_Op            op;
1914: #if defined(PETSC_USE_64BIT_INDICES)
1915:   PetscBool         iswin;
1916: #endif

1920:   PetscSFCheckGraphSet(sfA,1);
1922:   PetscSFCheckGraphSet(sfB,2);
1925:   PetscSFCheckLeavesUnique_Private(sfA);
1926:   PetscSFCheckLeavesUnique_Private(sfB);

1928:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1929:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

1931:   /* TODO: Check roots of sfB have degree of 1 */
1932:   /* Once we implement it, we can replace the MPI_MAXLOC
1933:      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1934:      We use MPI_MAXLOC only to have a deterministic output from this routine if
1935:      the root condition is not meet.
1936:    */
1937:   op = MPI_MAXLOC;
1938: #if defined(PETSC_USE_64BIT_INDICES)
1939:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1940:   PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
1941:   if (iswin) op = MPIU_REPLACE;
1942: #endif

1944:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1945:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
1946:   for (i=0; i<maxleaf - minleaf + 1; i++) {
1947:     reorderedRemotePointsA[i].rank = -1;
1948:     reorderedRemotePointsA[i].index = -1;
1949:   }
1950:   if (localPointsA) {
1951:     for (i=0; i<numLeavesA; i++) {
1952:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1953:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1954:     }
1955:   } else {
1956:     for (i=0; i<numLeavesA; i++) {
1957:       if (i > maxleaf || i < minleaf) continue;
1958:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1959:     }
1960:   }

1962:   PetscMalloc1(numRootsB,&localPointsBA);
1963:   PetscMalloc1(numRootsB,&remotePointsBA);
1964:   for (i=0; i<numRootsB; i++) {
1965:     remotePointsBA[i].rank = -1;
1966:     remotePointsBA[i].index = -1;
1967:   }

1969:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
1970:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
1971:   PetscFree(reorderedRemotePointsA);
1972:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1973:     if (remotePointsBA[i].rank == -1) continue;
1974:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1975:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1976:     localPointsBA[numLeavesBA] = i;
1977:     numLeavesBA++;
1978:   }
1979:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1980:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1981:   return(0);
1982: }

1984: /*
1985:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

1987:   Input Parameters:
1988: . sf - The global PetscSF

1990:   Output Parameters:
1991: . out - The local PetscSF
1992:  */
1993: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1994: {
1995:   MPI_Comm           comm;
1996:   PetscMPIInt        myrank;
1997:   const PetscInt     *ilocal;
1998:   const PetscSFNode  *iremote;
1999:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2000:   PetscSFNode        *liremote;
2001:   PetscSF            lsf;
2002:   PetscErrorCode     ierr;

2006:   if (sf->ops->CreateLocalSF) {
2007:     (*sf->ops->CreateLocalSF)(sf,out);
2008:   } else {
2009:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2010:     PetscObjectGetComm((PetscObject)sf,&comm);
2011:     MPI_Comm_rank(comm,&myrank);

2013:     /* Find out local edges and build a local SF */
2014:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2015:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2016:     PetscMalloc1(lnleaves,&lilocal);
2017:     PetscMalloc1(lnleaves,&liremote);

2019:     for (i=j=0; i<nleaves; i++) {
2020:       if (iremote[i].rank == (PetscInt)myrank) {
2021:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2022:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2023:         liremote[j].index = iremote[i].index;
2024:         j++;
2025:       }
2026:     }
2027:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2028:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2029:     PetscSFSetUp(lsf);
2030:     *out = lsf;
2031:   }
2032:   return(0);
2033: }

2035: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2036: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2037: {
2038:   PetscErrorCode     ierr;
2039:   PetscMemType       rootmtype,leafmtype;

2043:   PetscSFSetUp(sf);
2044:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2045:   PetscGetMemType(rootdata,&rootmtype);
2046:   PetscGetMemType(leafdata,&leafmtype);
2047:   if (sf->ops->BcastToZero) {
2048:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2049:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2050:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2051:   return(0);
2052: }