Actual source code: sf.c

petsc-master 2020-09-23
Report Typos and Errors
  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petscctable.h>

  5: #if defined(PETSC_HAVE_CUDA)
  6:   #include <cuda_runtime.h>
  7: #endif

  9: #if defined(PETSC_HAVE_HIP)
 10:   #include <hip/hip_runtime.h>
 11: #endif

 13: #if defined(PETSC_USE_DEBUG)
 14: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
 15:     if (PetscUnlikely(!(sf)->graphset))                              \
 16:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
 17:   } while (0)
 18: #else
 19: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 20: #endif

 22: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};

 24: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
 25: {
 28:   *type = PETSC_MEMTYPE_HOST;
 29: #if defined(PETSC_HAVE_CUDA)
 30:   if (PetscCUDAInitialized && data) {
 31:     cudaError_t                  cerr;
 32:     struct cudaPointerAttributes attr;
 33:     enum cudaMemoryType          mtype;
 34:     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
 35:     cudaGetLastError(); /* Reset the last error */
 36:     #if (CUDART_VERSION < 10000)
 37:       mtype = attr.memoryType;
 38:     #else
 39:       mtype = attr.type;
 40:     #endif
 41:     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
 42:   }
 43: #endif

 45: #if defined(PETSC_HAVE_HIP)
 46:   if (PetscHIPInitialized && data) {
 47:     hipError_t                   cerr;
 48:     struct hipPointerAttribute_t attr;
 49:     enum hipMemoryType           mtype;
 50:     cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
 51:     hipGetLastError(); /* Reset the last error */
 52:     mtype = attr.memoryType;
 53:     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
 54:   }
 55: #endif
 56:   return(0);
 57: }

 59: /*@
 60:    PetscSFCreate - create a star forest communication context

 62:    Collective

 64:    Input Arguments:
 65: .  comm - communicator on which the star forest will operate

 67:    Output Arguments:
 68: .  sf - new star forest context

 70:    Options Database Keys:
 71: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 72: .  -sf_type window    -Use MPI-3 one-sided window for communication
 73: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 75:    Level: intermediate

 77:    Notes:
 78:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 79:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 80:    SFs are optimized and they have better performance than general SFs.

 82: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 83: @*/
 84: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 85: {
 87:   PetscSF        b;

 91:   PetscSFInitializePackage();

 93:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 95:   b->nroots    = -1;
 96:   b->nleaves   = -1;
 97:   b->minleaf   = PETSC_MAX_INT;
 98:   b->maxleaf   = PETSC_MIN_INT;
 99:   b->nranks    = -1;
100:   b->rankorder = PETSC_TRUE;
101:   b->ingroup   = MPI_GROUP_NULL;
102:   b->outgroup  = MPI_GROUP_NULL;
103:   b->graphset  = PETSC_FALSE;
104: #if defined(PETSC_HAVE_DEVICE)
105:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
106:   b->use_stream_aware_mpi = PETSC_FALSE;
107:   b->use_default_stream   = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
108:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109:     b->backend = PETSCSF_BACKEND_KOKKOS;
110:   #elif defined(PETSC_HAVE_CUDA)
111:     b->backend = PETSCSF_BACKEND_CUDA;
112:   #endif
113: #endif
114:   *sf = b;
115:   return(0);
116: }

118: /*@
119:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

121:    Collective

123:    Input Arguments:
124: .  sf - star forest

126:    Level: advanced

128: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
129: @*/
130: PetscErrorCode PetscSFReset(PetscSF sf)
131: {

136:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
137:   sf->nroots   = -1;
138:   sf->nleaves  = -1;
139:   sf->minleaf  = PETSC_MAX_INT;
140:   sf->maxleaf  = PETSC_MIN_INT;
141:   sf->mine     = NULL;
142:   sf->remote   = NULL;
143:   sf->graphset = PETSC_FALSE;
144:   PetscFree(sf->mine_alloc);
145:   PetscFree(sf->remote_alloc);
146:   sf->nranks = -1;
147:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
148: #if defined(PETSC_HAVE_DEVICE)
149:   for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);}
150: #endif
151:   sf->degreeknown = PETSC_FALSE;
152:   PetscFree(sf->degree);
153:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
154:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
155:   PetscSFDestroy(&sf->multi);
156:   PetscLayoutDestroy(&sf->map);
157:   sf->setupcalled = PETSC_FALSE;
158:   return(0);
159: }

161: /*@C
162:    PetscSFSetType - Set the PetscSF communication implementation

164:    Collective on PetscSF

166:    Input Parameters:
167: +  sf - the PetscSF context
168: -  type - a known method

170:    Options Database Key:
171: .  -sf_type <type> - Sets the method; use -help for a list
172:    of available methods (for instance, window, basic, neighbor)

174:    Notes:
175:    See "include/petscsf.h" for available methods (for instance)
176: +    PETSCSFWINDOW - MPI-2/3 one-sided
177: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

179:   Level: intermediate

181: .seealso: PetscSFType, PetscSFCreate()
182: @*/
183: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
184: {
185:   PetscErrorCode ierr,(*r)(PetscSF);
186:   PetscBool      match;


192:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
193:   if (match) return(0);

195:   PetscFunctionListFind(PetscSFList,type,&r);
196:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
197:   /* Destroy the previous PetscSF implementation context */
198:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
199:   PetscMemzero(sf->ops,sizeof(*sf->ops));
200:   PetscObjectChangeTypeName((PetscObject)sf,type);
201:   (*r)(sf);
202:   return(0);
203: }

205: /*@C
206:   PetscSFGetType - Get the PetscSF communication implementation

208:   Not Collective

210:   Input Parameter:
211: . sf  - the PetscSF context

213:   Output Parameter:
214: . type - the PetscSF type name

216:   Level: intermediate

218: .seealso: PetscSFSetType(), PetscSFCreate()
219: @*/
220: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
221: {
225:   *type = ((PetscObject)sf)->type_name;
226:   return(0);
227: }

229: /*@
230:    PetscSFDestroy - destroy star forest

232:    Collective

234:    Input Arguments:
235: .  sf - address of star forest

237:    Level: intermediate

239: .seealso: PetscSFCreate(), PetscSFReset()
240: @*/
241: PetscErrorCode PetscSFDestroy(PetscSF *sf)
242: {

246:   if (!*sf) return(0);
248:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
249:   PetscSFReset(*sf);
250:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
251:   PetscHeaderDestroy(sf);
252:   return(0);
253: }

255: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
256: {
257:   PetscInt           i, nleaves;
258:   PetscMPIInt        size;
259:   const PetscInt    *ilocal;
260:   const PetscSFNode *iremote;
261:   PetscErrorCode     ierr;

264:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
265:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
266:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
267:   for (i = 0; i < nleaves; i++) {
268:     const PetscInt rank = iremote[i].rank;
269:     const PetscInt remote = iremote[i].index;
270:     const PetscInt leaf = ilocal ? ilocal[i] : i;
271:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
272:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
273:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
274:   }
275:   return(0);
276: }




281: /*@
282:    PetscSFSetUp - set up communication structures

284:    Collective

286:    Input Arguments:
287: .  sf - star forest communication object

289:    Level: beginner

291: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
292: @*/
293: PetscErrorCode PetscSFSetUp(PetscSF sf)
294: {

299:   PetscSFCheckGraphSet(sf,1);
300:   if (sf->setupcalled) return(0);
301:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
302:   PetscSFCheckGraphValid_Private(sf);
303:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);} /* Zero all sf->ops */
304:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
305: #if defined(PETSC_HAVE_CUDA)
306:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
307:     sf->ops->Malloc = PetscSFMalloc_Cuda;
308:     sf->ops->Free   = PetscSFFree_Cuda;
309:   }
310: #endif

312: #if defined(PETSC_HAVE_KOKKOS)
313:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
314:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
315:     sf->ops->Free   = PetscSFFree_Kokkos;
316:   }
317: #endif
318:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
319:   sf->setupcalled = PETSC_TRUE;
320:   return(0);
321: }

323: /*@
324:    PetscSFSetFromOptions - set PetscSF options using the options database

326:    Logically Collective

328:    Input Arguments:
329: .  sf - star forest

331:    Options Database Keys:
332: +  -sf_type               - implementation type, see PetscSFSetType()
333: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
334: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
335:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
336:                             If true, this option only works with -use_gpu_aware_mpi 1.
337: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
338:                                If true, this option only works with -use_gpu_aware_mpi 1.

340: -  -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos.
341:                               On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices,
342:                               the only available is kokkos.

344:    Level: intermediate
345: @*/
346: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347: {
348:   PetscSFType    deft;
349:   char           type[256];
351:   PetscBool      flg;

355:   PetscObjectOptionsBegin((PetscObject)sf);
356:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
357:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
358:   PetscSFSetType(sf,flg ? type : deft);
359:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
360: #if defined(PETSC_HAVE_DEVICE)
361:   {
362:     char        backendstr[32] = {0};
363:     PetscBool   isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
364:     /* Change the defaults set in PetscSFCreate() with command line options */
365:     PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);
366:     PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
367:     PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
368:     PetscStrcasecmp("cuda",backendstr,&isCuda);
369:     PetscStrcasecmp("kokkos",backendstr,&isKokkos);
370:   #if defined(PETSC_HAVE_CUDA)
371:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373:     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr);
374:   #elif defined(PETSC_HAVE_KOKKOS)
375:     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376:   #endif
377:   }
378: #endif
379:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
380:   PetscOptionsEnd();
381:   return(0);
382: }

384: /*@
385:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

387:    Logically Collective

389:    Input Arguments:
390: +  sf - star forest
391: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

393:    Level: advanced

395: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
396: @*/
397: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
398: {
402:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
403:   sf->rankorder = flg;
404:   return(0);
405: }

407: /*@
408:    PetscSFSetGraph - Set a parallel star forest

410:    Collective

412:    Input Arguments:
413: +  sf - star forest
414: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
415: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
416: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
417: during setup in debug mode)
418: .  localmode - copy mode for ilocal
419: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
420: during setup in debug mode)
421: -  remotemode - copy mode for iremote

423:    Level: intermediate

425:    Notes:
426:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

428:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
429:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
430:    needed

432:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
433:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

435: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
436: @*/
437: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
438: {

445:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
446:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

448:   if (sf->nroots >= 0) { /* Reset only if graph already set */
449:     PetscSFReset(sf);
450:   }

452:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

454:   sf->nroots  = nroots;
455:   sf->nleaves = nleaves;

457:   if (nleaves && ilocal) {
458:     PetscInt i;
459:     PetscInt minleaf = PETSC_MAX_INT;
460:     PetscInt maxleaf = PETSC_MIN_INT;
461:     int      contiguous = 1;
462:     for (i=0; i<nleaves; i++) {
463:       minleaf = PetscMin(minleaf,ilocal[i]);
464:       maxleaf = PetscMax(maxleaf,ilocal[i]);
465:       contiguous &= (ilocal[i] == i);
466:     }
467:     sf->minleaf = minleaf;
468:     sf->maxleaf = maxleaf;
469:     if (contiguous) {
470:       if (localmode == PETSC_OWN_POINTER) {
471:         PetscFree(ilocal);
472:       }
473:       ilocal = NULL;
474:     }
475:   } else {
476:     sf->minleaf = 0;
477:     sf->maxleaf = nleaves - 1;
478:   }

480:   if (ilocal) {
481:     switch (localmode) {
482:     case PETSC_COPY_VALUES:
483:       PetscMalloc1(nleaves,&sf->mine_alloc);
484:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
485:       sf->mine = sf->mine_alloc;
486:       break;
487:     case PETSC_OWN_POINTER:
488:       sf->mine_alloc = (PetscInt*)ilocal;
489:       sf->mine       = sf->mine_alloc;
490:       break;
491:     case PETSC_USE_POINTER:
492:       sf->mine_alloc = NULL;
493:       sf->mine       = (PetscInt*)ilocal;
494:       break;
495:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
496:     }
497:   }

499:   switch (remotemode) {
500:   case PETSC_COPY_VALUES:
501:     PetscMalloc1(nleaves,&sf->remote_alloc);
502:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
503:     sf->remote = sf->remote_alloc;
504:     break;
505:   case PETSC_OWN_POINTER:
506:     sf->remote_alloc = (PetscSFNode*)iremote;
507:     sf->remote       = sf->remote_alloc;
508:     break;
509:   case PETSC_USE_POINTER:
510:     sf->remote_alloc = NULL;
511:     sf->remote       = (PetscSFNode*)iremote;
512:     break;
513:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
514:   }

516:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
517:   sf->graphset = PETSC_TRUE;
518:   return(0);
519: }

521: /*@
522:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

524:   Collective

526:   Input Parameters:
527: + sf      - The PetscSF
528: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
529: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

531:   Notes:
532:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
533:   n and N are local and global sizes of x respectively.

535:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
536:   sequential vectors y on all ranks.

538:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
539:   sequential vector y on rank 0.

541:   In above cases, entries of x are roots and entries of y are leaves.

543:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
544:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
545:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
546:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
547:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

549:   In this case, roots and leaves are symmetric.

551:   Level: intermediate
552:  @*/
553: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
554: {
555:   MPI_Comm       comm;
556:   PetscInt       n,N,res[2];
557:   PetscMPIInt    rank,size;
558:   PetscSFType    type;

562:   PetscObjectGetComm((PetscObject)sf, &comm);
563:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
564:   MPI_Comm_rank(comm,&rank);
565:   MPI_Comm_size(comm,&size);

567:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568:     type = PETSCSFALLTOALL;
569:     PetscLayoutCreate(comm,&sf->map);
570:     PetscLayoutSetLocalSize(sf->map,size);
571:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
572:     PetscLayoutSetUp(sf->map);
573:   } else {
574:     PetscLayoutGetLocalSize(map,&n);
575:     PetscLayoutGetSize(map,&N);
576:     res[0] = n;
577:     res[1] = -n;
578:     /* Check if n are same over all ranks so that we can optimize it */
579:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
580:     if (res[0] == -res[1]) { /* same n */
581:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
582:     } else {
583:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
584:     }
585:     PetscLayoutReference(map,&sf->map);
586:   }
587:   PetscSFSetType(sf,type);

589:   sf->pattern = pattern;
590:   sf->mine    = NULL; /* Contiguous */

592:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
593:      Also set other easy stuff.
594:    */
595:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
596:     sf->nleaves      = N;
597:     sf->nroots       = n;
598:     sf->nranks       = size;
599:     sf->minleaf      = 0;
600:     sf->maxleaf      = N - 1;
601:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
602:     sf->nleaves      = rank ? 0 : N;
603:     sf->nroots       = n;
604:     sf->nranks       = rank ? 0 : size;
605:     sf->minleaf      = 0;
606:     sf->maxleaf      = rank ? -1 : N - 1;
607:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
608:     sf->nleaves      = size;
609:     sf->nroots       = size;
610:     sf->nranks       = size;
611:     sf->minleaf      = 0;
612:     sf->maxleaf      = size - 1;
613:   }
614:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
615:   sf->graphset = PETSC_TRUE;
616:   return(0);
617: }

619: /*@
620:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

622:    Collective

624:    Input Arguments:

626: .  sf - star forest to invert

628:    Output Arguments:
629: .  isf - inverse of sf
630:    Level: advanced

632:    Notes:
633:    All roots must have degree 1.

635:    The local space may be a permutation, but cannot be sparse.

637: .seealso: PetscSFSetGraph()
638: @*/
639: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
640: {
642:   PetscMPIInt    rank;
643:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
644:   const PetscInt *ilocal;
645:   PetscSFNode    *roots,*leaves;

649:   PetscSFCheckGraphSet(sf,1);

652:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
653:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

655:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
656:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
657:   for (i=0; i<maxlocal; i++) {
658:     leaves[i].rank  = rank;
659:     leaves[i].index = i;
660:   }
661:   for (i=0; i <nroots; i++) {
662:     roots[i].rank  = -1;
663:     roots[i].index = -1;
664:   }
665:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
666:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);

668:   /* Check whether our leaves are sparse */
669:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
670:   if (count == nroots) newilocal = NULL;
671:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
672:     PetscMalloc1(count,&newilocal);
673:     for (i=0,count=0; i<nroots; i++) {
674:       if (roots[i].rank >= 0) {
675:         newilocal[count]   = i;
676:         roots[count].rank  = roots[i].rank;
677:         roots[count].index = roots[i].index;
678:         count++;
679:       }
680:     }
681:   }

683:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
684:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
685:   PetscFree2(roots,leaves);
686:   return(0);
687: }

689: /*@
690:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

692:    Collective

694:    Input Arguments:
695: +  sf - communication object to duplicate
696: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

698:    Output Arguments:
699: .  newsf - new communication object

701:    Level: beginner

703: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
704: @*/
705: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
706: {
707:   PetscSFType    type;

714:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
715:   PetscSFGetType(sf,&type);
716:   if (type) {PetscSFSetType(*newsf,type);}
717:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
718:     PetscSFCheckGraphSet(sf,1);
719:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
720:       PetscInt          nroots,nleaves;
721:       const PetscInt    *ilocal;
722:       const PetscSFNode *iremote;
723:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
724:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
725:     } else {
726:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
727:     }
728:   }
729: #if defined(PETSC_HAVE_DEVICE)
730:   (*newsf)->backend              = sf->backend;
731:   (*newsf)->use_default_stream   = sf->use_default_stream;
732:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
733:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
734: #endif
735:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
736:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
737:   return(0);
738: }

740: /*@C
741:    PetscSFGetGraph - Get the graph specifying a parallel star forest

743:    Not Collective

745:    Input Arguments:
746: .  sf - star forest

748:    Output Arguments:
749: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
750: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
751: .  ilocal - locations of leaves in leafdata buffers
752: -  iremote - remote locations of root vertices for each leaf on the current process

754:    Notes:
755:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

757:    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
758:    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.

760:    Level: intermediate

762: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
763: @*/
764: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
765: {

770:   if (sf->ops->GetGraph) {
771:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
772:   } else {
773:     if (nroots) *nroots = sf->nroots;
774:     if (nleaves) *nleaves = sf->nleaves;
775:     if (ilocal) *ilocal = sf->mine;
776:     if (iremote) *iremote = sf->remote;
777:   }
778:   return(0);
779: }

781: /*@
782:    PetscSFGetLeafRange - Get the active leaf ranges

784:    Not Collective

786:    Input Arguments:
787: .  sf - star forest

789:    Output Arguments:
790: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

793:    Level: developer

795: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
796: @*/
797: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
798: {
801:   PetscSFCheckGraphSet(sf,1);
802:   if (minleaf) *minleaf = sf->minleaf;
803:   if (maxleaf) *maxleaf = sf->maxleaf;
804:   return(0);
805: }

807: /*@C
808:    PetscSFViewFromOptions - View from Options

810:    Collective on PetscSF

812:    Input Parameters:
813: +  A - the star forest
814: .  obj - Optional object
815: -  name - command line option

817:    Level: intermediate
818: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
819: @*/
820: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821: {

826:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
827:   return(0);
828: }

830: /*@C
831:    PetscSFView - view a star forest

833:    Collective

835:    Input Arguments:
836: +  sf - star forest
837: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

839:    Level: beginner

841: .seealso: PetscSFCreate(), PetscSFSetGraph()
842: @*/
843: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
844: {
845:   PetscErrorCode    ierr;
846:   PetscBool         iascii;
847:   PetscViewerFormat format;

851:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
854:   if (sf->graphset) {PetscSFSetUp(sf);}
855:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
856:   if (iascii) {
857:     PetscMPIInt rank;
858:     PetscInt    ii,i,j;

860:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
861:     PetscViewerASCIIPushTab(viewer);
862:     if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
863:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
864:       if (!sf->graphset) {
865:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
866:         PetscViewerASCIIPopTab(viewer);
867:         return(0);
868:       }
869:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
870:       PetscViewerASCIIPushSynchronized(viewer);
871:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
872:       for (i=0; i<sf->nleaves; i++) {
873:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
874:       }
875:       PetscViewerFlush(viewer);
876:       PetscViewerGetFormat(viewer,&format);
877:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
878:         PetscMPIInt *tmpranks,*perm;
879:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
880:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
881:         for (i=0; i<sf->nranks; i++) perm[i] = i;
882:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
883:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
884:         for (ii=0; ii<sf->nranks; ii++) {
885:           i = perm[ii];
886:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
887:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
888:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
889:           }
890:         }
891:         PetscFree2(tmpranks,perm);
892:       }
893:       PetscViewerFlush(viewer);
894:       PetscViewerASCIIPopSynchronized(viewer);
895:     }
896:     PetscViewerASCIIPopTab(viewer);
897:   }
898:   return(0);
899: }

901: /*@C
902:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

904:    Not Collective

906:    Input Arguments:
907: .  sf - star forest

909:    Output Arguments:
910: +  nranks - number of ranks referenced by local part
911: .  ranks - array of ranks
912: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
913: .  rmine - concatenated array holding local indices referencing each remote rank
914: -  rremote - concatenated array holding remote indices referenced for each remote rank

916:    Level: developer

918: .seealso: PetscSFGetLeafRanks()
919: @*/
920: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
921: {

926:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
927:   if (sf->ops->GetRootRanks) {
928:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
929:   } else {
930:     /* The generic implementation */
931:     if (nranks)  *nranks  = sf->nranks;
932:     if (ranks)   *ranks   = sf->ranks;
933:     if (roffset) *roffset = sf->roffset;
934:     if (rmine)   *rmine   = sf->rmine;
935:     if (rremote) *rremote = sf->rremote;
936:   }
937:   return(0);
938: }

940: /*@C
941:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

943:    Not Collective

945:    Input Arguments:
946: .  sf - star forest

948:    Output Arguments:
949: +  niranks - number of leaf ranks referencing roots on this process
950: .  iranks - array of ranks
951: .  ioffset - offset in irootloc for each rank (length niranks+1)
952: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

954:    Level: developer

956: .seealso: PetscSFGetRootRanks()
957: @*/
958: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
959: {

964:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
965:   if (sf->ops->GetLeafRanks) {
966:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
967:   } else {
968:     PetscSFType type;
969:     PetscSFGetType(sf,&type);
970:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
971:   }
972:   return(0);
973: }

975: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
976:   PetscInt i;
977:   for (i=0; i<n; i++) {
978:     if (needle == list[i]) return PETSC_TRUE;
979:   }
980:   return PETSC_FALSE;
981: }

983: /*@C
984:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

986:    Collective

988:    Input Arguments:
989: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
990: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

992:    Level: developer

994: .seealso: PetscSFGetRootRanks()
995: @*/
996: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
997: {
998:   PetscErrorCode     ierr;
999:   PetscTable         table;
1000:   PetscTablePosition pos;
1001:   PetscMPIInt        size,groupsize,*groupranks;
1002:   PetscInt           *rcount,*ranks;
1003:   PetscInt           i, irank = -1,orank = -1;

1007:   PetscSFCheckGraphSet(sf,1);
1008:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
1009:   PetscTableCreate(10,size,&table);
1010:   for (i=0; i<sf->nleaves; i++) {
1011:     /* Log 1-based rank */
1012:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
1013:   }
1014:   PetscTableGetCount(table,&sf->nranks);
1015:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
1016:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
1017:   PetscTableGetHeadPosition(table,&pos);
1018:   for (i=0; i<sf->nranks; i++) {
1019:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
1020:     ranks[i]--;             /* Convert back to 0-based */
1021:   }
1022:   PetscTableDestroy(&table);

1024:   /* We expect that dgroup is reliably "small" while nranks could be large */
1025:   {
1026:     MPI_Group group = MPI_GROUP_NULL;
1027:     PetscMPIInt *dgroupranks;
1028:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1029:     MPI_Group_size(dgroup,&groupsize);
1030:     PetscMalloc1(groupsize,&dgroupranks);
1031:     PetscMalloc1(groupsize,&groupranks);
1032:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1033:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
1034:     MPI_Group_free(&group);
1035:     PetscFree(dgroupranks);
1036:   }

1038:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1040:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1041:       if (InList(ranks[i],groupsize,groupranks)) break;
1042:     }
1043:     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1045:     }
1046:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1047:       PetscInt tmprank,tmpcount;

1049:       tmprank             = ranks[i];
1050:       tmpcount            = rcount[i];
1051:       ranks[i]            = ranks[sf->ndranks];
1052:       rcount[i]           = rcount[sf->ndranks];
1053:       ranks[sf->ndranks]  = tmprank;
1054:       rcount[sf->ndranks] = tmpcount;
1055:       sf->ndranks++;
1056:     }
1057:   }
1058:   PetscFree(groupranks);
1059:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1060:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1061:   sf->roffset[0] = 0;
1062:   for (i=0; i<sf->nranks; i++) {
1063:     PetscMPIIntCast(ranks[i],sf->ranks+i);
1064:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1065:     rcount[i]        = 0;
1066:   }
1067:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1068:     /* short circuit */
1069:     if (orank != sf->remote[i].rank) {
1070:       /* Search for index of iremote[i].rank in sf->ranks */
1071:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1072:       if (irank < 0) {
1073:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1074:         if (irank >= 0) irank += sf->ndranks;
1075:       }
1076:       orank = sf->remote[i].rank;
1077:     }
1078:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1079:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1080:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1081:     rcount[irank]++;
1082:   }
1083:   PetscFree2(rcount,ranks);
1084:   return(0);
1085: }

1087: /*@C
1088:    PetscSFGetGroups - gets incoming and outgoing process groups

1090:    Collective

1092:    Input Argument:
1093: .  sf - star forest

1095:    Output Arguments:
1096: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1097: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1099:    Level: developer

1101: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1102: @*/
1103: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1104: {
1106:   MPI_Group      group = MPI_GROUP_NULL;

1109:   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1110:   if (sf->ingroup == MPI_GROUP_NULL) {
1111:     PetscInt       i;
1112:     const PetscInt *indegree;
1113:     PetscMPIInt    rank,*outranks,*inranks;
1114:     PetscSFNode    *remote;
1115:     PetscSF        bgcount;

1117:     /* Compute the number of incoming ranks */
1118:     PetscMalloc1(sf->nranks,&remote);
1119:     for (i=0; i<sf->nranks; i++) {
1120:       remote[i].rank  = sf->ranks[i];
1121:       remote[i].index = 0;
1122:     }
1123:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1124:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1125:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1126:     PetscSFComputeDegreeEnd(bgcount,&indegree);
1127:     /* Enumerate the incoming ranks */
1128:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1129:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1130:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1131:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1132:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1133:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1134:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1135:     MPI_Group_free(&group);
1136:     PetscFree2(inranks,outranks);
1137:     PetscSFDestroy(&bgcount);
1138:   }
1139:   *incoming = sf->ingroup;

1141:   if (sf->outgroup == MPI_GROUP_NULL) {
1142:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1143:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1144:     MPI_Group_free(&group);
1145:   }
1146:   *outgoing = sf->outgroup;
1147:   return(0);
1148: }

1150: /*@
1151:    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

1153:    Collective

1155:    Input Argument:
1156: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1158:    Output Arguments:
1159: .  multi - star forest with split roots, such that each root has degree exactly 1

1161:    Level: developer

1163:    Notes:

1165:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1166:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1167:    edge, it is a candidate for future optimization that might involve its removal.

1169: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1170: @*/
1171: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1172: {

1178:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1179:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1180:     *multi = sf->multi;
1181:     return(0);
1182:   }
1183:   if (!sf->multi) {
1184:     const PetscInt *indegree;
1185:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1186:     PetscSFNode    *remote;
1187:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1188:     PetscSFComputeDegreeBegin(sf,&indegree);
1189:     PetscSFComputeDegreeEnd(sf,&indegree);
1190:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1191:     inoffset[0] = 0;
1192:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1193:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1194:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1195:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1196:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1197:     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1198:       for (i=0; i<sf->nroots; i++) {
1199:         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1200:       }
1201:     }
1202:     PetscMalloc1(sf->nleaves,&remote);
1203:     for (i=0; i<sf->nleaves; i++) {
1204:       remote[i].rank  = sf->remote[i].rank;
1205:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206:     }
1207:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1208:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1209:     if (sf->rankorder) {        /* Sort the ranks */
1210:       PetscMPIInt rank;
1211:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1212:       PetscSFNode *newremote;
1213:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1214:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1215:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1216:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1217:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1218:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1219:       /* Sort the incoming ranks at each vertex, build the inverse map */
1220:       for (i=0; i<sf->nroots; i++) {
1221:         PetscInt j;
1222:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1223:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1224:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1225:       }
1226:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1227:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1228:       PetscMalloc1(sf->nleaves,&newremote);
1229:       for (i=0; i<sf->nleaves; i++) {
1230:         newremote[i].rank  = sf->remote[i].rank;
1231:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1232:       }
1233:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1234:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1235:     }
1236:     PetscFree3(inoffset,outones,outoffset);
1237:   }
1238:   *multi = sf->multi;
1239:   return(0);
1240: }

1242: /*@C
1243:    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices

1245:    Collective

1247:    Input Arguments:
1248: +  sf - original star forest
1249: .  nselected  - number of selected roots on this process
1250: -  selected   - indices of the selected roots on this process

1252:    Output Arguments:
1253: .  esf - new star forest

1255:    Level: advanced

1257:    Note:
1258:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1259:    be done by calling PetscSFGetGraph().

1261: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1262: @*/
1263: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1264: {
1265:   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1266:   const PetscInt    *ilocal;
1267:   signed char       *rootdata,*leafdata,*leafmem;
1268:   const PetscSFNode *iremote;
1269:   PetscSFNode       *new_iremote;
1270:   MPI_Comm          comm;
1271:   PetscErrorCode    ierr;

1275:   PetscSFCheckGraphSet(sf,1);

1279:   PetscSFSetUp(sf);
1280:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1281:   PetscObjectGetComm((PetscObject)sf,&comm);
1282:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);

1284:   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1285:     PetscBool dups;
1287:     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1288:     for (i=0; i<nselected; i++)
1289:       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1290:   }

1292:   if (sf->ops->CreateEmbeddedSF) {
1293:     (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);
1294:   } else {
1295:     /* A generic version of creating embedded sf */
1296:     PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1297:     maxlocal = maxleaf - minleaf + 1;
1298:     PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1299:     leafdata = leafmem - minleaf;
1300:     /* Tag selected roots and bcast to leaves */
1301:     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1302:     PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1303:     PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);

1305:     /* Build esf with leaves that are still connected */
1306:     esf_nleaves = 0;
1307:     for (i=0; i<nleaves; i++) {
1308:       j = ilocal ? ilocal[i] : i;
1309:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1310:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1311:       */
1312:       esf_nleaves += (leafdata[j] ? 1 : 0);
1313:     }
1314:     PetscMalloc1(esf_nleaves,&new_ilocal);
1315:     PetscMalloc1(esf_nleaves,&new_iremote);
1316:     for (i=n=0; i<nleaves; i++) {
1317:       j = ilocal ? ilocal[i] : i;
1318:       if (leafdata[j]) {
1319:         new_ilocal[n]        = j;
1320:         new_iremote[n].rank  = iremote[i].rank;
1321:         new_iremote[n].index = iremote[i].index;
1322:         ++n;
1323:       }
1324:     }
1325:     PetscSFCreate(comm,esf);
1326:     PetscSFSetFromOptions(*esf);
1327:     PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1328:     PetscFree2(rootdata,leafmem);
1329:   }
1330:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1331:   return(0);
1332: }

1334: /*@C
1335:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1337:   Collective

1339:   Input Arguments:
1340: + sf - original star forest
1341: . nselected  - number of selected leaves on this process
1342: - selected   - indices of the selected leaves on this process

1344:   Output Arguments:
1345: .  newsf - new star forest

1347:   Level: advanced

1349: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1350: @*/
1351: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1352: {
1353:   const PetscSFNode *iremote;
1354:   PetscSFNode       *new_iremote;
1355:   const PetscInt    *ilocal;
1356:   PetscInt          i,nroots,*leaves,*new_ilocal;
1357:   MPI_Comm          comm;
1358:   PetscErrorCode    ierr;

1362:   PetscSFCheckGraphSet(sf,1);

1366:   /* Uniq selected[] and put results in leaves[] */
1367:   PetscObjectGetComm((PetscObject)sf,&comm);
1368:   PetscMalloc1(nselected,&leaves);
1369:   PetscArraycpy(leaves,selected,nselected);
1370:   PetscSortedRemoveDupsInt(&nselected,leaves);
1371:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1373:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1374:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1375:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1376:   } else {
1377:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1378:     PetscMalloc1(nselected,&new_ilocal);
1379:     PetscMalloc1(nselected,&new_iremote);
1380:     for (i=0; i<nselected; ++i) {
1381:       const PetscInt l     = leaves[i];
1382:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1383:       new_iremote[i].rank  = iremote[l].rank;
1384:       new_iremote[i].index = iremote[l].index;
1385:     }
1386:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1387:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1388:   }
1389:   PetscFree(leaves);
1390:   return(0);
1391: }

1393: /*@C
1394:    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()

1396:    Collective on PetscSF

1398:    Input Arguments:
1399: +  sf - star forest on which to communicate
1400: .  unit - data type associated with each node
1401: .  rootdata - buffer to broadcast
1402: -  op - operation to use for reduction

1404:    Output Arguments:
1405: .  leafdata - buffer to be reduced with values from each leaf's respective root

1407:    Level: intermediate

1409: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1410: @*/
1411: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1412: {
1414:   PetscMemType   rootmtype,leafmtype;

1418:   PetscSFSetUp(sf);
1419:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1420:   PetscGetMemType(rootdata,&rootmtype);
1421:   PetscGetMemType(leafdata,&leafmtype);
1422:   (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1423:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1424:   return(0);
1425: }

1427: /*@C
1428:    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()

1430:    Collective

1432:    Input Arguments:
1433: +  sf - star forest
1434: .  unit - data type
1435: .  rootdata - buffer to broadcast
1436: -  op - operation to use for reduction

1438:    Output Arguments:
1439: .  leafdata - buffer to be reduced with values from each leaf's respective root

1441:    Level: intermediate

1443: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1444: @*/
1445: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1446: {

1451:   PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1452:   (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);
1453:   PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1454:   return(0);
1455: }

1457: /*@C
1458:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1460:    Collective

1462:    Input Arguments:
1463: +  sf - star forest
1464: .  unit - data type
1465: .  leafdata - values to reduce
1466: -  op - reduction operation

1468:    Output Arguments:
1469: .  rootdata - result of reduction of values from all leaves of each root

1471:    Level: intermediate

1473: .seealso: PetscSFBcastBegin()
1474: @*/
1475: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1476: {
1478:   PetscMemType   rootmtype,leafmtype;

1482:   PetscSFSetUp(sf);
1483:   PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1484:   PetscGetMemType(rootdata,&rootmtype);
1485:   PetscGetMemType(leafdata,&leafmtype);
1486:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1487:   PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1488:   return(0);
1489: }

1491: /*@C
1492:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1494:    Collective

1496:    Input Arguments:
1497: +  sf - star forest
1498: .  unit - data type
1499: .  leafdata - values to reduce
1500: -  op - reduction operation

1502:    Output Arguments:
1503: .  rootdata - result of reduction of values from all leaves of each root

1505:    Level: intermediate

1507: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1508: @*/
1509: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1510: {

1515:   PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1516:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1517:   PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1518:   return(0);
1519: }

1521: /*@C
1522:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1524:    Collective

1526:    Input Arguments:
1527: +  sf - star forest
1528: .  unit - data type
1529: .  leafdata - leaf values to use in reduction
1530: -  op - operation to use for reduction

1532:    Output Arguments:
1533: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1534: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1536:    Level: advanced

1538:    Note:
1539:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1540:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1541:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1542:    integers.

1544: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1545: @*/
1546: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1547: {
1549:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1553:   PetscSFSetUp(sf);
1554:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1555:   PetscGetMemType(rootdata,&rootmtype);
1556:   PetscGetMemType(leafdata,&leafmtype);
1557:   PetscGetMemType(leafupdate,&leafupdatemtype);
1558:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1559:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1560:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1561:   return(0);
1562: }

1564: /*@C
1565:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1567:    Collective

1569:    Input Arguments:
1570: +  sf - star forest
1571: .  unit - data type
1572: .  leafdata - leaf values to use in reduction
1573: -  op - operation to use for reduction

1575:    Output Arguments:
1576: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1577: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1579:    Level: advanced

1581: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1582: @*/
1583: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1584: {

1589:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1590:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1591:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1592:   return(0);
1593: }

1595: /*@C
1596:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1598:    Collective

1600:    Input Arguments:
1601: .  sf - star forest

1603:    Output Arguments:
1604: .  degree - degree of each root vertex

1606:    Level: advanced

1608:    Notes:
1609:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1611: .seealso: PetscSFGatherBegin()
1612: @*/
1613: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1614: {

1619:   PetscSFCheckGraphSet(sf,1);
1621:   if (!sf->degreeknown) {
1622:     PetscInt i, nroots = sf->nroots, maxlocal;
1623:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1624:     maxlocal = sf->maxleaf-sf->minleaf+1;
1625:     PetscMalloc1(nroots,&sf->degree);
1626:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1627:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1628:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1629:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1630:   }
1631:   *degree = NULL;
1632:   return(0);
1633: }

1635: /*@C
1636:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1638:    Collective

1640:    Input Arguments:
1641: .  sf - star forest

1643:    Output Arguments:
1644: .  degree - degree of each root vertex

1646:    Level: developer

1648:    Notes:
1649:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1651: .seealso:
1652: @*/
1653: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1654: {

1659:   PetscSFCheckGraphSet(sf,1);
1661:   if (!sf->degreeknown) {
1662:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1663:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1664:     PetscFree(sf->degreetmp);
1665:     sf->degreeknown = PETSC_TRUE;
1666:   }
1667:   *degree = sf->degree;
1668:   return(0);
1669: }


1672: /*@C
1673:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1674:    Each multi-root is assigned index of the corresponding original root.

1676:    Collective

1678:    Input Arguments:
1679: +  sf - star forest
1680: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1682:    Output Arguments:
1683: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1684: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1686:    Level: developer

1688:    Notes:
1689:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1691: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1692: @*/
1693: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1694: {
1695:   PetscSF             msf;
1696:   PetscInt            i, j, k, nroots, nmroots;
1697:   PetscErrorCode      ierr;

1701:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1705:   PetscSFGetMultiSF(sf,&msf);
1706:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1707:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1708:   for (i=0,j=0,k=0; i<nroots; i++) {
1709:     if (!degree[i]) continue;
1710:     for (j=0; j<degree[i]; j++,k++) {
1711:       (*multiRootsOrigNumbering)[k] = i;
1712:     }
1713:   }
1714:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1715:   if (nMultiRoots) *nMultiRoots = nmroots;
1716:   return(0);
1717: }

1719: /*@C
1720:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1722:    Collective

1724:    Input Arguments:
1725: +  sf - star forest
1726: .  unit - data type
1727: -  leafdata - leaf data to gather to roots

1729:    Output Argument:
1730: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1732:    Level: intermediate

1734: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1735: @*/
1736: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1737: {
1739:   PetscSF        multi = NULL;

1743:   PetscSFSetUp(sf);
1744:   PetscSFGetMultiSF(sf,&multi);
1745:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1746:   return(0);
1747: }

1749: /*@C
1750:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1752:    Collective

1754:    Input Arguments:
1755: +  sf - star forest
1756: .  unit - data type
1757: -  leafdata - leaf data to gather to roots

1759:    Output Argument:
1760: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1762:    Level: intermediate

1764: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1765: @*/
1766: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1767: {
1769:   PetscSF        multi = NULL;

1773:   PetscSFGetMultiSF(sf,&multi);
1774:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1775:   return(0);
1776: }

1778: /*@C
1779:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1781:    Collective

1783:    Input Arguments:
1784: +  sf - star forest
1785: .  unit - data type
1786: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1788:    Output Argument:
1789: .  leafdata - leaf data to be update with personal data from each respective root

1791:    Level: intermediate

1793: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1794: @*/
1795: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1796: {
1798:   PetscSF        multi = NULL;

1802:   PetscSFSetUp(sf);
1803:   PetscSFGetMultiSF(sf,&multi);
1804:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1805:   return(0);
1806: }

1808: /*@C
1809:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1811:    Collective

1813:    Input Arguments:
1814: +  sf - star forest
1815: .  unit - data type
1816: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1818:    Output Argument:
1819: .  leafdata - leaf data to be update with personal data from each respective root

1821:    Level: intermediate

1823: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1824: @*/
1825: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1826: {
1828:   PetscSF        multi = NULL;

1832:   PetscSFGetMultiSF(sf,&multi);
1833:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1834:   return(0);
1835: }

1837: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1838: {
1839:   PetscInt        i, n, nleaves;
1840:   const PetscInt *ilocal = NULL;
1841:   PetscHSetI      seen;
1842:   PetscErrorCode  ierr;

1845:   if (!PetscDefined(USE_DEBUG)) return(0);
1846:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1847:   PetscHSetICreate(&seen);
1848:   for (i = 0; i < nleaves; i++) {
1849:     const PetscInt leaf = ilocal ? ilocal[i] : i;
1850:     PetscHSetIAdd(seen,leaf);
1851:   }
1852:   PetscHSetIGetSize(seen,&n);
1853:   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1854:   PetscHSetIDestroy(&seen);
1855:   return(0);
1856: }

1858: /*@
1859:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1861:   Input Parameters:
1862: + sfA - The first PetscSF
1863: - sfB - The second PetscSF

1865:   Output Parameters:
1866: . sfBA - The composite SF

1868:   Level: developer

1870:   Notes:
1871:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1872:   forests, i.e. the same leaf is not connected with different roots.

1874:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1875:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1876:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1877:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1879: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1880: @*/
1881: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1882: {
1883:   PetscErrorCode    ierr;
1884:   const PetscSFNode *remotePointsA,*remotePointsB;
1885:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1886:   const PetscInt    *localPointsA,*localPointsB;
1887:   PetscInt          *localPointsBA;
1888:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1889:   PetscBool         denseB;

1893:   PetscSFCheckGraphSet(sfA,1);
1895:   PetscSFCheckGraphSet(sfB,2);
1898:   PetscSFCheckLeavesUnique_Private(sfA);
1899:   PetscSFCheckLeavesUnique_Private(sfB);

1901:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1902:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1903:   if (localPointsA) {
1904:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1905:     for (i=0; i<numRootsB; i++) {
1906:       reorderedRemotePointsA[i].rank = -1;
1907:       reorderedRemotePointsA[i].index = -1;
1908:     }
1909:     for (i=0; i<numLeavesA; i++) {
1910:       if (localPointsA[i] >= numRootsB) continue;
1911:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1912:     }
1913:     remotePointsA = reorderedRemotePointsA;
1914:   }
1915:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1916:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1917:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1918:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1919:   PetscFree(reorderedRemotePointsA);

1921:   denseB = (PetscBool)!localPointsB;
1922:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1923:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1924:     else numLeavesBA++;
1925:   }
1926:   if (denseB) {
1927:     localPointsBA  = NULL;
1928:     remotePointsBA = leafdataB;
1929:   } else {
1930:     PetscMalloc1(numLeavesBA,&localPointsBA);
1931:     PetscMalloc1(numLeavesBA,&remotePointsBA);
1932:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1933:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1935:       if (leafdataB[l-minleaf].rank == -1) continue;
1936:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1937:       localPointsBA[numLeavesBA] = l;
1938:       numLeavesBA++;
1939:     }
1940:     PetscFree(leafdataB);
1941:   }
1942:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1943:   PetscSFSetFromOptions(*sfBA);
1944:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1945:   return(0);
1946: }

1948: /*@
1949:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

1951:   Input Parameters:
1952: + sfA - The first PetscSF
1953: - sfB - The second PetscSF

1955:   Output Parameters:
1956: . sfBA - The composite SF.

1958:   Level: developer

1960:   Notes:
1961:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1962:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1963:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

1965:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1966:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1967:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1968:   a Reduce on sfB, on connected roots.

1970: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1971: @*/
1972: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1973: {
1974:   PetscErrorCode    ierr;
1975:   const PetscSFNode *remotePointsA,*remotePointsB;
1976:   PetscSFNode       *remotePointsBA;
1977:   const PetscInt    *localPointsA,*localPointsB;
1978:   PetscSFNode       *reorderedRemotePointsA = NULL;
1979:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1980:   MPI_Op            op;
1981: #if defined(PETSC_USE_64BIT_INDICES)
1982:   PetscBool         iswin;
1983: #endif

1987:   PetscSFCheckGraphSet(sfA,1);
1989:   PetscSFCheckGraphSet(sfB,2);
1992:   PetscSFCheckLeavesUnique_Private(sfA);
1993:   PetscSFCheckLeavesUnique_Private(sfB);

1995:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1996:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

1998:   /* TODO: Check roots of sfB have degree of 1 */
1999:   /* Once we implement it, we can replace the MPI_MAXLOC
2000:      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
2001:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2002:      the root condition is not meet.
2003:    */
2004:   op = MPI_MAXLOC;
2005: #if defined(PETSC_USE_64BIT_INDICES)
2006:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2007:   PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2008:   if (iswin) op = MPIU_REPLACE;
2009: #endif

2011:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2012:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2013:   for (i=0; i<maxleaf - minleaf + 1; i++) {
2014:     reorderedRemotePointsA[i].rank = -1;
2015:     reorderedRemotePointsA[i].index = -1;
2016:   }
2017:   if (localPointsA) {
2018:     for (i=0; i<numLeavesA; i++) {
2019:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2020:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2021:     }
2022:   } else {
2023:     for (i=0; i<numLeavesA; i++) {
2024:       if (i > maxleaf || i < minleaf) continue;
2025:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2026:     }
2027:   }

2029:   PetscMalloc1(numRootsB,&localPointsBA);
2030:   PetscMalloc1(numRootsB,&remotePointsBA);
2031:   for (i=0; i<numRootsB; i++) {
2032:     remotePointsBA[i].rank = -1;
2033:     remotePointsBA[i].index = -1;
2034:   }

2036:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2037:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2038:   PetscFree(reorderedRemotePointsA);
2039:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2040:     if (remotePointsBA[i].rank == -1) continue;
2041:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2042:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2043:     localPointsBA[numLeavesBA] = i;
2044:     numLeavesBA++;
2045:   }
2046:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2047:   PetscSFSetFromOptions(*sfBA);
2048:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2049:   return(0);
2050: }

2052: /*
2053:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

2055:   Input Parameters:
2056: . sf - The global PetscSF

2058:   Output Parameters:
2059: . out - The local PetscSF
2060:  */
2061: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2062: {
2063:   MPI_Comm           comm;
2064:   PetscMPIInt        myrank;
2065:   const PetscInt     *ilocal;
2066:   const PetscSFNode  *iremote;
2067:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2068:   PetscSFNode        *liremote;
2069:   PetscSF            lsf;
2070:   PetscErrorCode     ierr;

2074:   if (sf->ops->CreateLocalSF) {
2075:     (*sf->ops->CreateLocalSF)(sf,out);
2076:   } else {
2077:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2078:     PetscObjectGetComm((PetscObject)sf,&comm);
2079:     MPI_Comm_rank(comm,&myrank);

2081:     /* Find out local edges and build a local SF */
2082:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2083:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2084:     PetscMalloc1(lnleaves,&lilocal);
2085:     PetscMalloc1(lnleaves,&liremote);

2087:     for (i=j=0; i<nleaves; i++) {
2088:       if (iremote[i].rank == (PetscInt)myrank) {
2089:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2090:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2091:         liremote[j].index = iremote[i].index;
2092:         j++;
2093:       }
2094:     }
2095:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2096:     PetscSFSetFromOptions(lsf);
2097:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2098:     PetscSFSetUp(lsf);
2099:     *out = lsf;
2100:   }
2101:   return(0);
2102: }

2104: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2105: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2106: {
2107:   PetscErrorCode     ierr;
2108:   PetscMemType       rootmtype,leafmtype;

2112:   PetscSFSetUp(sf);
2113:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2114:   PetscGetMemType(rootdata,&rootmtype);
2115:   PetscGetMemType(leafdata,&leafmtype);
2116:   if (sf->ops->BcastToZero) {
2117:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2118:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2119:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2120:   return(0);
2121: }