Actual source code: sfbasic.c

petsc-master 2019-12-07
Report Typos and Errors

  2:  #include <../src/vec/is/sf/impls/basic/sfbasic.h>

  4: /*===================================================================================*/
  5: /*              Internal routines for PetscSFPack                              */
  6: /*===================================================================================*/

  8: /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
  9:    initialized (since we use persistent requests), then initialize them.
 10: */
 11: static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
 12: {
 14:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
 15:   PetscInt       i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
 16:   const PetscInt *rootoffset,*leafoffset;
 17:   PetscMPIInt    n;
 18:   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
 19:   MPI_Datatype   unit = link->unit;
 20:   PetscMemType   rootmtype,leafmtype;

 23:   if (use_gpu_aware_mpi) {
 24:     rootmtype = link->rootmtype;
 25:     leafmtype = link->leafmtype;
 26:   } else {
 27:     rootmtype = PETSC_MEMTYPE_HOST;
 28:     leafmtype = PETSC_MEMTYPE_HOST;
 29:   }

 31:   if (rootreqs && !link->rootreqsinited[direction][rootmtype]) {
 32:     PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
 33:     if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
 34:       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
 35:         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
 36:         PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
 37:         MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);
 38:       }
 39:     } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
 40:       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
 41:         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
 42:         PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
 43:         MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);
 44:       }
 45:     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
 46:     link->rootreqsinited[direction][rootmtype] = PETSC_TRUE;
 47:   }

 49:   if (leafreqs && !link->leafreqsinited[direction][leafmtype]) {
 50:     PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
 51:     if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
 52:       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
 53:         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
 54:         PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
 55:         MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);
 56:       }
 57:     } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
 58:       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
 59:         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
 60:         PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
 61:         MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);
 62:       }
 63:     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
 64:     link->leafreqsinited[direction][leafmtype] = PETSC_TRUE;
 65:   }

 67:   if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype];
 68:   if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype];
 69:   return(0);
 70: }

 72: /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
 73: PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink)
 74: {
 75:   PetscErrorCode    ierr;
 76:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
 77:   PetscInt          i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks;
 78:   const PetscInt    *rootoffset,*leafoffset;
 79:   PetscSFPack       *p,link;
 80:   PetscBool         match;

 83:   PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);

 85:   /* Look for types in cache */
 86:   for (p=&bas->avail; (link=*p); p=&link->next) {
 87:     MPIPetsc_Type_compare(unit,link->unit,&match);
 88:     if (match) {
 89:       *p = link->next; /* Remove from available list */
 90:       goto found;
 91:     }
 92:   }

 94:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
 95:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
 96:   PetscNew(&link);
 97:   PetscSFPackSetUp_Host(sf,link,unit);
 98:   PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag); /* One tag per link */

100:   /* Allocate root, leaf, self buffers, and MPI requests */
101:   link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks];
102:   link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks];
103:   link->selfbuflen = rootoffset[ndrootranks];
104:   link->nrootreqs  = nrootreqs;
105:   link->nleafreqs  = nleafreqs;
106:   nreqs            = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */
107:   PetscMalloc1(nreqs,&link->reqs);
108:   for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */

110:   for (i=0; i<2; i++) { /* Two communication directions */
111:     for (j=0; j<2; j++) { /* Two memory types */
112:       link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j);
113:       link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j);
114:     }
115:   }

117: found:
118:   link->rootmtype = rootmtype;
119:   link->leafmtype = leafmtype;
120: #if defined(PETSC_HAVE_CUDA)
121:   PetscSFPackSetUp_Device(sf,link,unit);
122:   if (!use_gpu_aware_mpi) {
123:     /* If not using GPU aware MPI, we always need buffers on host. In case root/leafdata is on device, we copy root/leafdata to/from
124:        these buffers for MPI. We only need buffers for remote neighbors since self-to-self communication is not done via MPI.
125:      */
126:     if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {
127:       if (rootmtype == PETSC_MEMTYPE_DEVICE && sf->use_pinned_buf) {
128:         PetscMallocPinnedMemory(link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);
129:       } else {
130:         PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);
131:       }
132:     }
133:     if (!link->leafbuf[PETSC_MEMTYPE_HOST]) {
134:       if (leafmtype == PETSC_MEMTYPE_DEVICE && sf->use_pinned_buf) {
135:         PetscMallocPinnedMemory(link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);
136:       } else {
137:         PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);
138:       }
139:     }
140:   }
141: #endif
142:   if (!link->rootbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);}
143:   if (!link->leafbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);}
144:   if (!link->selfbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);}
145:   if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);}
146:   link->rootdata = rootdata;
147:   link->leafdata = leafdata;
148:   link->next     = bas->inuse;
149:   bas->inuse     = link;
150:   *mylink        = link;
151:   return(0);
152: }

154: static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink)
155: {
156:   PetscErrorCode    ierr;
157:   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;

160:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);
161:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);
162:   PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);
163:   return(0);
164: }

166: /*===================================================================================*/
167: /*              SF public interface implementations                                  */
168: /*===================================================================================*/
169: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
170: {
172:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
173:   PetscInt       *rlengths,*ilengths,i;
174:   PetscMPIInt    rank,niranks,*iranks,tag;
175:   MPI_Comm       comm;
176:   MPI_Group      group;
177:   MPI_Request    *rootreqs,*leafreqs;

180:   MPI_Comm_group(PETSC_COMM_SELF,&group);
181:   PetscSFSetUpRanks(sf,group);
182:   MPI_Group_free(&group);
183:   PetscObjectGetComm((PetscObject)sf,&comm);
184:   PetscObjectGetNewTag((PetscObject)sf,&tag);
185:   MPI_Comm_rank(comm,&rank);
186:   /*
187:    * Inform roots about how many leaves and from which ranks
188:    */
189:   PetscMalloc1(sf->nranks,&rlengths);
190:   /* Determine number, sending ranks, and length of incoming */
191:   for (i=0; i<sf->nranks; i++) {
192:     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
193:   }
194:   PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);

196:   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
197:      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
198:      small and the sorting is cheap.
199:    */
200:   PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);

202:   /* Partition into distinguished and non-distinguished incoming ranks */
203:   bas->ndiranks = sf->ndranks;
204:   bas->niranks = bas->ndiranks + niranks;
205:   PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);
206:   bas->ioffset[0] = 0;
207:   for (i=0; i<bas->ndiranks; i++) {
208:     bas->iranks[i] = sf->ranks[i];
209:     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
210:   }
211:   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
212:   for ( ; i<bas->niranks; i++) {
213:     bas->iranks[i] = iranks[i-bas->ndiranks];
214:     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
215:   }
216:   bas->itotal = bas->ioffset[i];
217:   PetscFree(rlengths);
218:   PetscFree(iranks);
219:   PetscFree(ilengths);

221:   /* Send leaf identities to roots */
222:   PetscMalloc1(bas->itotal,&bas->irootloc);
223:   PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);
224:   for (i=bas->ndiranks; i<bas->niranks; i++) {
225:     MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);
226:   }
227:   for (i=0; i<sf->nranks; i++) {
228:     PetscMPIInt npoints;
229:     PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);
230:     if (i < sf->ndranks) {
231:       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
232:       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
233:       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
234:       PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);
235:       continue;
236:     }
237:     MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);
238:   }
239:   MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);
240:   MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);
241:   PetscFree2(rootreqs,leafreqs);

243:   sf->selfleafdups    = PETSC_TRUE; /* The conservative assumption is there are data race */
244:   sf->remoteleafdups  = PETSC_TRUE;
245:   bas->selfrootdups   = PETSC_TRUE;
246:   bas->remoterootdups = PETSC_TRUE;

248:   /* Setup packing optimization for roots and leaves */
249:   PetscSFPackSetupOptimizations_Basic(sf);
250:   return(0);
251: }

253: PETSC_INTERN PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
254: {

258:   PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");
259:   PetscOptionsBool("-sf_use_pinned_buffer","Use pinned (nonpagable) memory for send/recv buffers on host","PetscSFSetFromOptions",sf->use_pinned_buf,&sf->use_pinned_buf,NULL);
260:   PetscOptionsTail();
261:   return(0);
262: }

264: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
265: {
266:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
267:   PetscErrorCode    ierr;

270:   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
271:   PetscFree2(bas->iranks,bas->ioffset);
272:   PetscFree(bas->irootloc);
273: #if defined(PETSC_HAVE_CUDA)
274:   if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;}
275: #endif
276:   PetscSFPackDestroyAvailable(sf,&bas->avail);
277:   PetscSFPackDestroyOptimizations_Basic(sf);
278:   return(0);
279: }

281: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
282: {

286:   PetscSFReset(sf);
287:   PetscFree(sf->data);
288:   return(0);
289: }

291: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
292: {
294:   PetscBool      iascii;

297:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
298:   if (iascii) {PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");}
299:   return(0);
300: }

302: static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
303: {
304:   PetscErrorCode    ierr;
305:   PetscSFPack       link;
306:   const PetscInt    *rootloc = NULL;
307:   MPI_Request       *rootreqs,*leafreqs;

310:   PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_../../../../../..2LEAF_BCAST,&link);
311:   PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);

313:   PetscSFPackGetReqs_Basic(sf,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);
314:   /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
315:   MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);

317:   /* Do Isend */
318:   PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);
319:   MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);

321:   /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */
322:   if (rootmtype != leafmtype) {PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);}
323:   return(0);
324: }

326: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
327: {
328:   PetscErrorCode    ierr;
329:   PetscSFPack       link;
330:   const PetscInt    *leafloc = NULL;

333:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
334:   PetscSFPackWaitall(link,PETSCSF_../../../../../..2LEAF_BCAST);
335:   PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
336:   PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);
337:   PetscSFPackReclaim(sf,&link);
338:   return(0);
339: }

341: /* leaf -> root with reduction */
342: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
343: {
344:   PetscErrorCode    ierr;
345:   PetscSFPack       link;
346:   const PetscInt    *leafloc = NULL;
347:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */

350:   PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);

352:   PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2../../../../../.._REDUCE,&link);
353:   PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2../../../../../.._REDUCE,&rootreqs,&leafreqs);
354:   /* Eagerly post root receives for non-distinguished ranks */
355:   MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);

357:   /* Pack and send leaf data */
358:   PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);
359:   MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);

361:   if (rootmtype != leafmtype) {PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);}
362:   return(0);
363: }

365: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
366: {
367:   PetscErrorCode    ierr;
368:   PetscSFPack       link;
369:   const PetscInt    *rootloc = NULL;

372:   PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
373:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
374:   PetscSFPackWaitall(link,PETSCSF_LEAF2../../../../../.._REDUCE);
375:   PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);
376:   PetscSFPackReclaim(sf,&link);
377:   return(0);
378: }

380: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
381: {

385:   PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);
386:   return(0);
387: }

389: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
390: {
391:   PetscErrorCode    ierr;
392:   PetscSFPack       link;
393:   const PetscInt    *rootloc = NULL,*leafloc = NULL;
394:   MPI_Request       *rootreqs,*leafreqs;

397:   PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
398:   PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
399:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
400:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
401:   PetscSFPackWaitall(link,PETSCSF_LEAF2../../../../../.._REDUCE);
402:   PetscSFPackGetReqs_Basic(sf,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);

404:   /* Post leaf receives */
405:   MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);

407:   /* Process local fetch-and-op, post root sends */
408:   PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);
409:   MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);
410:   if (rootmtype != leafmtype) {PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);}

412:   /* Unpack and insert fetched data into leaves */
413:   PetscSFPackWaitall(link,PETSCSF_../../../../../..2LEAF_BCAST);
414:   PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);
415:   PetscSFPackReclaim(sf,&link);
416:   return(0);
417: }

419: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
420: {
421:   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;

424:   if (niranks)  *niranks  = bas->niranks;
425:   if (iranks)   *iranks   = bas->iranks;
426:   if (ioffset)  *ioffset  = bas->ioffset;
427:   if (irootloc) *irootloc = bas->irootloc;
428:   return(0);
429: }

431: /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
432:    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
433:    was sorted before calling the routine.
434:  */
435: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
436: {
437:   PetscSF           esf;
438:   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
439:   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
440:   PetscMPIInt       *esf_ranks;
441:   const PetscMPIInt *ranks,*iranks;
442:   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
443:   PetscBool         connected;
444:   PetscSFPack       link;
445:   PetscSFNode       *new_iremote;
446:   PetscSF_Basic     *bas;
447:   PetscErrorCode    ierr;

450:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
451:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

453:   /* Find out which leaves are still connected to roots in the embedded sf */
454:   PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);
455:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
456:   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
457:   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
458:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
459:   /* Tag selected roots */
460:   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;

462:   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
463:      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
464:      interested in since it tells which leaves are connected to which ranks.
465:    */
466:   PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE); /* Need to give leafdata but we won't use it */
467:   PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);
468:   PetscSFPackWaitall(link,PETSCSF_../../../../../..2LEAF_BCAST);
469:   PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote); /* Get send info */
470:   esf_nranks = esf_ndranks = connected_leaves = 0;
471:   for (i=0; i<nranks; i++) {
472:     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
473:     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
474:     count     = roffset[i+1] - roffset[i];
475:     for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
476:     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
477:   }

479:   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
480:   PetscMalloc1(connected_leaves,&new_ilocal);
481:   PetscMalloc1(connected_leaves,&new_iremote);
482:   PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);
483:   p    = 0; /* Counter for connected root ranks */
484:   q    = 0; /* Counter for connected leaves */
485:   esf_roffset[0] = 0;
486:   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
487:     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
488:     connected = PETSC_FALSE;
489:     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
490:         if (buffer[k]) {
491:         esf_rmine[q]         = new_ilocal[q] = rmine[j];
492:         esf_rremote[q]       = rremote[j];
493:         new_iremote[q].index = rremote[j];
494:         new_iremote[q].rank  = ranks[i];
495:         connected            = PETSC_TRUE;
496:         q++;
497:       }
498:     }
499:     if (connected) {
500:       esf_ranks[p]     = ranks[i];
501:       esf_roffset[p+1] = q;
502:       p++;
503:     }
504:   }

506:   PetscSFPackReclaim(sf,&link);

508:   /* SetGraph internally resets the SF, so we only set its fields after the call */
509:   PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
510:   esf->nranks  = esf_nranks;
511:   esf->ndranks = esf_ndranks;
512:   esf->ranks   = esf_ranks;
513:   esf->roffset = esf_roffset;
514:   esf->rmine   = esf_rmine;
515:   esf->rremote = esf_rremote;

517:   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
518:   bas  = (PetscSF_Basic*)esf->data;
519:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc); /* Get recv info */
520:   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
521:      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
522:    */
523:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
524:   PetscMalloc1(ioffset[niranks],&bas->irootloc);
525:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
526:   p = 0; /* Counter for connected leaf ranks */
527:   q = 0; /* Counter for connected roots */
528:   for (i=0; i<niranks; i++) {
529:     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
530:     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
531:       PetscInt loc;
532:       PetscFindInt(irootloc[j],nselected,selected,&loc);
533:       if (loc >= 0) { /* Found in selected this root is connected */
534:         bas->irootloc[q++] = irootloc[j];
535:         connected = PETSC_TRUE;
536:       }
537:     }
538:     if (connected) {
539:       bas->niranks++;
540:       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
541:       bas->iranks[p]    = iranks[i];
542:       bas->ioffset[p+1] = q;
543:       p++;
544:     }
545:   }
546:   bas->itotal = q;

548:   /* Setup packing optimizations */
549:   PetscSFPackSetupOptimizations_Basic(esf);
550:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */

552:   PetscFree2(rootdata,leafdata);
553:   *newsf = esf;
554:   return(0);
555: }

557: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
558: {
559:   PetscSF           esf;
560:   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
561:   const PetscInt    *ilocal,*ioffset,*irootloc,*buffer;
562:   const PetscMPIInt *iranks;
563:   PetscSFPack       link;
564:   PetscSFNode       *new_iremote;
565:   const PetscSFNode *iremote;
566:   PetscSF_Basic     *bas;
567:   MPI_Group         group;
568:   PetscErrorCode    ierr;

571:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
572:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

574:   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
575:   PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
576:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
577:   PetscMalloc1(nselected,&new_ilocal);
578:   PetscMalloc1(nselected,&new_iremote);
579:   for (i=0; i<nselected; i++) {
580:     const PetscInt l     = selected[i];
581:     new_ilocal[i]        = ilocal ? ilocal[l] : l;
582:     new_iremote[i].rank  = iremote[l].rank;
583:     new_iremote[i].index = iremote[l].index;
584:   }

586:   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
587:   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
588:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
589:   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */

591:   PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);

593:   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
594:      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
595:    */
596:   MPI_Comm_group(PETSC_COMM_SELF,&group);
597:   PetscSFSetUpRanks(esf,group);
598:   MPI_Group_free(&group);

600:   /* Set up the incoming communication (i.e., recv info) */
601:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);
602:   bas  = (PetscSF_Basic*)esf->data;
603:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
604:   PetscMalloc1(ioffset[niranks],&bas->irootloc);

606:   /* Pass info about selected leaves to root buffer */
607:   PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE); /* -minleaf to re-adjust start address of leafdata */
608:   PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);
609:   PetscSFPackWaitall(link,PETSCSF_LEAF2../../../../../.._REDUCE);

611:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
612:   p = 0; /* Counter for connected leaf ranks */
613:   q = 0; /* Counter for connected roots */
614:   for (i=0; i<niranks; i++) {
615:     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
616:     buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]);
617:     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
618:       if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
619:     }
620:     if (connected) {
621:       bas->niranks++;
622:       if (i<ndiranks) bas->ndiranks++;
623:       bas->iranks[p]    = iranks[i];
624:       bas->ioffset[p+1] = q;
625:       p++;
626:     }
627:   }
628:   bas->itotal = q;
629:   PetscSFPackReclaim(sf,&link);

631:   /* Setup packing optimizations */
632:   PetscSFPackSetupOptimizations_Basic(esf);
633:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */

635:   PetscFree2(rootdata,leafdata);
636:   *newsf = esf;
637:   return(0);
638: }

640: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
641: {
642:   PetscSF_Basic  *dat;

646:   sf->ops->SetUp                = PetscSFSetUp_Basic;
647:   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
648:   sf->ops->Reset                = PetscSFReset_Basic;
649:   sf->ops->Destroy              = PetscSFDestroy_Basic;
650:   sf->ops->View                 = PetscSFView_Basic;
651:   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
652:   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
653:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
654:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
655:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
656:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
657:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
658:   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
659:   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;

661:   PetscNewLog(sf,&dat);
662:   sf->data = (void*)dat;
663:   return(0);
664: }