Actual source code: sfbasic.c

petsc-master 2020-09-26
Report Typos and Errors

  2: #include "petscsf.h"
  3: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
  4: #include <../src/vec/is/sf/impls/basic/sfpack.h>

  6: /*===================================================================================*/
  7: /*              SF public interface implementations                                  */
  8: /*===================================================================================*/
  9: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
 10: {
 12:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
 13:   PetscInt       *rlengths,*ilengths,i;
 14:   PetscMPIInt    rank,niranks,*iranks,tag;
 15:   MPI_Comm       comm;
 16:   MPI_Group      group;
 17:   MPI_Request    *rootreqs,*leafreqs;

 20:   MPI_Comm_group(PETSC_COMM_SELF,&group);
 21:   PetscSFSetUpRanks(sf,group);
 22:   MPI_Group_free(&group);
 23:   PetscObjectGetComm((PetscObject)sf,&comm);
 24:   PetscObjectGetNewTag((PetscObject)sf,&tag);
 25:   MPI_Comm_rank(comm,&rank);
 26:   /*
 27:    * Inform roots about how many leaves and from which ranks
 28:    */
 29:   PetscMalloc1(sf->nranks,&rlengths);
 30:   /* Determine number, sending ranks and length of incoming */
 31:   for (i=0; i<sf->nranks; i++) {
 32:     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
 33:   }
 34:   PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);

 36:   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
 37:      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
 38:      small and the sorting is cheap.
 39:    */
 40:   PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);

 42:   /* Partition into distinguished and non-distinguished incoming ranks */
 43:   bas->ndiranks = sf->ndranks;
 44:   bas->niranks = bas->ndiranks + niranks;
 45:   PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);
 46:   bas->ioffset[0] = 0;
 47:   for (i=0; i<bas->ndiranks; i++) {
 48:     bas->iranks[i] = sf->ranks[i];
 49:     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
 50:   }
 51:   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
 52:   for (; i<bas->niranks; i++) {
 53:     bas->iranks[i] = iranks[i-bas->ndiranks];
 54:     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
 55:   }
 56:   bas->itotal = bas->ioffset[i];
 57:   PetscFree(rlengths);
 58:   PetscFree(iranks);
 59:   PetscFree(ilengths);

 61:   /* Send leaf identities to roots */
 62:   PetscMalloc1(bas->itotal,&bas->irootloc);
 63:   PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);
 64:   for (i=bas->ndiranks; i<bas->niranks; i++) {
 65:     MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);
 66:   }
 67:   for (i=0; i<sf->nranks; i++) {
 68:     PetscMPIInt npoints;
 69:     PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);
 70:     if (i < sf->ndranks) {
 71:       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
 72:       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
 73:       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
 74:       PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);
 75:       continue;
 76:     }
 77:     MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);
 78:   }
 79:   MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);
 80:   MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);
 81:   PetscFree2(rootreqs,leafreqs);

 83:   sf->nleafreqs  = sf->nranks - sf->ndranks;
 84:   bas->nrootreqs = bas->niranks - bas->ndiranks;
 85:   sf->persistent = PETSC_TRUE;

 87:   /* Setup fields related to packing */
 88:   PetscSFSetUpPackFields(sf);
 89:   return(0);
 90: }

 92: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
 93: {
 94:   PetscErrorCode    ierr;
 95:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;

 98:   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
 99:   PetscFree2(bas->iranks,bas->ioffset);
100:   PetscFree(bas->irootloc);
101: #if defined(PETSC_HAVE_DEVICE)
102:   for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);}
103: #endif
104:   PetscSFLinkDestroy(sf,&bas->avail);
105:   PetscSFResetPackFields(sf);
106:   return(0);
107: }

109: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
110: {

114:   PetscSFReset_Basic(sf);
115:   PetscFree(sf->data);
116:   return(0);
117: }

119: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
120: {
122:   PetscBool      iascii;

125:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
126:   if (iascii) {PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");}
127:   return(0);
128: }

130: static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
131: {
132:   PetscErrorCode    ierr;
133:   PetscSFLink       link = NULL;
134:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
135:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;

138:   /* Create a communication link, which provides buffers & MPI requests etc */
139:   PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);
140:   /* Get MPI requests from the link. We do not need buffers explicitly since we use persistent MPI */
141:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,NULL,NULL,&rootreqs,&leafreqs);
142:   /* Post Irecv for remote */
143:   MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);
144:   /* Pack rootdata and do Isend for remote */
145:   PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);
146:   MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);
147:   /* Do local BcastAndOp, which overlaps with the irecv/isend above */
148:   PetscSFLinkBcastAndOpLocal(sf,link,rootdata,leafdata,op);
149:   return(0);
150: }

152: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
153: {
154:   PetscErrorCode    ierr;
155:   PetscSFLink       link = NULL;

158:   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
159:   PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
160:   /* Wait for the completion of mpi */
161:   PetscSFLinkMPIWaitall(sf,link,PETSCSF_../../../../../..2LEAF);
162:   /* Unpack leafdata and reclaim the link */
163:   PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);
164:   PetscSFLinkReclaim(sf,&link);
165:   return(0);
166: }

168: /* Shared by ReduceBegin and FetchAndOpBegin */
169: PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
170: {
171:   PetscErrorCode    ierr;
172:   PetscSFLink       link;
173:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
174:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;

177:   PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);
178:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2../../../../../..,NULL,NULL,&rootreqs,&leafreqs);
179:   MPI_Startall_irecv(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);
180:   PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);
181:   MPI_Startall_isend(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);
182:   *out = link;
183:   return(0);
184: }

186: /* leaf -> root with reduction */
187: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
188: {
189:   PetscErrorCode    ierr;
190:   PetscSFLink       link = NULL;

193:   PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);
194:   PetscSFLinkReduceLocal(sf,link,leafdata,rootdata,op);
195:   return(0);
196: }

198: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
199: {
200:   PetscErrorCode    ierr;
201:   PetscSFLink       link = NULL;

204:   PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
205:   PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2../../../../../..);
206:   PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);
207:   PetscSFLinkReclaim(sf,&link);
208:   return(0);
209: }

211: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
212: {
213:   PetscErrorCode    ierr;
214:   PetscSFLink       link = NULL;

217:   PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);
218:   PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);
219:   return(0);
220: }

222: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
223: {
224:   PetscErrorCode    ierr;
225:   PetscSFLink       link = NULL;
226:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
227:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;

230:   PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
231:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
232:   PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2../../../../../..);
233:   /* Do fetch-and-op, the (remote) update results are in rootbuf */
234:   PetscSFLinkFetchRootData(sf,link,PETSCSF_REMOTE,rootdata,op);

236:   /* Bcast rootbuf to leafupdate */
237:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,NULL,NULL,&rootreqs,&leafreqs);
238:   /* Post leaf receives and root sends */
239:   MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);
240:   MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);
241:   /* Unpack and insert fetched data into leaves */
242:   PetscSFLinkMPIWaitall(sf,link,PETSCSF_../../../../../..2LEAF);
243:   PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPIU_REPLACE);
244:   PetscSFLinkReclaim(sf,&link);
245:   return(0);
246: }

248: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
249: {
250:   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;

253:   if (niranks)  *niranks  = bas->niranks;
254:   if (iranks)   *iranks   = bas->iranks;
255:   if (ioffset)  *ioffset  = bas->ioffset;
256:   if (irootloc) *irootloc = bas->irootloc;
257:   return(0);
258: }

260: /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
261:    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
262:    was sorted before calling the routine.
263:  */
264: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
265: {
266:   PetscSF           esf;
267:   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
268:   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
269:   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
270:   PetscMPIInt       *esf_ranks;
271:   const PetscMPIInt *ranks,*iranks;
272:   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
273:   PetscBool         connected;
274:   PetscSFNode       *new_iremote;
275:   PetscSF_Basic     *bas;
276:   PetscErrorCode    ierr;

279:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
280:   PetscSFSetFromOptions(esf);
281:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

283:   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
284:   PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);
285:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
286:   maxlocal = maxleaf - minleaf + 1;
287:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
288:   leafdata = leafmem - minleaf;
289:   /* Tag selected roots */
290:   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;

292:   PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata);
293:   PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata);
294:   PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote); /* Get send info */
295:   esf_nranks = esf_ndranks = esf_nleaves = 0;
296:   for (i=0; i<nranks; i++) {
297:     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
298:     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
299:     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
300:   }

302:   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
303:   PetscMalloc1(esf_nleaves,&new_ilocal);
304:   PetscMalloc1(esf_nleaves,&new_iremote);
305:   PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);
306:   p    = 0; /* Counter for connected root ranks */
307:   q    = 0; /* Counter for connected leaves */
308:   esf_roffset[0] = 0;
309:   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
310:     connected = PETSC_FALSE;
311:     for (j=roffset[i]; j<roffset[i+1]; j++) {
312:       if (leafdata[rmine[j]]) {
313:         esf_rmine[q]         = new_ilocal[q] = rmine[j];
314:         esf_rremote[q]       = rremote[j];
315:         new_iremote[q].index = rremote[j];
316:         new_iremote[q].rank  = ranks[i];
317:         connected            = PETSC_TRUE;
318:         q++;
319:       }
320:     }
321:     if (connected) {
322:       esf_ranks[p]     = ranks[i];
323:       esf_roffset[p+1] = q;
324:       p++;
325:     }
326:   }

328:   /* SetGraph internally resets the SF, so we only set its fields after the call */
329:   PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
330:   esf->nranks    = esf_nranks;
331:   esf->ndranks   = esf_ndranks;
332:   esf->ranks     = esf_ranks;
333:   esf->roffset   = esf_roffset;
334:   esf->rmine     = esf_rmine;
335:   esf->rremote   = esf_rremote;
336:   esf->nleafreqs = esf_nranks - esf_ndranks;

338:   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
339:   bas  = (PetscSF_Basic*)esf->data;
340:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc); /* Get recv info */
341:   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
342:      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
343:    */
344:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
345:   PetscMalloc1(ioffset[niranks],&bas->irootloc);
346:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
347:   p = 0; /* Counter for connected leaf ranks */
348:   q = 0; /* Counter for connected roots */
349:   for (i=0; i<niranks; i++) {
350:     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
351:     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
352:       if (rootdata[irootloc[j]]) {
353:         bas->irootloc[q++] = irootloc[j];
354:         connected = PETSC_TRUE;
355:       }
356:     }
357:     if (connected) {
358:       bas->niranks++;
359:       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
360:       bas->iranks[p]    = iranks[i];
361:       bas->ioffset[p+1] = q;
362:       p++;
363:     }
364:   }
365:   bas->itotal     = q;
366:   bas->nrootreqs  = bas->niranks - bas->ndiranks;
367:   esf->persistent = PETSC_TRUE;
368:   /* Setup packing related fields */
369:   PetscSFSetUpPackFields(esf);

371:   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
372: #if defined(PETSC_HAVE_CUDA)
373:   if (esf->backend == PETSCSF_BACKEND_CUDA) {
374:     esf->ops->Malloc = PetscSFMalloc_Cuda;
375:     esf->ops->Free   = PetscSFFree_Cuda;
376:   }
377: #endif

379: #if defined(PETSC_HAVE_KOKKOS)
380:   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
381:     esf->ops->Malloc = PetscSFMalloc_Kokkos;
382:     esf->ops->Free   = PetscSFFree_Kokkos;
383:   }
384: #endif
385:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
386:   PetscFree2(rootdata,leafmem);
387:   *newsf = esf;
388:   return(0);
389: }

391: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
392: {
393:   PetscSF_Basic  *dat;

397:   sf->ops->SetUp                = PetscSFSetUp_Basic;
398:   sf->ops->Reset                = PetscSFReset_Basic;
399:   sf->ops->Destroy              = PetscSFDestroy_Basic;
400:   sf->ops->View                 = PetscSFView_Basic;
401:   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
402:   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
403:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
404:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
405:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
406:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
407:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
408:   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;

410:   PetscNewLog(sf,&dat);
411:   sf->data = (void*)dat;
412:   return(0);
413: }