Actual source code: sfbasic.c

petsc-master 2019-09-20
Report Typos and Errors

  2:  #include <../src/vec/is/sf/impls/basic/sfbasic.h>

  4: /*===================================================================================*/
  5: /*              Internal routines for PetscSFPack_Basic                              */
  6: /*===================================================================================*/

  8: /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
  9:    initialized (since we use persistent requests), then initialize them.
 10: */
 11: static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
 12: {
 13:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
 14:   MPI_Request    *requests = (direction == PETSCSF_LEAF2../../../../../.._REDUCE)? link->requests : link->requests + link->half;
 15:   PetscInt       i,nrootranks,ndrootranks,nleafranks,ndleafranks;
 16:   const PetscInt *rootoffset,*leafoffset;
 17:   PetscMPIInt    n;
 18:   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);


 23:   if (!link->initialized[direction]) {
 24:     PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
 25:     PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
 26:     if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
 27:       for (i=0; i<nrootranks; i++) {
 28:         if (i >= ndrootranks) {
 29:           PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
 30:           MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);
 31:         }
 32:       }
 33:       for (i=0; i<nleafranks; i++) {
 34:         if (i >= ndleafranks) {
 35:           PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
 36:           MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);
 37:         }
 38:       }
 39:     } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
 40:       for (i=0; i<nrootranks; i++) {
 41:         if (i >= ndrootranks) {
 42:           PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
 43:           MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);
 44:         }
 45:       }
 46:       for (i=0; i<nleafranks; i++) {
 47:         if (i >= ndleafranks) {
 48:           PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
 49:           MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);
 50:         }
 51:       }
 52:     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);

 54:     link->initialized[direction] = PETSC_TRUE;
 55:   }

 57:   if (rootreqs) *rootreqs = requests;
 58:   if (leafreqs) *leafreqs = requests + bas->niranks;
 59:   return(0);
 60: }

 62: static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *rkey,const void *lkey,PetscSFDirection direction,PetscSFPack_Basic *mylink)
 63: {
 64:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
 65:   PetscErrorCode    ierr;
 66:   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
 67:   const PetscInt    *rootoffset,*leafoffset;
 68:   PetscSFPack       *p;
 69:   PetscSFPack_Basic link;

 72:   PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rkey,lkey);

 74:   /* Look for types in cache */
 75:   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
 76:     PetscBool match;
 77:     MPIPetsc_Type_compare(unit,link->unit,&match);
 78:     if (match) {
 79:       *p = link->next; /* Remove from available list */
 80:       goto found;
 81:     }
 82:   }

 84:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
 85:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
 86:   PetscNew(&link);
 87:   PetscSFPackSetupType((PetscSFPack)link,unit);
 88:   PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);
 89:   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
 90:   link->half = nrootranks + nleafranks;
 91:   PetscMalloc1(link->half*2,&link->requests);
 92:   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */
 93:   /* One tag per link */
 94:   PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);

 96:   /* Allocate root and leaf buffers */
 97:   for (i=0; i<nrootranks; i++) {PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);}
 98:   for (i=0; i<nleafranks; i++) {
 99:     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
100:       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
101:       link->leaf[i] = link->root[0];
102:       continue;
103:     }
104:     PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);
105:   }

107: found:
108:   link->rkey = rkey;
109:   link->lkey = lkey;
110:   link->next = bas->inuse;
111:   bas->inuse = (PetscSFPack)link;

113:   *mylink    = link;
114:   return(0);
115: }

117: /*===================================================================================*/
118: /*              SF public interface implementations                                  */
119: /*===================================================================================*/
120: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
121: {
122:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
124:   PetscInt       *rlengths,*ilengths,i;
125:   PetscMPIInt    rank,niranks,*iranks,tag;
126:   MPI_Comm       comm;
127:   MPI_Group      group;
128:   MPI_Request    *rootreqs,*leafreqs;

131:   MPI_Comm_group(PETSC_COMM_SELF,&group);
132:   PetscSFSetUpRanks(sf,group);
133:   MPI_Group_free(&group);
134:   PetscObjectGetComm((PetscObject)sf,&comm);
135:   PetscObjectGetNewTag((PetscObject)sf,&tag);
136:   MPI_Comm_rank(comm,&rank);
137:   /*
138:    * Inform roots about how many leaves and from which ranks
139:    */
140:   PetscMalloc1(sf->nranks,&rlengths);
141:   /* Determine number, sending ranks, and length of incoming */
142:   for (i=0; i<sf->nranks; i++) {
143:     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
144:   }
145:   PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);

147:   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
148:      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
149:      small and the sorting is cheap.
150:    */
151:   PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);

153:   /* Partition into distinguished and non-distinguished incoming ranks */
154:   bas->ndiranks = sf->ndranks;
155:   bas->niranks = bas->ndiranks + niranks;
156:   PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);
157:   bas->ioffset[0] = 0;
158:   for (i=0; i<bas->ndiranks; i++) {
159:     bas->iranks[i] = sf->ranks[i];
160:     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
161:   }
162:   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
163:   for ( ; i<bas->niranks; i++) {
164:     bas->iranks[i] = iranks[i-bas->ndiranks];
165:     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
166:   }
167:   bas->itotal = bas->ioffset[i];
168:   PetscFree(rlengths);
169:   PetscFree(iranks);
170:   PetscFree(ilengths);

172:   /* Send leaf identities to roots */
173:   PetscMalloc1(bas->itotal,&bas->irootloc);
174:   PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);
175:   for (i=bas->ndiranks; i<bas->niranks; i++) {
176:     MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);
177:   }
178:   for (i=0; i<sf->nranks; i++) {
179:     PetscMPIInt npoints;
180:     PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);
181:     if (i < sf->ndranks) {
182:       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
183:       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
184:       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
185:       PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);
186:       continue;
187:     }
188:     MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);
189:   }
190:   MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);
191:   MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);
192:   PetscFree2(rootreqs,leafreqs);

194:   /* Setup packing optimization for root and leaf */
195:   PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);
196:   PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);
197:   return(0);
198: }

200: static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
201: {

205:   PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");
206:   PetscOptionsTail();
207:   return(0);
208: }

210: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
211: {
212:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
213:   PetscErrorCode    ierr;
214:   PetscSFPack_Basic link,next;

217:   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
218:   PetscFree2(bas->iranks,bas->ioffset);
219:   PetscFree(bas->irootloc);
220:   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
221:     PetscInt i;
222:     next = (PetscSFPack_Basic)link->next;
223:     if (!link->isbuiltin) {MPI_Type_free(&link->unit);}
224:     for (i=0; i<bas->niranks; i++) {PetscFree(link->root[i]);}
225:     for (i=sf->ndranks; i<sf->nranks; i++) {PetscFree(link->leaf[i]);} /* Free only non-distinguished leaf buffers */
226:     PetscFree2(link->root,link->leaf);
227:     /* Free persistent requests using MPI_Request_free */
228:     for (i=0; i<link->half*2; i++) {
229:       if (link->requests[i] != MPI_REQUEST_NULL) {MPI_Request_free(&link->requests[i]);}
230:     }
231:     PetscFree(link->requests);
232:     PetscFree(link);
233:   }
234:   bas->avail = NULL;
235:   PetscSFPackDestoryOptimization(&sf->leafpackopt);
236:   PetscSFPackDestoryOptimization(&bas->rootpackopt);
237:   return(0);
238: }

240: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
241: {

245:   PetscSFReset(sf);
246:   PetscFree(sf->data);
247:   return(0);
248: }

250: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
251: {
253:   PetscBool      iascii;

256:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
257:   if (iascii) {
258:     PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");
259:   }
260:   return(0);
261: }

263: static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
264: {
265:   PetscErrorCode    ierr;
266:   PetscSFPack_Basic link;
267:   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
268:   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
269:   const PetscMPIInt *rootranks,*leafranks;
270:   MPI_Request       *rootreqs,*leafreqs;
271:   PetscMPIInt       n;
272:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;

275:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);
276:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);
277:   PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_../../../../../..2LEAF_BCAST,&link);

279:   PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);
280:   /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
281:   PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);
282:   MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks); /* One can wait but not start a null request */

284:   /* Pack and send root data */
285:   for (i=0; i<nrootranks; i++) {
286:     void *packstart = link->root[i];
287:     PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
288:     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
289:     if (i < ndrootranks) continue; /* shared memory */
290:     MPI_Start_isend(n,unit,&rootreqs[i]);
291:   }
292:   return(0);
293: }

295: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
296: {
297:   PetscErrorCode    ierr;
298:   PetscSFPack_Basic link;
299:   PetscInt          i,nleafranks,ndleafranks;
300:   const PetscInt    *leafoffset,*leafloc;
301:   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
302:   PetscMPIInt       typesize = -1;

305:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);
306:   PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
307:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);
308:   PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);

310:   if (UnpackAndOp) { typesize = link->unitbytes; }
311:   else { MPI_Type_size(unit,&typesize); }

313:   for (i=0; i<nleafranks; i++) {
314:     PetscMPIInt n   = leafoffset[i+1] - leafoffset[i];
315:     char *packstart = (char *) link->leaf[i];
316:     if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); }
317: #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
318:     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
319:       PetscInt j;
320:       for (j=0; j<n; j++) { MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op); }
321:     }
322: #else
323:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
324: #endif
325:   }

327:   PetscSFPackReclaim(sf,(PetscSFPack*)&link);
328:   return(0);
329: }

331: /* leaf -> root with reduction */
332: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
333: {
334:   PetscSFPack_Basic link;
335:   PetscErrorCode    ierr;
336:   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
337:   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
338:   const PetscMPIInt *rootranks,*leafranks;
339:   MPI_Request       *rootreqs,*leafreqs;
340:   PetscMPIInt       n;

343:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);
344:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);
345:   PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_LEAF2../../../../../.._REDUCE,&link);

347:   PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2../../../../../.._REDUCE,&rootreqs,&leafreqs);
348:   /* Eagerly post root receives for non-distinguished ranks */
349:   PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);
350:   MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);

352:   /* Pack and send leaf data */
353:   for (i=0; i<nleafranks; i++) {
354:     void *packstart = link->leaf[i];
355:     PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
356:     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
357:     if (i < ndleafranks) continue; /* shared memory */
358:     MPI_Start_isend(n,unit,&leafreqs[i]);
359:   }
360:   return(0);
361: }

363: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
364: {
365:   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
366:   PetscErrorCode    ierr;
367:   PetscSFPack_Basic link;
368:   PetscInt          i,nrootranks;
369:   PetscMPIInt       typesize = -1;
370:   const PetscInt    *rootoffset,*rootloc;
371:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;

374:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);
375:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
376:   PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
377:   PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);
378:   PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);
379:   if (UnpackAndOp) {
380:     typesize = link->unitbytes;
381:   }
382:   else {
383:     MPI_Type_size(unit,&typesize);
384:   }
385:   for (i=0; i<nrootranks; i++) {
386:     PetscMPIInt n   = rootoffset[i+1] - rootoffset[i];
387:     char *packstart = (char *) link->root[i];

389:     if (UnpackAndOp) {
390:       (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart);
391:     }
392: #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
393:     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
394:       PetscInt j;

396:       for (j = 0; j < n; j++) {
397:         MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);
398:       }
399:     }
400: #else
401:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
402: #endif
403:   }
404:   PetscSFPackReclaim(sf,(PetscSFPack*)&link);
405:   return(0);
406: }

408: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
409: {

413:   PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);
414:   return(0);
415: }

417: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
418: {
419:   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
420:   PetscErrorCode    ierr;
421:   PetscSFPack_Basic link;
422:   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
423:   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
424:   const PetscMPIInt *rootranks,*leafranks;
425:   MPI_Request       *rootreqs,*leafreqs;
426:   PetscMPIInt       n;
427:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;

430:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);
431:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
432:   PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
433:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);
434:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);

436:   PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);
437:   /* Post leaf receives */
438:   PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);
439:   MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);

441:   /* Process local fetch-and-op, post root sends */
442:   PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);
443:   for (i=0; i<nrootranks; i++) {
444:     void *packstart = link->root[i];
445:     PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
446:     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
447:     if (i < ndrootranks) continue; /* shared memory */
448:     MPI_Start_isend(n,unit,&rootreqs[i]);
449:   }
450:   PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
451:   for (i=0; i<nleafranks; i++) {
452:     const void  *packstart = link->leaf[i];
453:     PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
454:     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
455:   }
456:   PetscSFPackReclaim(sf,(PetscSFPack*)&link);
457:   return(0);
458: }

460: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
461: {
462:   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;

465:   if (niranks)  *niranks  = bas->niranks;
466:   if (iranks)   *iranks   = bas->iranks;
467:   if (ioffset)  *ioffset  = bas->ioffset;
468:   if (irootloc) *irootloc = bas->irootloc;
469:   return(0);
470: }

472: /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
473:    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
474:    was sorted before calling the routine.
475:  */
476: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
477: {
478:   PetscSF           esf;
479:   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
480:   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*leafbuf,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
481:   PetscMPIInt       *esf_ranks;
482:   const PetscMPIInt *ranks,*iranks;
483:   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
484:   PetscBool         connected;
485:   PetscSFPack_Basic link;
486:   PetscSFNode       *new_iremote;
487:   PetscSF_Basic     *bas;
488:   PetscErrorCode    ierr;

491:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
492:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

494:   /* Find out which leaves are still connected to roots in the embedded sf */
495:   PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);
496:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
497:   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
498:   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
499:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
500:   /* Tag selected roots */
501:   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;

503:   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
504:      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
505:      interested in since it tells which leaves are connected to which ranks.
506:    */
507:   PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE); /* Need to give leafdata but we won't use it */
508:   PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);
509:   PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
510:   PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote); /* Get send info */
511:   esf_nranks = esf_ndranks = connected_leaves = 0;
512:   for (i=0; i<nranks; i++) { /* Scan leaf data to calculate some counts */
513:     leafbuf   = (PetscInt*)link->leaf[i];
514:     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
515:     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
516:       if (leafbuf[k]) {
517:         connected_leaves++;
518:         connected   = PETSC_TRUE;
519:       }
520:     }
521:     if (connected) {esf_nranks++; if (i<ndranks) esf_ndranks++;}
522:   }

524:   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
525:   PetscMalloc1(connected_leaves,&new_ilocal);
526:   PetscMalloc1(connected_leaves,&new_iremote);
527:   PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);
528:   p    = 0; /* Counter for connected root ranks */
529:   q    = 0; /* Counter for connected leaves */
530:   esf_roffset[0] = 0;
531:   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
532:     leafbuf   = (PetscInt*)link->leaf[i];
533:     connected = PETSC_FALSE;
534:     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
535:         if (leafbuf[k]) {
536:         esf_rmine[q]         = new_ilocal[q] = rmine[j];
537:         esf_rremote[q]       = rremote[j];
538:         new_iremote[q].index = rremote[j];
539:         new_iremote[q].rank  = ranks[i];
540:         connected            = PETSC_TRUE;
541:         q++;
542:       }
543:     }
544:     if (connected) {
545:       esf_ranks[p]     = ranks[i];
546:       esf_roffset[p+1] = q;
547:       p++;
548:     }
549:   }

551:   PetscSFPackReclaim(sf,(PetscSFPack*)&link);

553:   /* SetGraph internally resets the SF, so we only set its fields after the call */
554:   PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
555:   esf->nranks  = esf_nranks;
556:   esf->ndranks = esf_ndranks;
557:   esf->ranks   = esf_ranks;
558:   esf->roffset = esf_roffset;
559:   esf->rmine   = esf_rmine;
560:   esf->rremote = esf_rremote;

562:   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
563:   bas  = (PetscSF_Basic*)esf->data;
564:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc); /* Get recv info */
565:   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
566:      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
567:    */
568:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
569:   PetscMalloc1(ioffset[niranks],&bas->irootloc);
570:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
571:   p = 0; /* Counter for connected leaf ranks */
572:   q = 0; /* Counter for connected roots */
573:   for (i=0; i<niranks; i++) {
574:     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
575:     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
576:       PetscInt loc;
577:       PetscFindInt(irootloc[j],nselected,selected,&loc);
578:       if (loc >= 0) { /* Found in selected this root is connected */
579:         bas->irootloc[q++] = irootloc[j];
580:         connected = PETSC_TRUE;
581:       }
582:     }
583:     if (connected) {
584:       bas->niranks++;
585:       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
586:       bas->iranks[p]    = iranks[i];
587:       bas->ioffset[p+1] = q;
588:       p++;
589:     }
590:   }
591:   bas->itotal = q;

593:   /* Setup packing optimizations */
594:   PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);
595:   PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);
596:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */

598:   PetscFree2(rootdata,leafdata);
599:   *newsf = esf;
600:   return(0);
601: }

603: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
604: {
605:   PetscSF           esf;
606:   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
607:   const PetscInt    *ilocal,*ioffset,*irootloc;
608:   const PetscMPIInt *iranks;
609:   PetscSFPack_Basic link;
610:   PetscSFNode       *new_iremote;
611:   const PetscSFNode *iremote;
612:   PetscSF_Basic     *bas;
613:   MPI_Group         group;
614:   PetscErrorCode    ierr;

617:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
618:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

620:   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
621:   PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
622:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
623:   PetscMalloc1(nselected,&new_ilocal);
624:   PetscMalloc1(nselected,&new_iremote);
625:   for (i=0; i<nselected; i++) {
626:     const PetscInt l     = selected[i];
627:     new_ilocal[i]        = ilocal ? ilocal[l] : l;
628:     new_iremote[i].rank  = iremote[l].rank;
629:     new_iremote[i].index = iremote[l].index;
630:   }

632:   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
633:   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
634:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
635:   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */

637:   PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);

639:   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
640:      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
641:    */
642:   MPI_Comm_group(PETSC_COMM_SELF,&group);
643:   PetscSFSetUpRanks(esf,group);
644:   MPI_Group_free(&group);

646:   /* Set up the incoming communication (i.e., recv info) */
647:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);
648:   bas  = (PetscSF_Basic*)esf->data;
649:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
650:   PetscMalloc1(ioffset[niranks],&bas->irootloc);

652:   /* Pass info about selected leaves to root buffer */
653:   PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE); /* -minleaf to re-adjust start address of leafdata */
654:   PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);
655:   PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);

657:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
658:   p = 0; /* Counter for connected leaf ranks */
659:   q = 0; /* Counter for connected roots */
660:   for (i=0; i<niranks; i++) {
661:     PetscInt *rootbuf = (PetscInt*)link->root[i];
662:     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
663:     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
664:       if (rootbuf[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
665:     }
666:     if (connected) {
667:       bas->niranks++;
668:       if (i<ndiranks) bas->ndiranks++;
669:       bas->iranks[p]    = iranks[i];
670:       bas->ioffset[p+1] = q;
671:       p++;
672:     }
673:   }
674:   bas->itotal = q;
675:   PetscSFPackReclaim(sf,(PetscSFPack*)&link);

677:   /* Setup packing optimizations */
678:   PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);
679:   PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);
680:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */

682:   PetscFree2(rootdata,leafdata);
683:   *newsf = esf;
684:   return(0);
685: }

687: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
688: {
689:   PetscSF_Basic  *dat;

693:   sf->ops->SetUp                = PetscSFSetUp_Basic;
694:   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
695:   sf->ops->Reset                = PetscSFReset_Basic;
696:   sf->ops->Destroy              = PetscSFDestroy_Basic;
697:   sf->ops->View                 = PetscSFView_Basic;
698:   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
699:   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
700:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
701:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
702:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
703:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
704:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
705:   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
706:   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;

708:   PetscNewLog(sf,&dat);
709:   sf->data = (void*)dat;
710:   return(0);
711: }