Actual source code: vscatsf.c

petsc-3.11.3 2019-06-26
Report Typos and Errors
  1:  #include <petsc/private/vecscatterimpl.h>
  2:  #include <petsc/private/sfimpl.h>

  4: typedef struct {
  5:   PetscSF           sf;     /* the whole scatter, including local and remote */
  6:   PetscSF           lsf;    /* the local part of the scatter, used for SCATTER_LOCAL */
  7:   PetscInt          bs;     /* block size */
  8:   MPI_Datatype      unit;   /* one unit = bs PetscScalars */
  9: } VecScatter_SF;

 11: static PetscErrorCode VecScatterBegin_SF(VecScatter vscat,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 12: {
 13:   VecScatter_SF  *data=(VecScatter_SF*)vscat->data;
 14:   PetscSF        sf;
 15:   MPI_Op         mop=MPI_OP_NULL;

 19:   if (x != y) {VecLockReadPush(x);}

 21:   {
 22: #if defined(PETSC_HAVE_CUDA)
 23:     PetscBool is_cudatype = PETSC_FALSE;
 24:     PetscObjectTypeCompareAny((PetscObject)x,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
 25:     if (is_cudatype) {
 26:       VecCUDAAllocateCheckHost(x);
 27:       if (x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
 28:         if (x->spptr && vscat->spptr) {VecCUDACopyFromGPUSome_Public(x,(PetscCUDAIndices)vscat->spptr,mode);}
 29:         else {VecCUDACopyFromGPU(x);}
 30:       }
 31:       vscat->xdata = *((PetscScalar**)x->data);
 32:     } else
 33: #endif
 34:     {
 35:       VecGetArrayRead(x,&vscat->xdata);
 36:     }
 37:   }

 39:   if (x != y) {VecGetArray(y,&vscat->ydata);}
 40:   else vscat->ydata = (PetscScalar *)vscat->xdata;
 41:   VecLockWriteSet_Private(y,PETSC_TRUE);

 43:   /* SCATTER_LOCAL indicates ignoring inter-process communication */
 44:   sf = (mode & SCATTER_LOCAL) ? data->lsf : data->sf;

 46:   if (addv == INSERT_VALUES)   mop = MPI_REPLACE;
 47:   else if (addv == ADD_VALUES) mop = MPI_SUM;
 48:   else if (addv == MAX_VALUES) mop = MPI_MAX;
 49:   else SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %D in VecScatterBegin/End",addv);

 51:   if (mode & SCATTER_REVERSE) { /* reverse scatter sends root to leaf. Note that x and y are swapped in input */
 52:     PetscSFBcastAndOpBegin(sf,data->unit,vscat->xdata,vscat->ydata,mop);
 53:   } else { /* forward scatter sends leaf to root, i.e., x to y */
 54:     PetscSFReduceBegin(sf,data->unit,vscat->xdata,vscat->ydata,mop);
 55:   }
 56:   return(0);
 57: }

 59: static PetscErrorCode VecScatterEnd_SF(VecScatter vscat,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 60: {
 61:   VecScatter_SF  *data=(VecScatter_SF*)vscat->data;
 62:   PetscSF        sf;
 63:   MPI_Op         mop=MPI_OP_NULL;

 67:   /* SCATTER_LOCAL indicates ignoring inter-process communication */
 68:   sf = (mode & SCATTER_LOCAL) ? data->lsf : data->sf;

 70:   if (addv == INSERT_VALUES)   mop = MPI_REPLACE;
 71:   else if (addv == ADD_VALUES) mop = MPI_SUM;
 72:   else if (addv == MAX_VALUES) mop = MPI_MAX;
 73:   else SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %D in VecScatterBegin/End",addv);

 75:   if (mode & SCATTER_REVERSE) {/* reverse scatter sends root to leaf. Note that x and y are swapped in input */
 76:     PetscSFBcastAndOpEnd(sf,data->unit,vscat->xdata,vscat->ydata,mop);
 77:   } else { /* forward scatter sends leaf to root, i.e., x to y */
 78:     PetscSFReduceEnd(sf,data->unit,vscat->xdata,vscat->ydata,mop);
 79:   }

 81:   if (x != y) {
 82:     VecRestoreArrayRead(x,&vscat->xdata);
 83:     VecLockReadPop(x);
 84:   }
 85:   VecRestoreArray(y,&vscat->ydata);
 86:   VecLockWriteSet_Private(y,PETSC_FALSE);
 87:   return(0);
 88: }

 90: static PetscErrorCode VecScatterCopy_SF(VecScatter vscat,VecScatter ctx)
 91: {
 92:   VecScatter_SF  *data=(VecScatter_SF*)vscat->data,*out;

 96:   PetscMemcpy(ctx->ops,vscat->ops,sizeof(vscat->ops));
 97:   PetscNewLog(ctx,&out);
 98:   PetscSFDuplicate(data->sf,PETSCSF_DUPLICATE_GRAPH,&out->sf);
 99:   PetscSFDuplicate(data->lsf,PETSCSF_DUPLICATE_GRAPH,&out->lsf);
100:   PetscSFSetUp(out->sf);
101:   PetscSFSetUp(out->lsf);

103:   out->bs = data->bs;
104:   if (out->bs > 1) {
105:     MPI_Type_dup(data->unit,&out->unit); /* Since oldtype is committed, so is newtype, according to MPI */
106:   } else {
107:     out->unit = MPIU_SCALAR;
108:   }
109:   ctx->data = (void*)out;
110:   return(0);
111: }

113: static PetscErrorCode VecScatterDestroy_SF(VecScatter vscat)
114: {
115:   VecScatter_SF *data = (VecScatter_SF *)vscat->data;

119:   PetscSFDestroy(&data->sf);
120:   PetscSFDestroy(&data->lsf);
121:   if (data->bs > 1) {MPI_Type_free(&data->unit);}
122:   PetscFree(vscat->data);
123:   return(0);
124: }

126: static PetscErrorCode VecScatterView_SF(VecScatter vscat,PetscViewer viewer)
127: {
128:   VecScatter_SF *data = (VecScatter_SF *)vscat->data;

132:   PetscSFView(data->sf,viewer);
133:   return(0);
134: }

136: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input vscat scatters
137:    x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j].
138:  */
139: static PetscErrorCode VecScatterRemap_SF(VecScatter vscat,const PetscInt *tomap,const PetscInt *frommap)
140: {
141:   VecScatter_SF  *data = (VecScatter_SF *)vscat->data;
142:   PetscSF        sfs[2],sf;
143:   PetscInt       i,j;
144:   PetscBool      ident;

148:   sfs[0] = data->sf;
149:   sfs[1] = data->lsf;

151:   if (tomap) {
152:     /* check if it is an identity map. If it is, do nothing */
153:     ident = PETSC_TRUE;
154:     for (i=0; i<data->sf->nleaves; i++) {if (i != tomap[i]) {ident = PETSC_FALSE; break; } }
155:     if (ident) return(0);

157:     for (j=0; j<2; j++) {
158:       sf   = sfs[j];
159:       PetscSFSetUp(sf); /* to bulid sf->rmine if SetUp is not yet called */
160:       if (!sf->mine) { /* the old SF uses contiguous ilocal. After the remapping, it may not be true */
161:         PetscMalloc1(sf->nleaves,&sf->mine);
162:         PetscMemcpy(sf->mine,tomap,sizeof(PetscInt)*sf->nleaves);
163:         sf->mine_alloc = sf->mine;
164:       } else {
165:         for (i=0; i<sf->nleaves; i++)             sf->mine[i]   = tomap[sf->mine[i]];
166:       }
167:       for (i=0; i<sf->roffset[sf->nranks]; i++)   sf->rmine[i]  = tomap[sf->rmine[i]];
168:     }
169:   }

171:   if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
172:   return(0);
173: }

175: static PetscErrorCode VecScatterGetRemoteCount_SF(VecScatter vscat,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
176: {
177:   VecScatter_SF     *data = (VecScatter_SF *)vscat->data;
178:   PetscSF           sf = data->sf;
179:   PetscInt          nranks,remote_start;
180:   PetscMPIInt       myrank;
181:   const PetscInt    *offset;
182:   const PetscMPIInt *ranks;
183:   PetscErrorCode    ierr;

186:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&myrank);

188:   if (send) { PetscSFGetRanks(sf,&nranks,&ranks,&offset,NULL,NULL); }
189:   else { PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,NULL); }

191:   if (nranks) {
192:     remote_start = (myrank == ranks[0])? 1 : 0;
193:     if (num_procs)   *num_procs   = nranks - remote_start;
194:     if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
195:   } else {
196:     if (num_procs)   *num_procs   = 0;
197:     if (num_entries) *num_entries = 0;
198:   }
199:   return(0);
200: }

202: static PetscErrorCode VecScatterGetRemote_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
203: {
204:   VecScatter_SF     *data = (VecScatter_SF *)vscat->data;
205:   PetscSF           sf = data->sf;
206:   PetscInt          nranks,remote_start;
207:   PetscMPIInt       myrank;
208:   const PetscInt    *offset,*location;
209:   const PetscMPIInt *ranks;
210:   PetscErrorCode    ierr;

213:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&myrank);

215:   if (send) { PetscSFGetRanks(sf,&nranks,&ranks,&offset,&location,NULL); }
216:   else { PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,&location); }

218:   if (nranks) {
219:     remote_start = (myrank == ranks[0])? 1 : 0;
220:     if (n)       *n       = nranks - remote_start;
221:     if (starts)  *starts  = &offset[remote_start];
222:     if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
223:     if (procs)   *procs   = &ranks[remote_start];
224:   } else {
225:     if (n)       *n       = 0;
226:     if (starts)  *starts  = NULL;
227:     if (indices) *indices = NULL;
228:     if (procs)   *procs   = NULL;
229:   }

231:   if (bs) *bs = 1;
232:   return(0);
233: }

235: static PetscErrorCode VecScatterGetRemoteOrdered_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
236: {

240:   VecScatterGetRemote_SF(vscat,send,n,starts,indices,procs,bs);
241:   return(0);
242: }

244: static PetscErrorCode VecScatterRestoreRemote_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
245: {
247:   if (starts)   *starts  = NULL;
248:   if (indices)  *indices = NULL;
249:   if (procs)    *procs   = NULL;
250:   return(0);
251: }

253: static PetscErrorCode VecScatterRestoreRemoteOrdered_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
254: {
257:   VecScatterRestoreRemote_SF(vscat,send,n,starts,indices,procs,bs);
258:   return(0);
259: }

261: typedef enum {IS_INVALID, IS_GENERAL, IS_BLOCK, IS_STRIDE} ISTypeID;

263: PETSC_STATIC_INLINE PetscErrorCode ISGetTypeID_Private(IS is,ISTypeID *id)
264: {
266:   PetscBool      same;

269:   *id  = IS_INVALID;
270:   PetscObjectTypeCompare((PetscObject)is,ISGENERAL,&same);
271:   if (same) {*id = IS_GENERAL; goto functionend;}
272:   PetscObjectTypeCompare((PetscObject)is,ISBLOCK,&same);
273:   if (same) {*id = IS_BLOCK; goto functionend;}
274:   PetscObjectTypeCompare((PetscObject)is,ISSTRIDE,&same);
275:   if (same) {*id = IS_STRIDE; goto functionend;}
276: functionend:
277:   return(0);
278: }

280: static PetscErrorCode VecScatterSetUp_SF(VecScatter vscat)
281: {
282:   VecScatter_SF  *data;
283:   MPI_Comm       comm,xcomm,ycomm,bigcomm;
284:   Vec            x=vscat->from_v,y=vscat->to_v,xx,yy;
285:   IS             ix=vscat->from_is,iy=vscat->to_is,ixx,iyy;
286:   PetscMPIInt    size,xcommsize,ycommsize,myrank;
287:   PetscInt       i,j,n,N,nroots,nleaves,inedges=0,*leafdata,*rootdata,*ilocal,*lilocal,xstart,ystart,lnleaves,ixsize,iysize,xlen,ylen;
288:   const PetscInt *xindices,*yindices,*degree;
289:   PetscSFNode    *iremote,*liremote;
290:   PetscLayout    xlayout,ylayout;
291:   PetscSF        tmpsf;
292:   ISTypeID       ixid,iyid;
293:   PetscInt       bs,bsx,bsy,min=PETSC_MIN_INT,max=PETSC_MAX_INT,ixfirst,ixstep,iyfirst,iystep;
294:   PetscBool      can_do_block_opt=PETSC_FALSE;

298:   PetscNewLog(vscat,&data);

300:   /* Let P and S stand for parallel and sequential vectors respectively, there are four combinations of vecscatters: PtoP, PtoS, StoP and StoS.
301:     The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix contains global
302:     indices of x. If x is sequential, ix contains local indices of x. Similarily for y and iy.

304:     SF builds around concepts of local leaves and remote roots, which correspond to an StoP scatter. We transform PtoP and PtoS to StoP, and
305:     treat StoS as a trivial StoP.
306:   */
307:   PetscObjectGetComm((PetscObject)x,&xcomm);
308:   PetscObjectGetComm((PetscObject)y,&ycomm);
309:   MPI_Comm_size(xcomm,&xcommsize);
310:   MPI_Comm_size(ycomm,&ycommsize);

312:   /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
313:   if (!ix) {
314:     if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
315:       VecGetSize(x,&N);
316:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&ix);
317:     } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
318:       VecGetLocalSize(x,&n);
319:       VecGetOwnershipRange(x,&xstart,NULL);
320:       ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
321:     }
322:   }

324:   if (!iy) {
325:     if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
326:       VecGetSize(y,&N);
327:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iy);
328:     } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
329:       VecGetLocalSize(y,&n);
330:       VecGetOwnershipRange(y,&ystart,NULL);
331:       ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
332:     }
333:   }

335:   /* Do error checking immediately after we have non-empty ix, iy */
336:   ISGetLocalSize(ix,&ixsize);
337:   ISGetLocalSize(iy,&iysize);
338:   VecGetSize(x,&xlen);
339:   VecGetSize(y,&ylen);
340:   if (ixsize != iysize) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Scatter sizes of ix and iy don't match locally");
341:   ISGetMinMax(ix,&min,&max);
342:   if (min < 0 || max >= xlen) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scatter indices in ix are out of range");
343:   ISGetMinMax(iy,&min,&max);
344:   if (min < 0 || max >= ylen) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scatter indices in iy are out of range");

346:   /* Do block optimization by taking advantage of high level info available in ix, iy.
347:      The block optimization is valid when all of the following conditions are met:
348:      1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
349:      2) ix, iy have the same block size;
350:      3) all processors agree on one block size;
351:      4) no blocks span more than one process;
352:    */
353:   data->bs   = 1; /* default, no blocking */
354:   data->unit = MPIU_SCALAR;
355:   ISGetTypeID_Private(ix,&ixid);
356:   ISGetTypeID_Private(iy,&iyid);
357:   bigcomm    = (ycommsize == 1) ? xcomm : ycomm;

359:   if (ixid == IS_BLOCK)       {ISGetBlockSize(ix,&bsx);}
360:   else if (ixid == IS_STRIDE) {ISStrideGetInfo(ix,&ixfirst,&ixstep);}

362:   if ( iyid == IS_BLOCK)      {ISGetBlockSize(iy,&bsy);}
363:   else if (iyid == IS_STRIDE) {ISStrideGetInfo(iy,&iyfirst,&iystep);}

365:   /* Processors could go through different path in this if-else test */
366:   if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
367:     min = PetscMin(bsx,bsy);
368:     max = PetscMax(bsx,bsy);
369:   } else if (ixid == IS_BLOCK  && iyid == IS_STRIDE && iystep==1 && iyfirst%bsx==0) {
370:     min = max = bsx;
371:   } else if (ixid == IS_STRIDE && iyid == IS_BLOCK  && ixstep==1 && ixfirst%bsy==0) {
372:     min = max = bsy;
373:   }
374:   MPIU_Allreduce(MPI_IN_PLACE,&min,1,MPIU_INT,MPI_MIN,bigcomm);
375:   MPIU_Allreduce(MPI_IN_PLACE,&max,1,MPIU_INT,MPI_MAX,bigcomm);

377:   /* Since we used allreduce above, all ranks will have the same min and max. min==max
378:      implies all ranks have the same bs. Do further test to see if local vectors are dividable
379:      by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
380:    */
381:   if (min == max && min > 1) {
382:     PetscInt m[2];
383:     VecGetLocalSize(x,&xlen);
384:     VecGetLocalSize(y,&ylen);
385:     m[0] = xlen%min;
386:     m[1] = ylen%min;
387:     MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_LOR,bigcomm);
388:     if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
389:   }

391:   /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
392:      and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
393:      indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.

395:      yy is a little special. If y is seq, then yy is the concatenation of seq y's on xcomm. In this way,
396:      we can treat PtoP and PtoS uniformly as PtoP.
397:    */
398:   if (can_do_block_opt) {
399:     const PetscInt *indices;

401:     data->bs = bs = min;
402:     MPI_Type_contiguous(bs,MPIU_SCALAR,&data->unit);
403:     MPI_Type_commit(&data->unit);

405:     /* Shrink x and ix */
406:     VecCreateMPIWithArray(xcomm,1,xlen/bs,PETSC_DECIDE,NULL,&xx); /* We only care xx's layout */
407:     if (ixid == IS_BLOCK) {
408:       ISBlockGetIndices(ix,&indices);
409:       ISBlockGetLocalSize(ix,&ixsize);
410:       ISCreateGeneral(PETSC_COMM_SELF,ixsize,indices,PETSC_COPY_VALUES,&ixx);
411:       ISBlockRestoreIndices(ix,&indices);
412:     } else if (ixid == IS_STRIDE) {
413:       ISGetLocalSize(ix,&ixsize);
414:       ISCreateStride(PETSC_COMM_SELF,ixsize/bs,ixfirst/bs,1,&ixx);
415:     }

417:     /* Shrink y and iy */
418:     VecCreateMPIWithArray(bigcomm,1,ylen/bs,PETSC_DECIDE,NULL,&yy);
419:     if (iyid == IS_BLOCK) {
420:       ISBlockGetIndices(iy,&indices);
421:       ISBlockGetLocalSize(iy,&iysize);
422:       ISCreateGeneral(PETSC_COMM_SELF,iysize,indices,PETSC_COPY_VALUES,&iyy);
423:       ISBlockRestoreIndices(iy,&indices);
424:     } else if (iyid == IS_STRIDE) {
425:       ISGetLocalSize(iy,&iysize);
426:       ISCreateStride(PETSC_COMM_SELF,iysize/bs,iyfirst/bs,1,&iyy);
427:     }
428:   } else {
429:     ixx = ix;
430:     iyy = iy;
431:     xx  = x;
432:     if (ycommsize == 1) {VecCreateMPIWithArray(bigcomm,1,ylen,PETSC_DECIDE,NULL,&yy);} else yy = y;
433:   }

435:   /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
436:   ISGetIndices(ixx,&xindices);
437:   ISGetIndices(iyy,&yindices);

439:   if (xcommsize > 1) {
440:     /* PtoP or PtoS */
441:     VecGetLayout(xx,&xlayout);
442:     VecGetOwnershipRange(xx,&xstart,NULL);
443:     VecGetLayout(yy,&ylayout);
444:     VecGetOwnershipRange(yy,&ystart,NULL);

446:     /* Each process has a set of global index pairs (i, j) to scatter xx[i] to yy[j]. We first shift (i, j) to owner process of i through a tmp SF */
447:     VecGetLocalSize(xx,&nroots);
448:     ISGetLocalSize(ixx,&nleaves);
449:     PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);

451:     for (i=0; i<nleaves; i++) {
452:       PetscLayoutFindOwnerIndex(xlayout,xindices[i],&iremote[i].rank,&iremote[i].index);
453:       leafdata[2*i]   = xindices[i];
454:       leafdata[2*i+1] = (ycommsize > 1)? yindices[i] : yindices[i] + ystart;
455:     }

457:     PetscSFCreate(xcomm,&tmpsf);
458:     PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);

460:     PetscSFComputeDegreeBegin(tmpsf,&degree);
461:     PetscSFComputeDegreeEnd(tmpsf,&degree);

463:     for (i=0; i<nroots; i++) inedges += degree[i];
464:     PetscMalloc1(inedges*2,&rootdata);
465:     PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
466:     PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);

468:     PetscFree2(iremote,leafdata);
469:     PetscSFDestroy(&tmpsf);

471:     /* rootdata contains global index pairs (i, j). i's are owned by the current process, but j's can point to anywhere.
472:        We convert i to local, and convert j to (rank, index). In the end, we get an StoP suitable for building SF.
473:      */
474:     nleaves = inedges;
475:     VecGetLocalSize(yy,&nroots);
476:     PetscMalloc1(nleaves,&ilocal);
477:     PetscMalloc1(nleaves,&iremote);

479:     for (i=0; i<inedges; i++) {
480:       ilocal[i] = rootdata[2*i] - xstart; /* covert x's global index to local index */
481:       PetscLayoutFindOwnerIndex(ylayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert y's global index to (rank, index) */
482:     }

484:     /* MUST build SF on yy's comm, which is not necessarily identical to xx's comm.
485:        In SF's view, yy contains the roots (i.e., the remote) and iremote[].rank are ranks in yy's comm.
486:        xx contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
487:     PetscSFCreate(PetscObjectComm((PetscObject)yy),&data->sf);
488:     PetscSFSetGraph(data->sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
489:     PetscFree(rootdata);
490:   } else {
491:     /* StoP or StoS */
492:     VecGetLayout(yy,&ylayout);
493:     ISGetLocalSize(ixx,&nleaves);
494:     VecGetLocalSize(yy,&nroots);
495:     PetscMalloc1(nleaves,&ilocal);
496:     PetscMalloc1(nleaves,&iremote);
497:     PetscMemcpy(ilocal,xindices,nleaves*sizeof(PetscInt));
498:     for (i=0; i<nleaves; i++) {PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);}
499:     PetscSFCreate(PetscObjectComm((PetscObject)yy),&data->sf);
500:     PetscSFSetGraph(data->sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
501:   }

503:   /* Free memory no longer needed */
504:   ISRestoreIndices(ixx,&xindices);
505:   ISRestoreIndices(iyy,&yindices);
506:   if (can_do_block_opt) {
507:     VecDestroy(&xx);
508:     VecDestroy(&yy);
509:     ISDestroy(&ixx);
510:     ISDestroy(&iyy);
511:   } else if (ycommsize == 1) {
512:     VecDestroy(&yy);
513:   }
514:   if (!vscat->from_is) {ISDestroy(&ix);}
515:   if (!vscat->to_is  ) {ISDestroy(&iy);}

517:   /* Create lsf, the local scatter. Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
518:   PetscObjectGetComm((PetscObject)data->sf,&comm);
519:   MPI_Comm_size(comm,&size);
520:   MPI_Comm_rank(comm,&myrank);

522:   /* Find out local edges and build a local SF */
523:   {
524:     const PetscInt    *ilocal;
525:     const PetscSFNode *iremote;
526:     PetscSFGetGraph(data->sf,&nroots,&nleaves,&ilocal,&iremote);
527:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
528:     PetscMalloc1(lnleaves,&lilocal);
529:     PetscMalloc1(lnleaves,&liremote);

531:     for (i=j=0; i<nleaves; i++) {
532:       if (iremote[i].rank == (PetscInt)myrank) {
533:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
534:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
535:         liremote[j].index = iremote[i].index;
536:         j++;
537:       }
538:     }
539:     PetscSFCreate(PETSC_COMM_SELF,&data->lsf);
540:     PetscSFSetGraph(data->lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
541:   }

543:   /* vecscatter uses eager setup */
544:   PetscSFSetUp(data->sf);
545:   PetscSFSetUp(data->lsf);

547:   vscat->data                      = (void*)data;
548:   vscat->ops->begin                = VecScatterBegin_SF;
549:   vscat->ops->end                  = VecScatterEnd_SF;
550:   vscat->ops->remap                = VecScatterRemap_SF;
551:   vscat->ops->copy                 = VecScatterCopy_SF;
552:   vscat->ops->destroy              = VecScatterDestroy_SF;
553:   vscat->ops->view                 = VecScatterView_SF;
554:   vscat->ops->getremotecount       = VecScatterGetRemoteCount_SF;
555:   vscat->ops->getremote            = VecScatterGetRemote_SF;
556:   vscat->ops->getremoteordered     = VecScatterGetRemoteOrdered_SF;
557:   vscat->ops->restoreremote        = VecScatterRestoreRemote_SF;
558:   vscat->ops->restoreremoteordered = VecScatterRestoreRemoteOrdered_SF;
559:   return(0);
560: }

562: PetscErrorCode VecScatterCreate_SF(VecScatter ctx)
563: {

567:   ctx->ops->setup = VecScatterSetUp_SF;
568:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSF);
569:   PetscInfo(ctx,"Using StarForest for vector scatter\n");
570:   return(0);
571: }