Actual source code: vscatfce.c

petsc-3.11.2 2019-05-18
Report Typos and Errors
  1:  #include <petsc/private/vecscatterimpl.h>
  2: #if defined(PETSC_HAVE_CUDA)
  3:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
  4: #endif
  5: /* ------------------------------------------------------------------*/
  6: /*@
  7:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
  8:       and the VecScatterEnd() does nothing

 10:    Not Collective

 12:    Input Parameter:
 13: .   ctx - scatter context created with VecScatterCreate()

 15:    Output Parameter:
 16: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

 18:    Level: developer

 20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
 21: @*/
 22: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
 23: {
 26:   *flg = ctx->beginandendtogether;
 27:   return(0);
 28: }

 30: /*@
 31:    VecScatterBegin - Begins a generalized scatter from one vector to
 32:    another. Complete the scattering phase with VecScatterEnd().

 34:    Neighbor-wise Collective on VecScatter and Vec

 36:    Input Parameters:
 37: +  ctx - scatter context generated by VecScatterCreate()
 38: .  x - the vector from which we scatter
 39: .  y - the vector to which we scatter
 40: .  addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
 41:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
 42: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
 43:     SCATTER_FORWARD or SCATTER_REVERSE


 46:    Level: intermediate

 48:    Options Database: See VecScatterCreate()

 50:    Notes:
 51:    The vectors x and y need not be the same vectors used in the call
 52:    to VecScatterCreate(), but x must have the same parallel data layout
 53:    as that passed in as the x to VecScatterCreate(), similarly for the y.
 54:    Most likely they have been obtained from VecDuplicate().

 56:    You cannot change the values in the input vector between the calls to VecScatterBegin()
 57:    and VecScatterEnd().

 59:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
 60:    the SCATTER_FORWARD.

 62:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

 64:    This scatter is far more general than the conventional
 65:    scatter, since it can be a gather or a scatter or a combination,
 66:    depending on the indices ix and iy.  If x is a parallel vector and y
 67:    is sequential, VecScatterBegin() can serve to gather values to a
 68:    single processor.  Similarly, if y is parallel and x sequential, the
 69:    routine can scatter from one processor to many processors.

 71:    Concepts: scatter^between vectors
 72:    Concepts: gather^between vectors

 74: .seealso: VecScatterCreate(), VecScatterEnd()
 75: @*/
 76: PetscErrorCode  VecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 77: {
 79: #if defined(PETSC_USE_DEBUG)
 80:   PetscInt       to_n,from_n;
 81: #endif
 86:   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");

 88: #if defined(PETSC_USE_DEBUG)
 89:   /*
 90:      Error checking to make sure these vectors match the vectors used
 91:    to create the vector scatter context. -1 in the from_n and to_n indicate the
 92:    vector lengths are unknown (for example with mapped scatters) and thus
 93:    no error checking is performed.
 94:   */
 95:   if (ctx->from_n >= 0 && ctx->to_n >= 0) {
 96:     VecGetLocalSize(x,&from_n);
 97:     VecGetLocalSize(y,&to_n);
 98:     if (mode & SCATTER_REVERSE) {
 99:       if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
100:       if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
101:     } else {
102:       if (to_n != ctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
103:       if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
104:     }
105:   }
106: #endif

108:   ctx->inuse = PETSC_TRUE;
109:   PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
110:   (*ctx->ops->begin)(ctx,x,y,addv,mode);
111:   if (ctx->beginandendtogether && ctx->ops->end) {
112:     ctx->inuse = PETSC_FALSE;
113:     (*ctx->ops->end)(ctx,x,y,addv,mode);
114:   }
115:   PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
116:   return(0);
117: }

119: /* --------------------------------------------------------------------*/
120: /*@
121:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
122:    after first calling VecScatterBegin().

124:    Neighbor-wise Collective on VecScatter and Vec

126:    Input Parameters:
127: +  ctx - scatter context generated by VecScatterCreate()
128: .  x - the vector from which we scatter
129: .  y - the vector to which we scatter
130: .  addv - either ADD_VALUES or INSERT_VALUES.
131: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
132:      SCATTER_FORWARD, SCATTER_REVERSE

134:    Level: intermediate

136:    Notes:
137:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

139:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

141: .seealso: VecScatterBegin(), VecScatterCreate()
142: @*/
143: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
144: {

151:   ctx->inuse = PETSC_FALSE;
152:   if (!ctx->ops->end) return(0);
153:   if (!ctx->beginandendtogether) {
154:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
155:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
156:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
157:   }
158:   return(0);
159: }

161: /*@
162:    VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()

164:    Collective on VecScatter

166:    Input Parameter:
167: .  ctx - the scatter context

169:    Level: intermediate

171: .seealso: VecScatterCreate(), VecScatterCopy()
172: @*/
173: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
174: {

178:   if (!*ctx) return(0);
180:   if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
181:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}

183:   /* if memory was published with SAWs then destroy it */
184:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
185:   if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
186: #if defined(PETSC_HAVE_CUDA)
187:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
188: #endif
189:   PetscHeaderDestroy(ctx);
190:   return(0);
191: }

193: /*@
194:    VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors

196:    Collective on VecScatter

198:    Input Parameter:
199: .  ctx - the scatter context

201:    Level: intermediate

203: .seealso: VecScatterCreate(), VecScatterCopy()
204: @*/
205: PetscErrorCode VecScatterSetUp(VecScatter ctx)
206: {

211:   (*ctx->ops->setup)(ctx);
212:   return(0);
213: }

215: /*@
216:    VecScatterCopy - Makes a copy of a scatter context.

218:    Collective on VecScatter

220:    Input Parameter:
221: .  sctx - the scatter context

223:    Output Parameter:
224: .  ctx - the context copy

226:    Level: advanced

228: .seealso: VecScatterCreate(), VecScatterDestroy()
229: @*/
230: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
231: {
233:   VecScatterType type;

238:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
239:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
240:   (*ctx)->to_n   = sctx->to_n;
241:   (*ctx)->from_n = sctx->from_n;
242:   (*sctx->ops->copy)(sctx,*ctx);

244:   VecScatterGetType(sctx,&type);
245:   PetscObjectChangeTypeName((PetscObject)(*ctx),type);
246:   return(0);
247: }

249: /* ------------------------------------------------------------------*/
250: /*@C
251:    VecScatterView - Views a vector scatter context.

253:    Collective on VecScatter

255:    Input Parameters:
256: +  ctx - the scatter context
257: -  viewer - the viewer for displaying the context

259:    Level: intermediate

261: @*/
262: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
263: {

268:   if (!viewer) {
269:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
270:   }
272:   if (ctx->ops->view) {
273:     (*ctx->ops->view)(ctx,viewer);
274:   }
275:   return(0);
276: }

278: /*@C
279:    VecScatterRemap - Remaps the "from" and "to" indices in a
280:    vector scatter context. FOR EXPERTS ONLY!

282:    Collective on VecScatter

284:    Input Parameters:
285: +  scat    - vector scatter context
286: .  tomap   - remapping plan for "to" indices (may be NULL).
287: -  frommap - remapping plan for "from" indices (may be NULL)

289:    Level: developer

291:    Notes:
292:      In the parallel case the todata contains indices from where the data is taken
293:      (and then sent to others)! The fromdata contains indices from where the received
294:      data is finally put locally.

296:      In the sequential case the todata contains indices from where the data is put
297:      and the fromdata contains indices from where the data is taken from.
298:      This is backwards from the paralllel case!

300: @*/
301: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])
302: {
303:   VecScatter_MPI_General *to,*from;
304:   VecScatter_Seq_General *sgto,*sgfrom;
305:   VecScatter_Seq_Stride  *ssto;
306:   PetscInt               i,ierr;


313:   if (scat->ops->remap) {
314:     (*scat->ops->remap)(scat,tomap,frommap);
315:   } else {
316:     to     = (VecScatter_MPI_General*)scat->todata;
317:     from   = (VecScatter_MPI_General*)scat->fromdata;
318:     ssto   = (VecScatter_Seq_Stride*)scat->todata;
319:     sgto   = (VecScatter_Seq_General*)scat->todata;
320:     sgfrom = (VecScatter_Seq_General*)scat->fromdata;

322:     /* remap indices from where we take/read data */
323:     if (tomap) {
324:       if (to->format == VEC_SCATTER_MPI_TOALL) {
325:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
326:       } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
327:         /* handle off processor parts */
328:         for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];

330:         /* handle local part */
331:         for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];

333:         /* the memcpy optimizations in vecscatter was based on index patterns it has.
334:            They need to be recalculated when indices are changed (remapped).
335:          */
336:         VecScatterMemcpyPlanDestroy_PtoP(to,from);
337:         VecScatterMemcpyPlanCreate_PtoP(to,from);
338:       } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
339:         /* remap indices*/
340:         for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
341:         /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
342:         if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
343:           PetscInt tmp[2];
344:           tmp[0] = 0; tmp[1] = sgfrom->n;
345:           VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
346:           VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
347:         } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
348:           VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);;
349:           VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
350:           VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
351:         }
352:       } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
353:         VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;

355:         /* if the remapping is the identity and stride is identity then skip remap */
356:         if (ssto->step == 1 && ssto->first == 0) {
357:           for (i=0; i<ssto->n; i++) {
358:             if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
359:           }
360:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
361:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
362:     }
363:   }
364:   if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");

366:   /*
367:     Mark then vector lengths as unknown because we do not know the
368:     lengths of the remapped vectors
369:   */
370:   scat->from_n = -1;
371:   scat->to_n   = -1;
372:   return(0);
373: }

375: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication

377:   Input Parameters:
378: + ctx   - the context (must be a parallel vecscatter)
379: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

381:   Output parameters:
382: + num_procs   - number of remote processors
383: - num_entries - number of vector entries to send or recv


386:   .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()

388:   Notes:
389:   Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
390:   usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
391:  */
392: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter ctx,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
393: {
394:   VecScatter_MPI_General *vs;
395:   PetscBool              par;
396:   PetscErrorCode         ierr;

399:   if (ctx->ops->getremotecount) {
400:     (*ctx->ops->getremotecount)(ctx,send,num_procs,num_entries);
401:   } else {
402:     vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
403:     par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
404:     if (num_procs)   *num_procs   = par ? vs->n             : 0;
405:     if (num_entries) *num_entries = par ? vs->starts[vs->n] : 0;
406:   }
407:   return(0);
408: }

410: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
411:    Any output parameter can be NULL.

413:   Input Parameters:
414: + ctx   - the context
415: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

417:   Output parameters:
418: + n        - number of remote processors
419: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
420:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
421:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
422:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
423: . indices  - indices of entries to send/recv
424: . procs    - ranks of remote processors
425: - bs       - block size

427:   .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
428:  */
429: PetscErrorCode VecScatterGetRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
430: {
431:   VecScatter_MPI_General *vs;
432:   PetscBool              par;
433:   PetscErrorCode         ierr;

436:   if (ctx->ops->getremote) {
437:     (*ctx->ops->getremote)(ctx,send,n,starts,indices,procs,bs);
438:   } else {
439:     vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
440:     par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
441:     if (n)       *n       = par ? vs->n       : 0;
442:     if (indices) *indices = par ? vs->indices : NULL;
443:     if (starts)  *starts  = par ? vs->starts  : NULL;
444:     if (procs)   *procs   = par ? vs->procs   : NULL;
445:     if (bs)      *bs      = par ? vs->bs      : 0;
446:   }
447:   return(0);
448: }


451: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
452:    processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.

454:   Input Parameters:
455: + ctx   - the context
456: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

458:   Output parameters:
459: + n        - number of remote processors
460: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
461:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
462:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
463:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
464: . indices  - indices of entries to send/recv
465: . procs    - ranks of remote processors
466: - bs       - block size

468:   .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()

470:   Notes:
471:   Output parameters like starts, indices must also be adapted according to the sorted ranks.
472:  */
473: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
474: {
475:   VecScatter_MPI_General *vs;
476:   PetscBool              par;
477:   PetscErrorCode         ierr;

480:   if (ctx->ops->getremoteordered) {
481:     (*ctx->ops->getremoteordered)(ctx,send,n,starts,indices,procs,bs);
482:   } else {
483:     vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
484:     par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
485:     if (n)       *n       = par ? vs->n       : 0;
486:     if (indices) *indices = par ? vs->indices : NULL;
487:     if (starts)  *starts  = par ? vs->starts  : NULL;
488:     if (procs)   *procs   = par ? vs->procs   : NULL;
489:     if (bs)      *bs      = par ? vs->bs      : 0;
490:   }
491: #if defined(PETSC_USE_DEBUG)
492:   if (n && procs) {
493:     PetscInt i;
494:     /* from back to front to also handle cases *n=0 */
495:     for (i=*n-1; i>0; i--) { if ((*procs)[i-1] > (*procs)[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"procs[] are not ordered"); }
496:   }
497: #endif
498:   return(0);
499: }

501: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
502:    an implementation to free memory allocated in the VecScatterGetRemote_Private call.

504:   Input Parameters:
505: + ctx   - the context
506: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

508:   Output parameters:
509: + n        - number of remote processors
510: . starts   - starting point in indices for each proc
511: . indices  - indices of entries to send/recv
512: . procs    - ranks of remote processors
513: - bs       - block size

515:   .seealso: VecScatterGetRemote_Private()
516:  */
517: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
518: {
519:   PetscErrorCode         ierr;

522:   if (ctx->ops->restoreremote) {
523:     (*ctx->ops->restoreremote)(ctx,send,n,starts,indices,procs,bs);
524:   } else {
525:     if (starts)  *starts  = NULL;
526:     if (indices) *indices = NULL;
527:     if (procs)   *procs   = NULL;
528:   }
529:   return(0);
530: }

532: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
533:    an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.

535:   Input Parameters:
536: + ctx   - the context
537: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

539:   Output parameters:
540: + n        - number of remote processors
541: . starts   - starting point in indices for each proc
542: . indices  - indices of entries to send/recv
543: . procs    - ranks of remote processors
544: - bs       - block size

546:   .seealso: VecScatterGetRemoteOrdered_Private()
547:  */
548: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
549: {
552:   if (ctx->ops->restoreremoteordered) {
553:     (*ctx->ops->restoreremoteordered)(ctx,send,n,starts,indices,procs,bs);
554:   } else {
555:     VecScatterRestoreRemote_Private(ctx,send,n,starts,indices,procs,bs);
556:   }
557:   return(0);
558: }

560: #if defined(PETSC_HAVE_CUDA)

562: /*@C
563:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
564:    to another for GPU based computation.

566:    Input Parameters:
567: +  inctx - scatter context generated by VecScatterCreate()
568: -  x - the vector from which we scatter

570:   Level: intermediate

572:   Notes:
573:    Effectively, this function creates all the necessary indexing buffers and work
574:    vectors needed to move data only those data points in a vector which need to
575:    be communicated across ranks. This is done at the first time this function is
576:    called. Currently, this only used in the context of the parallel SpMV call in
577:    MatMult_MPIAIJCUSPARSE.

579:    This function is executed before the call to MatMult. This enables the memory
580:    transfers to be overlapped with the MatMult SpMV kernel call.

582: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
583: @*/
584: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x)
585: {
587:   PetscInt       i,nrecvs,nsends,sbs,rbs,ns,nr;
588:   const PetscInt *sstarts,*rstarts,*sindices,*rindices;

591:   VecScatterGetRemote_Private(inctx,PETSC_TRUE/*send*/, &nsends,&sstarts,&sindices,NULL/*procs*/,&sbs);
592:   VecScatterGetRemote_Private(inctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,NULL/*procs*/,&rbs);
593:   ns   = nsends ? sstarts[nsends]-sstarts[0] : 0; /* s/rstarts[0] is not necessarily zero */
594:   nr   = nrecvs ? rstarts[nrecvs]-rstarts[0] : 0;

596:   if (x->valid_GPU_array != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
597:     if (!inctx->spptr) {
598:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
599:       /* Here we create indices for both the senders and receivers. */
600:       PetscMalloc1(ns,&tindicesSends);
601:       PetscMalloc1(nr,&tindicesRecvs);

603:       /* s/rindices and s/rstarts could be NULL when ns or nr is zero */
604:       if (ns) {PetscMemcpy(tindicesSends,&sindices[sstarts[0]],ns*sizeof(PetscInt));}
605:       if (nr) {PetscMemcpy(tindicesRecvs,&rindices[rstarts[0]],nr*sizeof(PetscInt));}

607:       PetscSortRemoveDupsInt(&ns,tindicesSends);
608:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

610:       PetscMalloc1(sbs*ns,&sindicesSends);
611:       PetscMalloc1(rbs*nr,&sindicesRecvs);

613:       /* sender indices */
614:       for (i=0; i<ns; i++) {
615:         for (k=0; k<sbs; k++) sindicesSends[i*sbs+k] = tindicesSends[i]+k;
616:       }
617:       PetscFree(tindicesSends);

619:       /* receiver indices */
620:       for (i=0; i<nr; i++) {
621:         for (k=0; k<rbs; k++) sindicesRecvs[i*rbs+k] = tindicesRecvs[i]+k;
622:       }
623:       PetscFree(tindicesRecvs);

625:       /* create GPU indices, work vectors, ... */
626:       VecScatterCUDAIndicesCreate_PtoP(ns*sbs,sindicesSends,nr*rbs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
627:       PetscFree(sindicesSends);
628:       PetscFree(sindicesRecvs);
629:     }
630:   }
631:   return(0);
632: }

634: /*@C
635:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
636:    another for GPU based computation.

638:    Input Parameter:
639: +  inctx - scatter context generated by VecScatterCreate()

641:   Level: intermediate

643:   Notes:
644:    Effectively, this function resets the temporary buffer flags. Currently, this
645:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
646:    or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
647:    buffers used for messaging are no longer valid.

649: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
650: @*/
651: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
652: {
654:   return(0);
655: }

657: #endif