Actual source code: vscatfce.c

petsc-master 2020-08-10
Report Typos and Errors
  1:  #include <petsc/private/vecscatterimpl.h>
  2: #if defined(PETSC_HAVE_CUDA)
  3:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
  4: #endif
  5: /* ------------------------------------------------------------------*/
  6: /*@
  7:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
  8:       and the VecScatterEnd() does nothing

 10:    Not Collective

 12:    Input Parameter:
 13: .   ctx - scatter context created with VecScatterCreate()

 15:    Output Parameter:
 16: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

 18:    Level: developer

 20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
 21: @*/
 22: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
 23: {
 26:   *flg = ctx->beginandendtogether;
 27:   return(0);
 28: }

 30: /*@
 31:    VecScatterBegin - Begins a generalized scatter from one vector to
 32:    another. Complete the scattering phase with VecScatterEnd().

 34:    Neighbor-wise Collective on VecScatter

 36:    Input Parameters:
 37: +  ctx - scatter context generated by VecScatterCreate()
 38: .  x - the vector from which we scatter
 39: .  y - the vector to which we scatter
 40: .  addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
 41:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
 42: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
 43:     SCATTER_FORWARD or SCATTER_REVERSE


 46:    Level: intermediate

 48:    Options Database: See VecScatterCreate()

 50:    Notes:
 51:    The vectors x and y need not be the same vectors used in the call
 52:    to VecScatterCreate(), but x must have the same parallel data layout
 53:    as that passed in as the x to VecScatterCreate(), similarly for the y.
 54:    Most likely they have been obtained from VecDuplicate().

 56:    You cannot change the values in the input vector between the calls to VecScatterBegin()
 57:    and VecScatterEnd().

 59:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
 60:    the SCATTER_FORWARD.

 62:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

 64:    This scatter is far more general than the conventional
 65:    scatter, since it can be a gather or a scatter or a combination,
 66:    depending on the indices ix and iy.  If x is a parallel vector and y
 67:    is sequential, VecScatterBegin() can serve to gather values to a
 68:    single processor.  Similarly, if y is parallel and x sequential, the
 69:    routine can scatter from one processor to many processors.


 72: .seealso: VecScatterCreate(), VecScatterEnd()
 73: @*/
 74: PetscErrorCode  VecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 75: {
 77:   PetscInt       to_n,from_n;

 83:   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");

 85:   if (PetscDefined(USE_DEBUG)) {
 86:     /*
 87:      Error checking to make sure these vectors match the vectors used
 88:      to create the vector scatter context. -1 in the from_n and to_n indicate the
 89:      vector lengths are unknown (for example with mapped scatters) and thus
 90:      no error checking is performed.
 91:      */
 92:     if (ctx->from_n >= 0 && ctx->to_n >= 0) {
 93:       VecGetLocalSize(x,&from_n);
 94:       VecGetLocalSize(y,&to_n);
 95:       if (mode & SCATTER_REVERSE) {
 96:         if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
 97:         if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
 98:       } else {
 99:         if (to_n != ctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
100:         if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
101:       }
102:     }
103:   }

105:   ctx->inuse = PETSC_TRUE;
106:   PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
107:   (*ctx->ops->begin)(ctx,x,y,addv,mode);
108:   if (ctx->beginandendtogether && ctx->ops->end) {
109:     ctx->inuse = PETSC_FALSE;
110:     (*ctx->ops->end)(ctx,x,y,addv,mode);
111:   }
112:   PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
113:   return(0);
114: }

116: /* --------------------------------------------------------------------*/
117: /*@
118:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
119:    after first calling VecScatterBegin().

121:    Neighbor-wise Collective on VecScatter

123:    Input Parameters:
124: +  ctx - scatter context generated by VecScatterCreate()
125: .  x - the vector from which we scatter
126: .  y - the vector to which we scatter
127: .  addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES
128: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
129:      SCATTER_FORWARD, SCATTER_REVERSE

131:    Level: intermediate

133:    Notes:
134:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

136:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

138: .seealso: VecScatterBegin(), VecScatterCreate()
139: @*/
140: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
141: {

148:   ctx->inuse = PETSC_FALSE;
149:   if (!ctx->ops->end) return(0);
150:   if (!ctx->beginandendtogether) {
151:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
152:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
153:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
154:   }
155:   return(0);
156: }

158: /*@
159:    VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()

161:    Collective on VecScatter

163:    Input Parameter:
164: .  ctx - the scatter context

166:    Level: intermediate

168: .seealso: VecScatterCreate(), VecScatterCopy()
169: @*/
170: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
171: {

175:   if (!*ctx) return(0);
177:   if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
178:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}

180:   /* if memory was published with SAWs then destroy it */
181:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
182:   if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
183: #if defined(PETSC_HAVE_CUDA)
184:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
185: #endif
186:   PetscHeaderDestroy(ctx);
187:   return(0);
188: }

190: /*@
191:    VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors

193:    Collective on VecScatter

195:    Input Parameter:
196: .  ctx - the scatter context

198:    Level: intermediate

200: .seealso: VecScatterCreate(), VecScatterCopy()
201: @*/
202: PetscErrorCode VecScatterSetUp(VecScatter ctx)
203: {

208:   (*ctx->ops->setup)(ctx);
209:   return(0);
210: }

212: /*@
213:    VecScatterCopy - Makes a copy of a scatter context.

215:    Collective on VecScatter

217:    Input Parameter:
218: .  sctx - the scatter context

220:    Output Parameter:
221: .  ctx - the context copy

223:    Level: advanced

225: .seealso: VecScatterCreate(), VecScatterDestroy()
226: @*/
227: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
228: {
230:   VecScatterType type;

235:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
236:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
237:   (*ctx)->to_n   = sctx->to_n;
238:   (*ctx)->from_n = sctx->from_n;
239:   (*sctx->ops->copy)(sctx,*ctx);

241:   VecScatterGetType(sctx,&type);
242:   PetscObjectChangeTypeName((PetscObject)(*ctx),type);
243:   return(0);
244: }

246: /*@C
247:    VecScatterViewFromOptions - View from Options

249:    Collective on VecScatter

251:    Input Parameters:
252: +  A - the scatter context
253: .  obj - Optional object
254: -  name - command line option

256:    Level: intermediate
257: .seealso:  VecScatter, VecScatterView, PetscObjectViewFromOptions(), VecScatterCreate()
258: @*/
259: PetscErrorCode  VecScatterViewFromOptions(VecScatter A,PetscObject obj,const char name[])
260: {

265:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
266:   return(0);
267: }

269: /* ------------------------------------------------------------------*/
270: /*@C
271:    VecScatterView - Views a vector scatter context.

273:    Collective on VecScatter

275:    Input Parameters:
276: +  ctx - the scatter context
277: -  viewer - the viewer for displaying the context

279:    Level: intermediate

281: @*/
282: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
283: {

288:   if (!viewer) {
289:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
290:   }
292:   if (ctx->ops->view) {
293:     (*ctx->ops->view)(ctx,viewer);
294:   }
295:   return(0);
296: }

298: /*@C
299:    VecScatterRemap - Remaps the "from" and "to" indices in a
300:    vector scatter context. FOR EXPERTS ONLY!

302:    Collective on VecScatter

304:    Input Parameters:
305: +  scat    - vector scatter context
306: .  tomap   - remapping plan for "to" indices (may be NULL).
307: -  frommap - remapping plan for "from" indices (may be NULL)

309:    Level: developer

311:    Notes:
312:      In the parallel case the todata contains indices from where the data is taken
313:      (and then sent to others)! The fromdata contains indices from where the received
314:      data is finally put locally.

316:      In the sequential case the todata contains indices from where the data is put
317:      and the fromdata contains indices from where the data is taken from.
318:      This is backwards from the paralllel case!

320: @*/
321: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])
322: {
323:   VecScatter_MPI_General *to,*from;
324:   VecScatter_Seq_General *sgto,*sgfrom;
325:   VecScatter_Seq_Stride  *ssto;
326:   PetscInt               i,ierr;


333:   if (scat->ops->remap) {
334:     (*scat->ops->remap)(scat,tomap,frommap);
335:   } else {
336:     to     = (VecScatter_MPI_General*)scat->todata;
337:     from   = (VecScatter_MPI_General*)scat->fromdata;
338:     ssto   = (VecScatter_Seq_Stride*)scat->todata;
339:     sgto   = (VecScatter_Seq_General*)scat->todata;
340:     sgfrom = (VecScatter_Seq_General*)scat->fromdata;

342:     /* remap indices from where we take/read data */
343:     if (tomap) {
344:       if (to->format == VEC_SCATTER_MPI_TOALL) {
345:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
346:       } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
347:         /* handle off processor parts */
348:         for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];

350:         /* handle local part */
351:         for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];

353:         /* the memcpy optimizations in vecscatter was based on index patterns it has.
354:            They need to be recalculated when indices are changed (remapped).
355:          */
356:         VecScatterMemcpyPlanDestroy_PtoP(to,from);
357:         VecScatterMemcpyPlanCreate_PtoP(to,from);
358:       } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
359:         /* remap indices*/
360:         for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
361:         /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
362:         if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
363:           PetscInt tmp[2];
364:           tmp[0] = 0; tmp[1] = sgfrom->n;
365:           VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
366:           VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
367:         } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
368:           VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);
369:           VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
370:           VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
371:         }
372:       } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
373:         VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;

375:         /* if the remapping is the identity and stride is identity then skip remap */
376:         if (ssto->step == 1 && ssto->first == 0) {
377:           for (i=0; i<ssto->n; i++) {
378:             if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
379:           }
380:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
381:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
382:     }
383:   }
384:   if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");

386:   /*
387:     Mark then vector lengths as unknown because we do not know the
388:     lengths of the remapped vectors
389:   */
390:   scat->from_n = -1;
391:   scat->to_n   = -1;
392:   return(0);
393: }

395: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication

397:   Input Parameters:
398: + ctx   - the context (must be a parallel vecscatter)
399: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

401:   Output parameters:
402: + num_procs   - number of remote processors
403: - num_entries - number of vector entries to send or recv


406:   .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()

408:   Notes:
409:   Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
410:   usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
411:  */
412: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter ctx,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
413: {
414:   VecScatter_MPI_General *vs;
415:   PetscBool              par;
416:   PetscErrorCode         ierr;

419:   if (ctx->ops->getremotecount) {
420:     (*ctx->ops->getremotecount)(ctx,send,num_procs,num_entries);
421:   } else {
422:     vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
423:     par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
424:     if (num_procs)   *num_procs   = par ? vs->n             : 0;
425:     if (num_entries) *num_entries = par ? vs->starts[vs->n] : 0;
426:   }
427:   return(0);
428: }

430: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
431:    Any output parameter can be NULL.

433:   Input Parameters:
434: + ctx   - the context
435: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

437:   Output parameters:
438: + n        - number of remote processors
439: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
440:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
441:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
442:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
443: . indices  - indices of entries to send/recv
444: . procs    - ranks of remote processors
445: - bs       - block size

447:   .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
448:  */
449: PetscErrorCode VecScatterGetRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
450: {
451:   VecScatter_MPI_General *vs;
452:   PetscBool              par;
453:   PetscErrorCode         ierr;

456:   if (ctx->ops->getremote) {
457:     (*ctx->ops->getremote)(ctx,send,n,starts,indices,procs,bs);
458:   } else {
459:     vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
460:     par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
461:     if (n)       *n       = par ? vs->n       : 0;
462:     if (indices) *indices = par ? vs->indices : NULL;
463:     if (starts)  *starts  = par ? vs->starts  : NULL;
464:     if (procs)   *procs   = par ? vs->procs   : NULL;
465:     if (bs)      *bs      = par ? vs->bs      : 0;
466:   }
467:   return(0);
468: }


471: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
472:    processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.

474:   Input Parameters:
475: + ctx   - the context
476: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

478:   Output parameters:
479: + n        - number of remote processors
480: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
481:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
482:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
483:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
484: . indices  - indices of entries to send/recv
485: . procs    - ranks of remote processors
486: - bs       - block size

488:   .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()

490:   Notes:
491:   Output parameters like starts, indices must also be adapted according to the sorted ranks.
492:  */
493: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
494: {
495:   VecScatter_MPI_General *vs;
496:   PetscBool              par;
497:   PetscErrorCode         ierr;

500:   if (ctx->ops->getremoteordered) {
501:     (*ctx->ops->getremoteordered)(ctx,send,n,starts,indices,procs,bs);
502:   } else {
503:     vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
504:     par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
505:     if (n)       *n       = par ? vs->n       : 0;
506:     if (indices) *indices = par ? vs->indices : NULL;
507:     if (starts)  *starts  = par ? vs->starts  : NULL;
508:     if (procs)   *procs   = par ? vs->procs   : NULL;
509:     if (bs)      *bs      = par ? vs->bs      : 0;
510:   }
511:   if (PetscUnlikelyDebug(n && procs)) {
512:     PetscInt i;
513:     /* from back to front to also handle cases *n=0 */
514:     for (i=*n-1; i>0; i--) { if ((*procs)[i-1] > (*procs)[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"procs[] are not ordered"); }
515:   }
516:   return(0);
517: }

519: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
520:    an implementation to free memory allocated in the VecScatterGetRemote_Private call.

522:   Input Parameters:
523: + ctx   - the context
524: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

526:   Output parameters:
527: + n        - number of remote processors
528: . starts   - starting point in indices for each proc
529: . indices  - indices of entries to send/recv
530: . procs    - ranks of remote processors
531: - bs       - block size

533:   .seealso: VecScatterGetRemote_Private()
534:  */
535: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
536: {
537:   PetscErrorCode         ierr;

540:   if (ctx->ops->restoreremote) {
541:     (*ctx->ops->restoreremote)(ctx,send,n,starts,indices,procs,bs);
542:   } else {
543:     if (starts)  *starts  = NULL;
544:     if (indices) *indices = NULL;
545:     if (procs)   *procs   = NULL;
546:   }
547:   return(0);
548: }

550: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
551:    an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.

553:   Input Parameters:
554: + ctx   - the context
555: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

557:   Output parameters:
558: + n        - number of remote processors
559: . starts   - starting point in indices for each proc
560: . indices  - indices of entries to send/recv
561: . procs    - ranks of remote processors
562: - bs       - block size

564:   .seealso: VecScatterGetRemoteOrdered_Private()
565:  */
566: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
567: {
570:   if (ctx->ops->restoreremoteordered) {
571:     (*ctx->ops->restoreremoteordered)(ctx,send,n,starts,indices,procs,bs);
572:   } else {
573:     VecScatterRestoreRemote_Private(ctx,send,n,starts,indices,procs,bs);
574:   }
575:   return(0);
576: }

578: #if defined(PETSC_HAVE_CUDA)

580: /*@C
581:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
582:    to another for GPU based computation.

584:    Input Parameters:
585: +  inctx - scatter context generated by VecScatterCreate()
586: -  x - the vector from which we scatter

588:   Level: intermediate

590:   Notes:
591:    Effectively, this function creates all the necessary indexing buffers and work
592:    vectors needed to move only those data points in a vector which need to
593:    be communicated across ranks. This is done at the first time this function is
594:    called. Currently, this only used in the context of the parallel SpMV call in
595:    MatMult_MPIAIJCUSPARSE.

597:    This function is executed before the call to MatMult. This enables the memory
598:    transfers to be overlapped with the MatMult SpMV kernel call.

600: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
601: @*/
602: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x)
603: {
605:   PetscInt       i,nrecvs,nsends,sbs,rbs,ns,nr;
606:   const PetscInt *sstarts,*rstarts,*sindices,*rindices;
607:   VecScatterType type;
608:   PetscBool      isSF;

611:   VecScatterGetType(inctx,&type);
612:   PetscStrcmp(type,VECSCATTERSF,&isSF);
613:   if (isSF) return(0);

615:   VecScatterGetRemote_Private(inctx,PETSC_TRUE/*send*/, &nsends,&sstarts,&sindices,NULL/*procs*/,&sbs);
616:   VecScatterGetRemote_Private(inctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,NULL/*procs*/,&rbs);
617:   ns   = nsends ? sstarts[nsends]-sstarts[0] : 0; /* s/rstarts[0] is not necessarily zero */
618:   nr   = nrecvs ? rstarts[nrecvs]-rstarts[0] : 0;

620:   if (x->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
621:     if (!inctx->spptr) {
622:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
623:       /* Here we create indices for both the senders and receivers. */
624:       PetscMalloc1(ns,&tindicesSends);
625:       PetscMalloc1(nr,&tindicesRecvs);

627:       /* s/rindices and s/rstarts could be NULL when ns or nr is zero */
628:       if (ns) {PetscArraycpy(tindicesSends,&sindices[sstarts[0]],ns);}
629:       if (nr) {PetscArraycpy(tindicesRecvs,&rindices[rstarts[0]],nr);}

631:       PetscSortRemoveDupsInt(&ns,tindicesSends);
632:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

634:       PetscMalloc1(sbs*ns,&sindicesSends);
635:       PetscMalloc1(rbs*nr,&sindicesRecvs);

637:       /* sender indices */
638:       for (i=0; i<ns; i++) {
639:         for (k=0; k<sbs; k++) sindicesSends[i*sbs+k] = tindicesSends[i]+k;
640:       }
641:       PetscFree(tindicesSends);

643:       /* receiver indices */
644:       for (i=0; i<nr; i++) {
645:         for (k=0; k<rbs; k++) sindicesRecvs[i*rbs+k] = tindicesRecvs[i]+k;
646:       }
647:       PetscFree(tindicesRecvs);

649:       /* create GPU indices, work vectors, ... */
650:       VecScatterCUDAIndicesCreate_PtoP(ns*sbs,sindicesSends,nr*rbs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
651:       PetscFree(sindicesSends);
652:       PetscFree(sindicesRecvs);
653:     }
654:   }
655:   return(0);
656: }

658: /*@C
659:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
660:    another for GPU based computation.

662:    Input Parameter:
663: .  inctx - scatter context generated by VecScatterCreate()

665:   Level: intermediate

667:   Notes:
668:    Effectively, this function resets the temporary buffer flags. Currently, this
669:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUSPARSE
670:    Once the MatMultAdd is finished, the GPU temporary buffers used for messaging are no longer valid.

672: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
673: @*/
674: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
675: {
677:   return(0);
678: }

680: #endif