Actual source code: vscat.c

petsc-3.8.3 2017-12-09
Report Typos and Errors

  2: /*
  3:      Code for creating scatters between vectors. This file
  4:   includes the code for scattering between sequential vectors and
  5:   some special cases for parallel scatters.
  6: */

  8:  #include <petsc/private/isimpl.h>
  9:  #include <petsc/private/vecimpl.h>

 11: #if defined(PETSC_HAVE_CUSP)
 12: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUSPIndices*);
 13: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
 14: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
 15: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
 16: #elif defined(PETSC_HAVE_VECCUDA)
 17: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
 18: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
 19: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
 20: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
 21: #endif

 23: /* Logging support */
 24: PetscClassId VEC_SCATTER_CLASSID;

 26: #if defined(PETSC_USE_DEBUG)
 27: /*
 28:      Checks if any indices are less than zero and generates an error
 29: */
 30: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
 31: {
 32:   PetscInt i;

 35:   for (i=0; i<n; i++) {
 36:     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
 37:     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
 38:   }
 39:   return(0);
 40: }
 41: #endif

 43: /*
 44:       This is special scatter code for when the entire parallel vector is copied to each processor.

 46:    This code was written by Cameron Cooper, Occidental College, Fall 1995,
 47:    will working at ANL as a SERS student.
 48: */
 49: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 50: {
 52:   PetscInt       yy_n,xx_n;
 53:   PetscScalar    *xv,*yv;

 56:   VecGetLocalSize(y,&yy_n);
 57:   VecGetLocalSize(x,&xx_n);
 58:   VecGetArrayPair(x,y,&xv,&yv);

 60:   if (mode & SCATTER_REVERSE) {
 61:     PetscScalar          *xvt,*xvt2;
 62:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 63:     PetscInt             i;
 64:     PetscMPIInt          *disply = scat->displx;

 66:     if (addv == INSERT_VALUES) {
 67:       PetscInt rstart,rend;
 68:       /*
 69:          copy the correct part of the local vector into the local storage of
 70:          the MPI one.  Note: this operation only makes sense if all the local
 71:          vectors have the same values
 72:       */
 73:       VecGetOwnershipRange(y,&rstart,&rend);
 74:       PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
 75:     } else {
 76:       MPI_Comm    comm;
 77:       PetscMPIInt rank;
 78:       PetscObjectGetComm((PetscObject)y,&comm);
 79:       MPI_Comm_rank(comm,&rank);
 80:       if (scat->work1) xvt = scat->work1;
 81:       else {
 82:         PetscMalloc1(xx_n,&xvt);
 83:         scat->work1 = xvt;
 84:       }
 85:       if (!rank) { /* I am the zeroth processor, values are accumulated here */
 86:         if   (scat->work2) xvt2 = scat->work2;
 87:         else {
 88:           PetscMalloc1(xx_n,&xvt2);
 89:           scat->work2 = xvt2;
 90:         }
 91:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 92:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 93:         if (addv == ADD_VALUES) {
 94:           for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
 95: #if !defined(PETSC_USE_COMPLEX)
 96:         } else if (addv == MAX_VALUES) {
 97:           for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
 98: #endif
 99:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
100:         MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
101:       } else {
102:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
103:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
104:         MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
105:       }
106:     }
107:   } else {
108:     PetscScalar          *yvt;
109:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
110:     PetscInt             i;
111:     PetscMPIInt          *displx = scat->displx;

113:     if (addv == INSERT_VALUES) {
114:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
115:     } else {
116:       if (scat->work1) yvt = scat->work1;
117:       else {
118:         PetscMalloc1(yy_n,&yvt);
119:         scat->work1 = yvt;
120:       }
121:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
122:       if (addv == ADD_VALUES) {
123:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
124: #if !defined(PETSC_USE_COMPLEX)
125:       } else if (addv == MAX_VALUES) {
126:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
127: #endif
128:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
129:     }
130:   }
131:   VecRestoreArrayPair(x,y,&xv,&yv);
132:   return(0);
133: }

135: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
136: {
138:   PetscBool      isascii;

141:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
142:   if (isascii) {
143:     PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
144:   }
145:   return(0);
146: }

148: /*
149:       This is special scatter code for when the entire parallel vector is  copied to processor 0.

151: */
152: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
153: {
154:   PetscErrorCode    ierr;
155:   PetscMPIInt       rank;
156:   PetscInt          yy_n,xx_n;
157:   PetscScalar       *yv;
158:   const PetscScalar *xv;
159:   MPI_Comm          comm;

162:   VecGetLocalSize(y,&yy_n);
163:   VecGetLocalSize(x,&xx_n);
164:   VecGetArrayRead(x,&xv);
165:   VecGetArray(y,&yv);

167:   PetscObjectGetComm((PetscObject)x,&comm);
168:   MPI_Comm_rank(comm,&rank);

170:   /* --------  Reverse scatter; spread from processor 0 to other processors */
171:   if (mode & SCATTER_REVERSE) {
172:     PetscScalar          *yvt;
173:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
174:     PetscInt             i;
175:     PetscMPIInt          *disply = scat->displx;

177:     if (addv == INSERT_VALUES) {
178:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
179:     } else {
180:       if (scat->work2) yvt = scat->work2;
181:       else {
182:         PetscMalloc1(xx_n,&yvt);
183:         scat->work2 = yvt;
184:       }
185:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
186:       if (addv == ADD_VALUES) {
187:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
188: #if !defined(PETSC_USE_COMPLEX)
189:       } else if (addv == MAX_VALUES) {
190:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
191: #endif
192:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
193:     }
194:   /* ---------  Forward scatter; gather all values onto processor 0 */
195:   } else {
196:     PetscScalar          *yvt  = 0;
197:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
198:     PetscInt             i;
199:     PetscMPIInt          *displx = scat->displx;

201:     if (addv == INSERT_VALUES) {
202:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
203:     } else {
204:       if (!rank) {
205:         if (scat->work1) yvt = scat->work1;
206:         else {
207:           PetscMalloc1(yy_n,&yvt);
208:           scat->work1 = yvt;
209:         }
210:       }
211:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
212:       if (!rank) {
213:         if (addv == ADD_VALUES) {
214:           for (i=0; i<yy_n; i++) yv[i] += yvt[i];
215: #if !defined(PETSC_USE_COMPLEX)
216:         } else if (addv == MAX_VALUES) {
217:           for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
218: #endif
219:         }  else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
220:       }
221:     }
222:   }
223:   VecRestoreArrayRead(x,&xv);
224:   VecRestoreArray(y,&yv);
225:   return(0);
226: }

228: /*
229:        The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
230: */
231: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
232: {
233:   VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
234:   PetscErrorCode       ierr;

237:   PetscFree(scat->work1);
238:   PetscFree(scat->work2);
239:   PetscFree3(ctx->todata,scat->count,scat->displx);
240:   return(0);
241: }

243: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
244: {

248:   PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
249:   PetscFree2(ctx->todata,ctx->fromdata);
250:   return(0);
251: }

253: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
254: {

258:   PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
259:   PetscFree2(ctx->todata,ctx->fromdata);
260:   return(0);
261: }

263: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
264: {

268:   PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
269:   PetscFree2(ctx->todata,ctx->fromdata);
270:   return(0);
271: }

273: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
274: {

278:   PetscFree2(ctx->todata,ctx->fromdata);
279:   return(0);
280: }

282: /* -------------------------------------------------------------------------*/
283: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
284: {
285:   VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
286:   PetscErrorCode       ierr;
287:   PetscMPIInt          size,*count,*displx;
288:   PetscInt             i;

291:   out->ops->begin   = in->ops->begin;
292:   out->ops->end     = in->ops->end;
293:   out->ops->copy    = in->ops->copy;
294:   out->ops->destroy = in->ops->destroy;
295:   out->ops->view    = in->ops->view;

297:   MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
298:   PetscMalloc3(1,&sto,size,&count,size,&displx);
299:   sto->type   = in_to->type;
300:   sto->count  = count;
301:   sto->displx = displx;
302:   for (i=0; i<size; i++) {
303:     sto->count[i]  = in_to->count[i];
304:     sto->displx[i] = in_to->displx[i];
305:   }
306:   sto->work1    = 0;
307:   sto->work2    = 0;
308:   out->todata   = (void*)sto;
309:   out->fromdata = (void*)0;
310:   return(0);
311: }

313: /* --------------------------------------------------------------------------------------*/
314: /*
315:         Scatter: sequential general to sequential general
316: */
317: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
318: {
319:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
320:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
321:   PetscErrorCode         ierr;
322:   PetscInt               i,n = gen_from->n,*fslots,*tslots;
323:   PetscScalar            *xv,*yv;

326: #if defined(PETSC_HAVE_CUSP)
327:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
328:     /* create the scatter indices if not done already */
329:     if (!ctx->spptr) {
330:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
331:       fslots = gen_from->vslots;
332:       tslots = gen_to->vslots;
333:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
334:     }
335:     /* next do the scatter */
336:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
337:     return(0);
338:   }
339: #elif defined(PETSC_HAVE_VECCUDA)
340:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
341:     /* create the scatter indices if not done already */
342:     if (!ctx->spptr) {
343:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
344:       fslots = gen_from->vslots;
345:       tslots = gen_to->vslots;
346:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
347:     }
348:     /* next do the scatter */
349:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
350:     return(0);
351:   }
352: #endif

354:   VecGetArrayPair(x,y,&xv,&yv);
355:   if (mode & SCATTER_REVERSE) {
356:     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
357:     gen_from = (VecScatter_Seq_General*)ctx->todata;
358:   }
359:   fslots = gen_from->vslots;
360:   tslots = gen_to->vslots;

362:   if (addv == INSERT_VALUES) {
363:     for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]];
364:   } else if (addv == ADD_VALUES) {
365:     for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
366: #if !defined(PETSC_USE_COMPLEX)
367:   } else if (addv == MAX_VALUES) {
368:     for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
369: #endif
370:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
371:   VecRestoreArrayPair(x,y,&xv,&yv);
372:   return(0);
373: }

375: /*
376:     Scatter: sequential general to sequential stride 1
377: */
378: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
379: {
380:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
381:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
382:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
383:   PetscErrorCode         ierr;
384:   PetscInt               first = gen_to->first;
385:   PetscScalar            *xv,*yv;

388: #if defined(PETSC_HAVE_CUSP)
389:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
390:     /* create the scatter indices if not done already */
391:     if (!ctx->spptr) {
392:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
393:       PetscInt *tslots = 0;
394:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
395:     }
396:     /* next do the scatter */
397:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
398:     return(0);
399:   }
400: #elif defined(PETSC_HAVE_VECCUDA)
401:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
402:     /* create the scatter indices if not done already */
403:     if (!ctx->spptr) {
404:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
405:       PetscInt *tslots = 0;
406:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
407:     }
408:     /* next do the scatter */
409:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
410:     return(0);
411:   }
412: #endif

414:   VecGetArrayPair(x,y,&xv,&yv);
415:   if (mode & SCATTER_REVERSE) {
416:     xv += first;
417:     if (addv == INSERT_VALUES) {
418:       for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
419:     } else if (addv == ADD_VALUES) {
420:       for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
421: #if !defined(PETSC_USE_COMPLEX)
422:     } else if (addv == MAX_VALUES) {
423:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
424: #endif
425:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
426:   } else {
427:     yv += first;
428:     if (addv == INSERT_VALUES) {
429:       for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
430:     } else if (addv == ADD_VALUES) {
431:       for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
432: #if !defined(PETSC_USE_COMPLEX)
433:     } else if (addv == MAX_VALUES) {
434:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
435: #endif
436:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
437:   }
438:   VecRestoreArrayPair(x,y,&xv,&yv);
439:   return(0);
440: }

442: /*
443:    Scatter: sequential general to sequential stride
444: */
445: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
446: {
447:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
448:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
449:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
450:   PetscErrorCode         ierr;
451:   PetscInt               first = gen_to->first,step = gen_to->step;
452:   PetscScalar            *xv,*yv;

455: #if defined(PETSC_HAVE_CUSP)
456:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
457:     /* create the scatter indices if not done already */
458:     if (!ctx->spptr) {
459:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
460:       PetscInt * tslots = 0;
461:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
462:     }
463:     /* next do the scatter */
464:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
465:     return(0);
466:   }
467: #elif defined(PETSC_HAVE_VECCUDA)
468:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
469:     /* create the scatter indices if not done already */
470:     if (!ctx->spptr) {
471:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
472:       PetscInt * tslots = 0;
473:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
474:     }
475:     /* next do the scatter */
476:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
477:     return(0);
478:   }
479: #endif

481:   VecGetArrayPair(x,y,&xv,&yv);
482:   if (mode & SCATTER_REVERSE) {
483:     if (addv == INSERT_VALUES) {
484:       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
485:     } else if (addv == ADD_VALUES) {
486:       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
487: #if !defined(PETSC_USE_COMPLEX)
488:     } else if (addv == MAX_VALUES) {
489:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
490: #endif
491:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
492:   } else {
493:     if (addv == INSERT_VALUES) {
494:       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
495:     } else if (addv == ADD_VALUES) {
496:       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
497: #if !defined(PETSC_USE_COMPLEX)
498:     } else if (addv == MAX_VALUES) {
499:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
500: #endif
501:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
502:   }
503:   VecRestoreArrayPair(x,y,&xv,&yv);
504:   return(0);
505: }

507: /*
508:     Scatter: sequential stride 1 to sequential general
509: */
510: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
511: {
512:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
513:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
514:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
515:   PetscErrorCode         ierr;
516:   PetscInt               first = gen_from->first;
517:   PetscScalar            *xv,*yv;

520: #if defined(PETSC_HAVE_CUSP)
521:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
522:     /* create the scatter indices if not done already */
523:     if (!ctx->spptr) {
524:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
525:       PetscInt *fslots = 0;
526:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
527:     }
528:     /* next do the scatter */
529:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
530:     return(0);
531:   }
532: #elif defined(PETSC_HAVE_VECCUDA)
533:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
534:     /* create the scatter indices if not done already */
535:     if (!ctx->spptr) {
536:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
537:       PetscInt *fslots = 0;
538:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
539:     }
540:     /* next do the scatter */
541:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
542:     return(0);
543:   }
544: #endif

546:   VecGetArrayPair(x,y,&xv,&yv);
547:   if (mode & SCATTER_REVERSE) {
548:     yv += first;
549:     if (addv == INSERT_VALUES) {
550:       for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
551:     } else  if (addv == ADD_VALUES) {
552:       for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
553: #if !defined(PETSC_USE_COMPLEX)
554:     } else  if (addv == MAX_VALUES) {
555:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
556: #endif
557:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
558:   } else {
559:     xv += first;
560:     if (addv == INSERT_VALUES) {
561:       for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
562:     } else  if (addv == ADD_VALUES) {
563:       for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
564: #if !defined(PETSC_USE_COMPLEX)
565:     } else  if (addv == MAX_VALUES) {
566:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
567: #endif
568:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
569:   }
570:   VecRestoreArrayPair(x,y,&xv,&yv);
571:   return(0);
572: }

574: /*
575:    Scatter: sequential stride to sequential general
576: */
577: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
578: {
579:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
580:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
581:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
582:   PetscErrorCode         ierr;
583:   PetscInt               first = gen_from->first,step = gen_from->step;
584:   PetscScalar            *xv,*yv;

587: #if defined(PETSC_HAVE_CUSP)
588:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
589:     /* create the scatter indices if not done already */
590:     if (!ctx->spptr) {
591:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
592:       PetscInt *fslots = 0;
593:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
594:     }
595:     /* next do the scatter */
596:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
597:     return(0);
598:   }
599: #elif defined(PETSC_HAVE_VECCUDA)
600:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
601:     /* create the scatter indices if not done already */
602:     if (!ctx->spptr) {
603:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
604:       PetscInt *fslots = 0;
605:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
606:     }
607:     /* next do the scatter */
608:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
609:     return(0);
610:   }
611: #endif

613:   VecGetArrayPair(x,y,&xv,&yv);
614:   if (mode & SCATTER_REVERSE) {
615:     if (addv == INSERT_VALUES) {
616:       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
617:     } else if (addv == ADD_VALUES) {
618:       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
619: #if !defined(PETSC_USE_COMPLEX)
620:     } else if (addv == MAX_VALUES) {
621:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
622: #endif
623:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
624:   } else {
625:     if (addv == INSERT_VALUES) {
626:       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
627:     } else if (addv == ADD_VALUES) {
628:       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
629: #if !defined(PETSC_USE_COMPLEX)
630:     } else if (addv == MAX_VALUES) {
631:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
632: #endif
633:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
634:   }
635:   VecRestoreArrayPair(x,y,&xv,&yv);
636:   return(0);
637: }

639: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
640: {
641:   PetscErrorCode         ierr;
642:   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->fromdata;
643:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
644:   PetscInt               i;
645:   PetscBool              isascii;

648:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
649:   if (isascii) {
650:     PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
651:     for (i=0; i<in_to->n; i++) {
652:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
653:     }
654:   }
655:   return(0);
656: }
657: /*
658:      Scatter: sequential stride to sequential stride
659: */
660: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
661: {
662:   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
663:   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
664:   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
665:   PetscErrorCode        ierr;
666:   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
667:   PetscScalar           *xv,*yv;

670: #if defined(PETSC_HAVE_CUSP)
671:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
672:     /* create the scatter indices if not done already */
673:     if (!ctx->spptr) {
674:       PetscInt *tslots = 0,*fslots = 0;
675:       VecScatterCUSPIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
676:     }
677:     /* next do the scatter */
678:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
679:     return(0);
680:   }
681: #elif defined(PETSC_HAVE_VECCUDA)
682:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
683:     /* create the scatter indices if not done already */
684:     if (!ctx->spptr) {
685:       PetscInt *tslots = 0,*fslots = 0;
686:       VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
687:     }
688:     /* next do the scatter */
689:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
690:     return(0);
691:   }
692: #endif

694:   VecGetArrayPair(x,y,&xv,&yv);
695:   if (mode & SCATTER_REVERSE) {
696:     from_first = gen_to->first;
697:     to_first   = gen_from->first;
698:     from_step  = gen_to->step;
699:     to_step    = gen_from->step;
700:   }

702:   if (addv == INSERT_VALUES) {
703:     if (to_step == 1 && from_step == 1) {
704:       PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
705:     } else  {
706:       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
707:     }
708:   } else if (addv == ADD_VALUES) {
709:     if (to_step == 1 && from_step == 1) {
710:       yv += to_first; xv += from_first;
711:       for (i=0; i<n; i++) yv[i] += xv[i];
712:     } else {
713:       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
714:     }
715: #if !defined(PETSC_USE_COMPLEX)
716:   } else if (addv == MAX_VALUES) {
717:     if (to_step == 1 && from_step == 1) {
718:       yv += to_first; xv += from_first;
719:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
720:     } else {
721:       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
722:     }
723: #endif
724:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
725:   VecRestoreArrayPair(x,y,&xv,&yv);
726:   return(0);
727: }

729: /* --------------------------------------------------------------------------*/


732: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
733: {
734:   PetscErrorCode         ierr;
735:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
736:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

739:   out->ops->begin   = in->ops->begin;
740:   out->ops->end     = in->ops->end;
741:   out->ops->copy    = in->ops->copy;
742:   out->ops->destroy = in->ops->destroy;
743:   out->ops->view    = in->ops->view;

745:   PetscMalloc2(1,&out_to,1,&out_from);
746:   PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
747:   out_to->n                    = in_to->n;
748:   out_to->type                 = in_to->type;
749:   out_to->nonmatching_computed = PETSC_FALSE;
750:   out_to->slots_nonmatching    = 0;
751:   out_to->is_copy              = PETSC_FALSE;
752:   PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));

754:   out_from->n                    = in_from->n;
755:   out_from->type                 = in_from->type;
756:   out_from->nonmatching_computed = PETSC_FALSE;
757:   out_from->slots_nonmatching    = 0;
758:   out_from->is_copy              = PETSC_FALSE;
759:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

761:   out->todata   = (void*)out_to;
762:   out->fromdata = (void*)out_from;
763:   return(0);
764: }

766: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
767: {
768:   PetscErrorCode         ierr;
769:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
770:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
771:   PetscInt               i;
772:   PetscBool              isascii;

775:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
776:   if (isascii) {
777:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
778:     for (i=0; i<in_to->n; i++) {
779:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
780:     }
781:   }
782:   return(0);
783: }


786: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
787: {
788:   PetscErrorCode         ierr;
789:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
790:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

793:   out->ops->begin   = in->ops->begin;
794:   out->ops->end     = in->ops->end;
795:   out->ops->copy    = in->ops->copy;
796:   out->ops->destroy = in->ops->destroy;
797:   out->ops->view    = in->ops->view;

799:   PetscMalloc2(1,&out_to,1,&out_from);
800:   PetscMalloc1(in_from->n,&out_from->vslots);
801:   out_to->n     = in_to->n;
802:   out_to->type  = in_to->type;
803:   out_to->first = in_to->first;
804:   out_to->step  = in_to->step;
805:   out_to->type  = in_to->type;

807:   out_from->n                    = in_from->n;
808:   out_from->type                 = in_from->type;
809:   out_from->nonmatching_computed = PETSC_FALSE;
810:   out_from->slots_nonmatching    = 0;
811:   out_from->is_copy              = PETSC_FALSE;
812:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

814:   out->todata   = (void*)out_to;
815:   out->fromdata = (void*)out_from;
816:   return(0);
817: }

819: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
820: {
821:   PetscErrorCode         ierr;
822:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
823:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
824:   PetscInt               i;
825:   PetscBool              isascii;

828:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
829:   if (isascii) {
830:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
831:     for (i=0; i<in_to->n; i++) {
832:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
833:     }
834:   }
835:   return(0);
836: }

838: /* --------------------------------------------------------------------------*/
839: /*
840:     Scatter: parallel to sequential vector, sequential strides for both.
841: */
842: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
843: {
844:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
845:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
846:   PetscErrorCode        ierr;

849:   out->ops->begin   = in->ops->begin;
850:   out->ops->end     = in->ops->end;
851:   out->ops->copy    = in->ops->copy;
852:   out->ops->destroy = in->ops->destroy;
853:   out->ops->view    = in->ops->view;

855:   PetscMalloc2(1,&out_to,1,&out_from);
856:   out_to->n       = in_to->n;
857:   out_to->type    = in_to->type;
858:   out_to->first   = in_to->first;
859:   out_to->step    = in_to->step;
860:   out_to->type    = in_to->type;
861:   out_from->n     = in_from->n;
862:   out_from->type  = in_from->type;
863:   out_from->first = in_from->first;
864:   out_from->step  = in_from->step;
865:   out_from->type  = in_from->type;
866:   out->todata     = (void*)out_to;
867:   out->fromdata   = (void*)out_from;
868:   return(0);
869: }

871: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
872: {
873:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
874:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
875:   PetscErrorCode        ierr;
876:   PetscBool             isascii;

879:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
880:   if (isascii) {
881:     PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
882:   }
883:   return(0);
884: }


887: extern PetscErrorCode VecScatterCreate_PtoS(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
888: extern PetscErrorCode VecScatterCreate_PtoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
889: extern PetscErrorCode VecScatterCreate_StoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);

891: /* =======================================================================*/
892: #define VEC_SEQ_ID 0
893: #define VEC_MPI_ID 1
894: #define IS_GENERAL_ID 0
895: #define IS_STRIDE_ID  1
896: #define IS_BLOCK_ID   2

898: /*
899:    Blocksizes we have optimized scatters for
900: */

902: #define VecScatterOptimizedBS(mbs) (2 <= mbs)


905: PetscErrorCode  VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
906: {
907:   VecScatter     ctx;

911:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
912:   ctx->inuse               = PETSC_FALSE;
913:   ctx->beginandendtogether = PETSC_FALSE;
914:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
915:   if (ctx->beginandendtogether) {
916:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
917:   }
918:   ctx->packtogether = PETSC_FALSE;
919:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
920:   if (ctx->packtogether) {
921:     PetscInfo(ctx,"Pack all messages before sending\n");
922:   }
923:   *newctx = ctx;
924:   return(0);
925: }

927: /*@C
928:    VecScatterCreate - Creates a vector scatter context.

930:    Collective on Vec

932:    Input Parameters:
933: +  xin - a vector that defines the shape (parallel data layout of the vector)
934:          of vectors from which we scatter
935: .  yin - a vector that defines the shape (parallel data layout of the vector)
936:          of vectors to which we scatter
937: .  ix - the indices of xin to scatter (if NULL scatters all values)
938: -  iy - the indices of yin to hold results (if NULL fills entire vector yin)

940:    Output Parameter:
941: .  newctx - location to store the new scatter context

943:    Options Database Keys: (uses regular MPI_Sends by default)
944: +  -vecscatter_view         - Prints detail of communications
945: .  -vecscatter_view ::ascii_info    - Print less details about communication
946: .  -vecscatter_ssend        - Uses MPI_Ssend_init() instead of MPI_Send_init()
947: .  -vecscatter_rsend           - use ready receiver mode for MPI sends
948: .  -vecscatter_merge        - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
949:                               eliminates the chance for overlap of computation and communication
950: .  -vecscatter_sendfirst    - Posts sends before receives
951: .  -vecscatter_packtogether - Pack all messages before sending, receive all messages before unpacking
952: .  -vecscatter_alltoall     - Uses MPI all to all communication for scatter
953: .  -vecscatter_window       - Use MPI 2 window operations to move data
954: .  -vecscatter_nopack       - Avoid packing to work vector when possible (if used with -vecscatter_alltoall then will use MPI_Alltoallw()
955: -  -vecscatter_reproduce    - insure that the order of the communications are done the same for each scatter, this under certain circumstances
956:                               will make the results of scatters deterministic when otherwise they are not (it may be slower also).

958: $
959: $                                                                                    --When packing is used--
960: $                               MPI Datatypes (no packing)  sendfirst   merge        packtogether  persistent*
961: $                                _nopack                   _sendfirst    _merge      _packtogether                -vecscatter_
962: $ ----------------------------------------------------------------------------------------------------------------------------
963: $    Message passing    Send       p                           X            X           X         always
964: $                      Ssend       p                           X            X           X         always          _ssend
965: $                      Rsend       p                        nonsense        X           X         always          _rsend
966: $    AlltoAll  v or w              X                        nonsense     always         X         nonsense        _alltoall
967: $    MPI_Win                       p                        nonsense        p           p         nonsense        _window
968: $
969: $   Since persistent sends and receives require a constant memory address they can only be used when data is packed into the work vector
970: $   because the in and out array may be different for each call to VecScatterBegin/End().
971: $
972: $    p indicates possible, but not implemented. X indicates implemented
973: $

975:     Level: intermediate

977:   Notes:
978:    In calls to VecScatter() you can use different vectors than the xin and
979:    yin you used above; BUT they must have the same parallel data layout, for example,
980:    they could be obtained from VecDuplicate().
981:    A VecScatter context CANNOT be used in two or more simultaneous scatters;
982:    that is you cannot call a second VecScatterBegin() with the same scatter
983:    context until the VecScatterEnd() has been called on the first VecScatterBegin().
984:    In this case a separate VecScatter is needed for each concurrent scatter.

986:    Currently the MPI_Send(), MPI_Ssend() and MPI_Rsend() all use PERSISTENT versions.
987:    (this unfortunately requires that the same in and out arrays be used for each use, this
988:     is why when not using MPI_alltoallw() we always need to pack the input into the work array before sending
989:     and unpack upon receeving instead of using MPI datatypes to avoid the packing/unpacking).

991:    Both ix and iy cannot be NULL at the same time.

993:    Concepts: scatter^between vectors
994:    Concepts: gather^between vectors

996: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero()
997: @*/
998: PetscErrorCode  VecScatterCreate(Vec xin,IS ix,Vec yin,IS iy,VecScatter *newctx)
999: {
1000:   VecScatter        ctx;
1001:   PetscErrorCode    ierr;
1002:   PetscMPIInt       size;
1003:   PetscInt          xin_type = VEC_SEQ_ID,yin_type = VEC_SEQ_ID,*range;
1004:   PetscInt          ix_type  = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1005:   MPI_Comm          comm,ycomm;
1006:   PetscBool         totalv,ixblock,iyblock,iystride,islocal,cando,flag;
1007:   IS                tix = 0,tiy = 0;

1010:   if (!ix && !iy) SETERRQ(PetscObjectComm((PetscObject)xin),PETSC_ERR_SUP,"Cannot pass default in for both input and output indices");

1012:   /*
1013:       Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1014:       sequential (it does not share a comm). The difference is that parallel vectors treat the
1015:       index set as providing indices in the global parallel numbering of the vector, with
1016:       sequential vectors treat the index set as providing indices in the local sequential
1017:       numbering
1018:   */
1019:   PetscObjectGetComm((PetscObject)xin,&comm);
1020:   MPI_Comm_size(comm,&size);
1021:   if (size > 1) xin_type = VEC_MPI_ID;

1023:   PetscObjectGetComm((PetscObject)yin,&ycomm);
1024:   MPI_Comm_size(ycomm,&size);
1025:   if (size > 1) {comm = ycomm; yin_type = VEC_MPI_ID;}

1027:   /* generate the Scatter context */
1028:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1029:   ctx->inuse               = PETSC_FALSE;

1031:   ctx->beginandendtogether = PETSC_FALSE;
1032:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1033:   if (ctx->beginandendtogether) {
1034:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1035:   }
1036:   ctx->packtogether = PETSC_FALSE;
1037:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
1038:   if (ctx->packtogether) {
1039:     PetscInfo(ctx,"Pack all messages before sending\n");
1040:   }

1042:   VecGetLocalSize(xin,&ctx->from_n);
1043:   VecGetLocalSize(yin,&ctx->to_n);

1045:   /*
1046:       if ix or iy is not included; assume just grabbing entire vector
1047:   */
1048:   if (!ix && xin_type == VEC_SEQ_ID) {
1049:     ISCreateStride(comm,ctx->from_n,0,1,&ix);
1050:     tix  = ix;
1051:   } else if (!ix && xin_type == VEC_MPI_ID) {
1052:     if (yin_type == VEC_MPI_ID) {
1053:       PetscInt ntmp, low;
1054:       VecGetLocalSize(xin,&ntmp);
1055:       VecGetOwnershipRange(xin,&low,NULL);
1056:       ISCreateStride(comm,ntmp,low,1,&ix);
1057:     } else {
1058:       PetscInt Ntmp;
1059:       VecGetSize(xin,&Ntmp);
1060:       ISCreateStride(comm,Ntmp,0,1,&ix);
1061:     }
1062:     tix = ix;
1063:   } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");

1065:   if (!iy && yin_type == VEC_SEQ_ID) {
1066:     ISCreateStride(comm,ctx->to_n,0,1,&iy);
1067:     tiy  = iy;
1068:   } else if (!iy && yin_type == VEC_MPI_ID) {
1069:     if (xin_type == VEC_MPI_ID) {
1070:       PetscInt ntmp, low;
1071:       VecGetLocalSize(yin,&ntmp);
1072:       VecGetOwnershipRange(yin,&low,NULL);
1073:       ISCreateStride(comm,ntmp,low,1,&iy);
1074:     } else {
1075:       PetscInt Ntmp;
1076:       VecGetSize(yin,&Ntmp);
1077:       ISCreateStride(comm,Ntmp,0,1,&iy);
1078:     }
1079:     tiy = iy;
1080:   } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");

1082:   /*
1083:      Determine types of index sets
1084:   */
1085:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1086:   if (flag) ix_type = IS_BLOCK_ID;
1087:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1088:   if (flag) iy_type = IS_BLOCK_ID;
1089:   PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1090:   if (flag) ix_type = IS_STRIDE_ID;
1091:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1092:   if (flag) iy_type = IS_STRIDE_ID;

1094:   /* ===========================================================================================================
1095:         Check for special cases
1096:      ==========================================================================================================*/
1097:   /* ---------------------------------------------------------------------------*/
1098:   if (xin_type == VEC_SEQ_ID && yin_type == VEC_SEQ_ID) {
1099:     if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1100:       PetscInt               nx,ny;
1101:       const PetscInt         *idx,*idy;
1102:       VecScatter_Seq_General *to = NULL,*from = NULL;

1104:       ISGetLocalSize(ix,&nx);
1105:       ISGetLocalSize(iy,&ny);
1106:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1107:       ISGetIndices(ix,&idx);
1108:       ISGetIndices(iy,&idy);
1109:       PetscMalloc2(1,&to,1,&from);
1110:       PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1111:       to->n = nx;
1112: #if defined(PETSC_USE_DEBUG)
1113:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1114: #endif
1115:       PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1116:       from->n = nx;
1117: #if defined(PETSC_USE_DEBUG)
1118:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1119: #endif
1120:        PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1121:       to->type          = VEC_SCATTER_SEQ_GENERAL;
1122:       from->type        = VEC_SCATTER_SEQ_GENERAL;
1123:       ctx->todata       = (void*)to;
1124:       ctx->fromdata     = (void*)from;
1125:       ctx->ops->begin   = VecScatterBegin_SGToSG;
1126:       ctx->ops->end     = 0;
1127:       ctx->ops->destroy = VecScatterDestroy_SGToSG;
1128:       ctx->ops->copy    = VecScatterCopy_SGToSG;
1129:       ctx->ops->view    = VecScatterView_SGToSG;
1130:       PetscInfo(xin,"Special case: sequential vector general scatter\n");
1131:       goto functionend;
1132:     } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
1133:       PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1134:       VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;

1136:       ISGetLocalSize(ix,&nx);
1137:       ISGetLocalSize(iy,&ny);
1138:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1139:       ISStrideGetInfo(iy,&to_first,&to_step);
1140:       ISStrideGetInfo(ix,&from_first,&from_step);
1141:       PetscMalloc2(1,&to8,1,&from8);
1142:       to8->n            = nx;
1143:       to8->first        = to_first;
1144:       to8->step         = to_step;
1145:       from8->n          = nx;
1146:       from8->first      = from_first;
1147:       from8->step       = from_step;
1148:       to8->type         = VEC_SCATTER_SEQ_STRIDE;
1149:       from8->type       = VEC_SCATTER_SEQ_STRIDE;
1150:       ctx->todata       = (void*)to8;
1151:       ctx->fromdata     = (void*)from8;
1152:       ctx->ops->begin   = VecScatterBegin_SSToSS;
1153:       ctx->ops->end     = 0;
1154:       ctx->ops->destroy = VecScatterDestroy_SSToSS;
1155:       ctx->ops->copy    = VecScatterCopy_SSToSS;
1156:       ctx->ops->view    = VecScatterView_SSToSS;
1157:       PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1158:       goto functionend;
1159:     } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1160:       PetscInt               nx,ny,first,step;
1161:       const PetscInt         *idx;
1162:       VecScatter_Seq_General *from9 = NULL;
1163:       VecScatter_Seq_Stride  *to9   = NULL;

1165:       ISGetLocalSize(ix,&nx);
1166:       ISGetIndices(ix,&idx);
1167:       ISGetLocalSize(iy,&ny);
1168:       ISStrideGetInfo(iy,&first,&step);
1169:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1170:       PetscMalloc2(1,&to9,1,&from9);
1171:       PetscMalloc1(nx,&from9->vslots);
1172:       to9->n     = nx;
1173:       to9->first = first;
1174:       to9->step  = step;
1175:       from9->n   = nx;
1176: #if defined(PETSC_USE_DEBUG)
1177:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1178: #endif
1179:       PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1180:       ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
1181:       if (step == 1)  ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1182:       else            ctx->ops->begin = VecScatterBegin_SGToSS;
1183:       ctx->ops->destroy   = VecScatterDestroy_SGToSS;
1184:       ctx->ops->end       = 0;
1185:       ctx->ops->copy      = VecScatterCopy_SGToSS;
1186:       ctx->ops->view      = VecScatterView_SGToSS;
1187:       to9->type      = VEC_SCATTER_SEQ_STRIDE;
1188:       from9->type    = VEC_SCATTER_SEQ_GENERAL;
1189:       PetscInfo(xin,"Special case: sequential vector general to stride\n");
1190:       goto functionend;
1191:     } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1192:       PetscInt               nx,ny,first,step;
1193:       const PetscInt         *idy;
1194:       VecScatter_Seq_General *to10 = NULL;
1195:       VecScatter_Seq_Stride  *from10 = NULL;

1197:       ISGetLocalSize(ix,&nx);
1198:       ISGetIndices(iy,&idy);
1199:       ISGetLocalSize(iy,&ny);
1200:       ISStrideGetInfo(ix,&first,&step);
1201:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1202:       PetscMalloc2(1,&to10,1,&from10);
1203:       PetscMalloc1(nx,&to10->vslots);
1204:       from10->n     = nx;
1205:       from10->first = first;
1206:       from10->step  = step;
1207:       to10->n       = nx;
1208: #if defined(PETSC_USE_DEBUG)
1209:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1210: #endif
1211:       PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1212:       ctx->todata   = (void*)to10;
1213:       ctx->fromdata = (void*)from10;
1214:       if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1215:       else           ctx->ops->begin = VecScatterBegin_SSToSG;
1216:       ctx->ops->destroy = VecScatterDestroy_SSToSG;
1217:       ctx->ops->end     = 0;
1218:       ctx->ops->copy    = 0;
1219:       ctx->ops->view    = VecScatterView_SSToSG;
1220:       to10->type   = VEC_SCATTER_SEQ_GENERAL;
1221:       from10->type = VEC_SCATTER_SEQ_STRIDE;
1222:       PetscInfo(xin,"Special case: sequential vector stride to general\n");
1223:       goto functionend;
1224:     } else {
1225:       PetscInt               nx,ny;
1226:       const PetscInt         *idx,*idy;
1227:       VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1228:       PetscBool              idnx,idny;

1230:       ISGetLocalSize(ix,&nx);
1231:       ISGetLocalSize(iy,&ny);
1232:       if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);

1234:       ISIdentity(ix,&idnx);
1235:       ISIdentity(iy,&idny);
1236:       if (idnx && idny) {
1237:         VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1238:         PetscMalloc2(1,&to13,1,&from13);
1239:         to13->n       = nx;
1240:         to13->first   = 0;
1241:         to13->step    = 1;
1242:         from13->n     = nx;
1243:         from13->first = 0;
1244:         from13->step  = 1;
1245:         to13->type    = VEC_SCATTER_SEQ_STRIDE;
1246:         from13->type  = VEC_SCATTER_SEQ_STRIDE;
1247:         ctx->todata   = (void*)to13;
1248:         ctx->fromdata = (void*)from13;
1249:         ctx->ops->begin    = VecScatterBegin_SSToSS;
1250:         ctx->ops->end      = 0;
1251:         ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1252:         ctx->ops->copy     = VecScatterCopy_SSToSS;
1253:         ctx->ops->view     = VecScatterView_SSToSS;
1254:         PetscInfo(xin,"Special case: sequential copy\n");
1255:         goto functionend;
1256:       }

1258:       ISGetIndices(iy,&idy);
1259:       ISGetIndices(ix,&idx);
1260:       PetscMalloc2(1,&to11,1,&from11);
1261:       PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1262:       to11->n = nx;
1263: #if defined(PETSC_USE_DEBUG)
1264:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1265: #endif
1266:       PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1267:       from11->n = nx;
1268: #if defined(PETSC_USE_DEBUG)
1269:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1270: #endif
1271:       PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1272:       to11->type        = VEC_SCATTER_SEQ_GENERAL;
1273:       from11->type      = VEC_SCATTER_SEQ_GENERAL;
1274:       ctx->todata       = (void*)to11;
1275:       ctx->fromdata     = (void*)from11;
1276:       ctx->ops->begin   = VecScatterBegin_SGToSG;
1277:       ctx->ops->end     = 0;
1278:       ctx->ops->destroy = VecScatterDestroy_SGToSG;
1279:       ctx->ops->copy    = VecScatterCopy_SGToSG;
1280:       ctx->ops->view    = VecScatterView_SGToSG;
1281:       ISRestoreIndices(ix,&idx);
1282:       ISRestoreIndices(iy,&idy);
1283:       PetscInfo(xin,"Sequential vector scatter with block indices\n");
1284:       goto functionend;
1285:     }
1286:   }
1287:   /* ---------------------------------------------------------------------------*/
1288:   if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {

1290:     /* ===========================================================================================================
1291:           Check for special cases
1292:        ==========================================================================================================*/
1293:     islocal = PETSC_FALSE;
1294:     /* special case extracting (subset of) local portion */
1295:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1296:       PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1297:       PetscInt              start,end,min,max;
1298:       VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;

1300:       VecGetOwnershipRange(xin,&start,&end);
1301:       ISGetLocalSize(ix,&nx);
1302:       ISStrideGetInfo(ix,&from_first,&from_step);
1303:       ISGetLocalSize(iy,&ny);
1304:       ISStrideGetInfo(iy,&to_first,&to_step);
1305:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1306:       ISGetMinMax(ix,&min,&max);
1307:       if (min >= start && max < end) islocal = PETSC_TRUE;
1308:       else islocal = PETSC_FALSE;
1309:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1310:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1311:       if (cando) {
1312:         PetscMalloc2(1,&to12,1,&from12);
1313:         to12->n            = nx;
1314:         to12->first        = to_first;
1315:         to12->step         = to_step;
1316:         from12->n          = nx;
1317:         from12->first      = from_first-start;
1318:         from12->step       = from_step;
1319:         to12->type         = VEC_SCATTER_SEQ_STRIDE;
1320:         from12->type       = VEC_SCATTER_SEQ_STRIDE;
1321:         ctx->todata        = (void*)to12;
1322:         ctx->fromdata      = (void*)from12;
1323:         ctx->ops->begin    = VecScatterBegin_SSToSS;
1324:         ctx->ops->end      = 0;
1325:         ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1326:         ctx->ops->copy     = VecScatterCopy_SSToSS;
1327:         ctx->ops->view     = VecScatterView_SSToSS;
1328:         PetscInfo(xin,"Special case: processors only getting local values\n");
1329:         goto functionend;
1330:       }
1331:     } else {
1332:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1333:     }

1335:     /* test for special case of all processors getting entire vector */
1336:     /* contains check that PetscMPIInt can handle the sizes needed */
1337:     totalv = PETSC_FALSE;
1338:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1339:       PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1340:       PetscMPIInt          *count = NULL,*displx;
1341:       VecScatter_MPI_ToAll *sto   = NULL;

1343:       ISGetLocalSize(ix,&nx);
1344:       ISStrideGetInfo(ix,&from_first,&from_step);
1345:       ISGetLocalSize(iy,&ny);
1346:       ISStrideGetInfo(iy,&to_first,&to_step);
1347:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1348:       VecGetSize(xin,&N);
1349:       if (nx != N) totalv = PETSC_FALSE;
1350:       else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1351:       else totalv = PETSC_FALSE;
1352:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1353:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1355: #if defined(PETSC_USE_64BIT_INDICES)
1356:       if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1357: #else
1358:       if (cando) {
1359: #endif
1360:         MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1361:         PetscMalloc3(1,&sto,size,&count,size,&displx);
1362:         range = xin->map->range;
1363:         for (i=0; i<size; i++) {
1364:           PetscMPIIntCast(range[i+1] - range[i],count+i);
1365:           PetscMPIIntCast(range[i],displx+i);
1366:         }
1367:         sto->count        = count;
1368:         sto->displx       = displx;
1369:         sto->work1        = 0;
1370:         sto->work2        = 0;
1371:         sto->type         = VEC_SCATTER_MPI_TOALL;
1372:         ctx->todata       = (void*)sto;
1373:         ctx->fromdata     = 0;
1374:         ctx->ops->begin   = VecScatterBegin_MPI_ToAll;
1375:         ctx->ops->end     = 0;
1376:         ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1377:         ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1378:         ctx->ops->view    = VecScatterView_MPI_ToAll;
1379:         PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1380:         goto functionend;
1381:       }
1382:     } else {
1383:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1384:     }

1386:     /* test for special case of processor 0 getting entire vector */
1387:     /* contains check that PetscMPIInt can handle the sizes needed */
1388:     totalv = PETSC_FALSE;
1389:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1390:       PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1391:       PetscMPIInt          rank,*count = NULL,*displx;
1392:       VecScatter_MPI_ToAll *sto = NULL;

1394:       PetscObjectGetComm((PetscObject)xin,&comm);
1395:       MPI_Comm_rank(comm,&rank);
1396:       ISGetLocalSize(ix,&nx);
1397:       ISStrideGetInfo(ix,&from_first,&from_step);
1398:       ISGetLocalSize(iy,&ny);
1399:       ISStrideGetInfo(iy,&to_first,&to_step);
1400:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1401:       if (!rank) {
1402:         VecGetSize(xin,&N);
1403:         if (nx != N) totalv = PETSC_FALSE;
1404:         else if (from_first == 0        && from_step == 1 &&
1405:                  from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1406:         else totalv = PETSC_FALSE;
1407:       } else {
1408:         if (!nx) totalv = PETSC_TRUE;
1409:         else     totalv = PETSC_FALSE;
1410:       }
1411:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1412:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1414: #if defined(PETSC_USE_64BIT_INDICES)
1415:       if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1416: #else
1417:       if (cando) {
1418: #endif
1419:         MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1420:         PetscMalloc3(1,&sto,size,&count,size,&displx);
1421:         range = xin->map->range;
1422:         for (i=0; i<size; i++) {
1423:           PetscMPIIntCast(range[i+1] - range[i],count+i);
1424:           PetscMPIIntCast(range[i],displx+i);
1425:         }
1426:         sto->count        = count;
1427:         sto->displx       = displx;
1428:         sto->work1        = 0;
1429:         sto->work2        = 0;
1430:         sto->type         = VEC_SCATTER_MPI_TOONE;
1431:         ctx->todata       = (void*)sto;
1432:         ctx->fromdata     = 0;
1433:         ctx->ops->begin   = VecScatterBegin_MPI_ToOne;
1434:         ctx->ops->end     = 0;
1435:         ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1436:         ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1437:         ctx->ops->view    = VecScatterView_MPI_ToAll;
1438:         PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1439:         goto functionend;
1440:       }
1441:     } else {
1442:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1443:     }

1445:     PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1446:     PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1447:     PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1448:     if (ixblock) {
1449:       /* special case block to block */
1450:       if (iyblock) {
1451:         PetscInt       nx,ny,bsx,bsy;
1452:         const PetscInt *idx,*idy;
1453:         ISGetBlockSize(iy,&bsy);
1454:         ISGetBlockSize(ix,&bsx);
1455:         if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1456:           ISBlockGetLocalSize(ix,&nx);
1457:           ISBlockGetIndices(ix,&idx);
1458:           ISBlockGetLocalSize(iy,&ny);
1459:           ISBlockGetIndices(iy,&idy);
1460:           if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1461:           VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,bsx,ctx);
1462:           ISBlockRestoreIndices(ix,&idx);
1463:           ISBlockRestoreIndices(iy,&idy);
1464:           PetscInfo(xin,"Special case: blocked indices\n");
1465:           goto functionend;
1466:         }
1467:         /* special case block to stride */
1468:       } else if (iystride) {
1469:         PetscInt ystart,ystride,ysize,bsx;
1470:         ISStrideGetInfo(iy,&ystart,&ystride);
1471:         ISGetLocalSize(iy,&ysize);
1472:         ISGetBlockSize(ix,&bsx);
1473:         /* see if stride index set is equivalent to block index set */
1474:         if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1475:           PetscInt       nx,il,*idy;
1476:           const PetscInt *idx;
1477:           ISBlockGetLocalSize(ix,&nx);
1478:           ISBlockGetIndices(ix,&idx);
1479:           if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1480:           PetscMalloc1(nx,&idy);
1481:           if (nx) {
1482:             idy[0] = ystart/bsx;
1483:             for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1484:           }
1485:           VecScatterCreate_PtoS(nx,idx,nx,idy,xin,yin,bsx,ctx);
1486:           PetscFree(idy);
1487:           ISBlockRestoreIndices(ix,&idx);
1488:           PetscInfo(xin,"Special case: blocked indices to stride\n");
1489:           goto functionend;
1490:         }
1491:       }
1492:     }
1493:     /* left over general case */
1494:     {
1495:       PetscInt       nx,ny;
1496:       const PetscInt *idx,*idy;
1497:       ISGetLocalSize(ix,&nx);
1498:       ISGetIndices(ix,&idx);
1499:       ISGetLocalSize(iy,&ny);
1500:       ISGetIndices(iy,&idy);
1501:       if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%d %d)",nx,ny);
1502:       VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,1,ctx);
1503:       ISRestoreIndices(ix,&idx);
1504:       ISRestoreIndices(iy,&idy);
1505:       PetscInfo(xin,"General case: MPI to Seq\n");
1506:       goto functionend;
1507:     }
1508:   }
1509:   /* ---------------------------------------------------------------------------*/
1510:   if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1511:     /* ===========================================================================================================
1512:           Check for special cases
1513:        ==========================================================================================================*/
1514:     /* special case local copy portion */
1515:     islocal = PETSC_FALSE;
1516:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1517:       PetscInt              nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1518:       VecScatter_Seq_Stride *from = NULL,*to = NULL;

1520:       VecGetOwnershipRange(yin,&start,&end);
1521:       ISGetLocalSize(ix,&nx);
1522:       ISStrideGetInfo(ix,&from_first,&from_step);
1523:       ISGetLocalSize(iy,&ny);
1524:       ISStrideGetInfo(iy,&to_first,&to_step);
1525:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1526:       ISGetMinMax(iy,&min,&max);
1527:       if (min >= start && max < end) islocal = PETSC_TRUE;
1528:       else islocal = PETSC_FALSE;
1529:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1530:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1531:       if (cando) {
1532:         PetscMalloc2(1,&to,1,&from);
1533:         to->n             = nx;
1534:         to->first         = to_first-start;
1535:         to->step          = to_step;
1536:         from->n           = nx;
1537:         from->first       = from_first;
1538:         from->step        = from_step;
1539:         to->type          = VEC_SCATTER_SEQ_STRIDE;
1540:         from->type        = VEC_SCATTER_SEQ_STRIDE;
1541:         ctx->todata       = (void*)to;
1542:         ctx->fromdata     = (void*)from;
1543:         ctx->ops->begin   = VecScatterBegin_SSToSS;
1544:         ctx->ops->end     = 0;
1545:         ctx->ops->destroy = VecScatterDestroy_SSToSS;
1546:         ctx->ops->copy    = VecScatterCopy_SSToSS;
1547:         ctx->ops->view    = VecScatterView_SSToSS;
1548:         PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1549:         goto functionend;
1550:       }
1551:     } else {
1552:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1553:     }
1554:     /* special case block to stride */
1555:     if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1556:       PetscInt ystart,ystride,ysize,bsx;
1557:       ISStrideGetInfo(iy,&ystart,&ystride);
1558:       ISGetLocalSize(iy,&ysize);
1559:       ISGetBlockSize(ix,&bsx);
1560:       /* see if stride index set is equivalent to block index set */
1561:       if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1562:         PetscInt       nx,il,*idy;
1563:         const PetscInt *idx;
1564:         ISBlockGetLocalSize(ix,&nx);
1565:         ISBlockGetIndices(ix,&idx);
1566:         if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1567:         PetscMalloc1(nx,&idy);
1568:         if (nx) {
1569:           idy[0] = ystart/bsx;
1570:           for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1571:         }
1572:         VecScatterCreate_StoP(nx,idx,nx,idy,xin,yin,bsx,ctx);
1573:         PetscFree(idy);
1574:         ISBlockRestoreIndices(ix,&idx);
1575:         PetscInfo(xin,"Special case: Blocked indices to stride\n");
1576:         goto functionend;
1577:       }
1578:     }

1580:     /* general case */
1581:     {
1582:       PetscInt       nx,ny;
1583:       const PetscInt *idx,*idy;
1584:       ISGetLocalSize(ix,&nx);
1585:       ISGetIndices(ix,&idx);
1586:       ISGetLocalSize(iy,&ny);
1587:       ISGetIndices(iy,&idy);
1588:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1589:       VecScatterCreate_StoP(nx,idx,ny,idy,xin,yin,1,ctx);
1590:       ISRestoreIndices(ix,&idx);
1591:       ISRestoreIndices(iy,&idy);
1592:       PetscInfo(xin,"General case: Seq to MPI\n");
1593:       goto functionend;
1594:     }
1595:   }
1596:   /* ---------------------------------------------------------------------------*/
1597:   if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1598:     /* no special cases for now */
1599:     PetscInt       nx,ny;
1600:     const PetscInt *idx,*idy;
1601:     ISGetLocalSize(ix,&nx);
1602:     ISGetIndices(ix,&idx);
1603:     ISGetLocalSize(iy,&ny);
1604:     ISGetIndices(iy,&idy);
1605:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1606:     VecScatterCreate_PtoP(nx,idx,ny,idy,xin,yin,1,ctx);
1607:     ISRestoreIndices(ix,&idx);
1608:     ISRestoreIndices(iy,&idy);
1609:     PetscInfo(xin,"General case: MPI to MPI\n");
1610:     goto functionend;
1611:   }

1613:   functionend:
1614:   *newctx = ctx;
1615:   ISDestroy(&tix);
1616:   ISDestroy(&tiy);
1617:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1618:   return(0);
1619: }

1621: /* ------------------------------------------------------------------*/
1622: /*@
1623:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1624:       and the VecScatterEnd() does nothing

1626:    Not Collective

1628:    Input Parameter:
1629: .   ctx - scatter context created with VecScatterCreate()

1631:    Output Parameter:
1632: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

1634:    Level: developer

1636: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1637: @*/
1638: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
1639: {
1642:   *flg = ctx->beginandendtogether;
1643:   return(0);
1644: }

1646: /*@
1647:    VecScatterBegin - Begins a generalized scatter from one vector to
1648:    another. Complete the scattering phase with VecScatterEnd().

1650:    Neighbor-wise Collective on VecScatter and Vec

1652:    Input Parameters:
1653: +  inctx - scatter context generated by VecScatterCreate()
1654: .  x - the vector from which we scatter
1655: .  y - the vector to which we scatter
1656: .  addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1657:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1658: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1659:     SCATTER_FORWARD or SCATTER_REVERSE


1662:    Level: intermediate

1664:    Options Database: See VecScatterCreate()

1666:    Notes:
1667:    The vectors x and y need not be the same vectors used in the call
1668:    to VecScatterCreate(), but x must have the same parallel data layout
1669:    as that passed in as the x to VecScatterCreate(), similarly for the y.
1670:    Most likely they have been obtained from VecDuplicate().

1672:    You cannot change the values in the input vector between the calls to VecScatterBegin()
1673:    and VecScatterEnd().

1675:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1676:    the SCATTER_FORWARD.

1678:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1680:    This scatter is far more general than the conventional
1681:    scatter, since it can be a gather or a scatter or a combination,
1682:    depending on the indices ix and iy.  If x is a parallel vector and y
1683:    is sequential, VecScatterBegin() can serve to gather values to a
1684:    single processor.  Similarly, if y is parallel and x sequential, the
1685:    routine can scatter from one processor to many processors.

1687:    Concepts: scatter^between vectors
1688:    Concepts: gather^between vectors

1690: .seealso: VecScatterCreate(), VecScatterEnd()
1691: @*/
1692: PetscErrorCode  VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1693: {
1695: #if defined(PETSC_USE_DEBUG)
1696:   PetscInt       to_n,from_n;
1697: #endif
1702:   if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");

1704: #if defined(PETSC_USE_DEBUG)
1705:   /*
1706:      Error checking to make sure these vectors match the vectors used
1707:    to create the vector scatter context. -1 in the from_n and to_n indicate the
1708:    vector lengths are unknown (for example with mapped scatters) and thus
1709:    no error checking is performed.
1710:   */
1711:   if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1712:     VecGetLocalSize(x,&from_n);
1713:     VecGetLocalSize(y,&to_n);
1714:     if (mode & SCATTER_REVERSE) {
1715:       if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1716:       if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1717:     } else {
1718:       if (to_n != inctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1719:       if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1720:     }
1721:   }
1722: #endif

1724:   inctx->inuse = PETSC_TRUE;
1725:   PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1726:   (*inctx->ops->begin)(inctx,x,y,addv,mode);
1727:   if (inctx->beginandendtogether && inctx->ops->end) {
1728:     inctx->inuse = PETSC_FALSE;
1729:     (*inctx->ops->end)(inctx,x,y,addv,mode);
1730:   }
1731:   PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1732:   return(0);
1733: }

1735: /* --------------------------------------------------------------------*/
1736: /*@
1737:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
1738:    after first calling VecScatterBegin().

1740:    Neighbor-wise Collective on VecScatter and Vec

1742:    Input Parameters:
1743: +  ctx - scatter context generated by VecScatterCreate()
1744: .  x - the vector from which we scatter
1745: .  y - the vector to which we scatter
1746: .  addv - either ADD_VALUES or INSERT_VALUES.
1747: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1748:      SCATTER_FORWARD, SCATTER_REVERSE

1750:    Level: intermediate

1752:    Notes:
1753:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

1755:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1757: .seealso: VecScatterBegin(), VecScatterCreate()
1758: @*/
1759: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1760: {

1767:   ctx->inuse = PETSC_FALSE;
1768:   if (!ctx->ops->end) return(0);
1769:   if (!ctx->beginandendtogether) {
1770:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1771:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1772:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1773:   }
1774:   return(0);
1775: }

1777: /*@C
1778:    VecScatterDestroy - Destroys a scatter context created by
1779:    VecScatterCreate().

1781:    Collective on VecScatter

1783:    Input Parameter:
1784: .  ctx - the scatter context

1786:    Level: intermediate

1788: .seealso: VecScatterCreate(), VecScatterCopy()
1789: @*/
1790: PetscErrorCode  VecScatterDestroy(VecScatter *ctx)
1791: {

1795:   if (!*ctx) return(0);
1797:   if ((*ctx)->inuse) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1798:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}

1800:   /* if memory was published with SAWs then destroy it */
1801:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
1802:   (*(*ctx)->ops->destroy)(*ctx);
1803: #if defined(PETSC_HAVE_CUSP)
1804:   VecScatterCUSPIndicesDestroy((PetscCUSPIndices*)&((*ctx)->spptr));
1805: #elif defined(PETSC_HAVE_VECCUDA)
1806:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1807: #endif
1808:   PetscHeaderDestroy(ctx);
1809:   return(0);
1810: }

1812: /*@
1813:    VecScatterCopy - Makes a copy of a scatter context.

1815:    Collective on VecScatter

1817:    Input Parameter:
1818: .  sctx - the scatter context

1820:    Output Parameter:
1821: .  ctx - the context copy

1823:    Level: advanced

1825: .seealso: VecScatterCreate(), VecScatterDestroy()
1826: @*/
1827: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1828: {

1834:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1835:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1836:   (*ctx)->to_n   = sctx->to_n;
1837:   (*ctx)->from_n = sctx->from_n;
1838:   (*sctx->ops->copy)(sctx,*ctx);
1839:   return(0);
1840: }


1843: /* ------------------------------------------------------------------*/
1844: /*@C
1845:    VecScatterView - Views a vector scatter context.

1847:    Collective on VecScatter

1849:    Input Parameters:
1850: +  ctx - the scatter context
1851: -  viewer - the viewer for displaying the context

1853:    Level: intermediate

1855: C@*/
1856: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
1857: {

1862:   if (!viewer) {
1863:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1864:   }
1866:   if (ctx->ops->view) {
1867:     (*ctx->ops->view)(ctx,viewer);
1868:   }
1869:   return(0);
1870: }

1872: /*@C
1873:    VecScatterRemap - Remaps the "from" and "to" indices in a
1874:    vector scatter context. FOR EXPERTS ONLY!

1876:    Collective on VecScatter

1878:    Input Parameters:
1879: +  scat - vector scatter context
1880: .  from - remapping for "from" indices (may be NULL)
1881: -  to   - remapping for "to" indices (may be NULL)

1883:    Level: developer

1885:    Notes: In the parallel case the todata is actually the indices
1886:           from which the data is TAKEN! The from stuff is where the
1887:           data is finally put. This is VERY VERY confusing!

1889:           In the sequential case the todata is the indices where the
1890:           data is put and the fromdata is where it is taken from.
1891:           This is backwards from the paralllel case! CRY! CRY! CRY!

1893: @*/
1894: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1895: {
1896:   VecScatter_Seq_General *to,*from;
1897:   VecScatter_MPI_General *mto;
1898:   PetscInt               i;


1905:   from = (VecScatter_Seq_General*)scat->fromdata;
1906:   mto  = (VecScatter_MPI_General*)scat->todata;

1908:   if (mto->type == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");

1910:   if (rto) {
1911:     if (mto->type == VEC_SCATTER_MPI_GENERAL) {
1912:       /* handle off processor parts */
1913:       for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];

1915:       /* handle local part */
1916:       to = &mto->local;
1917:       for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1918:     } else if (from->type == VEC_SCATTER_SEQ_GENERAL) {
1919:       for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1920:     } else if (from->type == VEC_SCATTER_SEQ_STRIDE) {
1921:       VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;

1923:       /* if the remapping is the identity and stride is identity then skip remap */
1924:       if (sto->step == 1 && sto->first == 0) {
1925:         for (i=0; i<sto->n; i++) {
1926:           if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1927:         }
1928:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1929:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1930:   }

1932:   if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");

1934:   /*
1935:      Mark then vector lengths as unknown because we do not know the
1936:    lengths of the remapped vectors
1937:   */
1938:   scat->from_n = -1;
1939:   scat->to_n   = -1;
1940:   return(0);
1941: }

1943: /*
1944:  VecScatterGetTypes_Private - Returns the scatter types.

1946:  scatter - The scatter.
1947:  from    - Upon exit this contains the type of the from scatter.
1948:  to      - Upon exit this contains the type of the to scatter.
1949: */
1950: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterType *from,VecScatterType *to)
1951: {
1952:   VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
1953:   VecScatter_Common* todata   = (VecScatter_Common*)scatter->todata;

1956:   *from = fromdata->type;
1957:   *to = todata->type;
1958:   return(0);
1959: }


1962: /*
1963:   VecScatterIsSequential_Private - Returns true if the scatter is sequential.

1965:   scatter - The scatter.
1966:   flag    - Upon exit flag is true if the scatter is of type VecScatter_Seq_General 
1967:             or VecScatter_Seq_Stride; otherwise flag is false.
1968: */
1969: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
1970: {
1971:   VecScatterType scatterType = scatter->type;

1974:   if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
1975:     *flag = PETSC_TRUE;
1976:   } else {
1977:     *flag = PETSC_FALSE;
1978:   }
1979:   return(0);
1980: }

1982: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA)

1984: /*@C
1985:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
1986:    to another for GPU based computation.

1988:    Input Parameters:
1989: +  inctx - scatter context generated by VecScatterCreate()
1990: .  x - the vector from which we scatter
1991: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1992:     SCATTER_FORWARD or SCATTER_REVERSE

1994:   Level: intermediate

1996:   Notes:
1997:    Effectively, this function creates all the necessary indexing buffers and work
1998:    vectors needed to move data only those data points in a vector which need to
1999:    be communicated across ranks. This is done at the first time this function is
2000:    called. Currently, this only used in the context of the parallel SpMV call in
2001:    MatMult_MPIAIJCUSP or MatMult_MPIAIJCUSPARSE.

2003:    This function is executed before the call to MatMult. This enables the memory
2004:    transfers to be overlapped with the MatMult SpMV kernel call.

2006: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2007: @*/
2008: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2009: {
2011:   VecScatter_MPI_General *to,*from;
2012:   PetscErrorCode         ierr;
2013:   PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2014:   PetscBool              isSeq1,isSeq2;

2017:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2018:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2019:   if (isSeq1 || isSeq2) {
2020:     return(0);
2021:   }
2022:   if (mode & SCATTER_REVERSE) {
2023:     to     = (VecScatter_MPI_General*)inctx->fromdata;
2024:     from   = (VecScatter_MPI_General*)inctx->todata;
2025:   } else {
2026:     to     = (VecScatter_MPI_General*)inctx->todata;
2027:     from   = (VecScatter_MPI_General*)inctx->fromdata;
2028:   }
2029:   bs           = to->bs;
2030:   nrecvs       = from->n;
2031:   nsends       = to->n;
2032:   indices      = to->indices;
2033:   sstartsSends = to->starts;
2034:   sstartsRecvs = from->starts;
2035: #if defined(PETSC_HAVE_CUSP)
2036:   if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2037: #else
2038:   if (x->valid_GPU_array != PETSC_CUDA_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2039: #endif
2040:     if (!inctx->spptr) {
2041:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2042:       PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2043:       /* Here we create indices for both the senders and receivers. */
2044:       PetscMalloc1(ns,&tindicesSends);
2045:       PetscMalloc1(nr,&tindicesRecvs);

2047:       PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2048:       PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));

2050:       PetscSortRemoveDupsInt(&ns,tindicesSends);
2051:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

2053:       PetscMalloc1(bs*ns,&sindicesSends);
2054:       PetscMalloc1(from->bs*nr,&sindicesRecvs);

2056:       /* sender indices */
2057:       for (i=0; i<ns; i++) {
2058:         for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2059:       }
2060:       PetscFree(tindicesSends);

2062:       /* receiver indices */
2063:       for (i=0; i<nr; i++) {
2064:         for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2065:       }
2066:       PetscFree(tindicesRecvs);

2068:       /* create GPU indices, work vectors, ... */
2069: #if defined(PETSC_HAVE_CUSP)
2070:       VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
2071: #else
2072:       VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2073: #endif
2074:       PetscFree(sindicesSends);
2075:       PetscFree(sindicesRecvs);
2076:     }
2077:   }
2078:   return(0);
2079: }

2081: /*@C
2082:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2083:    another for GPU based computation.

2085:    Input Parameter:
2086: +  inctx - scatter context generated by VecScatterCreate()

2088:   Level: intermediate

2090:   Notes:
2091:    Effectively, this function resets the temporary buffer flags. Currently, this
2092:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2093:    or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2094:    buffers used for messaging are no longer valid.

2096: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2097: @*/
2098: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2099: {
2101:   return(0);
2102: }

2104: #endif