Actual source code: vscat.c

petsc-3.7.3 2016-07-24
Report Typos and Errors
  2: /*
  3:      Code for creating scatters between vectors. This file
  4:   includes the code for scattering between sequential vectors and
  5:   some special cases for parallel scatters.
  6: */

  8: #include <petsc/private/isimpl.h>
  9: #include <petsc/private/vecimpl.h>    /*I   "petscvec.h"    I*/

 11: #if defined(PETSC_HAVE_CUSP)
 12: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUSPIndices*);
 13: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
 14: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
 15: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
 16: #elif defined(PETSC_HAVE_VECCUDA)
 17: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
 18: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
 19: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
 20: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
 21: #endif

 23: /* Logging support */
 24: PetscClassId VEC_SCATTER_CLASSID;

 26: #if defined(PETSC_USE_DEBUG)
 27: /*
 28:      Checks if any indices are less than zero and generates an error
 29: */
 32: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
 33: {
 34:   PetscInt i;

 37:   for (i=0; i<n; i++) {
 38:     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
 39:     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
 40:   }
 41:   return(0);
 42: }
 43: #endif

 45: /*
 46:       This is special scatter code for when the entire parallel vector is copied to each processor.

 48:    This code was written by Cameron Cooper, Occidental College, Fall 1995,
 49:    will working at ANL as a SERS student.
 50: */
 53: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 54: {
 56:   PetscInt       yy_n,xx_n;
 57:   PetscScalar    *xv,*yv;

 60:   VecGetLocalSize(y,&yy_n);
 61:   VecGetLocalSize(x,&xx_n);
 62:   VecGetArrayPair(x,y,&xv,&yv);

 64:   if (mode & SCATTER_REVERSE) {
 65:     PetscScalar          *xvt,*xvt2;
 66:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 67:     PetscInt             i;
 68:     PetscMPIInt          *disply = scat->displx;

 70:     if (addv == INSERT_VALUES) {
 71:       PetscInt rstart,rend;
 72:       /*
 73:          copy the correct part of the local vector into the local storage of
 74:          the MPI one.  Note: this operation only makes sense if all the local
 75:          vectors have the same values
 76:       */
 77:       VecGetOwnershipRange(y,&rstart,&rend);
 78:       PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
 79:     } else {
 80:       MPI_Comm    comm;
 81:       PetscMPIInt rank;
 82:       PetscObjectGetComm((PetscObject)y,&comm);
 83:       MPI_Comm_rank(comm,&rank);
 84:       if (scat->work1) xvt = scat->work1;
 85:       else {
 86:         PetscMalloc1(xx_n,&xvt);
 87:         scat->work1 = xvt;
 88:       }
 89:       if (!rank) { /* I am the zeroth processor, values are accumulated here */
 90:         if   (scat->work2) xvt2 = scat->work2;
 91:         else {
 92:           PetscMalloc1(xx_n,&xvt2);
 93:           scat->work2 = xvt2;
 94:         }
 95:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 96:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 97:         if (addv == ADD_VALUES) {
 98:           for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
 99: #if !defined(PETSC_USE_COMPLEX)
100:         } else if (addv == MAX_VALUES) {
101:           for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
102: #endif
103:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
104:         MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
105:       } else {
106:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
107:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
108:         MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
109:       }
110:     }
111:   } else {
112:     PetscScalar          *yvt;
113:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
114:     PetscInt             i;
115:     PetscMPIInt          *displx = scat->displx;

117:     if (addv == INSERT_VALUES) {
118:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
119:     } else {
120:       if (scat->work1) yvt = scat->work1;
121:       else {
122:         PetscMalloc1(yy_n,&yvt);
123:         scat->work1 = yvt;
124:       }
125:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
126:       if (addv == ADD_VALUES) {
127:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
128: #if !defined(PETSC_USE_COMPLEX)
129:       } else if (addv == MAX_VALUES) {
130:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
131: #endif
132:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
133:     }
134:   }
135:   VecRestoreArrayPair(x,y,&xv,&yv);
136:   return(0);
137: }

141: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
142: {
144:   PetscBool      isascii;

147:   PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
148:   if (isascii) {
149:     PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
150:   }
151:   return(0);
152: }

154: /*
155:       This is special scatter code for when the entire parallel vector is  copied to processor 0.

157: */
160: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
161: {
162:   PetscErrorCode    ierr;
163:   PetscMPIInt       rank;
164:   PetscInt          yy_n,xx_n;
165:   PetscScalar       *yv;
166:   const PetscScalar *xv;
167:   MPI_Comm          comm;

170:   VecGetLocalSize(y,&yy_n);
171:   VecGetLocalSize(x,&xx_n);
172:   VecGetArrayRead(x,&xv);
173:   VecGetArray(y,&yv);

175:   PetscObjectGetComm((PetscObject)x,&comm);
176:   MPI_Comm_rank(comm,&rank);

178:   /* --------  Reverse scatter; spread from processor 0 to other processors */
179:   if (mode & SCATTER_REVERSE) {
180:     PetscScalar          *yvt;
181:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
182:     PetscInt             i;
183:     PetscMPIInt          *disply = scat->displx;

185:     if (addv == INSERT_VALUES) {
186:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
187:     } else {
188:       if (scat->work2) yvt = scat->work2;
189:       else {
190:         PetscMalloc1(xx_n,&yvt);
191:         scat->work2 = yvt;
192:       }
193:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
194:       if (addv == ADD_VALUES) {
195:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
196: #if !defined(PETSC_USE_COMPLEX)
197:       } else if (addv == MAX_VALUES) {
198:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
199: #endif
200:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
201:     }
202:   /* ---------  Forward scatter; gather all values onto processor 0 */
203:   } else {
204:     PetscScalar          *yvt  = 0;
205:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
206:     PetscInt             i;
207:     PetscMPIInt          *displx = scat->displx;

209:     if (addv == INSERT_VALUES) {
210:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
211:     } else {
212:       if (!rank) {
213:         if (scat->work1) yvt = scat->work1;
214:         else {
215:           PetscMalloc1(yy_n,&yvt);
216:           scat->work1 = yvt;
217:         }
218:       }
219:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
220:       if (!rank) {
221:         if (addv == ADD_VALUES) {
222:           for (i=0; i<yy_n; i++) yv[i] += yvt[i];
223: #if !defined(PETSC_USE_COMPLEX)
224:         } else if (addv == MAX_VALUES) {
225:           for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
226: #endif
227:         }  else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
228:       }
229:     }
230:   }
231:   VecRestoreArrayRead(x,&xv);
232:   VecRestoreArray(y,&yv);
233:   return(0);
234: }

236: /*
237:        The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
238: */
241: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
242: {
243:   VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
244:   PetscErrorCode       ierr;

247:   PetscFree(scat->work1);
248:   PetscFree(scat->work2);
249:   PetscFree3(ctx->todata,scat->count,scat->displx);
250:   return(0);
251: }

255: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
256: {

260:   PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
261:   PetscFree2(ctx->todata,ctx->fromdata);
262:   return(0);
263: }

267: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
268: {

272:   PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
273:   PetscFree2(ctx->todata,ctx->fromdata);
274:   return(0);
275: }

279: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
280: {

284:   PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
285:   PetscFree2(ctx->todata,ctx->fromdata);
286:   return(0);
287: }

291: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
292: {

296:   PetscFree2(ctx->todata,ctx->fromdata);
297:   return(0);
298: }

300: /* -------------------------------------------------------------------------*/
303: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
304: {
305:   VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
306:   PetscErrorCode       ierr;
307:   PetscMPIInt          size,*count,*displx;
308:   PetscInt             i;

311:   out->ops->begin   = in->ops->begin;
312:   out->ops->end     = in->ops->end;
313:   out->ops->copy    = in->ops->copy;
314:   out->ops->destroy = in->ops->destroy;
315:   out->ops->view    = in->ops->view;

317:   MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
318:   PetscMalloc3(1,&sto,size,&count,size,&displx);
319:   sto->type   = in_to->type;
320:   sto->count  = count;
321:   sto->displx = displx;
322:   for (i=0; i<size; i++) {
323:     sto->count[i]  = in_to->count[i];
324:     sto->displx[i] = in_to->displx[i];
325:   }
326:   sto->work1    = 0;
327:   sto->work2    = 0;
328:   out->todata   = (void*)sto;
329:   out->fromdata = (void*)0;
330:   return(0);
331: }

333: /* --------------------------------------------------------------------------------------*/
334: /*
335:         Scatter: sequential general to sequential general
336: */
339: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
340: {
341:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
342:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
343:   PetscErrorCode         ierr;
344:   PetscInt               i,n = gen_from->n,*fslots,*tslots;
345:   PetscScalar            *xv,*yv;

348: #if defined(PETSC_HAVE_CUSP)
349:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
350:     /* create the scatter indices if not done already */
351:     if (!ctx->spptr) {
352:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
353:       fslots = gen_from->vslots;
354:       tslots = gen_to->vslots;
355:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
356:     }
357:     /* next do the scatter */
358:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
359:     return(0);
360:   }
361: #elif defined(PETSC_HAVE_VECCUDA)
362:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
363:     /* create the scatter indices if not done already */
364:     if (!ctx->spptr) {
365:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
366:       fslots = gen_from->vslots;
367:       tslots = gen_to->vslots;
368:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
369:     }
370:     /* next do the scatter */
371:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
372:     return(0);
373:   }
374: #endif

376:   VecGetArrayPair(x,y,&xv,&yv);
377:   if (mode & SCATTER_REVERSE) {
378:     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
379:     gen_from = (VecScatter_Seq_General*)ctx->todata;
380:   }
381:   fslots = gen_from->vslots;
382:   tslots = gen_to->vslots;

384:   if (addv == INSERT_VALUES) {
385:     for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]];
386:   } else if (addv == ADD_VALUES) {
387:     for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
388: #if !defined(PETSC_USE_COMPLEX)
389:   } else if (addv == MAX_VALUES) {
390:     for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
391: #endif
392:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
393:   VecRestoreArrayPair(x,y,&xv,&yv);
394:   return(0);
395: }

397: /*
398:     Scatter: sequential general to sequential stride 1
399: */
402: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
403: {
404:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
405:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
406:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
407:   PetscErrorCode         ierr;
408:   PetscInt               first = gen_to->first;
409:   PetscScalar            *xv,*yv;

412: #if defined(PETSC_HAVE_CUSP)
413:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
414:     /* create the scatter indices if not done already */
415:     if (!ctx->spptr) {
416:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
417:       PetscInt *tslots = 0;
418:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
419:     }
420:     /* next do the scatter */
421:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
422:     return(0);
423:   }
424: #elif defined(PETSC_HAVE_VECCUDA)
425:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
426:     /* create the scatter indices if not done already */
427:     if (!ctx->spptr) {
428:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
429:       PetscInt *tslots = 0;
430:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
431:     }
432:     /* next do the scatter */
433:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
434:     return(0);
435:   }
436: #endif

438:   VecGetArrayPair(x,y,&xv,&yv);
439:   if (mode & SCATTER_REVERSE) {
440:     xv += first;
441:     if (addv == INSERT_VALUES) {
442:       for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
443:     } else if (addv == ADD_VALUES) {
444:       for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
445: #if !defined(PETSC_USE_COMPLEX)
446:     } else if (addv == MAX_VALUES) {
447:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
448: #endif
449:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
450:   } else {
451:     yv += first;
452:     if (addv == INSERT_VALUES) {
453:       for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
454:     } else if (addv == ADD_VALUES) {
455:       for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
456: #if !defined(PETSC_USE_COMPLEX)
457:     } else if (addv == MAX_VALUES) {
458:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
459: #endif
460:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
461:   }
462:   VecRestoreArrayPair(x,y,&xv,&yv);
463:   return(0);
464: }

466: /*
467:    Scatter: sequential general to sequential stride
468: */
471: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
472: {
473:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
474:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
475:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
476:   PetscErrorCode         ierr;
477:   PetscInt               first = gen_to->first,step = gen_to->step;
478:   PetscScalar            *xv,*yv;

481: #if defined(PETSC_HAVE_CUSP)
482:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
483:     /* create the scatter indices if not done already */
484:     if (!ctx->spptr) {
485:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
486:       PetscInt * tslots = 0;
487:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
488:     }
489:     /* next do the scatter */
490:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
491:     return(0);
492:   }
493: #elif defined(PETSC_HAVE_VECCUDA)
494:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
495:     /* create the scatter indices if not done already */
496:     if (!ctx->spptr) {
497:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
498:       PetscInt * tslots = 0;
499:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
500:     }
501:     /* next do the scatter */
502:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
503:     return(0);
504:   }
505: #endif

507:   VecGetArrayPair(x,y,&xv,&yv);
508:   if (mode & SCATTER_REVERSE) {
509:     if (addv == INSERT_VALUES) {
510:       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
511:     } else if (addv == ADD_VALUES) {
512:       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
513: #if !defined(PETSC_USE_COMPLEX)
514:     } else if (addv == MAX_VALUES) {
515:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
516: #endif
517:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
518:   } else {
519:     if (addv == INSERT_VALUES) {
520:       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
521:     } else if (addv == ADD_VALUES) {
522:       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
523: #if !defined(PETSC_USE_COMPLEX)
524:     } else if (addv == MAX_VALUES) {
525:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
526: #endif
527:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
528:   }
529:   VecRestoreArrayPair(x,y,&xv,&yv);
530:   return(0);
531: }

533: /*
534:     Scatter: sequential stride 1 to sequential general
535: */
538: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
539: {
540:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
541:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
542:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
543:   PetscErrorCode         ierr;
544:   PetscInt               first = gen_from->first;
545:   PetscScalar            *xv,*yv;

548: #if defined(PETSC_HAVE_CUSP)
549:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
550:     /* create the scatter indices if not done already */
551:     if (!ctx->spptr) {
552:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
553:       PetscInt *fslots = 0;
554:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
555:     }
556:     /* next do the scatter */
557:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
558:     return(0);
559:   }
560: #elif defined(PETSC_HAVE_VECCUDA)
561:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
562:     /* create the scatter indices if not done already */
563:     if (!ctx->spptr) {
564:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
565:       PetscInt *fslots = 0;
566:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
567:     }
568:     /* next do the scatter */
569:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
570:     return(0);
571:   }
572: #endif

574:   VecGetArrayPair(x,y,&xv,&yv);
575:   if (mode & SCATTER_REVERSE) {
576:     yv += first;
577:     if (addv == INSERT_VALUES) {
578:       for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
579:     } else  if (addv == ADD_VALUES) {
580:       for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
581: #if !defined(PETSC_USE_COMPLEX)
582:     } else  if (addv == MAX_VALUES) {
583:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
584: #endif
585:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
586:   } else {
587:     xv += first;
588:     if (addv == INSERT_VALUES) {
589:       for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
590:     } else  if (addv == ADD_VALUES) {
591:       for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
592: #if !defined(PETSC_USE_COMPLEX)
593:     } else  if (addv == MAX_VALUES) {
594:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
595: #endif
596:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
597:   }
598:   VecRestoreArrayPair(x,y,&xv,&yv);
599:   return(0);
600: }

604: /*
605:    Scatter: sequential stride to sequential general
606: */
607: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
608: {
609:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
610:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
611:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
612:   PetscErrorCode         ierr;
613:   PetscInt               first = gen_from->first,step = gen_from->step;
614:   PetscScalar            *xv,*yv;

617: #if defined(PETSC_HAVE_CUSP)
618:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
619:     /* create the scatter indices if not done already */
620:     if (!ctx->spptr) {
621:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
622:       PetscInt *fslots = 0;
623:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
624:     }
625:     /* next do the scatter */
626:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
627:     return(0);
628:   }
629: #elif defined(PETSC_HAVE_VECCUDA)
630:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
631:     /* create the scatter indices if not done already */
632:     if (!ctx->spptr) {
633:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
634:       PetscInt *fslots = 0;
635:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
636:     }
637:     /* next do the scatter */
638:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
639:     return(0);
640:   }
641: #endif

643:   VecGetArrayPair(x,y,&xv,&yv);
644:   if (mode & SCATTER_REVERSE) {
645:     if (addv == INSERT_VALUES) {
646:       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
647:     } else if (addv == ADD_VALUES) {
648:       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
649: #if !defined(PETSC_USE_COMPLEX)
650:     } else if (addv == MAX_VALUES) {
651:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
652: #endif
653:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
654:   } else {
655:     if (addv == INSERT_VALUES) {
656:       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
657:     } else if (addv == ADD_VALUES) {
658:       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
659: #if !defined(PETSC_USE_COMPLEX)
660:     } else if (addv == MAX_VALUES) {
661:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
662: #endif
663:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
664:   }
665:   VecRestoreArrayPair(x,y,&xv,&yv);
666:   return(0);
667: }

671: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
672: {
673:   PetscErrorCode         ierr;
674:   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->todata;
675:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->fromdata;
676:   PetscInt               i;
677:   PetscBool              isascii;

680:   PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
681:   if (isascii) {
682:     PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
683:     for (i=0; i<in_to->n; i++) {
684:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
685:     }
686:   }
687:   return(0);
688: }
689: /*
690:      Scatter: sequential stride to sequential stride
691: */
694: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
695: {
696:   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
697:   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
698:   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
699:   PetscErrorCode        ierr;
700:   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
701:   PetscScalar           *xv,*yv;

704: #if defined(PETSC_HAVE_CUSP)
705:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
706:     /* create the scatter indices if not done already */
707:     if (!ctx->spptr) {
708:       PetscInt *tslots = 0,*fslots = 0;
709:       VecScatterCUSPIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
710:     }
711:     /* next do the scatter */
712:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
713:     return(0);
714:   }
715: #elif defined(PETSC_HAVE_VECCUDA)
716:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
717:     /* create the scatter indices if not done already */
718:     if (!ctx->spptr) {
719:       PetscInt *tslots = 0,*fslots = 0;
720:       VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
721:     }
722:     /* next do the scatter */
723:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
724:     return(0);
725:   }
726: #endif

728:   VecGetArrayPair(x,y,&xv,&yv);
729:   if (mode & SCATTER_REVERSE) {
730:     from_first = gen_to->first;
731:     to_first   = gen_from->first;
732:     from_step  = gen_to->step;
733:     to_step    = gen_from->step;
734:   }

736:   if (addv == INSERT_VALUES) {
737:     if (to_step == 1 && from_step == 1) {
738:       PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
739:     } else  {
740:       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
741:     }
742:   } else if (addv == ADD_VALUES) {
743:     if (to_step == 1 && from_step == 1) {
744:       yv += to_first; xv += from_first;
745:       for (i=0; i<n; i++) yv[i] += xv[i];
746:     } else {
747:       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
748:     }
749: #if !defined(PETSC_USE_COMPLEX)
750:   } else if (addv == MAX_VALUES) {
751:     if (to_step == 1 && from_step == 1) {
752:       yv += to_first; xv += from_first;
753:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
754:     } else {
755:       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
756:     }
757: #endif
758:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
759:   VecRestoreArrayPair(x,y,&xv,&yv);
760:   return(0);
761: }

763: /* --------------------------------------------------------------------------*/


768: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
769: {
770:   PetscErrorCode         ierr;
771:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
772:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

775:   out->ops->begin   = in->ops->begin;
776:   out->ops->end     = in->ops->end;
777:   out->ops->copy    = in->ops->copy;
778:   out->ops->destroy = in->ops->destroy;
779:   out->ops->view    = in->ops->view;

781:   PetscMalloc2(1,&out_to,1,&out_from);
782:   PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
783:   out_to->n                    = in_to->n;
784:   out_to->type                 = in_to->type;
785:   out_to->nonmatching_computed = PETSC_FALSE;
786:   out_to->slots_nonmatching    = 0;
787:   out_to->is_copy              = PETSC_FALSE;
788:   PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));

790:   out_from->n                    = in_from->n;
791:   out_from->type                 = in_from->type;
792:   out_from->nonmatching_computed = PETSC_FALSE;
793:   out_from->slots_nonmatching    = 0;
794:   out_from->is_copy              = PETSC_FALSE;
795:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

797:   out->todata   = (void*)out_to;
798:   out->fromdata = (void*)out_from;
799:   return(0);
800: }

804: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
805: {
806:   PetscErrorCode         ierr;
807:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
808:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
809:   PetscInt               i;
810:   PetscBool              isascii;

813:   PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
814:   if (isascii) {
815:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
816:     for (i=0; i<in_to->n; i++) {
817:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
818:     }
819:   }
820:   return(0);
821: }


826: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
827: {
828:   PetscErrorCode         ierr;
829:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
830:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

833:   out->ops->begin   = in->ops->begin;
834:   out->ops->end     = in->ops->end;
835:   out->ops->copy    = in->ops->copy;
836:   out->ops->destroy = in->ops->destroy;
837:   out->ops->view    = in->ops->view;

839:   PetscMalloc2(1,&out_to,1,&out_from);
840:   PetscMalloc1(in_from->n,&out_from->vslots);
841:   out_to->n     = in_to->n;
842:   out_to->type  = in_to->type;
843:   out_to->first = in_to->first;
844:   out_to->step  = in_to->step;
845:   out_to->type  = in_to->type;

847:   out_from->n                    = in_from->n;
848:   out_from->type                 = in_from->type;
849:   out_from->nonmatching_computed = PETSC_FALSE;
850:   out_from->slots_nonmatching    = 0;
851:   out_from->is_copy              = PETSC_FALSE;
852:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

854:   out->todata   = (void*)out_to;
855:   out->fromdata = (void*)out_from;
856:   return(0);
857: }

861: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
862: {
863:   PetscErrorCode         ierr;
864:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
865:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
866:   PetscInt               i;
867:   PetscBool              isascii;

870:   PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
871:   if (isascii) {
872:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
873:     for (i=0; i<in_to->n; i++) {
874:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
875:     }
876:   }
877:   return(0);
878: }

880: /* --------------------------------------------------------------------------*/
881: /*
882:     Scatter: parallel to sequential vector, sequential strides for both.
883: */
886: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
887: {
888:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
889:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
890:   PetscErrorCode        ierr;

893:   out->ops->begin   = in->ops->begin;
894:   out->ops->end     = in->ops->end;
895:   out->ops->copy    = in->ops->copy;
896:   out->ops->destroy = in->ops->destroy;
897:   out->ops->view    = in->ops->view;

899:   PetscMalloc2(1,&out_to,1,&out_from);
900:   out_to->n       = in_to->n;
901:   out_to->type    = in_to->type;
902:   out_to->first   = in_to->first;
903:   out_to->step    = in_to->step;
904:   out_to->type    = in_to->type;
905:   out_from->n     = in_from->n;
906:   out_from->type  = in_from->type;
907:   out_from->first = in_from->first;
908:   out_from->step  = in_from->step;
909:   out_from->type  = in_from->type;
910:   out->todata     = (void*)out_to;
911:   out->fromdata   = (void*)out_from;
912:   return(0);
913: }

917: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
918: {
919:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
920:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
921:   PetscErrorCode        ierr;
922:   PetscBool             isascii;

925:   PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
926:   if (isascii) {
927:     PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
928:   }
929:   return(0);
930: }


933: extern PetscErrorCode VecScatterCreate_PtoS(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
934: extern PetscErrorCode VecScatterCreate_PtoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
935: extern PetscErrorCode VecScatterCreate_StoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);

937: /* =======================================================================*/
938: #define VEC_SEQ_ID 0
939: #define VEC_MPI_ID 1
940: #define IS_GENERAL_ID 0
941: #define IS_STRIDE_ID  1
942: #define IS_BLOCK_ID   2

944: /*
945:    Blocksizes we have optimized scatters for
946: */

948: #define VecScatterOptimizedBS(mbs) (2 <= mbs)


953: PetscErrorCode  VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
954: {
955:   VecScatter     ctx;

959:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
960:   ctx->inuse               = PETSC_FALSE;
961:   ctx->beginandendtogether = PETSC_FALSE;
962:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
963:   if (ctx->beginandendtogether) {
964:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
965:   }
966:   ctx->packtogether = PETSC_FALSE;
967:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
968:   if (ctx->packtogether) {
969:     PetscInfo(ctx,"Pack all messages before sending\n");
970:   }
971:   *newctx = ctx;
972:   return(0);
973: }

977: /*@C
978:    VecScatterCreate - Creates a vector scatter context.

980:    Collective on Vec

982:    Input Parameters:
983: +  xin - a vector that defines the shape (parallel data layout of the vector)
984:          of vectors from which we scatter
985: .  yin - a vector that defines the shape (parallel data layout of the vector)
986:          of vectors to which we scatter
987: .  ix - the indices of xin to scatter (if NULL scatters all values)
988: -  iy - the indices of yin to hold results (if NULL fills entire vector yin)

990:    Output Parameter:
991: .  newctx - location to store the new scatter context

993:    Options Database Keys: (uses regular MPI_Sends by default)
994: +  -vecscatter_view         - Prints detail of communications
995: .  -vecscatter_view ::ascii_info    - Print less details about communication
996: .  -vecscatter_ssend        - Uses MPI_Ssend_init() instead of MPI_Send_init()
997: .  -vecscatter_rsend           - use ready receiver mode for MPI sends
998: .  -vecscatter_merge        - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
999:                               eliminates the chance for overlap of computation and communication
1000: .  -vecscatter_sendfirst    - Posts sends before receives
1001: .  -vecscatter_packtogether - Pack all messages before sending, receive all messages before unpacking
1002: .  -vecscatter_alltoall     - Uses MPI all to all communication for scatter
1003: .  -vecscatter_window       - Use MPI 2 window operations to move data
1004: .  -vecscatter_nopack       - Avoid packing to work vector when possible (if used with -vecscatter_alltoall then will use MPI_Alltoallw()
1005: -  -vecscatter_reproduce    - insure that the order of the communications are done the same for each scatter, this under certain circumstances
1006:                               will make the results of scatters deterministic when otherwise they are not (it may be slower also).

1008: $
1009: $                                                                                    --When packing is used--
1010: $                               MPI Datatypes (no packing)  sendfirst   merge        packtogether  persistent*
1011: $                                _nopack                   _sendfirst    _merge      _packtogether                -vecscatter_
1012: $ ----------------------------------------------------------------------------------------------------------------------------
1013: $    Message passing    Send       p                           X            X           X         always
1014: $                      Ssend       p                           X            X           X         always          _ssend
1015: $                      Rsend       p                        nonsense        X           X         always          _rsend
1016: $    AlltoAll  v or w              X                        nonsense     always         X         nonsense        _alltoall
1017: $    MPI_Win                       p                        nonsense        p           p         nonsense        _window
1018: $
1019: $   Since persistent sends and receives require a constant memory address they can only be used when data is packed into the work vector
1020: $   because the in and out array may be different for each call to VecScatterBegin/End().
1021: $
1022: $    p indicates possible, but not implemented. X indicates implemented
1023: $

1025:     Level: intermediate

1027:   Notes:
1028:    In calls to VecScatter() you can use different vectors than the xin and
1029:    yin you used above; BUT they must have the same parallel data layout, for example,
1030:    they could be obtained from VecDuplicate().
1031:    A VecScatter context CANNOT be used in two or more simultaneous scatters;
1032:    that is you cannot call a second VecScatterBegin() with the same scatter
1033:    context until the VecScatterEnd() has been called on the first VecScatterBegin().
1034:    In this case a separate VecScatter is needed for each concurrent scatter.

1036:    Currently the MPI_Send(), MPI_Ssend() and MPI_Rsend() all use PERSISTENT versions.
1037:    (this unfortunately requires that the same in and out arrays be used for each use, this
1038:     is why when not using MPI_alltoallw() we always need to pack the input into the work array before sending
1039:     and unpack upon receeving instead of using MPI datatypes to avoid the packing/unpacking).

1041:    Both ix and iy cannot be NULL at the same time.

1043:    Concepts: scatter^between vectors
1044:    Concepts: gather^between vectors

1046: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero()
1047: @*/
1048: PetscErrorCode  VecScatterCreate(Vec xin,IS ix,Vec yin,IS iy,VecScatter *newctx)
1049: {
1050:   VecScatter        ctx;
1051:   PetscErrorCode    ierr;
1052:   PetscMPIInt       size;
1053:   PetscInt          xin_type = VEC_SEQ_ID,yin_type = VEC_SEQ_ID,*range;
1054:   PetscInt          ix_type  = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1055:   MPI_Comm          comm,ycomm;
1056:   PetscBool         totalv,ixblock,iyblock,iystride,islocal,cando,flag;
1057:   IS                tix = 0,tiy = 0;

1060:   if (!ix && !iy) SETERRQ(PetscObjectComm((PetscObject)xin),PETSC_ERR_SUP,"Cannot pass default in for both input and output indices");

1062:   /*
1063:       Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1064:       sequential (it does not share a comm). The difference is that parallel vectors treat the
1065:       index set as providing indices in the global parallel numbering of the vector, with
1066:       sequential vectors treat the index set as providing indices in the local sequential
1067:       numbering
1068:   */
1069:   PetscObjectGetComm((PetscObject)xin,&comm);
1070:   MPI_Comm_size(comm,&size);
1071:   if (size > 1) xin_type = VEC_MPI_ID;

1073:   PetscObjectGetComm((PetscObject)yin,&ycomm);
1074:   MPI_Comm_size(ycomm,&size);
1075:   if (size > 1) {comm = ycomm; yin_type = VEC_MPI_ID;}

1077:   /* generate the Scatter context */
1078:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1079:   ctx->inuse               = PETSC_FALSE;

1081:   ctx->beginandendtogether = PETSC_FALSE;
1082:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1083:   if (ctx->beginandendtogether) {
1084:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1085:   }
1086:   ctx->packtogether = PETSC_FALSE;
1087:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
1088:   if (ctx->packtogether) {
1089:     PetscInfo(ctx,"Pack all messages before sending\n");
1090:   }

1092:   VecGetLocalSize(xin,&ctx->from_n);
1093:   VecGetLocalSize(yin,&ctx->to_n);

1095:   /*
1096:       if ix or iy is not included; assume just grabbing entire vector
1097:   */
1098:   if (!ix && xin_type == VEC_SEQ_ID) {
1099:     ISCreateStride(comm,ctx->from_n,0,1,&ix);
1100:     tix  = ix;
1101:   } else if (!ix && xin_type == VEC_MPI_ID) {
1102:     if (yin_type == VEC_MPI_ID) {
1103:       PetscInt ntmp, low;
1104:       VecGetLocalSize(xin,&ntmp);
1105:       VecGetOwnershipRange(xin,&low,NULL);
1106:       ISCreateStride(comm,ntmp,low,1,&ix);
1107:     } else {
1108:       PetscInt Ntmp;
1109:       VecGetSize(xin,&Ntmp);
1110:       ISCreateStride(comm,Ntmp,0,1,&ix);
1111:     }
1112:     tix = ix;
1113:   } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");

1115:   if (!iy && yin_type == VEC_SEQ_ID) {
1116:     ISCreateStride(comm,ctx->to_n,0,1,&iy);
1117:     tiy  = iy;
1118:   } else if (!iy && yin_type == VEC_MPI_ID) {
1119:     if (xin_type == VEC_MPI_ID) {
1120:       PetscInt ntmp, low;
1121:       VecGetLocalSize(yin,&ntmp);
1122:       VecGetOwnershipRange(yin,&low,NULL);
1123:       ISCreateStride(comm,ntmp,low,1,&iy);
1124:     } else {
1125:       PetscInt Ntmp;
1126:       VecGetSize(yin,&Ntmp);
1127:       ISCreateStride(comm,Ntmp,0,1,&iy);
1128:     }
1129:     tiy = iy;
1130:   } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");

1132:   /*
1133:      Determine types of index sets
1134:   */
1135:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1136:   if (flag) ix_type = IS_BLOCK_ID;
1137:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1138:   if (flag) iy_type = IS_BLOCK_ID;
1139:   PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1140:   if (flag) ix_type = IS_STRIDE_ID;
1141:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1142:   if (flag) iy_type = IS_STRIDE_ID;

1144:   /* ===========================================================================================================
1145:         Check for special cases
1146:      ==========================================================================================================*/
1147:   /* ---------------------------------------------------------------------------*/
1148:   if (xin_type == VEC_SEQ_ID && yin_type == VEC_SEQ_ID) {
1149:     if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1150:       PetscInt               nx,ny;
1151:       const PetscInt         *idx,*idy;
1152:       VecScatter_Seq_General *to = NULL,*from = NULL;

1154:       ISGetLocalSize(ix,&nx);
1155:       ISGetLocalSize(iy,&ny);
1156:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1157:       ISGetIndices(ix,&idx);
1158:       ISGetIndices(iy,&idy);
1159:       PetscMalloc2(1,&to,1,&from);
1160:       PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1161:       to->n = nx;
1162: #if defined(PETSC_USE_DEBUG)
1163:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1164: #endif
1165:       PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1166:       from->n = nx;
1167: #if defined(PETSC_USE_DEBUG)
1168:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1169: #endif
1170:        PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1171:       to->type          = VEC_SCATTER_SEQ_GENERAL;
1172:       from->type        = VEC_SCATTER_SEQ_GENERAL;
1173:       ctx->todata       = (void*)to;
1174:       ctx->fromdata     = (void*)from;
1175:       ctx->ops->begin   = VecScatterBegin_SGToSG;
1176:       ctx->ops->end     = 0;
1177:       ctx->ops->destroy = VecScatterDestroy_SGToSG;
1178:       ctx->ops->copy    = VecScatterCopy_SGToSG;
1179:       ctx->ops->view    = VecScatterView_SGToSG;
1180:       PetscInfo(xin,"Special case: sequential vector general scatter\n");
1181:       goto functionend;
1182:     } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
1183:       PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1184:       VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;

1186:       ISGetLocalSize(ix,&nx);
1187:       ISGetLocalSize(iy,&ny);
1188:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1189:       ISStrideGetInfo(iy,&to_first,&to_step);
1190:       ISStrideGetInfo(ix,&from_first,&from_step);
1191:       PetscMalloc2(1,&to8,1,&from8);
1192:       to8->n            = nx;
1193:       to8->first        = to_first;
1194:       to8->step         = to_step;
1195:       from8->n          = nx;
1196:       from8->first      = from_first;
1197:       from8->step       = from_step;
1198:       to8->type         = VEC_SCATTER_SEQ_STRIDE;
1199:       from8->type       = VEC_SCATTER_SEQ_STRIDE;
1200:       ctx->todata       = (void*)to8;
1201:       ctx->fromdata     = (void*)from8;
1202:       ctx->ops->begin   = VecScatterBegin_SSToSS;
1203:       ctx->ops->end     = 0;
1204:       ctx->ops->destroy = VecScatterDestroy_SSToSS;
1205:       ctx->ops->copy    = VecScatterCopy_SSToSS;
1206:       ctx->ops->view    = VecScatterView_SSToSS;
1207:       PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1208:       goto functionend;
1209:     } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1210:       PetscInt               nx,ny,first,step;
1211:       const PetscInt         *idx;
1212:       VecScatter_Seq_General *from9 = NULL;
1213:       VecScatter_Seq_Stride  *to9   = NULL;

1215:       ISGetLocalSize(ix,&nx);
1216:       ISGetIndices(ix,&idx);
1217:       ISGetLocalSize(iy,&ny);
1218:       ISStrideGetInfo(iy,&first,&step);
1219:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1220:       PetscMalloc2(1,&to9,1,&from9);
1221:       PetscMalloc1(nx,&from9->vslots);
1222:       to9->n     = nx;
1223:       to9->first = first;
1224:       to9->step  = step;
1225:       from9->n   = nx;
1226: #if defined(PETSC_USE_DEBUG)
1227:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1228: #endif
1229:       PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1230:       ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
1231:       if (step == 1)  ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1232:       else            ctx->ops->begin = VecScatterBegin_SGToSS;
1233:       ctx->ops->destroy   = VecScatterDestroy_SGToSS;
1234:       ctx->ops->end       = 0;
1235:       ctx->ops->copy      = VecScatterCopy_SGToSS;
1236:       ctx->ops->view      = VecScatterView_SGToSS;
1237:       to9->type      = VEC_SCATTER_SEQ_STRIDE;
1238:       from9->type    = VEC_SCATTER_SEQ_GENERAL;
1239:       PetscInfo(xin,"Special case: sequential vector general to stride\n");
1240:       goto functionend;
1241:     } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1242:       PetscInt               nx,ny,first,step;
1243:       const PetscInt         *idy;
1244:       VecScatter_Seq_General *to10 = NULL;
1245:       VecScatter_Seq_Stride  *from10 = NULL;

1247:       ISGetLocalSize(ix,&nx);
1248:       ISGetIndices(iy,&idy);
1249:       ISGetLocalSize(iy,&ny);
1250:       ISStrideGetInfo(ix,&first,&step);
1251:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1252:       PetscMalloc2(1,&to10,1,&from10);
1253:       PetscMalloc1(nx,&to10->vslots);
1254:       from10->n     = nx;
1255:       from10->first = first;
1256:       from10->step  = step;
1257:       to10->n       = nx;
1258: #if defined(PETSC_USE_DEBUG)
1259:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1260: #endif
1261:       PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1262:       ctx->todata   = (void*)to10;
1263:       ctx->fromdata = (void*)from10;
1264:       if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1265:       else           ctx->ops->begin = VecScatterBegin_SSToSG;
1266:       ctx->ops->destroy = VecScatterDestroy_SSToSG;
1267:       ctx->ops->end     = 0;
1268:       ctx->ops->copy    = 0;
1269:       ctx->ops->view    = VecScatterView_SSToSG;
1270:       to10->type   = VEC_SCATTER_SEQ_GENERAL;
1271:       from10->type = VEC_SCATTER_SEQ_STRIDE;
1272:       PetscInfo(xin,"Special case: sequential vector stride to general\n");
1273:       goto functionend;
1274:     } else {
1275:       PetscInt               nx,ny;
1276:       const PetscInt         *idx,*idy;
1277:       VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1278:       PetscBool              idnx,idny;

1280:       ISGetLocalSize(ix,&nx);
1281:       ISGetLocalSize(iy,&ny);
1282:       if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);

1284:       ISIdentity(ix,&idnx);
1285:       ISIdentity(iy,&idny);
1286:       if (idnx && idny) {
1287:         VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1288:         PetscMalloc2(1,&to13,1,&from13);
1289:         to13->n       = nx;
1290:         to13->first   = 0;
1291:         to13->step    = 1;
1292:         from13->n     = nx;
1293:         from13->first = 0;
1294:         from13->step  = 1;
1295:         to13->type    = VEC_SCATTER_SEQ_STRIDE;
1296:         from13->type  = VEC_SCATTER_SEQ_STRIDE;
1297:         ctx->todata   = (void*)to13;
1298:         ctx->fromdata = (void*)from13;
1299:         ctx->ops->begin    = VecScatterBegin_SSToSS;
1300:         ctx->ops->end      = 0;
1301:         ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1302:         ctx->ops->copy     = VecScatterCopy_SSToSS;
1303:         ctx->ops->view     = VecScatterView_SSToSS;
1304:         PetscInfo(xin,"Special case: sequential copy\n");
1305:         goto functionend;
1306:       }

1308:       ISGetIndices(iy,&idy);
1309:       ISGetIndices(ix,&idx);
1310:       PetscMalloc2(1,&to11,1,&from11);
1311:       PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1312:       to11->n = nx;
1313: #if defined(PETSC_USE_DEBUG)
1314:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1315: #endif
1316:       PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1317:       from11->n = nx;
1318: #if defined(PETSC_USE_DEBUG)
1319:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1320: #endif
1321:       PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1322:       to11->type        = VEC_SCATTER_SEQ_GENERAL;
1323:       from11->type      = VEC_SCATTER_SEQ_GENERAL;
1324:       ctx->todata       = (void*)to11;
1325:       ctx->fromdata     = (void*)from11;
1326:       ctx->ops->begin   = VecScatterBegin_SGToSG;
1327:       ctx->ops->end     = 0;
1328:       ctx->ops->destroy = VecScatterDestroy_SGToSG;
1329:       ctx->ops->copy    = VecScatterCopy_SGToSG;
1330:       ctx->ops->view    = VecScatterView_SGToSG;
1331:       ISRestoreIndices(ix,&idx);
1332:       ISRestoreIndices(iy,&idy);
1333:       PetscInfo(xin,"Sequential vector scatter with block indices\n");
1334:       goto functionend;
1335:     }
1336:   }
1337:   /* ---------------------------------------------------------------------------*/
1338:   if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {

1340:     /* ===========================================================================================================
1341:           Check for special cases
1342:        ==========================================================================================================*/
1343:     islocal = PETSC_FALSE;
1344:     /* special case extracting (subset of) local portion */
1345:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1346:       PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1347:       PetscInt              start,end,min,max;
1348:       VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;

1350:       VecGetOwnershipRange(xin,&start,&end);
1351:       ISGetLocalSize(ix,&nx);
1352:       ISStrideGetInfo(ix,&from_first,&from_step);
1353:       ISGetLocalSize(iy,&ny);
1354:       ISStrideGetInfo(iy,&to_first,&to_step);
1355:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1356:       ISGetMinMax(ix,&min,&max);
1357:       if (min >= start && max < end) islocal = PETSC_TRUE;
1358:       else islocal = PETSC_FALSE;
1359:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1360:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1361:       if (cando) {
1362:         PetscMalloc2(1,&to12,1,&from12);
1363:         to12->n            = nx;
1364:         to12->first        = to_first;
1365:         to12->step         = to_step;
1366:         from12->n          = nx;
1367:         from12->first      = from_first-start;
1368:         from12->step       = from_step;
1369:         to12->type         = VEC_SCATTER_SEQ_STRIDE;
1370:         from12->type       = VEC_SCATTER_SEQ_STRIDE;
1371:         ctx->todata        = (void*)to12;
1372:         ctx->fromdata      = (void*)from12;
1373:         ctx->ops->begin    = VecScatterBegin_SSToSS;
1374:         ctx->ops->end      = 0;
1375:         ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1376:         ctx->ops->copy     = VecScatterCopy_SSToSS;
1377:         ctx->ops->view     = VecScatterView_SSToSS;
1378:         PetscInfo(xin,"Special case: processors only getting local values\n");
1379:         goto functionend;
1380:       }
1381:     } else {
1382:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1383:     }

1385:     /* test for special case of all processors getting entire vector */
1386:     /* contains check that PetscMPIInt can handle the sizes needed */
1387:     totalv = PETSC_FALSE;
1388:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1389:       PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1390:       PetscMPIInt          *count = NULL,*displx;
1391:       VecScatter_MPI_ToAll *sto   = NULL;

1393:       ISGetLocalSize(ix,&nx);
1394:       ISStrideGetInfo(ix,&from_first,&from_step);
1395:       ISGetLocalSize(iy,&ny);
1396:       ISStrideGetInfo(iy,&to_first,&to_step);
1397:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1398:       VecGetSize(xin,&N);
1399:       if (nx != N) totalv = PETSC_FALSE;
1400:       else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1401:       else totalv = PETSC_FALSE;
1402:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1403:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1405: #if defined(PETSC_USE_64BIT_INDICES)
1406:       if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1407: #else
1408:       if (cando) {
1409: #endif
1410:         MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1411:         PetscMalloc3(1,&sto,size,&count,size,&displx);
1412:         range = xin->map->range;
1413:         for (i=0; i<size; i++) {
1414:           PetscMPIIntCast(range[i+1] - range[i],count+i);
1415:           PetscMPIIntCast(range[i],displx+i);
1416:         }
1417:         sto->count        = count;
1418:         sto->displx       = displx;
1419:         sto->work1        = 0;
1420:         sto->work2        = 0;
1421:         sto->type         = VEC_SCATTER_MPI_TOALL;
1422:         ctx->todata       = (void*)sto;
1423:         ctx->fromdata     = 0;
1424:         ctx->ops->begin   = VecScatterBegin_MPI_ToAll;
1425:         ctx->ops->end     = 0;
1426:         ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1427:         ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1428:         ctx->ops->view    = VecScatterView_MPI_ToAll;
1429:         PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1430:         goto functionend;
1431:       }
1432:     } else {
1433:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1434:     }

1436:     /* test for special case of processor 0 getting entire vector */
1437:     /* contains check that PetscMPIInt can handle the sizes needed */
1438:     totalv = PETSC_FALSE;
1439:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1440:       PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1441:       PetscMPIInt          rank,*count = NULL,*displx;
1442:       VecScatter_MPI_ToAll *sto = NULL;

1444:       PetscObjectGetComm((PetscObject)xin,&comm);
1445:       MPI_Comm_rank(comm,&rank);
1446:       ISGetLocalSize(ix,&nx);
1447:       ISStrideGetInfo(ix,&from_first,&from_step);
1448:       ISGetLocalSize(iy,&ny);
1449:       ISStrideGetInfo(iy,&to_first,&to_step);
1450:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1451:       if (!rank) {
1452:         VecGetSize(xin,&N);
1453:         if (nx != N) totalv = PETSC_FALSE;
1454:         else if (from_first == 0        && from_step == 1 &&
1455:                  from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1456:         else totalv = PETSC_FALSE;
1457:       } else {
1458:         if (!nx) totalv = PETSC_TRUE;
1459:         else     totalv = PETSC_FALSE;
1460:       }
1461:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1462:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1464: #if defined(PETSC_USE_64BIT_INDICES)
1465:       if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1466: #else
1467:       if (cando) {
1468: #endif
1469:         MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1470:         PetscMalloc3(1,&sto,size,&count,size,&displx);
1471:         range = xin->map->range;
1472:         for (i=0; i<size; i++) {
1473:           PetscMPIIntCast(range[i+1] - range[i],count+i);
1474:           PetscMPIIntCast(range[i],displx+i);
1475:         }
1476:         sto->count        = count;
1477:         sto->displx       = displx;
1478:         sto->work1        = 0;
1479:         sto->work2        = 0;
1480:         sto->type         = VEC_SCATTER_MPI_TOONE;
1481:         ctx->todata       = (void*)sto;
1482:         ctx->fromdata     = 0;
1483:         ctx->ops->begin   = VecScatterBegin_MPI_ToOne;
1484:         ctx->ops->end     = 0;
1485:         ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1486:         ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1487:         ctx->ops->view    = VecScatterView_MPI_ToAll;
1488:         PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1489:         goto functionend;
1490:       }
1491:     } else {
1492:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1493:     }

1495:     PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1496:     PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1497:     PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1498:     if (ixblock) {
1499:       /* special case block to block */
1500:       if (iyblock) {
1501:         PetscInt       nx,ny,bsx,bsy;
1502:         const PetscInt *idx,*idy;
1503:         ISGetBlockSize(iy,&bsy);
1504:         ISGetBlockSize(ix,&bsx);
1505:         if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1506:           ISBlockGetLocalSize(ix,&nx);
1507:           ISBlockGetIndices(ix,&idx);
1508:           ISBlockGetLocalSize(iy,&ny);
1509:           ISBlockGetIndices(iy,&idy);
1510:           if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1511:           VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,bsx,ctx);
1512:           ISBlockRestoreIndices(ix,&idx);
1513:           ISBlockRestoreIndices(iy,&idy);
1514:           PetscInfo(xin,"Special case: blocked indices\n");
1515:           goto functionend;
1516:         }
1517:         /* special case block to stride */
1518:       } else if (iystride) {
1519:         PetscInt ystart,ystride,ysize,bsx;
1520:         ISStrideGetInfo(iy,&ystart,&ystride);
1521:         ISGetLocalSize(iy,&ysize);
1522:         ISGetBlockSize(ix,&bsx);
1523:         /* see if stride index set is equivalent to block index set */
1524:         if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1525:           PetscInt       nx,il,*idy;
1526:           const PetscInt *idx;
1527:           ISBlockGetLocalSize(ix,&nx);
1528:           ISBlockGetIndices(ix,&idx);
1529:           if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1530:           PetscMalloc1(nx,&idy);
1531:           if (nx) {
1532:             idy[0] = ystart/bsx;
1533:             for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1534:           }
1535:           VecScatterCreate_PtoS(nx,idx,nx,idy,xin,yin,bsx,ctx);
1536:           PetscFree(idy);
1537:           ISBlockRestoreIndices(ix,&idx);
1538:           PetscInfo(xin,"Special case: blocked indices to stride\n");
1539:           goto functionend;
1540:         }
1541:       }
1542:     }
1543:     /* left over general case */
1544:     {
1545:       PetscInt       nx,ny;
1546:       const PetscInt *idx,*idy;
1547:       ISGetLocalSize(ix,&nx);
1548:       ISGetIndices(ix,&idx);
1549:       ISGetLocalSize(iy,&ny);
1550:       ISGetIndices(iy,&idy);
1551:       if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%d %d)",nx,ny);
1552:       VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,1,ctx);
1553:       ISRestoreIndices(ix,&idx);
1554:       ISRestoreIndices(iy,&idy);
1555:       PetscInfo(xin,"General case: MPI to Seq\n");
1556:       goto functionend;
1557:     }
1558:   }
1559:   /* ---------------------------------------------------------------------------*/
1560:   if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1561:     /* ===========================================================================================================
1562:           Check for special cases
1563:        ==========================================================================================================*/
1564:     /* special case local copy portion */
1565:     islocal = PETSC_FALSE;
1566:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1567:       PetscInt              nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1568:       VecScatter_Seq_Stride *from = NULL,*to = NULL;

1570:       VecGetOwnershipRange(yin,&start,&end);
1571:       ISGetLocalSize(ix,&nx);
1572:       ISStrideGetInfo(ix,&from_first,&from_step);
1573:       ISGetLocalSize(iy,&ny);
1574:       ISStrideGetInfo(iy,&to_first,&to_step);
1575:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1576:       ISGetMinMax(iy,&min,&max);
1577:       if (min >= start && max < end) islocal = PETSC_TRUE;
1578:       else islocal = PETSC_FALSE;
1579:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1580:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1581:       if (cando) {
1582:         PetscMalloc2(1,&to,1,&from);
1583:         to->n             = nx;
1584:         to->first         = to_first-start;
1585:         to->step          = to_step;
1586:         from->n           = nx;
1587:         from->first       = from_first;
1588:         from->step        = from_step;
1589:         to->type          = VEC_SCATTER_SEQ_STRIDE;
1590:         from->type        = VEC_SCATTER_SEQ_STRIDE;
1591:         ctx->todata       = (void*)to;
1592:         ctx->fromdata     = (void*)from;
1593:         ctx->ops->begin   = VecScatterBegin_SSToSS;
1594:         ctx->ops->end     = 0;
1595:         ctx->ops->destroy = VecScatterDestroy_SSToSS;
1596:         ctx->ops->copy    = VecScatterCopy_SSToSS;
1597:         ctx->ops->view    = VecScatterView_SSToSS;
1598:         PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1599:         goto functionend;
1600:       }
1601:     } else {
1602:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1603:     }
1604:     /* special case block to stride */
1605:     if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1606:       PetscInt ystart,ystride,ysize,bsx;
1607:       ISStrideGetInfo(iy,&ystart,&ystride);
1608:       ISGetLocalSize(iy,&ysize);
1609:       ISGetBlockSize(ix,&bsx);
1610:       /* see if stride index set is equivalent to block index set */
1611:       if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1612:         PetscInt       nx,il,*idy;
1613:         const PetscInt *idx;
1614:         ISBlockGetLocalSize(ix,&nx);
1615:         ISBlockGetIndices(ix,&idx);
1616:         if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1617:         PetscMalloc1(nx,&idy);
1618:         if (nx) {
1619:           idy[0] = ystart/bsx;
1620:           for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1621:         }
1622:         VecScatterCreate_StoP(nx,idx,nx,idy,xin,yin,bsx,ctx);
1623:         PetscFree(idy);
1624:         ISBlockRestoreIndices(ix,&idx);
1625:         PetscInfo(xin,"Special case: Blocked indices to stride\n");
1626:         goto functionend;
1627:       }
1628:     }

1630:     /* general case */
1631:     {
1632:       PetscInt       nx,ny;
1633:       const PetscInt *idx,*idy;
1634:       ISGetLocalSize(ix,&nx);
1635:       ISGetIndices(ix,&idx);
1636:       ISGetLocalSize(iy,&ny);
1637:       ISGetIndices(iy,&idy);
1638:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1639:       VecScatterCreate_StoP(nx,idx,ny,idy,xin,yin,1,ctx);
1640:       ISRestoreIndices(ix,&idx);
1641:       ISRestoreIndices(iy,&idy);
1642:       PetscInfo(xin,"General case: Seq to MPI\n");
1643:       goto functionend;
1644:     }
1645:   }
1646:   /* ---------------------------------------------------------------------------*/
1647:   if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1648:     /* no special cases for now */
1649:     PetscInt       nx,ny;
1650:     const PetscInt *idx,*idy;
1651:     ISGetLocalSize(ix,&nx);
1652:     ISGetIndices(ix,&idx);
1653:     ISGetLocalSize(iy,&ny);
1654:     ISGetIndices(iy,&idy);
1655:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1656:     VecScatterCreate_PtoP(nx,idx,ny,idy,xin,yin,1,ctx);
1657:     ISRestoreIndices(ix,&idx);
1658:     ISRestoreIndices(iy,&idy);
1659:     PetscInfo(xin,"General case: MPI to MPI\n");
1660:     goto functionend;
1661:   }

1663:   functionend:
1664:   *newctx = ctx;
1665:   ISDestroy(&tix);
1666:   ISDestroy(&tiy);
1667:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1668:   return(0);
1669: }

1671: /* ------------------------------------------------------------------*/
1674: /*@
1675:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1676:       and the VecScatterEnd() does nothing

1678:    Not Collective

1680:    Input Parameter:
1681: .   ctx - scatter context created with VecScatterCreate()

1683:    Output Parameter:
1684: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

1686:    Level: developer

1688: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1689: @*/
1690: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
1691: {
1694:   *flg = ctx->beginandendtogether;
1695:   return(0);
1696: }

1700: /*@
1701:    VecScatterBegin - Begins a generalized scatter from one vector to
1702:    another. Complete the scattering phase with VecScatterEnd().

1704:    Neighbor-wise Collective on VecScatter and Vec

1706:    Input Parameters:
1707: +  inctx - scatter context generated by VecScatterCreate()
1708: .  x - the vector from which we scatter
1709: .  y - the vector to which we scatter
1710: .  addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1711:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1712: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1713:     SCATTER_FORWARD or SCATTER_REVERSE


1716:    Level: intermediate

1718:    Options Database: See VecScatterCreate()

1720:    Notes:
1721:    The vectors x and y need not be the same vectors used in the call
1722:    to VecScatterCreate(), but x must have the same parallel data layout
1723:    as that passed in as the x to VecScatterCreate(), similarly for the y.
1724:    Most likely they have been obtained from VecDuplicate().

1726:    You cannot change the values in the input vector between the calls to VecScatterBegin()
1727:    and VecScatterEnd().

1729:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1730:    the SCATTER_FORWARD.

1732:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1734:    This scatter is far more general than the conventional
1735:    scatter, since it can be a gather or a scatter or a combination,
1736:    depending on the indices ix and iy.  If x is a parallel vector and y
1737:    is sequential, VecScatterBegin() can serve to gather values to a
1738:    single processor.  Similarly, if y is parallel and x sequential, the
1739:    routine can scatter from one processor to many processors.

1741:    Concepts: scatter^between vectors
1742:    Concepts: gather^between vectors

1744: .seealso: VecScatterCreate(), VecScatterEnd()
1745: @*/
1746: PetscErrorCode  VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1747: {
1749: #if defined(PETSC_USE_DEBUG)
1750:   PetscInt       to_n,from_n;
1751: #endif
1756:   if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");

1758: #if defined(PETSC_USE_DEBUG)
1759:   /*
1760:      Error checking to make sure these vectors match the vectors used
1761:    to create the vector scatter context. -1 in the from_n and to_n indicate the
1762:    vector lengths are unknown (for example with mapped scatters) and thus
1763:    no error checking is performed.
1764:   */
1765:   if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1766:     VecGetLocalSize(x,&from_n);
1767:     VecGetLocalSize(y,&to_n);
1768:     if (mode & SCATTER_REVERSE) {
1769:       if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1770:       if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1771:     } else {
1772:       if (to_n != inctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1773:       if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1774:     }
1775:   }
1776: #endif

1778:   inctx->inuse = PETSC_TRUE;
1779:   PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1780:   (*inctx->ops->begin)(inctx,x,y,addv,mode);
1781:   if (inctx->beginandendtogether && inctx->ops->end) {
1782:     inctx->inuse = PETSC_FALSE;
1783:     (*inctx->ops->end)(inctx,x,y,addv,mode);
1784:   }
1785:   PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1786:   return(0);
1787: }

1789: /* --------------------------------------------------------------------*/
1792: /*@
1793:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
1794:    after first calling VecScatterBegin().

1796:    Neighbor-wise Collective on VecScatter and Vec

1798:    Input Parameters:
1799: +  ctx - scatter context generated by VecScatterCreate()
1800: .  x - the vector from which we scatter
1801: .  y - the vector to which we scatter
1802: .  addv - either ADD_VALUES or INSERT_VALUES.
1803: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1804:      SCATTER_FORWARD, SCATTER_REVERSE

1806:    Level: intermediate

1808:    Notes:
1809:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

1811:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1813: .seealso: VecScatterBegin(), VecScatterCreate()
1814: @*/
1815: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1816: {

1823:   ctx->inuse = PETSC_FALSE;
1824:   if (!ctx->ops->end) return(0);
1825:   if (!ctx->beginandendtogether) {
1826:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1827:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1828:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1829:   }
1830:   return(0);
1831: }

1835: /*@C
1836:    VecScatterDestroy - Destroys a scatter context created by
1837:    VecScatterCreate().

1839:    Collective on VecScatter

1841:    Input Parameter:
1842: .  ctx - the scatter context

1844:    Level: intermediate

1846: .seealso: VecScatterCreate(), VecScatterCopy()
1847: @*/
1848: PetscErrorCode  VecScatterDestroy(VecScatter *ctx)
1849: {

1853:   if (!*ctx) return(0);
1855:   if ((*ctx)->inuse) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1856:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}

1858:   /* if memory was published with SAWs then destroy it */
1859:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
1860:   (*(*ctx)->ops->destroy)(*ctx);
1861: #if defined(PETSC_HAVE_CUSP)
1862:   VecScatterCUSPIndicesDestroy((PetscCUSPIndices*)&((*ctx)->spptr));
1863: #elif defined(PETSC_HAVE_VECCUDA)
1864:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1865: #endif
1866:   PetscHeaderDestroy(ctx);
1867:   return(0);
1868: }

1872: /*@
1873:    VecScatterCopy - Makes a copy of a scatter context.

1875:    Collective on VecScatter

1877:    Input Parameter:
1878: .  sctx - the scatter context

1880:    Output Parameter:
1881: .  ctx - the context copy

1883:    Level: advanced

1885: .seealso: VecScatterCreate(), VecScatterDestroy()
1886: @*/
1887: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1888: {

1894:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1895:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1896:   (*ctx)->to_n   = sctx->to_n;
1897:   (*ctx)->from_n = sctx->from_n;
1898:   (*sctx->ops->copy)(sctx,*ctx);
1899:   return(0);
1900: }


1903: /* ------------------------------------------------------------------*/
1906: /*@
1907:    VecScatterView - Views a vector scatter context.

1909:    Collective on VecScatter

1911:    Input Parameters:
1912: +  ctx - the scatter context
1913: -  viewer - the viewer for displaying the context

1915:    Level: intermediate

1917: @*/
1918: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
1919: {

1924:   if (!viewer) {
1925:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1926:   }
1928:   if (ctx->ops->view) {
1929:     (*ctx->ops->view)(ctx,viewer);
1930:   }
1931:   return(0);
1932: }

1936: /*@C
1937:    VecScatterRemap - Remaps the "from" and "to" indices in a
1938:    vector scatter context. FOR EXPERTS ONLY!

1940:    Collective on VecScatter

1942:    Input Parameters:
1943: +  scat - vector scatter context
1944: .  from - remapping for "from" indices (may be NULL)
1945: -  to   - remapping for "to" indices (may be NULL)

1947:    Level: developer

1949:    Notes: In the parallel case the todata is actually the indices
1950:           from which the data is TAKEN! The from stuff is where the
1951:           data is finally put. This is VERY VERY confusing!

1953:           In the sequential case the todata is the indices where the
1954:           data is put and the fromdata is where it is taken from.
1955:           This is backwards from the paralllel case! CRY! CRY! CRY!

1957: @*/
1958: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1959: {
1960:   VecScatter_Seq_General *to,*from;
1961:   VecScatter_MPI_General *mto;
1962:   PetscInt               i;


1969:   from = (VecScatter_Seq_General*)scat->fromdata;
1970:   mto  = (VecScatter_MPI_General*)scat->todata;

1972:   if (mto->type == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");

1974:   if (rto) {
1975:     if (mto->type == VEC_SCATTER_MPI_GENERAL) {
1976:       /* handle off processor parts */
1977:       for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];

1979:       /* handle local part */
1980:       to = &mto->local;
1981:       for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1982:     } else if (from->type == VEC_SCATTER_SEQ_GENERAL) {
1983:       for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1984:     } else if (from->type == VEC_SCATTER_SEQ_STRIDE) {
1985:       VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;

1987:       /* if the remapping is the identity and stride is identity then skip remap */
1988:       if (sto->step == 1 && sto->first == 0) {
1989:         for (i=0; i<sto->n; i++) {
1990:           if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1991:         }
1992:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1993:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1994:   }

1996:   if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");

1998:   /*
1999:      Mark then vector lengths as unknown because we do not know the
2000:    lengths of the remapped vectors
2001:   */
2002:   scat->from_n = -1;
2003:   scat->to_n   = -1;
2004:   return(0);
2005: }

2009: /*
2010:  VecScatterGetTypes_Private - Returns the scatter types.

2012:  scatter - The scatter.
2013:  from    - Upon exit this contains the type of the from scatter.
2014:  to      - Upon exit this contains the type of the to scatter.
2015: */
2016: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterType *from,VecScatterType *to)
2017: {
2018:   VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
2019:   VecScatter_Common* todata   = (VecScatter_Common*)scatter->todata;

2022:   *from = fromdata->type;
2023:   *to = todata->type;
2024:   return(0);
2025: }


2030: /*
2031:   VecScatterIsSequential_Private - Returns true if the scatter is sequential.

2033:   scatter - The scatter.
2034:   flag    - Upon exit flag is true if the scatter is of type VecScatter_Seq_General 
2035:             or VecScatter_Seq_Stride; otherwise flag is false.
2036: */
2037: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
2038: {
2039:   VecScatterType scatterType = scatter->type;

2042:   if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
2043:     *flag = PETSC_TRUE;
2044:   } else {
2045:     *flag = PETSC_FALSE;
2046:   }
2047:   return(0);
2048: }

2050: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA)

2054: /*@C
2055:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
2056:    to another for GPU based computation.

2058:    Input Parameters:
2059: +  inctx - scatter context generated by VecScatterCreate()
2060: .  x - the vector from which we scatter
2061: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
2062:     SCATTER_FORWARD or SCATTER_REVERSE

2064:   Level: intermediate

2066:   Notes:
2067:    Effectively, this function creates all the necessary indexing buffers and work
2068:    vectors needed to move data only those data points in a vector which need to
2069:    be communicated across ranks. This is done at the first time this function is
2070:    called. Currently, this only used in the context of the parallel SpMV call in
2071:    MatMult_MPIAIJCUSP or MatMult_MPIAIJCUSPARSE.

2073:    This function is executed before the call to MatMult. This enables the memory
2074:    transfers to be overlapped with the MatMult SpMV kernel call.

2076: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2077: @*/
2078: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2079: {
2081:   VecScatter_MPI_General *to,*from;
2082:   PetscErrorCode         ierr;
2083:   PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2084:   PetscBool              isSeq1,isSeq2;

2087:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2088:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2089:   if (isSeq1 || isSeq2) {
2090:     return(0);
2091:   }
2092:   if (mode & SCATTER_REVERSE) {
2093:     to     = (VecScatter_MPI_General*)inctx->fromdata;
2094:     from   = (VecScatter_MPI_General*)inctx->todata;
2095:   } else {
2096:     to     = (VecScatter_MPI_General*)inctx->todata;
2097:     from   = (VecScatter_MPI_General*)inctx->fromdata;
2098:   }
2099:   bs           = to->bs;
2100:   nrecvs       = from->n;
2101:   nsends       = to->n;
2102:   indices      = to->indices;
2103:   sstartsSends = to->starts;
2104:   sstartsRecvs = from->starts;
2105: #if defined(PETSC_HAVE_CUSP)
2106:   if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2107: #else
2108:   if (x->valid_GPU_array != PETSC_CUDA_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2109: #endif
2110:     if (!inctx->spptr) {
2111:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2112:       PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2113:       /* Here we create indices for both the senders and receivers. */
2114:       PetscMalloc1(ns,&tindicesSends);
2115:       PetscMalloc1(nr,&tindicesRecvs);

2117:       PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2118:       PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));

2120:       PetscSortRemoveDupsInt(&ns,tindicesSends);
2121:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

2123:       PetscMalloc1(bs*ns,&sindicesSends);
2124:       PetscMalloc1(from->bs*nr,&sindicesRecvs);

2126:       /* sender indices */
2127:       for (i=0; i<ns; i++) {
2128:         for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2129:       }
2130:       PetscFree(tindicesSends);

2132:       /* receiver indices */
2133:       for (i=0; i<nr; i++) {
2134:         for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2135:       }
2136:       PetscFree(tindicesRecvs);

2138:       /* create GPU indices, work vectors, ... */
2139: #if defined(PETSC_HAVE_CUSP)
2140:       VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
2141: #else
2142:       VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2143: #endif
2144:       PetscFree(sindicesSends);
2145:       PetscFree(sindicesRecvs);
2146:     }
2147:   }
2148:   return(0);
2149: }

2153: /*@C
2154:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2155:    another for GPU based computation.

2157:    Input Parameter:
2158: +  inctx - scatter context generated by VecScatterCreate()

2160:   Level: intermediate

2162:   Notes:
2163:    Effectively, this function resets the temporary buffer flags. Currently, this
2164:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2165:    or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2166:    buffers used for messaging are no longer valid.

2168: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2169: @*/
2170: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2171: {
2173:   return(0);
2174: }

2176: #endif