Actual source code: vscat.c

petsc-3.10.2 2018-10-09
Report Typos and Errors

  2: /*
  3:      Code for creating scatters between vectors. This file
  4:   includes the code for scattering between sequential vectors and
  5:   some special cases for parallel scatters.
  6: */

  8:  #include <petsc/private/vecscatterimpl.h>

 10: #if defined(PETSC_HAVE_VECCUDA)
 11:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
 12: #endif

 14: #if defined(PETSC_USE_DEBUG)
 15: /*
 16:      Checks if any indices are less than zero and generates an error
 17: */
 18: PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
 19: {
 20:   PetscInt i;

 23:   for (i=0; i<n; i++) {
 24:     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
 25:     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
 26:   }
 27:   return(0);
 28: }
 29: #endif

 31: /*
 32:       This is special scatter code for when the entire parallel vector is copied to each processor.

 34:    This code was written by Cameron Cooper, Occidental College, Fall 1995,
 35:    will working at ANL as a SERS student.
 36: */
 37: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 38: {
 40:   PetscInt       yy_n,xx_n;
 41:   PetscScalar    *xv,*yv;

 44:   VecGetLocalSize(y,&yy_n);
 45:   VecGetLocalSize(x,&xx_n);
 46:   VecGetArrayPair(x,y,&xv,&yv);

 48:   if (mode & SCATTER_REVERSE) {
 49:     PetscScalar          *xvt,*xvt2;
 50:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 51:     PetscInt             i;
 52:     PetscMPIInt          *disply = scat->displx;

 54:     if (addv == INSERT_VALUES) {
 55:       PetscInt rstart,rend;
 56:       /*
 57:          copy the correct part of the local vector into the local storage of
 58:          the MPI one.  Note: this operation only makes sense if all the local
 59:          vectors have the same values
 60:       */
 61:       VecGetOwnershipRange(y,&rstart,&rend);
 62:       PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
 63:     } else {
 64:       MPI_Comm    comm;
 65:       PetscMPIInt rank;
 66:       PetscObjectGetComm((PetscObject)y,&comm);
 67:       MPI_Comm_rank(comm,&rank);
 68:       if (scat->work1) xvt = scat->work1;
 69:       else {
 70:         PetscMalloc1(xx_n,&xvt);
 71:         scat->work1 = xvt;
 72:       }
 73:       if (!rank) { /* I am the zeroth processor, values are accumulated here */
 74:         if   (scat->work2) xvt2 = scat->work2;
 75:         else {
 76:           PetscMalloc1(xx_n,&xvt2);
 77:           scat->work2 = xvt2;
 78:         }
 79:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 80:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 81:         if (addv == ADD_VALUES) {
 82:           for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
 83: #if !defined(PETSC_USE_COMPLEX)
 84:         } else if (addv == MAX_VALUES) {
 85:           for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
 86: #endif
 87:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
 88:         MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 89:       } else {
 90:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 91:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 92:         MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 93:       }
 94:     }
 95:   } else {
 96:     PetscScalar          *yvt;
 97:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 98:     PetscInt             i;
 99:     PetscMPIInt          *displx = scat->displx;

101:     if (addv == INSERT_VALUES) {
102:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
103:     } else {
104:       if (scat->work1) yvt = scat->work1;
105:       else {
106:         PetscMalloc1(yy_n,&yvt);
107:         scat->work1 = yvt;
108:       }
109:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
110:       if (addv == ADD_VALUES) {
111:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
112: #if !defined(PETSC_USE_COMPLEX)
113:       } else if (addv == MAX_VALUES) {
114:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
115: #endif
116:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
117:     }
118:   }
119:   VecRestoreArrayPair(x,y,&xv,&yv);
120:   return(0);
121: }

123: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
124: {
126:   PetscBool      isascii;

129:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
130:   if (isascii) {
131:     PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
132:   }
133:   return(0);
134: }

136: /*
137:       This is special scatter code for when the entire parallel vector is  copied to processor 0.

139: */
140: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
141: {
142:   PetscErrorCode    ierr;
143:   PetscMPIInt       rank;
144:   PetscInt          yy_n,xx_n;
145:   PetscScalar       *yv;
146:   const PetscScalar *xv;
147:   MPI_Comm          comm;

150:   VecGetLocalSize(y,&yy_n);
151:   VecGetLocalSize(x,&xx_n);
152:   VecGetArrayRead(x,&xv);
153:   VecGetArray(y,&yv);

155:   PetscObjectGetComm((PetscObject)x,&comm);
156:   MPI_Comm_rank(comm,&rank);

158:   /* --------  Reverse scatter; spread from processor 0 to other processors */
159:   if (mode & SCATTER_REVERSE) {
160:     PetscScalar          *yvt;
161:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
162:     PetscInt             i;
163:     PetscMPIInt          *disply = scat->displx;

165:     if (addv == INSERT_VALUES) {
166:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
167:     } else {
168:       if (scat->work2) yvt = scat->work2;
169:       else {
170:         PetscInt xx_nt;
171:         MPI_Allreduce(&xx_n,&xx_nt,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)y));
172:         PetscMalloc1(xx_nt,&yvt);
173:         scat->work2 = yvt;
174:       }
175:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
176:       if (addv == ADD_VALUES) {
177:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
178: #if !defined(PETSC_USE_COMPLEX)
179:       } else if (addv == MAX_VALUES) {
180:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
181: #endif
182:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
183:     }
184:   /* ---------  Forward scatter; gather all values onto processor 0 */
185:   } else {
186:     PetscScalar          *yvt  = 0;
187:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
188:     PetscInt             i;
189:     PetscMPIInt          *displx = scat->displx;

191:     if (addv == INSERT_VALUES) {
192:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
193:     } else {
194:       if (!rank) {
195:         if (scat->work1) yvt = scat->work1;
196:         else {
197:           PetscMalloc1(yy_n,&yvt);
198:           scat->work1 = yvt;
199:         }
200:       }
201:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
202:       if (!rank) {
203:         if (addv == ADD_VALUES) {
204:           for (i=0; i<yy_n; i++) yv[i] += yvt[i];
205: #if !defined(PETSC_USE_COMPLEX)
206:         } else if (addv == MAX_VALUES) {
207:           for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
208: #endif
209:         }  else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
210:       }
211:     }
212:   }
213:   VecRestoreArrayRead(x,&xv);
214:   VecRestoreArray(y,&yv);
215:   return(0);
216: }

218: /*
219:        The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
220: */
221: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
222: {
223:   VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
224:   PetscErrorCode       ierr;

227:   PetscFree(scat->work1);
228:   PetscFree(scat->work2);
229:   PetscFree3(ctx->todata,scat->count,scat->displx);
230:   return(0);
231: }

233: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
234: {

238:   PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
239:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
240:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
241:   PetscFree2(ctx->todata,ctx->fromdata);
242:   return(0);
243: }

245: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
246: {

250:   PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
251:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
252:   PetscFree2(ctx->todata,ctx->fromdata);
253:   return(0);
254: }

256: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
257: {

261:   PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
262:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
263:   PetscFree2(ctx->todata,ctx->fromdata);
264:   return(0);
265: }

267: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
268: {

272:   PetscFree2(ctx->todata,ctx->fromdata);
273:   return(0);
274: }

276: /* -------------------------------------------------------------------------*/
277: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
278: {
279:   VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
280:   PetscErrorCode       ierr;
281:   PetscMPIInt          size,*count,*displx;
282:   PetscInt             i;

285:   out->ops->begin   = in->ops->begin;
286:   out->ops->end     = in->ops->end;
287:   out->ops->copy    = in->ops->copy;
288:   out->ops->destroy = in->ops->destroy;
289:   out->ops->view    = in->ops->view;

291:   MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
292:   PetscMalloc3(1,&sto,size,&count,size,&displx);
293:   sto->format = in_to->format;
294:   sto->count  = count;
295:   sto->displx = displx;
296:   for (i=0; i<size; i++) {
297:     sto->count[i]  = in_to->count[i];
298:     sto->displx[i] = in_to->displx[i];
299:   }
300:   sto->work1    = 0;
301:   sto->work2    = 0;
302:   out->todata   = (void*)sto;
303:   out->fromdata = (void*)0;
304:   return(0);
305: }

307: /* --------------------------------------------------------------------------------------*/
308: /*
309:         Scatter: sequential general to sequential general
310: */
311: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
312: {
313:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
314:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
315:   PetscErrorCode         ierr;
316:   PetscInt               i,n = gen_from->n,*fslots,*tslots;
317:   PetscScalar            *xv,*yv;
318: #if defined(PETSC_HAVE_VECCUDA)
319:   PetscBool              is_veccuda;
320: #endif

323: #if defined(PETSC_HAVE_VECCUDA)
324:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
325:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
326:     /* create the scatter indices if not done already */
327:     if (!ctx->spptr) {
328:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
329:       fslots = gen_from->vslots;
330:       tslots = gen_to->vslots;
331:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
332:     }
333:     /* next do the scatter */
334:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
335:     return(0);
336:   }
337: #endif

339:   VecGetArrayPair(x,y,&xv,&yv);
340:   if (mode & SCATTER_REVERSE) {
341:     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
342:     gen_from = (VecScatter_Seq_General*)ctx->todata;
343:   }
344:   fslots = gen_from->vslots;
345:   tslots = gen_to->vslots;

347:   if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv); }
348:   else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]]; }
349:   else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
350: #if !defined(PETSC_USE_COMPLEX)
351:   else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]]  = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
352: #endif
353:   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
354:   VecRestoreArrayPair(x,y,&xv,&yv);
355:   return(0);
356: }

358: /*
359:     Scatter: sequential general to sequential stride 1
360: */
361: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
362: {
363:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
364:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
365:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
366:   PetscErrorCode         ierr;
367:   PetscInt               first = gen_to->first;
368:   PetscScalar            *xv,*yv;
369: #if defined(PETSC_HAVE_VECCUDA)
370:   PetscBool              is_veccuda;
371: #endif

374: #if defined(PETSC_HAVE_VECCUDA)
375:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
376:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
377:     /* create the scatter indices if not done already */
378:     if (!ctx->spptr) {
379:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
380:       PetscInt *tslots = 0;
381:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
382:     }
383:     /* next do the scatter */
384:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
385:     return(0);
386:   }
387: #endif

389:   VecGetArrayPair(x,y,&xv,&yv);
390:   if (mode & SCATTER_REVERSE) {
391:     PetscScalar *xxv = xv + first;
392:     if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1); }
393:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]]  = xxv[i]; }
394:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
395: #if !defined(PETSC_USE_COMPLEX)
396:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[fslots[i]]  = PetscMax(yv[fslots[i]],xxv[i]); }
397: #endif
398:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
399:   } else {
400:     PetscScalar *yyv = yv + first;
401:     if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1); }
402:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i]  = xv[fslots[i]]; }
403:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
404: #if !defined(PETSC_USE_COMPLEX)
405:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yyv[i]  = PetscMax(yyv[i],xv[fslots[i]]); }
406: #endif
407:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
408:   }
409:   VecRestoreArrayPair(x,y,&xv,&yv);
410:   return(0);
411: }

413: /*
414:    Scatter: sequential general to sequential stride
415: */
416: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
417: {
418:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
419:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
420:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
421:   PetscErrorCode         ierr;
422:   PetscInt               first = gen_to->first,step = gen_to->step;
423:   PetscScalar            *xv,*yv;
424: #if defined(PETSC_HAVE_VECCUDA)
425:   PetscBool              is_veccuda;
426: #endif

429: #if defined(PETSC_HAVE_VECCUDA)
430:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
431:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
432:     /* create the scatter indices if not done already */
433:     if (!ctx->spptr) {
434:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
435:       PetscInt * tslots = 0;
436:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
437:     }
438:     /* next do the scatter */
439:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
440:     return(0);
441:   }
442: #endif

444:   VecGetArrayPair(x,y,&xv,&yv);
445:   if (mode & SCATTER_REVERSE) {
446:     if (addv == INSERT_VALUES) {
447:       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
448:     } else if (addv == ADD_VALUES) {
449:       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
450: #if !defined(PETSC_USE_COMPLEX)
451:     } else if (addv == MAX_VALUES) {
452:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
453: #endif
454:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
455:   } else {
456:     if (addv == INSERT_VALUES) {
457:       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
458:     } else if (addv == ADD_VALUES) {
459:       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
460: #if !defined(PETSC_USE_COMPLEX)
461:     } else if (addv == MAX_VALUES) {
462:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
463: #endif
464:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
465:   }
466:   VecRestoreArrayPair(x,y,&xv,&yv);
467:   return(0);
468: }

470: /*
471:     Scatter: sequential stride 1 to sequential general
472: */
473: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
474: {
475:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
476:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
477:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
478:   PetscErrorCode         ierr;
479:   PetscInt               first = gen_from->first;
480:   PetscScalar            *xv,*yv;
481: #if defined(PETSC_HAVE_VECCUDA)
482:   PetscBool              is_veccuda;
483: #endif

486: #if defined(PETSC_HAVE_VECCUDA)
487:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
488:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
489:     /* create the scatter indices if not done already */
490:     if (!ctx->spptr) {
491:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
492:       PetscInt *fslots = 0;
493:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
494:     }
495:     /* next do the scatter */
496:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
497:     return(0);
498:   }
499: #endif

501:   VecGetArrayPair(x,y,&xv,&yv);
502:   if (mode & SCATTER_REVERSE) {
503:     PetscScalar *yyv = yv + first;
504:     if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1); }
505:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i]  = xv[tslots[i]]; }
506:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
507: #if !defined(PETSC_USE_COMPLEX)
508:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yyv[i]  = PetscMax(yyv[i],xv[tslots[i]]); }
509: #endif
510:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
511:   } else {
512:     PetscScalar *xxv = xv + first;
513:     if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1); }
514:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]]  = xxv[i]; }
515:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
516: #if !defined(PETSC_USE_COMPLEX)
517:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]]  = PetscMax(yv[tslots[i]],xxv[i]); }
518: #endif
519:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
520:   }
521:   VecRestoreArrayPair(x,y,&xv,&yv);
522:   return(0);
523: }

525: /*
526:    Scatter: sequential stride to sequential general
527: */
528: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
529: {
530:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
531:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
532:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
533:   PetscErrorCode         ierr;
534:   PetscInt               first = gen_from->first,step = gen_from->step;
535:   PetscScalar            *xv,*yv;
536: #if defined(PETSC_HAVE_VECCUDA)
537:   PetscBool              is_veccuda;
538: #endif

541: #if defined(PETSC_HAVE_VECCUDA)
542:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
543:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
544:     /* create the scatter indices if not done already */
545:     if (!ctx->spptr) {
546:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
547:       PetscInt *fslots = 0;
548:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
549:     }
550:     /* next do the scatter */
551:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
552:     return(0);
553:   }
554: #endif

556:   VecGetArrayPair(x,y,&xv,&yv);
557:   if (mode & SCATTER_REVERSE) {
558:     if (addv == INSERT_VALUES) {
559:       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
560:     } else if (addv == ADD_VALUES) {
561:       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
562: #if !defined(PETSC_USE_COMPLEX)
563:     } else if (addv == MAX_VALUES) {
564:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
565: #endif
566:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
567:   } else {
568:     if (addv == INSERT_VALUES) {
569:       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
570:     } else if (addv == ADD_VALUES) {
571:       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
572: #if !defined(PETSC_USE_COMPLEX)
573:     } else if (addv == MAX_VALUES) {
574:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
575: #endif
576:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
577:   }
578:   VecRestoreArrayPair(x,y,&xv,&yv);
579:   return(0);
580: }

582: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
583: {
584:   PetscErrorCode         ierr;
585:   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->fromdata;
586:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
587:   PetscInt               i;
588:   PetscBool              isascii;

591:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
592:   if (isascii) {
593:     PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
594:     for (i=0; i<in_to->n; i++) {
595:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
596:     }
597:     if (in_to->memcpy_plan.optimized[0]) {
598:       PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);
599:     }
600:   }
601:   return(0);
602: }
603: /*
604:      Scatter: sequential stride to sequential stride
605: */
606: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
607: {
608:   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
609:   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
610:   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
611:   PetscErrorCode        ierr;
612:   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
613:   PetscScalar           *xv,*yv;
614: #if defined(PETSC_HAVE_VECCUDA)
615:   PetscBool              is_veccuda;
616: #endif

619: #if defined(PETSC_HAVE_VECCUDA)
620:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
621:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
622:     /* create the scatter indices if not done already */
623:     if (!ctx->spptr) {
624:       PetscInt *tslots = 0,*fslots = 0;
625:       VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
626:     }
627:     /* next do the scatter */
628:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
629:     return(0);
630:   }
631: #endif

633:   VecGetArrayPair(x,y,&xv,&yv);
634:   if (mode & SCATTER_REVERSE) {
635:     from_first = gen_to->first;
636:     to_first   = gen_from->first;
637:     from_step  = gen_to->step;
638:     to_step    = gen_from->step;
639:   }

641:   if (addv == INSERT_VALUES) {
642:     if (to_step == 1 && from_step == 1) {
643:       PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
644:     } else  {
645:       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
646:     }
647:   } else if (addv == ADD_VALUES) {
648:     if (to_step == 1 && from_step == 1) {
649:       PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
650:       for (i=0; i<n; i++) yyv[i] += xxv[i];
651:     } else {
652:       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
653:     }
654: #if !defined(PETSC_USE_COMPLEX)
655:   } else if (addv == MAX_VALUES) {
656:     if (to_step == 1 && from_step == 1) {
657:       PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
658:       for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
659:     } else {
660:       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
661:     }
662: #endif
663:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
664:   VecRestoreArrayPair(x,y,&xv,&yv);
665:   return(0);
666: }

668: /* --------------------------------------------------------------------------*/


671: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
672: {
673:   PetscErrorCode         ierr;
674:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
675:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

678:   out->ops->begin   = in->ops->begin;
679:   out->ops->end     = in->ops->end;
680:   out->ops->copy    = in->ops->copy;
681:   out->ops->destroy = in->ops->destroy;
682:   out->ops->view    = in->ops->view;

684:   PetscMalloc2(1,&out_to,1,&out_from);
685:   PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
686:   out_to->n                    = in_to->n;
687:   out_to->format               = in_to->format;
688:   out_to->nonmatching_computed = PETSC_FALSE;
689:   out_to->slots_nonmatching    = 0;
690:   PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
691:   VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);

693:   out_from->n                    = in_from->n;
694:   out_from->format               = in_from->format;
695:   out_from->nonmatching_computed = PETSC_FALSE;
696:   out_from->slots_nonmatching    = 0;
697:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
698:   VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);

700:   out->todata   = (void*)out_to;
701:   out->fromdata = (void*)out_from;
702:   return(0);
703: }

705: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
706: {
707:   PetscErrorCode         ierr;
708:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
709:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
710:   PetscInt               i;
711:   PetscBool              isascii;

714:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
715:   if (isascii) {
716:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
717:     for (i=0; i<in_to->n; i++) {
718:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
719:     }
720:     if (in_from->memcpy_plan.optimized[0]) {
721:       PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
722:     }
723:   }
724:   return(0);
725: }


728: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
729: {
730:   PetscErrorCode         ierr;
731:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
732:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

735:   out->ops->begin   = in->ops->begin;
736:   out->ops->end     = in->ops->end;
737:   out->ops->copy    = in->ops->copy;
738:   out->ops->destroy = in->ops->destroy;
739:   out->ops->view    = in->ops->view;

741:   PetscMalloc2(1,&out_to,1,&out_from);
742:   PetscMalloc1(in_from->n,&out_from->vslots);
743:   out_to->n      = in_to->n;
744:   out_to->format = in_to->format;
745:   out_to->first  = in_to->first;
746:   out_to->step   = in_to->step;
747:   out_to->format = in_to->format;

749:   out_from->n                    = in_from->n;
750:   out_from->format               = in_from->format;
751:   out_from->nonmatching_computed = PETSC_FALSE;
752:   out_from->slots_nonmatching    = 0;
753:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
754:   VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);

756:   out->todata   = (void*)out_to;
757:   out->fromdata = (void*)out_from;
758:   return(0);
759: }

761: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
762: {
763:   PetscErrorCode         ierr;
764:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
765:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
766:   PetscInt               i;
767:   PetscBool              isascii;

770:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
771:   if (isascii) {
772:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
773:     for (i=0; i<in_to->n; i++) {
774:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
775:     }
776:     if (in_from->memcpy_plan.optimized[0]) {
777:       PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
778:     }
779:   }
780:   return(0);
781: }

783: /* --------------------------------------------------------------------------*/
784: /*
785:     Scatter: parallel to sequential vector, sequential strides for both.
786: */
787: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
788: {
789:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
790:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
791:   PetscErrorCode        ierr;

794:   out->ops->begin   = in->ops->begin;
795:   out->ops->end     = in->ops->end;
796:   out->ops->copy    = in->ops->copy;
797:   out->ops->destroy = in->ops->destroy;
798:   out->ops->view    = in->ops->view;

800:   PetscMalloc2(1,&out_to,1,&out_from);
801:   out_to->n       = in_to->n;
802:   out_to->format  = in_to->format;
803:   out_to->first   = in_to->first;
804:   out_to->step    = in_to->step;
805:   out_to->format  = in_to->format;
806:   out_from->n     = in_from->n;
807:   out_from->format= in_from->format;
808:   out_from->first = in_from->first;
809:   out_from->step  = in_from->step;
810:   out_from->format= in_from->format;
811:   out->todata     = (void*)out_to;
812:   out->fromdata   = (void*)out_from;
813:   return(0);
814: }

816: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
817: {
818:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
819:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
820:   PetscErrorCode        ierr;
821:   PetscBool             isascii;

824:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
825:   if (isascii) {
826:     PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
827:   }
828:   return(0);
829: }

831: /*
832:   Create memcpy optimization plans based on indices of vector entries we want to scatter

834:    Input Parameters:
835:   +  n       - number of target processors
836:   .  starts  - [n+1] for the i-th processor, its associated indices are indices[starts[i], starts[i+1])
837:   .  indices - [] array storing indices. Its length is starts[n+1]
838:   -  bs      - block size

840:    Output Parameters:
841:   +  plan    - the memcpy plan
842: */
843: PetscErrorCode VecScatterMemcpyPlanCreate_Index(PetscInt n,const PetscInt *starts,const PetscInt *indices,PetscInt bs,VecScatterMemcpyPlan *plan)
844: {
846:   PetscInt       i,j,k,my_copies,n_copies=0,step;
847:   PetscBool      strided,has_strided;

850:   PetscMemzero(plan,sizeof(VecScatterMemcpyPlan));
851:   plan->n = n;
852:   PetscMalloc2(n,&plan->optimized,n+1,&plan->copy_offsets);

854:   /* check if each remote part of the scatter is made of copies, and count total_copies */
855:   for (i=0; i<n; i++) { /* for each target processor procs[i] */
856:     my_copies = 1; /* count num. of copies for procs[i] */
857:     for (j=starts[i]; j<starts[i+1]-1; j++) { /* go through indices from the first to the second to last */
858:       if (indices[j]+bs != indices[j+1]) my_copies++;
859:     }
860:     if (bs*(starts[i+1]-starts[i])*sizeof(PetscScalar)/my_copies >= 256) { /* worth using memcpy? */
861:       plan->optimized[i] = PETSC_TRUE;
862:       n_copies += my_copies;
863:     } else {
864:       plan->optimized[i] = PETSC_FALSE;
865:     }
866:   }

868:   /* do malloc with the recently known n_copies */
869:   PetscMalloc2(n_copies,&plan->copy_starts,n_copies,&plan->copy_lengths);

871:   /* analyze the total_copies one by one */
872:   k                     = 0; /* k-th copy */
873:   plan->copy_offsets[0] = 0;
874:   for (i=0; i<n; i++) { /* for each target processor procs[i] */
875:     if (plan->optimized[i]) {
876:       my_copies            = 1;
877:       plan->copy_starts[k] = indices[starts[i]];
878:       for (j=starts[i]; j<starts[i+1]-1; j++) {
879:         if (indices[j]+bs != indices[j+1]) { /* meet end of a copy (and next copy must exist) */
880:           my_copies++;
881:           plan->copy_starts[k+1] = indices[j+1];
882:           plan->copy_lengths[k]  = sizeof(PetscScalar)*(indices[j]+bs-plan->copy_starts[k]);
883:           k++;
884:         }
885:       }
886:       /* set copy length of the last copy for this remote proc */
887:       plan->copy_lengths[k] = sizeof(PetscScalar)*(indices[j]+bs-plan->copy_starts[k]);
888:       k++;
889:     }

891:     /* set offset for next proc. When optimized[i] is false, copy_offsets[i] = copy_offsets[i+1] */
892:     plan->copy_offsets[i+1] = k;
893:   }

895:   /* try the last chance to optimize. If a scatter is not memory copies, then is it strided? */
896:   has_strided = PETSC_FALSE;
897:   PetscMalloc3(n,&plan->stride_first,n,&plan->stride_step,n,&plan->stride_n);
898:   for (i=0; i<n; i++) { /* for each target processor procs[i] */
899:     if (!plan->optimized[i] && starts[i+1] - starts[i] >= 16) { /* few indices (<16) are not worth striding */
900:       strided = PETSC_TRUE;
901:       step    = indices[starts[i]+1] - indices[starts[i]];
902:       for (j=starts[i]; j<starts[i+1]-1; j++) {
903:         if (indices[j]+step != indices[j+1]) { strided = PETSC_FALSE; break; }
904:       }
905:       if (strided) {
906:         plan->optimized[i]    = PETSC_TRUE;
907:         plan->stride_first[i] = indices[starts[i]];
908:         plan->stride_step[i]  = step;
909:         plan->stride_n[i]     = starts[i+1] - starts[i];
910:         has_strided           = PETSC_TRUE;
911:       }
912:     }
913:   }
914:   /* if none is strided, free the arrays to save memory here and also in plan copying */
915:   if (!has_strided) { PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n); }
916:   return(0);
917: }

919: /* --------------------------------------------------------------------------------------*/
920: /* Create a memcpy plan for a sequential general (SG) to SG scatter */
921: PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
922: {
923:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
924:   PetscInt       j,n_copies;
926:   PetscBool      same_copy_starts;

929:   PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));
930:   PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));
931:   to->memcpy_plan.n   = 1;
932:   from->memcpy_plan.n = 1;

934:   /* malloc and init the two fields to false and zero */
935:   PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);
936:   PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);

938:   /* count number of copies, which runs from 1 to n */
939:   n_copies = 1;
940:   for (i=0; i<n-1; i++) {
941:     if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
942:   }

944:   /* if average copy size >= 256 bytes, use memcpy instead of load/store */
945:   if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
946:     PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);
947:     PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);

949:     /* set up copy_starts[] & copy_lenghts[] of to and from */
950:     to->memcpy_plan.copy_starts[0]   = to_slots[0];
951:     from->memcpy_plan.copy_starts[0] = from_slots[0];

953:     if (n_copies != 1) { /* one copy is trival and we can save some work */
954:       j = 0;  /* j-th copy */
955:       for (i=0; i<n-1; i++) {
956:         if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
957:           to->memcpy_plan.copy_lengths[j]    = sizeof(PetscScalar)*(to_slots[i]+bs-to->memcpy_plan.copy_starts[j]);
958:           from->memcpy_plan.copy_lengths[j]  = sizeof(PetscScalar)*(from_slots[i]+bs-from->memcpy_plan.copy_starts[j]);
959:           to->memcpy_plan.copy_starts[j+1]   = to_slots[i+1];
960:           from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
961:           j++;
962:         }
963:       }
964:     }

966:     /* set up copy_lengths[] of the last copy */
967:     to->memcpy_plan.copy_lengths[n_copies-1]   = sizeof(PetscScalar)*(to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1]);
968:     from->memcpy_plan.copy_lengths[n_copies-1] = sizeof(PetscScalar)*(from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1]);

970:     /* check if to and from have the same copy_starts[] values */
971:     same_copy_starts = PETSC_TRUE;
972:     for (i=0; i<n_copies; i++) {
973:       if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
974:     }

976:     to->memcpy_plan.optimized[0]        = PETSC_TRUE;
977:     from->memcpy_plan.optimized[0]      = PETSC_TRUE;
978:     to->memcpy_plan.copy_offsets[1]     = n_copies;
979:     from->memcpy_plan.copy_offsets[1]   = n_copies;
980:     to->memcpy_plan.same_copy_starts    = same_copy_starts;
981:     from->memcpy_plan.same_copy_starts  = same_copy_starts;
982:   }

984:   /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
985:   return(0);
986: }

988: /* Copy the memcpy plan from in to out */
989: PetscErrorCode VecScatterMemcpyPlanCopy(const VecScatterMemcpyPlan *in,VecScatterMemcpyPlan *out)
990: {

994:   PetscMemzero(out,sizeof(VecScatterMemcpyPlan));
995:   out->n = in->n;
996:   PetscMalloc2(in->n,&out->optimized,in->n+1,&out->copy_offsets);
997:   PetscMalloc2(in->copy_offsets[in->n],&out->copy_starts,in->copy_offsets[in->n],&out->copy_lengths);
998:   PetscMemcpy(out->optimized,in->optimized,sizeof(PetscBool)*in->n);
999:   PetscMemcpy(out->copy_offsets,in->copy_offsets,sizeof(PetscInt)*(in->n+1));
1000:   PetscMemcpy(out->copy_starts,in->copy_starts,sizeof(PetscInt)*in->copy_offsets[in->n]);
1001:   PetscMemcpy(out->copy_lengths,in->copy_lengths,sizeof(PetscInt)*in->copy_offsets[in->n]);
1002:   if (in->stride_first) {
1003:     PetscMalloc3(in->n,&out->stride_first,in->n,&out->stride_step,in->n,&out->stride_n);
1004:     PetscMemcpy(out->stride_first,in->stride_first,sizeof(PetscInt)*in->n);
1005:     PetscMemcpy(out->stride_step,in->stride_step,sizeof(PetscInt)*in->n);
1006:     PetscMemcpy(out->stride_n,in->stride_n,sizeof(PetscInt)*in->n);
1007:   }
1008:   return(0);
1009: }

1011: /* Destroy the vecscatter memcpy plan */
1012: PetscErrorCode VecScatterMemcpyPlanDestroy(VecScatterMemcpyPlan *plan)
1013: {

1017:   PetscFree2(plan->optimized,plan->copy_offsets);
1018:   PetscFree2(plan->copy_starts,plan->copy_lengths);
1019:   PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n);
1020:   return(0);
1021: }

1023: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1024: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1025: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1026: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1027: #endif

1029: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1030: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1031: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);

1033: static PetscErrorCode GetInputISType_private(VecScatter,PetscInt,PetscInt,PetscInt*,IS*,PetscInt*,IS*);

1035: /* =======================================================================*/
1036: #define VEC_SEQ_ID 0
1037: #define VEC_MPI_ID 1
1038: #define IS_GENERAL_ID 0
1039: #define IS_STRIDE_ID  1
1040: #define IS_BLOCK_ID   2

1042: /*
1043:    Blocksizes we have optimized scatters for
1044: */

1046: #define VecScatterOptimizedBS(mbs) (2 <= mbs)


1049: PetscErrorCode  VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
1050: {
1051:   VecScatter     ctx;

1055:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1056:   ctx->inuse               = PETSC_FALSE;
1057:   ctx->beginandendtogether = PETSC_FALSE;
1058:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1059:   if (ctx->beginandendtogether) {
1060:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1061:   }
1062:   *newctx = ctx;
1063:   return(0);
1064: }

1066: /* -------------------------------------------------- */
1067: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
1068: {
1069:   PetscErrorCode    ierr;
1070:   PetscInt          ix_type=-1,iy_type=-1;
1071:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1072:   Vec               xin=ctx->from_v;

1075:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
1076:   GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1077:   if (tix) ix = tix;
1078:   if (tiy) iy = tiy;

1080:   if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1081:     PetscInt               nx,ny;
1082:     const PetscInt         *idx,*idy;
1083:     VecScatter_Seq_General *to = NULL,*from = NULL;

1085:     ISGetLocalSize(ix,&nx);
1086:     ISGetLocalSize(iy,&ny);
1087:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1088:     ISGetIndices(ix,&idx);
1089:     ISGetIndices(iy,&idy);
1090:     PetscMalloc2(1,&to,1,&from);
1091:     PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1092:     to->n = nx;
1093: #if defined(PETSC_USE_DEBUG)
1094:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1095: #endif
1096:     PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1097:     from->n = nx;
1098: #if defined(PETSC_USE_DEBUG)
1099:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1100: #endif
1101:      PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1102:     to->format          = VEC_SCATTER_SEQ_GENERAL;
1103:     from->format        = VEC_SCATTER_SEQ_GENERAL;
1104:     ctx->todata       = (void*)to;
1105:     ctx->fromdata     = (void*)from;
1106:     VecScatterMemcpyPlanCreate_SGToSG(1,to,from);
1107:     ctx->ops->begin   = VecScatterBegin_SGToSG;
1108:     ctx->ops->end     = 0;
1109:     ctx->ops->destroy = VecScatterDestroy_SGToSG;
1110:     ctx->ops->copy    = VecScatterCopy_SGToSG;
1111:     ctx->ops->view    = VecScatterView_SGToSG;
1112:     PetscInfo(xin,"Special case: sequential vector general scatter\n");
1113:     goto functionend;
1114:   } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
1115:     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1116:     VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;

1118:     ISGetLocalSize(ix,&nx);
1119:     ISGetLocalSize(iy,&ny);
1120:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1121:     ISStrideGetInfo(iy,&to_first,&to_step);
1122:     ISStrideGetInfo(ix,&from_first,&from_step);
1123:     PetscMalloc2(1,&to8,1,&from8);
1124:     to8->n            = nx;
1125:     to8->first        = to_first;
1126:     to8->step         = to_step;
1127:     from8->n          = nx;
1128:     from8->first      = from_first;
1129:     from8->step       = from_step;
1130:     to8->format         = VEC_SCATTER_SEQ_STRIDE;
1131:     from8->format       = VEC_SCATTER_SEQ_STRIDE;
1132:     ctx->todata       = (void*)to8;
1133:     ctx->fromdata     = (void*)from8;
1134:     ctx->ops->begin   = VecScatterBegin_SSToSS;
1135:     ctx->ops->end     = 0;
1136:     ctx->ops->destroy = VecScatterDestroy_SSToSS;
1137:     ctx->ops->copy    = VecScatterCopy_SSToSS;
1138:     ctx->ops->view    = VecScatterView_SSToSS;
1139:     PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1140:     goto functionend;
1141:   } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1142:     PetscInt               nx,ny,first,step;
1143:     const PetscInt         *idx;
1144:     VecScatter_Seq_General *from9 = NULL;
1145:     VecScatter_Seq_Stride  *to9   = NULL;

1147:     ISGetLocalSize(ix,&nx);
1148:     ISGetIndices(ix,&idx);
1149:     ISGetLocalSize(iy,&ny);
1150:     ISStrideGetInfo(iy,&first,&step);
1151:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1152:     PetscMalloc2(1,&to9,1,&from9);
1153:     PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));
1154:     PetscMalloc1(nx,&from9->vslots);
1155:     to9->n     = nx;
1156:     to9->first = first;
1157:     to9->step  = step;
1158:     from9->n   = nx;
1159: #if defined(PETSC_USE_DEBUG)
1160:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1161: #endif
1162:     PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1163:     ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
1164:     if (step == 1) {
1165:       PetscInt tmp[2];
1166:       tmp[0] = 0; tmp[1] = nx;
1167:       VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);
1168:       ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1169:     } else {
1170:       ctx->ops->begin = VecScatterBegin_SGToSS;
1171:     }
1172:     ctx->ops->destroy   = VecScatterDestroy_SGToSS;
1173:     ctx->ops->end       = 0;
1174:     ctx->ops->copy      = VecScatterCopy_SGToSS;
1175:     ctx->ops->view      = VecScatterView_SGToSS;
1176:     to9->format      = VEC_SCATTER_SEQ_STRIDE;
1177:     from9->format    = VEC_SCATTER_SEQ_GENERAL;
1178:     PetscInfo(xin,"Special case: sequential vector general to stride\n");
1179:     goto functionend;
1180:   } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1181:     PetscInt               nx,ny,first,step;
1182:     const PetscInt         *idy;
1183:     VecScatter_Seq_General *to10 = NULL;
1184:     VecScatter_Seq_Stride  *from10 = NULL;

1186:     ISGetLocalSize(ix,&nx);
1187:     ISGetIndices(iy,&idy);
1188:     ISGetLocalSize(iy,&ny);
1189:     ISStrideGetInfo(ix,&first,&step);
1190:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1191:     PetscMalloc2(1,&to10,1,&from10);
1192:     PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));
1193:     PetscMalloc1(nx,&to10->vslots);
1194:     from10->n     = nx;
1195:     from10->first = first;
1196:     from10->step  = step;
1197:     to10->n       = nx;
1198: #if defined(PETSC_USE_DEBUG)
1199:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1200: #endif
1201:     PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1202:     ctx->todata   = (void*)to10;
1203:     ctx->fromdata = (void*)from10;
1204:     if (step == 1) {
1205:       PetscInt tmp[2];
1206:       tmp[0] = 0; tmp[1] = nx;
1207:       VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);
1208:       ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1209:     } else {
1210:       ctx->ops->begin = VecScatterBegin_SSToSG;
1211:     }
1212:     ctx->ops->destroy = VecScatterDestroy_SSToSG;
1213:     ctx->ops->end     = 0;
1214:     ctx->ops->copy    = 0;
1215:     ctx->ops->view    = VecScatterView_SSToSG;
1216:     to10->format   = VEC_SCATTER_SEQ_GENERAL;
1217:     from10->format = VEC_SCATTER_SEQ_STRIDE;
1218:     PetscInfo(xin,"Special case: sequential vector stride to general\n");
1219:     goto functionend;
1220:   } else {
1221:     PetscInt               nx,ny;
1222:     const PetscInt         *idx,*idy;
1223:     VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1224:     PetscBool              idnx,idny;

1226:     ISGetLocalSize(ix,&nx);
1227:     ISGetLocalSize(iy,&ny);
1228:     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);

1230:     ISIdentity(ix,&idnx);
1231:     ISIdentity(iy,&idny);
1232:     if (idnx && idny) {
1233:       VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1234:       PetscMalloc2(1,&to13,1,&from13);
1235:       to13->n       = nx;
1236:       to13->first   = 0;
1237:       to13->step    = 1;
1238:       from13->n     = nx;
1239:       from13->first = 0;
1240:       from13->step  = 1;
1241:       to13->format    = VEC_SCATTER_SEQ_STRIDE;
1242:       from13->format  = VEC_SCATTER_SEQ_STRIDE;
1243:       ctx->todata   = (void*)to13;
1244:       ctx->fromdata = (void*)from13;
1245:       ctx->ops->begin    = VecScatterBegin_SSToSS;
1246:       ctx->ops->end      = 0;
1247:       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1248:       ctx->ops->copy     = VecScatterCopy_SSToSS;
1249:       ctx->ops->view     = VecScatterView_SSToSS;
1250:       PetscInfo(xin,"Special case: sequential copy\n");
1251:       goto functionend;
1252:     }

1254:     ISGetIndices(iy,&idy);
1255:     ISGetIndices(ix,&idx);
1256:     PetscMalloc2(1,&to11,1,&from11);
1257:     PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1258:     to11->n = nx;
1259: #if defined(PETSC_USE_DEBUG)
1260:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1261: #endif
1262:     PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1263:     from11->n = nx;
1264: #if defined(PETSC_USE_DEBUG)
1265:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1266: #endif
1267:     PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1268:     to11->format        = VEC_SCATTER_SEQ_GENERAL;
1269:     from11->format      = VEC_SCATTER_SEQ_GENERAL;
1270:     ctx->todata       = (void*)to11;
1271:     ctx->fromdata     = (void*)from11;
1272:     ctx->ops->begin   = VecScatterBegin_SGToSG;
1273:     ctx->ops->end     = 0;
1274:     ctx->ops->destroy = VecScatterDestroy_SGToSG;
1275:     ctx->ops->copy    = VecScatterCopy_SGToSG;
1276:     ctx->ops->view    = VecScatterView_SGToSG;
1277:     VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);
1278:     ISRestoreIndices(ix,&idx);
1279:     ISRestoreIndices(iy,&idy);
1280:     PetscInfo(xin,"Sequential vector scatter with block indices\n");
1281:     goto functionend;
1282:   }
1283:   functionend:
1284:   ISDestroy(&tix);
1285:   ISDestroy(&tiy);
1286:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1287:   return(0);
1288: }

1290: static PetscErrorCode VecScatterCreate_PtoS(VecScatter ctx)
1291: {
1292:   PetscErrorCode    ierr;
1293:   PetscMPIInt       size;
1294:   PetscInt          ix_type=-1,iy_type=-1,*range;
1295:   MPI_Comm          comm;
1296:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1297:   Vec               xin=ctx->from_v,yin=ctx->to_v;
1298:   VecScatterType    type;
1299:   PetscBool         vec_mpi1_flg;
1300:   PetscBool         totalv,ixblock,iyblock,iystride,islocal,cando;

1303:   PetscObjectGetComm((PetscObject)ctx,&comm);
1304:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1305:   if (tix) ix = tix;
1306:   if (tiy) iy = tiy;

1308:   VecScatterGetType(ctx,&type);
1309:   PetscStrcmp(type,"mpi1",&vec_mpi1_flg);

1311:   islocal = PETSC_FALSE;
1312:   /* special case extracting (subset of) local portion */
1313:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1314:     /* Case (2-a) */
1315:     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1316:     PetscInt              start,end,min,max;
1317:     VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;

1319:     VecGetOwnershipRange(xin,&start,&end);
1320:     ISGetLocalSize(ix,&nx);
1321:     ISStrideGetInfo(ix,&from_first,&from_step);
1322:     ISGetLocalSize(iy,&ny);
1323:     ISStrideGetInfo(iy,&to_first,&to_step);
1324:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1325:     ISGetMinMax(ix,&min,&max);
1326:     if (min >= start && max < end) islocal = PETSC_TRUE;
1327:     else islocal = PETSC_FALSE;
1328:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1329:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1330:     if (cando) {
1331:       PetscMalloc2(1,&to12,1,&from12);
1332:       to12->n            = nx;
1333:       to12->first        = to_first;
1334:       to12->step         = to_step;
1335:       from12->n          = nx;
1336:       from12->first      = from_first-start;
1337:       from12->step       = from_step;
1338:       to12->format         = VEC_SCATTER_SEQ_STRIDE;
1339:       from12->format       = VEC_SCATTER_SEQ_STRIDE;
1340:       ctx->todata        = (void*)to12;
1341:       ctx->fromdata      = (void*)from12;
1342:       ctx->ops->begin    = VecScatterBegin_SSToSS;
1343:       ctx->ops->end      = 0;
1344:       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1345:       ctx->ops->copy     = VecScatterCopy_SSToSS;
1346:       ctx->ops->view     = VecScatterView_SSToSS;
1347:       PetscInfo(xin,"Special case: processors only getting local values\n");
1348:       goto functionend;
1349:     }
1350:   } else {
1351:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1352:   }

1354:   /* test for special case of all processors getting entire vector */
1355:   /* contains check that PetscMPIInt can handle the sizes needed */
1356:   totalv = PETSC_FALSE;
1357:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1358:     /* Case (2-b) */
1359:     PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1360:     PetscMPIInt          *count = NULL,*displx;
1361:     VecScatter_MPI_ToAll *sto   = NULL;

1363:     ISGetLocalSize(ix,&nx);
1364:     ISStrideGetInfo(ix,&from_first,&from_step);
1365:     ISGetLocalSize(iy,&ny);
1366:     ISStrideGetInfo(iy,&to_first,&to_step);
1367:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1368:     VecGetSize(xin,&N);
1369:     if (nx != N) totalv = PETSC_FALSE;
1370:     else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1371:     else totalv = PETSC_FALSE;
1372:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1373:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1375: #if defined(PETSC_USE_64BIT_INDICES)
1376:     if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1377: #else
1378:     if (cando) {
1379: #endif
1380:       MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1381:       PetscMalloc3(1,&sto,size,&count,size,&displx);
1382:       range = xin->map->range;
1383:       for (i=0; i<size; i++) {
1384:         PetscMPIIntCast(range[i+1] - range[i],count+i);
1385:         PetscMPIIntCast(range[i],displx+i);
1386:       }
1387:       sto->count        = count;
1388:       sto->displx       = displx;
1389:       sto->work1        = 0;
1390:       sto->work2        = 0;
1391:       sto->format         = VEC_SCATTER_MPI_TOALL;
1392:       ctx->todata       = (void*)sto;
1393:       ctx->fromdata     = 0;
1394:       ctx->ops->begin   = VecScatterBegin_MPI_ToAll;
1395:       ctx->ops->end     = 0;
1396:       ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1397:       ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1398:       ctx->ops->view    = VecScatterView_MPI_ToAll;
1399:       PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1400:       goto functionend;
1401:     }
1402:   } else {
1403:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1404:   }

1406:   /* test for special case of processor 0 getting entire vector */
1407:   /* contains check that PetscMPIInt can handle the sizes needed */
1408:   totalv = PETSC_FALSE;
1409:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1410:     /* Case (2-c) */
1411:     PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1412:     PetscMPIInt          rank,*count = NULL,*displx;
1413:     VecScatter_MPI_ToAll *sto = NULL;

1415:     PetscObjectGetComm((PetscObject)xin,&comm);
1416:     MPI_Comm_rank(comm,&rank);
1417:     ISGetLocalSize(ix,&nx);
1418:     ISStrideGetInfo(ix,&from_first,&from_step);
1419:     ISGetLocalSize(iy,&ny);
1420:     ISStrideGetInfo(iy,&to_first,&to_step);
1421:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1422:     if (!rank) {
1423:       VecGetSize(xin,&N);
1424:       if (nx != N) totalv = PETSC_FALSE;
1425:       else if (from_first == 0        && from_step == 1 &&
1426:                from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1427:       else totalv = PETSC_FALSE;
1428:     } else {
1429:       if (!nx) totalv = PETSC_TRUE;
1430:       else     totalv = PETSC_FALSE;
1431:     }
1432:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1433:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1435: #if defined(PETSC_USE_64BIT_INDICES)
1436:     if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1437: #else
1438:     if (cando) {
1439: #endif
1440:       MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1441:       PetscMalloc3(1,&sto,size,&count,size,&displx);
1442:       range = xin->map->range;
1443:       for (i=0; i<size; i++) {
1444:         PetscMPIIntCast(range[i+1] - range[i],count+i);
1445:         PetscMPIIntCast(range[i],displx+i);
1446:       }
1447:       sto->count        = count;
1448:       sto->displx       = displx;
1449:       sto->work1        = 0;
1450:       sto->work2        = 0;
1451:       sto->format         = VEC_SCATTER_MPI_TOONE;
1452:       ctx->todata       = (void*)sto;
1453:       ctx->fromdata     = 0;
1454:       ctx->ops->begin   = VecScatterBegin_MPI_ToOne;
1455:       ctx->ops->end     = 0;
1456:       ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1457:       ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1458:       ctx->ops->view    = VecScatterView_MPI_ToAll;
1459:       PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1460:       goto functionend;
1461:     }
1462:   } else {
1463:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1464:   }

1466:   /* Case 2-d */
1467:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1468:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1469:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1470:   if (ixblock) {
1471:     /* special case block to block */
1472:     if (iyblock) {
1473:       PetscInt       nx,ny,bsx,bsy;
1474:       const PetscInt *idx,*idy;
1475:       ISGetBlockSize(iy,&bsy);
1476:       ISGetBlockSize(ix,&bsx);
1477:       if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1478:         ISBlockGetLocalSize(ix,&nx);
1479:         ISBlockGetIndices(ix,&idx);
1480:         ISBlockGetLocalSize(iy,&ny);
1481:         ISBlockGetIndices(iy,&idy);
1482:         if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1483: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1484:         if (vec_mpi1_flg) {
1485:           VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1486:         } else {
1487:           VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,bsx,ctx);
1488:         }
1489: #else
1490:         VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1491: #endif
1492:         ISBlockRestoreIndices(ix,&idx);
1493:         ISBlockRestoreIndices(iy,&idy);
1494:         PetscInfo(xin,"Special case: blocked indices\n");
1495:         goto functionend;
1496:       }
1497:       /* special case block to stride */
1498:     } else if (iystride) {
1499:       /* Case (2-e) */
1500:       PetscInt ystart,ystride,ysize,bsx;
1501:       ISStrideGetInfo(iy,&ystart,&ystride);
1502:       ISGetLocalSize(iy,&ysize);
1503:       ISGetBlockSize(ix,&bsx);
1504:       /* see if stride index set is equivalent to block index set */
1505:       if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1506:         PetscInt       nx,il,*idy;
1507:         const PetscInt *idx;
1508:         ISBlockGetLocalSize(ix,&nx);
1509:         ISBlockGetIndices(ix,&idx);
1510:         if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1511:         PetscMalloc1(nx,&idy);
1512:         if (nx) {
1513:           idy[0] = ystart/bsx;
1514:           for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1515:         }
1516: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1517:         if (vec_mpi1_flg) {
1518:           VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1519:         } else {
1520:           VecScatterCreateLocal_PtoS_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1521:         }
1522: #else
1523:         VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1524: #endif
1525:         PetscFree(idy);
1526:         ISBlockRestoreIndices(ix,&idx);
1527:         PetscInfo(xin,"Special case: blocked indices to stride\n");
1528:         goto functionend;
1529:       }
1530:     }
1531:   }

1533:   /* left over general case (2-f) */
1534:   {
1535:     PetscInt       nx,ny;
1536:     const PetscInt *idx,*idy;
1537:     ISGetLocalSize(ix,&nx);
1538:     ISGetIndices(ix,&idx);
1539:     ISGetLocalSize(iy,&ny);
1540:     ISGetIndices(iy,&idy);
1541:     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%D %D)",nx,ny);
1542: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1543:     if (vec_mpi1_flg) {
1544:       VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1545:     } else {
1546:       VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1547:     }
1548: #else
1549:     VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1550: #endif
1551:     ISRestoreIndices(ix,&idx);
1552:     ISRestoreIndices(iy,&idy);
1553:     PetscInfo(xin,"General case: MPI to Seq\n");
1554:     goto functionend;
1555:   }
1556:   functionend:
1557:   ISDestroy(&tix);
1558:   ISDestroy(&tiy);
1559:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1560:   return(0);
1561: }

1563: static PetscErrorCode VecScatterCreate_StoP(VecScatter ctx)
1564: {
1565:   PetscErrorCode    ierr;
1566:   PetscInt          ix_type=-1,iy_type=-1;
1567:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1568:   Vec               xin=ctx->from_v,yin=ctx->to_v;
1569:   VecScatterType    type;
1570:   PetscBool         vscat_mpi1;
1571:   PetscBool         islocal,cando;

1574:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1575:   if (tix) ix = tix;
1576:   if (tiy) iy = tiy;

1578:   VecScatterGetType(ctx,&type);
1579:   PetscStrcmp(type,"mpi1",&vscat_mpi1);

1581:   /* special case local copy portion */
1582:   islocal = PETSC_FALSE;
1583:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1584:     PetscInt              nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1585:     VecScatter_Seq_Stride *from = NULL,*to = NULL;

1587:     VecGetOwnershipRange(yin,&start,&end);
1588:     ISGetLocalSize(ix,&nx);
1589:     ISStrideGetInfo(ix,&from_first,&from_step);
1590:     ISGetLocalSize(iy,&ny);
1591:     ISStrideGetInfo(iy,&to_first,&to_step);
1592:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1593:     ISGetMinMax(iy,&min,&max);
1594:     if (min >= start && max < end) islocal = PETSC_TRUE;
1595:     else islocal = PETSC_FALSE;
1596:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1597:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1598:     if (cando) {
1599:       PetscMalloc2(1,&to,1,&from);
1600:       to->n             = nx;
1601:       to->first         = to_first-start;
1602:       to->step          = to_step;
1603:       from->n           = nx;
1604:       from->first       = from_first;
1605:       from->step        = from_step;
1606:       to->format          = VEC_SCATTER_SEQ_STRIDE;
1607:       from->format        = VEC_SCATTER_SEQ_STRIDE;
1608:       ctx->todata       = (void*)to;
1609:       ctx->fromdata     = (void*)from;
1610:       ctx->ops->begin   = VecScatterBegin_SSToSS;
1611:       ctx->ops->end     = 0;
1612:       ctx->ops->destroy = VecScatterDestroy_SSToSS;
1613:       ctx->ops->copy    = VecScatterCopy_SSToSS;
1614:       ctx->ops->view    = VecScatterView_SSToSS;
1615:       PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1616:       goto functionend;
1617:     }
1618:   } else {
1619:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1620:   }
1621:   /* special case block to stride */
1622:   if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1623:     PetscInt ystart,ystride,ysize,bsx;
1624:     ISStrideGetInfo(iy,&ystart,&ystride);
1625:     ISGetLocalSize(iy,&ysize);
1626:     ISGetBlockSize(ix,&bsx);
1627:     /* see if stride index set is equivalent to block index set */
1628:     if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1629:       PetscInt       nx,il,*idy;
1630:       const PetscInt *idx;
1631:       ISBlockGetLocalSize(ix,&nx);
1632:       ISBlockGetIndices(ix,&idx);
1633:       if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1634:       PetscMalloc1(nx,&idy);
1635:       if (nx) {
1636:         idy[0] = ystart/bsx;
1637:         for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1638:       }
1639:       if (vscat_mpi1) {
1640:         VecScatterCreateLocal_StoP_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1641:       }
1642: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1643:       else {
1644:         VecScatterCreateLocal_StoP_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1645:       }
1646: #endif
1647:       PetscFree(idy);
1648:       ISBlockRestoreIndices(ix,&idx);
1649:       PetscInfo(xin,"Special case: Blocked indices to stride\n");
1650:       goto functionend;
1651:     }
1652:   }

1654:   /* general case */
1655:   {
1656:     PetscInt       nx,ny;
1657:     const PetscInt *idx,*idy;
1658:     ISGetLocalSize(ix,&nx);
1659:     ISGetIndices(ix,&idx);
1660:     ISGetLocalSize(iy,&ny);
1661:     ISGetIndices(iy,&idy);
1662:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1663:     if (vscat_mpi1) {
1664:       VecScatterCreateLocal_StoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1665:     }
1666: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1667:     else {
1668:       VecScatterCreateLocal_StoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1669:     }
1670: #endif
1671:     ISRestoreIndices(ix,&idx);
1672:     ISRestoreIndices(iy,&idy);
1673:     PetscInfo(xin,"General case: Seq to MPI\n");
1674:     goto functionend;
1675:   }
1676:   functionend:
1677:   ISDestroy(&tix);
1678:   ISDestroy(&tiy);
1679:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1680:   return(0);
1681: }

1683: static PetscErrorCode VecScatterCreate_PtoP(VecScatter ctx)
1684: {
1685:   PetscErrorCode    ierr;
1686:   PetscInt          ix_type=-1,iy_type=-1;
1687:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1688:   Vec               xin=ctx->from_v,yin=ctx->to_v;
1689:   VecScatterType    type;
1690:   PetscBool         vscat_mpi1;
1691:   PetscInt          nx,ny;
1692:   const PetscInt    *idx,*idy;

1695:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_MPI_ID,&ix_type,&tix,&iy_type,&tiy);
1696:   if (tix) ix = tix;
1697:   if (tiy) iy = tiy;

1699:   VecScatterGetType(ctx,&type);
1700:   PetscStrcmp(type,"mpi1",&vscat_mpi1);

1702:   /* no special cases for now */
1703:   ISGetLocalSize(ix,&nx);
1704:   ISGetIndices(ix,&idx);
1705:   ISGetLocalSize(iy,&ny);
1706:   ISGetIndices(iy,&idy);
1707:   if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1708:   if (vscat_mpi1) {
1709:     VecScatterCreateLocal_PtoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1710:   }
1711: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1712:   else {
1713:     VecScatterCreateLocal_PtoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1714:   }
1715: #endif
1716:   ISRestoreIndices(ix,&idx);
1717:   ISRestoreIndices(iy,&idy);
1718:   PetscInfo(xin,"General case: MPI to MPI\n");

1720:   ISDestroy(&tix);
1721:   ISDestroy(&tiy);
1722:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1723:   return(0);
1724: }

1726: static PetscErrorCode VecScatterGetInputVecType_private(VecScatter ctx,PetscInt *xin_type1,PetscInt *yin_type1)
1727: {
1728:   PetscErrorCode    ierr;
1729:   MPI_Comm          comm,ycomm;
1730:   PetscMPIInt       size;
1731:   Vec               xin = ctx->from_v,yin = ctx->to_v;
1732:   PetscInt          xin_type,yin_type;

1735:   /*
1736:       Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1737:       sequential (it does not share a comm). The difference is that parallel vectors treat the
1738:       index set as providing indices in the global parallel numbering of the vector, with
1739:       sequential vectors treat the index set as providing indices in the local sequential
1740:       numbering
1741:   */
1742:   PetscObjectGetComm((PetscObject)xin,&comm);
1743:   MPI_Comm_size(comm,&size);
1744:   if (size > 1) {
1745:     xin_type = VEC_MPI_ID;
1746:   } else xin_type = VEC_SEQ_ID;
1747:   *xin_type1 = xin_type;

1749:   PetscObjectGetComm((PetscObject)yin,&ycomm);
1750:   MPI_Comm_size(ycomm,&size);
1751:   if (size > 1) {
1752:     yin_type = VEC_MPI_ID;
1753:   } else yin_type = VEC_SEQ_ID;
1754:   *yin_type1 = yin_type;
1755:   return(0);
1756: }

1758: static PetscErrorCode GetInputISType_private(VecScatter ctx,PetscInt xin_type,PetscInt yin_type,PetscInt *ix_type1,IS *tix1,PetscInt *iy_type1,IS *tiy1)
1759: {
1760:   PetscErrorCode    ierr;
1761:   MPI_Comm          comm;
1762:   Vec               xin = ctx->from_v,yin = ctx->to_v;
1763:   IS                tix = 0,tiy = 0,ix = ctx->from_is, iy = ctx->to_is;
1764:   PetscInt          ix_type  = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1765:   PetscBool         flag;

1768:   PetscObjectGetComm((PetscObject)ctx,&comm);

1770:   /* if ix or iy is not included; assume just grabbing entire vector */
1771:   if (!ix && xin_type == VEC_SEQ_ID) {
1772:     ISCreateStride(comm,ctx->from_n,0,1,&ix);
1773:     tix  = ix;
1774:   } else if (!ix && xin_type == VEC_MPI_ID) {
1775:     if (yin_type == VEC_MPI_ID) {
1776:       PetscInt ntmp, low;
1777:       VecGetLocalSize(xin,&ntmp);
1778:       VecGetOwnershipRange(xin,&low,NULL);
1779:       ISCreateStride(comm,ntmp,low,1,&ix);
1780:     } else {
1781:       PetscInt Ntmp;
1782:       VecGetSize(xin,&Ntmp);
1783:       ISCreateStride(comm,Ntmp,0,1,&ix);
1784:     }
1785:     tix = ix;
1786:   } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");

1788:   if (!iy && yin_type == VEC_SEQ_ID) {
1789:     ISCreateStride(comm,ctx->to_n,0,1,&iy);
1790:     tiy  = iy;
1791:   } else if (!iy && yin_type == VEC_MPI_ID) {
1792:     if (xin_type == VEC_MPI_ID) {
1793:       PetscInt ntmp, low;
1794:       VecGetLocalSize(yin,&ntmp);
1795:       VecGetOwnershipRange(yin,&low,NULL);
1796:       ISCreateStride(comm,ntmp,low,1,&iy);
1797:     } else {
1798:       PetscInt Ntmp;
1799:       VecGetSize(yin,&Ntmp);
1800:       ISCreateStride(comm,Ntmp,0,1,&iy);
1801:     }
1802:     tiy = iy;
1803:   } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");

1805:   /* Determine types of index sets */
1806:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1807:   if (flag) ix_type = IS_BLOCK_ID;
1808:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1809:   if (flag) iy_type = IS_BLOCK_ID;
1810:   PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1811:   if (flag) ix_type = IS_STRIDE_ID;
1812:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1813:   if (flag) iy_type = IS_STRIDE_ID;

1815:   if (ix_type1) *ix_type1 = ix_type;
1816:   if (iy_type1) *iy_type1 = iy_type;
1817:   if (tix1)     *tix1     = tix;
1818:   if (tiy1)     *tiy1     = tiy;
1819:   return(0);
1820: }

1822: static PetscErrorCode VecScatterCreate_vectype_private(VecScatter ctx)
1823: {
1824:   PetscErrorCode    ierr;
1825:   PetscInt          xin_type=-1,yin_type=-1;

1828:   VecScatterGetInputVecType_private(ctx,&xin_type,&yin_type);
1829:   if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
1830:     VecScatterCreate_PtoS(ctx);
1831:   } else if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1832:     VecScatterCreate_StoP(ctx);
1833:   } else if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1834:     VecScatterCreate_PtoP(ctx);
1835:   }
1836:   return(0);
1837: }

1839: PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
1840: {
1841:   PetscErrorCode    ierr;

1844:   /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1845:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);
1846:   PetscInfo(ctx,"Using MPI1 for vector scatter\n");
1847:   VecScatterCreate_vectype_private(ctx);
1848:   return(0);
1849: }

1851: PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
1852: {
1853:   PetscErrorCode    ierr;

1856:   /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1857:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);
1858:   PetscInfo(ctx,"Using MPI3 for vector scatter\n");
1859:   VecScatterCreate_vectype_private(ctx);
1860:   return(0);
1861: }

1863: PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
1864: {
1865:   PetscErrorCode    ierr;

1868:   /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1869:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);
1870:   PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");
1871:   VecScatterCreate_vectype_private(ctx);
1872:   return(0);
1873: }