Actual source code: vpscat.c

petsc-3.5.2 2014-09-08
Report Typos and Errors
  2: /*
  3:     Defines parallel vector scatters.
  4: */

  6: #include <../src/vec/vec/impls/dvecimpl.h>         /*I "petscvec.h" I*/
  7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>

 11: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 12: {
 13:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 14:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 15:   PetscErrorCode         ierr;
 16:   PetscInt               i;
 17:   PetscMPIInt            rank;
 18:   PetscViewerFormat      format;
 19:   PetscBool              iascii;

 22:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 23:   if (iascii) {
 24:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 25:     PetscViewerGetFormat(viewer,&format);
 26:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 27:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 29:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 30:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 31:       itmp = to->starts[to->n+1];
 32:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 33:       itmp = from->starts[from->n+1];
 34:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 35:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 37:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 38:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 42:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 44:     } else {
 45:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
 46:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 47:       if (to->n) {
 48:         for (i=0; i<to->n; i++) {
 49:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 50:         }
 51:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 52:         for (i=0; i<to->starts[to->n]; i++) {
 53:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 54:         }
 55:       }

 57:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 58:       if (from->n) {
 59:         for (i=0; i<from->n; i++) {
 60:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 61:         }

 63:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 64:         for (i=0; i<from->starts[from->n]; i++) {
 65:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 66:         }
 67:       }
 68:       if (to->local.n) {
 69:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 70:         for (i=0; i<to->local.n; i++) {
 71:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 72:         }
 73:       }

 75:       PetscViewerFlush(viewer);
 76:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
 77:     }
 78:   }
 79:   return(0);
 80: }

 82: /* -----------------------------------------------------------------------------------*/
 83: /*
 84:       The next routine determines what part of  the local part of the scatter is an
 85:   exact copy of values into their current location. We check this here and
 86:   then know that we need not perform that portion of the scatter when the vector is
 87:   scattering to itself with INSERT_VALUES.

 89:      This is currently not used but would speed up, for example DMLocalToLocalBegin/End()

 91: */
 94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 95: {
 96:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
 98:   PetscInt       *nto_slots,*nfrom_slots,j = 0;

101:   for (i=0; i<n; i++) {
102:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
103:   }

105:   if (!n_nonmatching) {
106:     to->nonmatching_computed = PETSC_TRUE;
107:     to->n_nonmatching        = from->n_nonmatching = 0;

109:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
110:   } else if (n_nonmatching == n) {
111:     to->nonmatching_computed = PETSC_FALSE;

113:     PetscInfo(scatter,"All values non-matching\n");
114:   } else {
115:     to->nonmatching_computed= PETSC_TRUE;
116:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;

118:     PetscMalloc1(n_nonmatching,&nto_slots);
119:     PetscMalloc1(n_nonmatching,&nfrom_slots);

121:     to->slots_nonmatching   = nto_slots;
122:     from->slots_nonmatching = nfrom_slots;
123:     for (i=0; i<n; i++) {
124:       if (to_slots[i] != from_slots[i]) {
125:         nto_slots[j]   = to_slots[i];
126:         nfrom_slots[j] = from_slots[i];
127:         j++;
128:       }
129:     }
130:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131:   }
132:   return(0);
133: }

135: /* --------------------------------------------------------------------------------------*/

137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
143:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144:   PetscErrorCode         ierr;
145:   PetscInt               i;

148:   if (to->use_readyreceiver) {
149:     /*
150:        Since we have already posted sends we must cancel them before freeing
151:        the requests
152:     */
153:     for (i=0; i<from->n; i++) {
154:       MPI_Cancel(from->requests+i);
155:     }
156:     for (i=0; i<to->n; i++) {
157:       MPI_Cancel(to->rev_requests+i);
158:     }
159:     MPI_Waitall(from->n,from->requests,to->rstatus);
160:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
161:   }

163: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
164:   if (to->use_alltoallw) {
165:     PetscFree3(to->wcounts,to->wdispls,to->types);
166:     PetscFree3(from->wcounts,from->wdispls,from->types);
167:   }
168: #endif

170: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
171:   if (to->use_window) {
172:     MPI_Win_free(&from->window);
173:     MPI_Win_free(&to->window);
174:     PetscFree(from->winstarts);
175:     PetscFree(to->winstarts);
176:   }
177: #endif

179:   if (to->use_alltoallv) {
180:     PetscFree2(to->counts,to->displs);
181:     PetscFree2(from->counts,from->displs);
182:   }

184:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
185:   /*
186:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
187:      message passing.
188:   */
189: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
190:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
191:     if (to->requests) {
192:       for (i=0; i<to->n; i++) {
193:         MPI_Request_free(to->requests + i);
194:       }
195:     }
196:     if (to->rev_requests) {
197:       for (i=0; i<to->n; i++) {
198:         MPI_Request_free(to->rev_requests + i);
199:       }
200:     }
201:   }
202:   /*
203:       MPICH could not properly cancel requests thus with ready receiver mode we
204:     cannot free the requests. It may be fixed now, if not then put the following
205:     code inside a if (!to->use_readyreceiver) {
206:   */
207:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
208:     if (from->requests) {
209:       for (i=0; i<from->n; i++) {
210:         MPI_Request_free(from->requests + i);
211:       }
212:     }

214:     if (from->rev_requests) {
215:       for (i=0; i<from->n; i++) {
216:         MPI_Request_free(from->rev_requests + i);
217:       }
218:     }
219:   }
220: #endif

222:   PetscFree(to->local.vslots);
223:   PetscFree(from->local.vslots);
224:   PetscFree2(to->counts,to->displs);
225:   PetscFree2(from->counts,from->displs);
226:   PetscFree(to->local.slots_nonmatching);
227:   PetscFree(from->local.slots_nonmatching);
228:   PetscFree(to->rev_requests);
229:   PetscFree(from->rev_requests);
230:   PetscFree(to->requests);
231:   PetscFree(from->requests);
232:   PetscFree4(to->values,to->indices,to->starts,to->procs);
233:   PetscFree2(to->sstatus,to->rstatus);
234:   PetscFree4(from->values,from->indices,from->starts,from->procs);
235:   PetscFree(from);
236:   PetscFree(to);
237:   return(0);
238: }



242: /* --------------------------------------------------------------------------------------*/
243: /*
244:     Special optimization to see if the local part of the scatter is actually
245:     a copy. The scatter routines call PetscMemcpy() instead.

247: */
250: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
251: {
252:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
253:   PetscInt       to_start,from_start;

257:   to_start   = to_slots[0];
258:   from_start = from_slots[0];

260:   for (i=1; i<n; i++) {
261:     to_start   += bs;
262:     from_start += bs;
263:     if (to_slots[i]   != to_start)   return(0);
264:     if (from_slots[i] != from_start) return(0);
265:   }
266:   to->is_copy       = PETSC_TRUE;
267:   to->copy_start    = to_slots[0];
268:   to->copy_length   = bs*sizeof(PetscScalar)*n;
269:   from->is_copy     = PETSC_TRUE;
270:   from->copy_start  = from_slots[0];
271:   from->copy_length = bs*sizeof(PetscScalar)*n;
272:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
273:   return(0);
274: }

276: /* --------------------------------------------------------------------------------------*/

280: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
281: {
282:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
283:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
284:   PetscErrorCode         ierr;
285:   PetscInt               ny,bs = in_from->bs;

288:   out->begin   = in->begin;
289:   out->end     = in->end;
290:   out->copy    = in->copy;
291:   out->destroy = in->destroy;
292:   out->view    = in->view;

294:   /* allocate entire send scatter context */
295:   PetscNewLog(out,&out_to);
296:   PetscNewLog(out,&out_from);

298:   ny                = in_to->starts[in_to->n];
299:   out_to->n         = in_to->n;
300:   out_to->type      = in_to->type;
301:   out_to->sendfirst = in_to->sendfirst;

303:   PetscMalloc1(out_to->n,&out_to->requests);
304:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
305:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
306:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
307:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
308:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

310:   out->todata                        = (void*)out_to;
311:   out_to->local.n                    = in_to->local.n;
312:   out_to->local.nonmatching_computed = PETSC_FALSE;
313:   out_to->local.n_nonmatching        = 0;
314:   out_to->local.slots_nonmatching    = 0;
315:   if (in_to->local.n) {
316:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
317:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
318:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
319:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
320:   } else {
321:     out_to->local.vslots   = 0;
322:     out_from->local.vslots = 0;
323:   }

325:   /* allocate entire receive context */
326:   out_from->type      = in_from->type;
327:   ny                  = in_from->starts[in_from->n];
328:   out_from->n         = in_from->n;
329:   out_from->sendfirst = in_from->sendfirst;

331:   PetscMalloc1(out_from->n,&out_from->requests);
332:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
333:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
334:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
335:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

337:   out->fromdata                        = (void*)out_from;
338:   out_from->local.n                    = in_from->local.n;
339:   out_from->local.nonmatching_computed = PETSC_FALSE;
340:   out_from->local.n_nonmatching        = 0;
341:   out_from->local.slots_nonmatching    = 0;

343:   /*
344:       set up the request arrays for use with isend_init() and irecv_init()
345:   */
346:   {
347:     PetscMPIInt tag;
348:     MPI_Comm    comm;
349:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
350:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
351:     PetscInt    i;
352:     PetscBool   flg;
353:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
354:     MPI_Request *rev_swaits,*rev_rwaits;
355:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

357:     PetscMalloc1(in_to->n,&out_to->rev_requests);
358:     PetscMalloc1(in_from->n,&out_from->rev_requests);

360:     rev_rwaits = out_to->rev_requests;
361:     rev_swaits = out_from->rev_requests;

363:     out_from->bs = out_to->bs = bs;
364:     tag          = ((PetscObject)out)->tag;
365:     PetscObjectGetComm((PetscObject)out,&comm);

367:     /* Register the receives that you will use later (sends for scatter reverse) */
368:     for (i=0; i<out_from->n; i++) {
369:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
370:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
371:     }

373:     flg  = PETSC_FALSE;
374:     PetscOptionsGetBool(NULL,"-vecscatter_rsend",&flg,NULL);
375:     if (flg) {
376:       out_to->use_readyreceiver   = PETSC_TRUE;
377:       out_from->use_readyreceiver = PETSC_TRUE;
378:       for (i=0; i<out_to->n; i++) {
379:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
380:       }
381:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
382:       MPI_Barrier(comm);
383:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
384:     } else {
385:       out_to->use_readyreceiver   = PETSC_FALSE;
386:       out_from->use_readyreceiver = PETSC_FALSE;
387:       flg                         = PETSC_FALSE;
388:       PetscOptionsGetBool(NULL,"-vecscatter_ssend",&flg,NULL);
389:       if (flg) {
390:         PetscInfo(in,"Using VecScatter Ssend mode\n");
391:       }
392:       for (i=0; i<out_to->n; i++) {
393:         if (!flg) {
394:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
395:         } else {
396:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
397:         }
398:       }
399:     }
400:     /* Register receives for scatter reverse */
401:     for (i=0; i<out_to->n; i++) {
402:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
403:     }
404:   }
405:   return(0);
406: }

410: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
411: {
412:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
413:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
414:   PetscErrorCode         ierr;
415:   PetscInt               ny,bs = in_from->bs;
416:   PetscMPIInt            size;

419:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

421:   out->begin     = in->begin;
422:   out->end       = in->end;
423:   out->copy      = in->copy;
424:   out->destroy   = in->destroy;
425:   out->view      = in->view;

427:   /* allocate entire send scatter context */
428:   PetscNewLog(out,&out_to);
429:   PetscNewLog(out,&out_from);

431:   ny                = in_to->starts[in_to->n];
432:   out_to->n         = in_to->n;
433:   out_to->type      = in_to->type;
434:   out_to->sendfirst = in_to->sendfirst;

436:   PetscMalloc1(out_to->n,&out_to->requests);
437:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
438:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
439:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
440:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
441:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

443:   out->todata                        = (void*)out_to;
444:   out_to->local.n                    = in_to->local.n;
445:   out_to->local.nonmatching_computed = PETSC_FALSE;
446:   out_to->local.n_nonmatching        = 0;
447:   out_to->local.slots_nonmatching    = 0;
448:   if (in_to->local.n) {
449:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
450:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
451:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
452:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
453:   } else {
454:     out_to->local.vslots   = 0;
455:     out_from->local.vslots = 0;
456:   }

458:   /* allocate entire receive context */
459:   out_from->type      = in_from->type;
460:   ny                  = in_from->starts[in_from->n];
461:   out_from->n         = in_from->n;
462:   out_from->sendfirst = in_from->sendfirst;

464:   PetscMalloc1(out_from->n,&out_from->requests);
465:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
466:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
467:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
468:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

470:   out->fromdata                        = (void*)out_from;
471:   out_from->local.n                    = in_from->local.n;
472:   out_from->local.nonmatching_computed = PETSC_FALSE;
473:   out_from->local.n_nonmatching        = 0;
474:   out_from->local.slots_nonmatching    = 0;

476:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

478:   PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
479:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
480:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

482:   PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
483:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
484:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
485:   return(0);
486: }
487: /* --------------------------------------------------------------------------------------------------
488:     Packs and unpacks the message data into send or from receive buffers.

490:     These could be generated automatically.

492:     Fortran kernels etc. could be used.
493: */
494: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
495: {
496:   PetscInt i;
497:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
498: }

502: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
503: {
504:   PetscInt i;

507:   switch (addv) {
508:   case INSERT_VALUES:
509:   case INSERT_ALL_VALUES:
510:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
511:     break;
512:   case ADD_VALUES:
513:   case ADD_ALL_VALUES:
514:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
515:     break;
516: #if !defined(PETSC_USE_COMPLEX)
517:   case MAX_VALUES:
518:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
519: #else
520:   case MAX_VALUES:
521: #endif
522:   case NOT_SET_VALUES:
523:     break;
524:   default:
525:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
526:   }
527:   return(0);
528: }

532: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
533: {
534:   PetscInt i;

537:   switch (addv) {
538:   case INSERT_VALUES:
539:   case INSERT_ALL_VALUES:
540:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
541:     break;
542:   case ADD_VALUES:
543:   case ADD_ALL_VALUES:
544:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
545:     break;
546: #if !defined(PETSC_USE_COMPLEX)
547:   case MAX_VALUES:
548:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
549: #else
550:   case MAX_VALUES:
551: #endif
552:   case NOT_SET_VALUES:
553:     break;
554:   default:
555:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
556:   }
557:   return(0);
558: }

560: /* ----------------------------------------------------------------------------------------------- */
561: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
562: {
563:   PetscInt i,idx;

565:   for (i=0; i<n; i++) {
566:     idx  = *indicesx++;
567:     y[0] = x[idx];
568:     y[1] = x[idx+1];
569:     y   += 2;
570:   }
571: }

575: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
576: {
577:   PetscInt i,idy;

580:   switch (addv) {
581:   case INSERT_VALUES:
582:   case INSERT_ALL_VALUES:
583:     for (i=0; i<n; i++) {
584:       idy      = *indicesy++;
585:       y[idy]   = x[0];
586:       y[idy+1] = x[1];
587:       x       += 2;
588:     }
589:     break;
590:   case ADD_VALUES:
591:   case ADD_ALL_VALUES:
592:     for (i=0; i<n; i++) {
593:       idy       = *indicesy++;
594:       y[idy]   += x[0];
595:       y[idy+1] += x[1];
596:       x        += 2;
597:     }
598:     break;
599: #if !defined(PETSC_USE_COMPLEX)
600:   case MAX_VALUES:
601:     for (i=0; i<n; i++) {
602:       idy      = *indicesy++;
603:       y[idy]   = PetscMax(y[idy],x[0]);
604:       y[idy+1] = PetscMax(y[idy+1],x[1]);
605:       x       += 2;
606:     }
607: #else
608:   case MAX_VALUES:
609: #endif
610:   case NOT_SET_VALUES:
611:     break;
612:   default:
613:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
614:   }
615:   return(0);
616: }

620: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
621: {
622:   PetscInt i,idx,idy;

625:   switch (addv) {
626:   case INSERT_VALUES:
627:   case INSERT_ALL_VALUES:
628:     for (i=0; i<n; i++) {
629:       idx      = *indicesx++;
630:       idy      = *indicesy++;
631:       y[idy]   = x[idx];
632:       y[idy+1] = x[idx+1];
633:     }
634:     break;
635:   case ADD_VALUES:
636:   case ADD_ALL_VALUES:
637:     for (i=0; i<n; i++) {
638:       idx       = *indicesx++;
639:       idy       = *indicesy++;
640:       y[idy]   += x[idx];
641:       y[idy+1] += x[idx+1];
642:     }
643:     break;
644: #if !defined(PETSC_USE_COMPLEX)
645:   case MAX_VALUES:
646:     for (i=0; i<n; i++) {
647:       idx      = *indicesx++;
648:       idy      = *indicesy++;
649:       y[idy]   = PetscMax(y[idy],x[idx]);
650:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
651:     }
652: #else
653:   case MAX_VALUES:
654: #endif
655:   case NOT_SET_VALUES:
656:     break;
657:   default:
658:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
659:   }
660:   return(0);
661: }
662: /* ----------------------------------------------------------------------------------------------- */
663: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
664: {
665:   PetscInt i,idx;

667:   for (i=0; i<n; i++) {
668:     idx  = *indicesx++;
669:     y[0] = x[idx];
670:     y[1] = x[idx+1];
671:     y[2] = x[idx+2];
672:     y   += 3;
673:   }
674: }
677: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
678: {
679:   PetscInt i,idy;

682:   switch (addv) {
683:   case INSERT_VALUES:
684:   case INSERT_ALL_VALUES:
685:     for (i=0; i<n; i++) {
686:       idy      = *indicesy++;
687:       y[idy]   = x[0];
688:       y[idy+1] = x[1];
689:       y[idy+2] = x[2];
690:       x       += 3;
691:     }
692:     break;
693:   case ADD_VALUES:
694:   case ADD_ALL_VALUES:
695:     for (i=0; i<n; i++) {
696:       idy       = *indicesy++;
697:       y[idy]   += x[0];
698:       y[idy+1] += x[1];
699:       y[idy+2] += x[2];
700:       x        += 3;
701:     }
702:     break;
703: #if !defined(PETSC_USE_COMPLEX)
704:   case MAX_VALUES:
705:     for (i=0; i<n; i++) {
706:       idy      = *indicesy++;
707:       y[idy]   = PetscMax(y[idy],x[0]);
708:       y[idy+1] = PetscMax(y[idy+1],x[1]);
709:       y[idy+2] = PetscMax(y[idy+2],x[2]);
710:       x       += 3;
711:     }
712: #else
713:   case MAX_VALUES:
714: #endif
715:   case NOT_SET_VALUES:
716:     break;
717:   default:
718:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
719:   }
720:   return(0);
721: }

725: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
726: {
727:   PetscInt i,idx,idy;

730:   switch (addv) {
731:   case INSERT_VALUES:
732:   case INSERT_ALL_VALUES:
733:     for (i=0; i<n; i++) {
734:       idx      = *indicesx++;
735:       idy      = *indicesy++;
736:       y[idy]   = x[idx];
737:       y[idy+1] = x[idx+1];
738:       y[idy+2] = x[idx+2];
739:     }
740:     break;
741:   case ADD_VALUES:
742:   case ADD_ALL_VALUES:
743:     for (i=0; i<n; i++) {
744:       idx       = *indicesx++;
745:       idy       = *indicesy++;
746:       y[idy]   += x[idx];
747:       y[idy+1] += x[idx+1];
748:       y[idy+2] += x[idx+2];
749:     }
750:     break;
751: #if !defined(PETSC_USE_COMPLEX)
752:   case MAX_VALUES:
753:     for (i=0; i<n; i++) {
754:       idx      = *indicesx++;
755:       idy      = *indicesy++;
756:       y[idy]   = PetscMax(y[idy],x[idx]);
757:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
758:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
759:     }
760: #else
761:   case MAX_VALUES:
762: #endif
763:   case NOT_SET_VALUES:
764:     break;
765:   default:
766:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
767:   }
768:   return(0);
769: }
770: /* ----------------------------------------------------------------------------------------------- */
771: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
772: {
773:   PetscInt i,idx;

775:   for (i=0; i<n; i++) {
776:     idx  = *indicesx++;
777:     y[0] = x[idx];
778:     y[1] = x[idx+1];
779:     y[2] = x[idx+2];
780:     y[3] = x[idx+3];
781:     y   += 4;
782:   }
783: }
786: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
787: {
788:   PetscInt i,idy;

791:   switch (addv) {
792:   case INSERT_VALUES:
793:   case INSERT_ALL_VALUES:
794:     for (i=0; i<n; i++) {
795:       idy      = *indicesy++;
796:       y[idy]   = x[0];
797:       y[idy+1] = x[1];
798:       y[idy+2] = x[2];
799:       y[idy+3] = x[3];
800:       x       += 4;
801:     }
802:     break;
803:   case ADD_VALUES:
804:   case ADD_ALL_VALUES:
805:     for (i=0; i<n; i++) {
806:       idy       = *indicesy++;
807:       y[idy]   += x[0];
808:       y[idy+1] += x[1];
809:       y[idy+2] += x[2];
810:       y[idy+3] += x[3];
811:       x        += 4;
812:     }
813:     break;
814: #if !defined(PETSC_USE_COMPLEX)
815:   case MAX_VALUES:
816:     for (i=0; i<n; i++) {
817:       idy      = *indicesy++;
818:       y[idy]   = PetscMax(y[idy],x[0]);
819:       y[idy+1] = PetscMax(y[idy+1],x[1]);
820:       y[idy+2] = PetscMax(y[idy+2],x[2]);
821:       y[idy+3] = PetscMax(y[idy+3],x[3]);
822:       x       += 4;
823:     }
824: #else
825:   case MAX_VALUES:
826: #endif
827:   case NOT_SET_VALUES:
828:     break;
829:   default:
830:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
831:   }
832:   return(0);
833: }

837: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
838: {
839:   PetscInt i,idx,idy;

842:   switch (addv) {
843:   case INSERT_VALUES:
844:   case INSERT_ALL_VALUES:
845:     for (i=0; i<n; i++) {
846:       idx      = *indicesx++;
847:       idy      = *indicesy++;
848:       y[idy]   = x[idx];
849:       y[idy+1] = x[idx+1];
850:       y[idy+2] = x[idx+2];
851:       y[idy+3] = x[idx+3];
852:     }
853:     break;
854:   case ADD_VALUES:
855:   case ADD_ALL_VALUES:
856:     for (i=0; i<n; i++) {
857:       idx       = *indicesx++;
858:       idy       = *indicesy++;
859:       y[idy]   += x[idx];
860:       y[idy+1] += x[idx+1];
861:       y[idy+2] += x[idx+2];
862:       y[idy+3] += x[idx+3];
863:     }
864:     break;
865: #if !defined(PETSC_USE_COMPLEX)
866:   case MAX_VALUES:
867:     for (i=0; i<n; i++) {
868:       idx      = *indicesx++;
869:       idy      = *indicesy++;
870:       y[idy]   = PetscMax(y[idy],x[idx]);
871:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
872:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
873:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
874:     }
875: #else
876:   case MAX_VALUES:
877: #endif
878:   case NOT_SET_VALUES:
879:     break;
880:   default:
881:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
882:   }
883:   return(0);
884: }
885: /* ----------------------------------------------------------------------------------------------- */
886: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
887: {
888:   PetscInt i,idx;

890:   for (i=0; i<n; i++) {
891:     idx  = *indicesx++;
892:     y[0] = x[idx];
893:     y[1] = x[idx+1];
894:     y[2] = x[idx+2];
895:     y[3] = x[idx+3];
896:     y[4] = x[idx+4];
897:     y   += 5;
898:   }
899: }

903: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
904: {
905:   PetscInt i,idy;

908:   switch (addv) {
909:   case INSERT_VALUES:
910:   case INSERT_ALL_VALUES:
911:     for (i=0; i<n; i++) {
912:       idy      = *indicesy++;
913:       y[idy]   = x[0];
914:       y[idy+1] = x[1];
915:       y[idy+2] = x[2];
916:       y[idy+3] = x[3];
917:       y[idy+4] = x[4];
918:       x       += 5;
919:     }
920:     break;
921:   case ADD_VALUES:
922:   case ADD_ALL_VALUES:
923:     for (i=0; i<n; i++) {
924:       idy       = *indicesy++;
925:       y[idy]   += x[0];
926:       y[idy+1] += x[1];
927:       y[idy+2] += x[2];
928:       y[idy+3] += x[3];
929:       y[idy+4] += x[4];
930:       x        += 5;
931:     }
932:     break;
933: #if !defined(PETSC_USE_COMPLEX)
934:   case MAX_VALUES:
935:     for (i=0; i<n; i++) {
936:       idy      = *indicesy++;
937:       y[idy]   = PetscMax(y[idy],x[0]);
938:       y[idy+1] = PetscMax(y[idy+1],x[1]);
939:       y[idy+2] = PetscMax(y[idy+2],x[2]);
940:       y[idy+3] = PetscMax(y[idy+3],x[3]);
941:       y[idy+4] = PetscMax(y[idy+4],x[4]);
942:       x       += 5;
943:     }
944: #else
945:   case MAX_VALUES:
946: #endif
947:   case NOT_SET_VALUES:
948:     break;
949:   default:
950:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
951:   }
952:   return(0);
953: }

957: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
958: {
959:   PetscInt i,idx,idy;

962:   switch (addv) {
963:   case INSERT_VALUES:
964:   case INSERT_ALL_VALUES:
965:     for (i=0; i<n; i++) {
966:       idx      = *indicesx++;
967:       idy      = *indicesy++;
968:       y[idy]   = x[idx];
969:       y[idy+1] = x[idx+1];
970:       y[idy+2] = x[idx+2];
971:       y[idy+3] = x[idx+3];
972:       y[idy+4] = x[idx+4];
973:     }
974:     break;
975:   case ADD_VALUES:
976:   case ADD_ALL_VALUES:
977:     for (i=0; i<n; i++) {
978:       idx       = *indicesx++;
979:       idy       = *indicesy++;
980:       y[idy]   += x[idx];
981:       y[idy+1] += x[idx+1];
982:       y[idy+2] += x[idx+2];
983:       y[idy+3] += x[idx+3];
984:       y[idy+4] += x[idx+4];
985:     }
986:     break;
987: #if !defined(PETSC_USE_COMPLEX)
988:   case MAX_VALUES:
989:     for (i=0; i<n; i++) {
990:       idx      = *indicesx++;
991:       idy      = *indicesy++;
992:       y[idy]   = PetscMax(y[idy],x[idx]);
993:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
994:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
995:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
996:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
997:     }
998: #else
999:   case MAX_VALUES:
1000: #endif
1001:   case NOT_SET_VALUES:
1002:     break;
1003:   default:
1004:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1005:   }
1006:   return(0);
1007: }
1008: /* ----------------------------------------------------------------------------------------------- */
1009: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1010: {
1011:   PetscInt i,idx;

1013:   for (i=0; i<n; i++) {
1014:     idx  = *indicesx++;
1015:     y[0] = x[idx];
1016:     y[1] = x[idx+1];
1017:     y[2] = x[idx+2];
1018:     y[3] = x[idx+3];
1019:     y[4] = x[idx+4];
1020:     y[5] = x[idx+5];
1021:     y   += 6;
1022:   }
1023: }

1027: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1028: {
1029:   PetscInt i,idy;

1032:   switch (addv) {
1033:   case INSERT_VALUES:
1034:   case INSERT_ALL_VALUES:
1035:     for (i=0; i<n; i++) {
1036:       idy      = *indicesy++;
1037:       y[idy]   = x[0];
1038:       y[idy+1] = x[1];
1039:       y[idy+2] = x[2];
1040:       y[idy+3] = x[3];
1041:       y[idy+4] = x[4];
1042:       y[idy+5] = x[5];
1043:       x       += 6;
1044:     }
1045:     break;
1046:   case ADD_VALUES:
1047:   case ADD_ALL_VALUES:
1048:     for (i=0; i<n; i++) {
1049:       idy       = *indicesy++;
1050:       y[idy]   += x[0];
1051:       y[idy+1] += x[1];
1052:       y[idy+2] += x[2];
1053:       y[idy+3] += x[3];
1054:       y[idy+4] += x[4];
1055:       y[idy+5] += x[5];
1056:       x        += 6;
1057:     }
1058:     break;
1059: #if !defined(PETSC_USE_COMPLEX)
1060:   case MAX_VALUES:
1061:     for (i=0; i<n; i++) {
1062:       idy      = *indicesy++;
1063:       y[idy]   = PetscMax(y[idy],x[0]);
1064:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1065:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1066:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1067:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1068:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1069:       x       += 6;
1070:     }
1071: #else
1072:   case MAX_VALUES:
1073: #endif
1074:   case NOT_SET_VALUES:
1075:     break;
1076:   default:
1077:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1078:   }
1079:   return(0);
1080: }

1084: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1085: {
1086:   PetscInt i,idx,idy;

1089:   switch (addv) {
1090:   case INSERT_VALUES:
1091:   case INSERT_ALL_VALUES:
1092:     for (i=0; i<n; i++) {
1093:       idx      = *indicesx++;
1094:       idy      = *indicesy++;
1095:       y[idy]   = x[idx];
1096:       y[idy+1] = x[idx+1];
1097:       y[idy+2] = x[idx+2];
1098:       y[idy+3] = x[idx+3];
1099:       y[idy+4] = x[idx+4];
1100:       y[idy+5] = x[idx+5];
1101:     }
1102:     break;
1103:   case ADD_VALUES:
1104:   case ADD_ALL_VALUES:
1105:     for (i=0; i<n; i++) {
1106:       idx       = *indicesx++;
1107:       idy       = *indicesy++;
1108:       y[idy]   += x[idx];
1109:       y[idy+1] += x[idx+1];
1110:       y[idy+2] += x[idx+2];
1111:       y[idy+3] += x[idx+3];
1112:       y[idy+4] += x[idx+4];
1113:       y[idy+5] += x[idx+5];
1114:     }
1115:     break;
1116: #if !defined(PETSC_USE_COMPLEX)
1117:   case MAX_VALUES:
1118:     for (i=0; i<n; i++) {
1119:       idx      = *indicesx++;
1120:       idy      = *indicesy++;
1121:       y[idy]   = PetscMax(y[idy],x[idx]);
1122:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1123:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1124:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1125:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1126:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1127:     }
1128: #else
1129:   case MAX_VALUES:
1130: #endif
1131:   case NOT_SET_VALUES:
1132:     break;
1133:   default:
1134:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1135:   }
1136:   return(0);
1137: }
1138: /* ----------------------------------------------------------------------------------------------- */
1139: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1140: {
1141:   PetscInt i,idx;

1143:   for (i=0; i<n; i++) {
1144:     idx  = *indicesx++;
1145:     y[0] = x[idx];
1146:     y[1] = x[idx+1];
1147:     y[2] = x[idx+2];
1148:     y[3] = x[idx+3];
1149:     y[4] = x[idx+4];
1150:     y[5] = x[idx+5];
1151:     y[6] = x[idx+6];
1152:     y   += 7;
1153:   }
1154: }

1158: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1159: {
1160:   PetscInt i,idy;

1163:   switch (addv) {
1164:   case INSERT_VALUES:
1165:   case INSERT_ALL_VALUES:
1166:     for (i=0; i<n; i++) {
1167:       idy      = *indicesy++;
1168:       y[idy]   = x[0];
1169:       y[idy+1] = x[1];
1170:       y[idy+2] = x[2];
1171:       y[idy+3] = x[3];
1172:       y[idy+4] = x[4];
1173:       y[idy+5] = x[5];
1174:       y[idy+6] = x[6];
1175:       x       += 7;
1176:     }
1177:     break;
1178:   case ADD_VALUES:
1179:   case ADD_ALL_VALUES:
1180:     for (i=0; i<n; i++) {
1181:       idy       = *indicesy++;
1182:       y[idy]   += x[0];
1183:       y[idy+1] += x[1];
1184:       y[idy+2] += x[2];
1185:       y[idy+3] += x[3];
1186:       y[idy+4] += x[4];
1187:       y[idy+5] += x[5];
1188:       y[idy+6] += x[6];
1189:       x        += 7;
1190:     }
1191:     break;
1192: #if !defined(PETSC_USE_COMPLEX)
1193:   case MAX_VALUES:
1194:     for (i=0; i<n; i++) {
1195:       idy      = *indicesy++;
1196:       y[idy]   = PetscMax(y[idy],x[0]);
1197:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1198:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1199:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1200:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1201:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1202:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1203:       x       += 7;
1204:     }
1205: #else
1206:   case MAX_VALUES:
1207: #endif
1208:   case NOT_SET_VALUES:
1209:     break;
1210:   default:
1211:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1212:   }
1213:   return(0);
1214: }

1218: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1219: {
1220:   PetscInt i,idx,idy;

1223:   switch (addv) {
1224:   case INSERT_VALUES:
1225:   case INSERT_ALL_VALUES:
1226:     for (i=0; i<n; i++) {
1227:       idx      = *indicesx++;
1228:       idy      = *indicesy++;
1229:       y[idy]   = x[idx];
1230:       y[idy+1] = x[idx+1];
1231:       y[idy+2] = x[idx+2];
1232:       y[idy+3] = x[idx+3];
1233:       y[idy+4] = x[idx+4];
1234:       y[idy+5] = x[idx+5];
1235:       y[idy+6] = x[idx+6];
1236:     }
1237:     break;
1238:   case ADD_VALUES:
1239:   case ADD_ALL_VALUES:
1240:     for (i=0; i<n; i++) {
1241:       idx       = *indicesx++;
1242:       idy       = *indicesy++;
1243:       y[idy]   += x[idx];
1244:       y[idy+1] += x[idx+1];
1245:       y[idy+2] += x[idx+2];
1246:       y[idy+3] += x[idx+3];
1247:       y[idy+4] += x[idx+4];
1248:       y[idy+5] += x[idx+5];
1249:       y[idy+6] += x[idx+6];
1250:     }
1251:     break;
1252: #if !defined(PETSC_USE_COMPLEX)
1253:   case MAX_VALUES:
1254:     for (i=0; i<n; i++) {
1255:       idx      = *indicesx++;
1256:       idy      = *indicesy++;
1257:       y[idy]   = PetscMax(y[idy],x[idx]);
1258:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1259:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1260:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1261:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1262:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1263:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1264:     }
1265: #else
1266:   case MAX_VALUES:
1267: #endif
1268:   case NOT_SET_VALUES:
1269:     break;
1270:   default:
1271:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1272:   }
1273:   return(0);
1274: }
1275: /* ----------------------------------------------------------------------------------------------- */
1276: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1277: {
1278:   PetscInt i,idx;

1280:   for (i=0; i<n; i++) {
1281:     idx  = *indicesx++;
1282:     y[0] = x[idx];
1283:     y[1] = x[idx+1];
1284:     y[2] = x[idx+2];
1285:     y[3] = x[idx+3];
1286:     y[4] = x[idx+4];
1287:     y[5] = x[idx+5];
1288:     y[6] = x[idx+6];
1289:     y[7] = x[idx+7];
1290:     y   += 8;
1291:   }
1292: }

1296: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1297: {
1298:   PetscInt i,idy;

1301:   switch (addv) {
1302:   case INSERT_VALUES:
1303:   case INSERT_ALL_VALUES:
1304:     for (i=0; i<n; i++) {
1305:       idy      = *indicesy++;
1306:       y[idy]   = x[0];
1307:       y[idy+1] = x[1];
1308:       y[idy+2] = x[2];
1309:       y[idy+3] = x[3];
1310:       y[idy+4] = x[4];
1311:       y[idy+5] = x[5];
1312:       y[idy+6] = x[6];
1313:       y[idy+7] = x[7];
1314:       x       += 8;
1315:     }
1316:     break;
1317:   case ADD_VALUES:
1318:   case ADD_ALL_VALUES:
1319:     for (i=0; i<n; i++) {
1320:       idy       = *indicesy++;
1321:       y[idy]   += x[0];
1322:       y[idy+1] += x[1];
1323:       y[idy+2] += x[2];
1324:       y[idy+3] += x[3];
1325:       y[idy+4] += x[4];
1326:       y[idy+5] += x[5];
1327:       y[idy+6] += x[6];
1328:       y[idy+7] += x[7];
1329:       x        += 8;
1330:     }
1331:     break;
1332: #if !defined(PETSC_USE_COMPLEX)
1333:   case MAX_VALUES:
1334:     for (i=0; i<n; i++) {
1335:       idy      = *indicesy++;
1336:       y[idy]   = PetscMax(y[idy],x[0]);
1337:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1338:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1339:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1340:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1341:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1342:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1343:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1344:       x       += 8;
1345:     }
1346: #else
1347:   case MAX_VALUES:
1348: #endif
1349:   case NOT_SET_VALUES:
1350:     break;
1351:   default:
1352:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1353:   }
1354:   return(0);
1355: }

1359: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1360: {
1361:   PetscInt i,idx,idy;

1364:   switch (addv) {
1365:   case INSERT_VALUES:
1366:   case INSERT_ALL_VALUES:
1367:     for (i=0; i<n; i++) {
1368:       idx      = *indicesx++;
1369:       idy      = *indicesy++;
1370:       y[idy]   = x[idx];
1371:       y[idy+1] = x[idx+1];
1372:       y[idy+2] = x[idx+2];
1373:       y[idy+3] = x[idx+3];
1374:       y[idy+4] = x[idx+4];
1375:       y[idy+5] = x[idx+5];
1376:       y[idy+6] = x[idx+6];
1377:       y[idy+7] = x[idx+7];
1378:     }
1379:     break;
1380:   case ADD_VALUES:
1381:   case ADD_ALL_VALUES:
1382:     for (i=0; i<n; i++) {
1383:       idx       = *indicesx++;
1384:       idy       = *indicesy++;
1385:       y[idy]   += x[idx];
1386:       y[idy+1] += x[idx+1];
1387:       y[idy+2] += x[idx+2];
1388:       y[idy+3] += x[idx+3];
1389:       y[idy+4] += x[idx+4];
1390:       y[idy+5] += x[idx+5];
1391:       y[idy+6] += x[idx+6];
1392:       y[idy+7] += x[idx+7];
1393:     }
1394:     break;
1395: #if !defined(PETSC_USE_COMPLEX)
1396:   case MAX_VALUES:
1397:     for (i=0; i<n; i++) {
1398:       idx      = *indicesx++;
1399:       idy      = *indicesy++;
1400:       y[idy]   = PetscMax(y[idy],x[idx]);
1401:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1402:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1403:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1404:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1405:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1406:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1407:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1408:     }
1409: #else
1410:   case MAX_VALUES:
1411: #endif
1412:   case NOT_SET_VALUES:
1413:     break;
1414:   default:
1415:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1416:   }
1417:   return(0);
1418: }

1420: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1421: {
1422:   PetscInt i,idx;

1424:   for (i=0; i<n; i++) {
1425:     idx   = *indicesx++;
1426:     y[0]  = x[idx];
1427:     y[1]  = x[idx+1];
1428:     y[2]  = x[idx+2];
1429:     y[3]  = x[idx+3];
1430:     y[4]  = x[idx+4];
1431:     y[5]  = x[idx+5];
1432:     y[6]  = x[idx+6];
1433:     y[7]  = x[idx+7];
1434:     y[8]  = x[idx+8];
1435:     y    += 9;
1436:   }
1437: }

1441: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1442: {
1443:   PetscInt i,idy;

1446:   switch (addv) {
1447:   case INSERT_VALUES:
1448:   case INSERT_ALL_VALUES:
1449:     for (i=0; i<n; i++) {
1450:       idy       = *indicesy++;
1451:       y[idy]    = x[0];
1452:       y[idy+1]  = x[1];
1453:       y[idy+2]  = x[2];
1454:       y[idy+3]  = x[3];
1455:       y[idy+4]  = x[4];
1456:       y[idy+5]  = x[5];
1457:       y[idy+6]  = x[6];
1458:       y[idy+7]  = x[7];
1459:       y[idy+8]  = x[8];
1460:       x        += 9;
1461:     }
1462:     break;
1463:   case ADD_VALUES:
1464:   case ADD_ALL_VALUES:
1465:     for (i=0; i<n; i++) {
1466:       idy        = *indicesy++;
1467:       y[idy]    += x[0];
1468:       y[idy+1]  += x[1];
1469:       y[idy+2]  += x[2];
1470:       y[idy+3]  += x[3];
1471:       y[idy+4]  += x[4];
1472:       y[idy+5]  += x[5];
1473:       y[idy+6]  += x[6];
1474:       y[idy+7]  += x[7];
1475:       y[idy+8]  += x[8];
1476:       x         += 9;
1477:     }
1478:     break;
1479: #if !defined(PETSC_USE_COMPLEX)
1480:   case MAX_VALUES:
1481:     for (i=0; i<n; i++) {
1482:       idy       = *indicesy++;
1483:       y[idy]    = PetscMax(y[idy],x[0]);
1484:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1485:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1486:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1487:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1488:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1489:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1490:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1491:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1492:       x        += 9;
1493:     }
1494: #else
1495:   case MAX_VALUES:
1496: #endif
1497:   case NOT_SET_VALUES:
1498:     break;
1499:   default:
1500:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1501:   }
1502:   return(0);
1503: }

1507: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1508: {
1509:   PetscInt i,idx,idy;

1512:   switch (addv) {
1513:   case INSERT_VALUES:
1514:   case INSERT_ALL_VALUES:
1515:     for (i=0; i<n; i++) {
1516:       idx       = *indicesx++;
1517:       idy       = *indicesy++;
1518:       y[idy]    = x[idx];
1519:       y[idy+1]  = x[idx+1];
1520:       y[idy+2]  = x[idx+2];
1521:       y[idy+3]  = x[idx+3];
1522:       y[idy+4]  = x[idx+4];
1523:       y[idy+5]  = x[idx+5];
1524:       y[idy+6]  = x[idx+6];
1525:       y[idy+7]  = x[idx+7];
1526:       y[idy+8]  = x[idx+8];
1527:     }
1528:     break;
1529:   case ADD_VALUES:
1530:   case ADD_ALL_VALUES:
1531:     for (i=0; i<n; i++) {
1532:       idx        = *indicesx++;
1533:       idy        = *indicesy++;
1534:       y[idy]    += x[idx];
1535:       y[idy+1]  += x[idx+1];
1536:       y[idy+2]  += x[idx+2];
1537:       y[idy+3]  += x[idx+3];
1538:       y[idy+4]  += x[idx+4];
1539:       y[idy+5]  += x[idx+5];
1540:       y[idy+6]  += x[idx+6];
1541:       y[idy+7]  += x[idx+7];
1542:       y[idy+8]  += x[idx+8];
1543:     }
1544:     break;
1545: #if !defined(PETSC_USE_COMPLEX)
1546:   case MAX_VALUES:
1547:     for (i=0; i<n; i++) {
1548:       idx       = *indicesx++;
1549:       idy       = *indicesy++;
1550:       y[idy]    = PetscMax(y[idy],x[idx]);
1551:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1552:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1553:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1554:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1555:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1556:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1557:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1558:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1559:     }
1560: #else
1561:   case MAX_VALUES:
1562: #endif
1563:   case NOT_SET_VALUES:
1564:     break;
1565:   default:
1566:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1567:   }
1568:   return(0);
1569: }

1571: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1572: {
1573:   PetscInt i,idx;

1575:   for (i=0; i<n; i++) {
1576:     idx   = *indicesx++;
1577:     y[0]  = x[idx];
1578:     y[1]  = x[idx+1];
1579:     y[2]  = x[idx+2];
1580:     y[3]  = x[idx+3];
1581:     y[4]  = x[idx+4];
1582:     y[5]  = x[idx+5];
1583:     y[6]  = x[idx+6];
1584:     y[7]  = x[idx+7];
1585:     y[8]  = x[idx+8];
1586:     y[9]  = x[idx+9];
1587:     y    += 10;
1588:   }
1589: }

1593: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1594: {
1595:   PetscInt i,idy;

1598:   switch (addv) {
1599:   case INSERT_VALUES:
1600:   case INSERT_ALL_VALUES:
1601:     for (i=0; i<n; i++) {
1602:       idy       = *indicesy++;
1603:       y[idy]    = x[0];
1604:       y[idy+1]  = x[1];
1605:       y[idy+2]  = x[2];
1606:       y[idy+3]  = x[3];
1607:       y[idy+4]  = x[4];
1608:       y[idy+5]  = x[5];
1609:       y[idy+6]  = x[6];
1610:       y[idy+7]  = x[7];
1611:       y[idy+8]  = x[8];
1612:       y[idy+9]  = x[9];
1613:       x        += 10;
1614:     }
1615:     break;
1616:   case ADD_VALUES:
1617:   case ADD_ALL_VALUES:
1618:     for (i=0; i<n; i++) {
1619:       idy        = *indicesy++;
1620:       y[idy]    += x[0];
1621:       y[idy+1]  += x[1];
1622:       y[idy+2]  += x[2];
1623:       y[idy+3]  += x[3];
1624:       y[idy+4]  += x[4];
1625:       y[idy+5]  += x[5];
1626:       y[idy+6]  += x[6];
1627:       y[idy+7]  += x[7];
1628:       y[idy+8]  += x[8];
1629:       y[idy+9]  += x[9];
1630:       x         += 10;
1631:     }
1632:     break;
1633: #if !defined(PETSC_USE_COMPLEX)
1634:   case MAX_VALUES:
1635:     for (i=0; i<n; i++) {
1636:       idy       = *indicesy++;
1637:       y[idy]    = PetscMax(y[idy],x[0]);
1638:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1639:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1640:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1641:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1642:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1643:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1644:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1645:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1646:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1647:       x        += 10;
1648:     }
1649: #else
1650:   case MAX_VALUES:
1651: #endif
1652:   case NOT_SET_VALUES:
1653:     break;
1654:   default:
1655:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1656:   }
1657:   return(0);
1658: }

1662: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1663: {
1664:   PetscInt i,idx,idy;

1667:   switch (addv) {
1668:   case INSERT_VALUES:
1669:   case INSERT_ALL_VALUES:
1670:     for (i=0; i<n; i++) {
1671:       idx       = *indicesx++;
1672:       idy       = *indicesy++;
1673:       y[idy]    = x[idx];
1674:       y[idy+1]  = x[idx+1];
1675:       y[idy+2]  = x[idx+2];
1676:       y[idy+3]  = x[idx+3];
1677:       y[idy+4]  = x[idx+4];
1678:       y[idy+5]  = x[idx+5];
1679:       y[idy+6]  = x[idx+6];
1680:       y[idy+7]  = x[idx+7];
1681:       y[idy+8]  = x[idx+8];
1682:       y[idy+9]  = x[idx+9];
1683:     }
1684:     break;
1685:   case ADD_VALUES:
1686:   case ADD_ALL_VALUES:
1687:     for (i=0; i<n; i++) {
1688:       idx        = *indicesx++;
1689:       idy        = *indicesy++;
1690:       y[idy]    += x[idx];
1691:       y[idy+1]  += x[idx+1];
1692:       y[idy+2]  += x[idx+2];
1693:       y[idy+3]  += x[idx+3];
1694:       y[idy+4]  += x[idx+4];
1695:       y[idy+5]  += x[idx+5];
1696:       y[idy+6]  += x[idx+6];
1697:       y[idy+7]  += x[idx+7];
1698:       y[idy+8]  += x[idx+8];
1699:       y[idy+9]  += x[idx+9];
1700:     }
1701:     break;
1702: #if !defined(PETSC_USE_COMPLEX)
1703:   case MAX_VALUES:
1704:     for (i=0; i<n; i++) {
1705:       idx       = *indicesx++;
1706:       idy       = *indicesy++;
1707:       y[idy]    = PetscMax(y[idy],x[idx]);
1708:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1709:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1710:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1711:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1712:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1713:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1714:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1715:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1716:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1717:     }
1718: #else
1719:   case MAX_VALUES:
1720: #endif
1721:   case NOT_SET_VALUES:
1722:     break;
1723:   default:
1724:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1725:   }
1726:   return(0);
1727: }

1729: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1730: {
1731:   PetscInt i,idx;

1733:   for (i=0; i<n; i++) {
1734:     idx   = *indicesx++;
1735:     y[0]  = x[idx];
1736:     y[1]  = x[idx+1];
1737:     y[2]  = x[idx+2];
1738:     y[3]  = x[idx+3];
1739:     y[4]  = x[idx+4];
1740:     y[5]  = x[idx+5];
1741:     y[6]  = x[idx+6];
1742:     y[7]  = x[idx+7];
1743:     y[8]  = x[idx+8];
1744:     y[9]  = x[idx+9];
1745:     y[10] = x[idx+10];
1746:     y    += 11;
1747:   }
1748: }

1752: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1753: {
1754:   PetscInt i,idy;

1757:   switch (addv) {
1758:   case INSERT_VALUES:
1759:   case INSERT_ALL_VALUES:
1760:     for (i=0; i<n; i++) {
1761:       idy       = *indicesy++;
1762:       y[idy]    = x[0];
1763:       y[idy+1]  = x[1];
1764:       y[idy+2]  = x[2];
1765:       y[idy+3]  = x[3];
1766:       y[idy+4]  = x[4];
1767:       y[idy+5]  = x[5];
1768:       y[idy+6]  = x[6];
1769:       y[idy+7]  = x[7];
1770:       y[idy+8]  = x[8];
1771:       y[idy+9]  = x[9];
1772:       y[idy+10] = x[10];
1773:       x        += 11;
1774:     }
1775:     break;
1776:   case ADD_VALUES:
1777:   case ADD_ALL_VALUES:
1778:     for (i=0; i<n; i++) {
1779:       idy        = *indicesy++;
1780:       y[idy]    += x[0];
1781:       y[idy+1]  += x[1];
1782:       y[idy+2]  += x[2];
1783:       y[idy+3]  += x[3];
1784:       y[idy+4]  += x[4];
1785:       y[idy+5]  += x[5];
1786:       y[idy+6]  += x[6];
1787:       y[idy+7]  += x[7];
1788:       y[idy+8]  += x[8];
1789:       y[idy+9]  += x[9];
1790:       y[idy+10] += x[10];
1791:       x         += 11;
1792:     }
1793:     break;
1794: #if !defined(PETSC_USE_COMPLEX)
1795:   case MAX_VALUES:
1796:     for (i=0; i<n; i++) {
1797:       idy       = *indicesy++;
1798:       y[idy]    = PetscMax(y[idy],x[0]);
1799:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1800:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1801:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1802:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1803:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1804:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1805:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1806:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1807:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1808:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1809:       x        += 11;
1810:     }
1811: #else
1812:   case MAX_VALUES:
1813: #endif
1814:   case NOT_SET_VALUES:
1815:     break;
1816:   default:
1817:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1818:   }
1819:   return(0);
1820: }

1824: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1825: {
1826:   PetscInt i,idx,idy;

1829:   switch (addv) {
1830:   case INSERT_VALUES:
1831:   case INSERT_ALL_VALUES:
1832:     for (i=0; i<n; i++) {
1833:       idx       = *indicesx++;
1834:       idy       = *indicesy++;
1835:       y[idy]    = x[idx];
1836:       y[idy+1]  = x[idx+1];
1837:       y[idy+2]  = x[idx+2];
1838:       y[idy+3]  = x[idx+3];
1839:       y[idy+4]  = x[idx+4];
1840:       y[idy+5]  = x[idx+5];
1841:       y[idy+6]  = x[idx+6];
1842:       y[idy+7]  = x[idx+7];
1843:       y[idy+8]  = x[idx+8];
1844:       y[idy+9]  = x[idx+9];
1845:       y[idy+10] = x[idx+10];
1846:     }
1847:     break;
1848:   case ADD_VALUES:
1849:   case ADD_ALL_VALUES:
1850:     for (i=0; i<n; i++) {
1851:       idx        = *indicesx++;
1852:       idy        = *indicesy++;
1853:       y[idy]    += x[idx];
1854:       y[idy+1]  += x[idx+1];
1855:       y[idy+2]  += x[idx+2];
1856:       y[idy+3]  += x[idx+3];
1857:       y[idy+4]  += x[idx+4];
1858:       y[idy+5]  += x[idx+5];
1859:       y[idy+6]  += x[idx+6];
1860:       y[idy+7]  += x[idx+7];
1861:       y[idy+8]  += x[idx+8];
1862:       y[idy+9]  += x[idx+9];
1863:       y[idy+10] += x[idx+10];
1864:     }
1865:     break;
1866: #if !defined(PETSC_USE_COMPLEX)
1867:   case MAX_VALUES:
1868:     for (i=0; i<n; i++) {
1869:       idx       = *indicesx++;
1870:       idy       = *indicesy++;
1871:       y[idy]    = PetscMax(y[idy],x[idx]);
1872:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1873:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1874:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1875:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1876:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1877:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1878:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1879:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1880:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1881:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1882:     }
1883: #else
1884:   case MAX_VALUES:
1885: #endif
1886:   case NOT_SET_VALUES:
1887:     break;
1888:   default:
1889:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1890:   }
1891:   return(0);
1892: }

1894: /* ----------------------------------------------------------------------------------------------- */
1895: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1896: {
1897:   PetscInt i,idx;

1899:   for (i=0; i<n; i++) {
1900:     idx   = *indicesx++;
1901:     y[0]  = x[idx];
1902:     y[1]  = x[idx+1];
1903:     y[2]  = x[idx+2];
1904:     y[3]  = x[idx+3];
1905:     y[4]  = x[idx+4];
1906:     y[5]  = x[idx+5];
1907:     y[6]  = x[idx+6];
1908:     y[7]  = x[idx+7];
1909:     y[8]  = x[idx+8];
1910:     y[9]  = x[idx+9];
1911:     y[10] = x[idx+10];
1912:     y[11] = x[idx+11];
1913:     y    += 12;
1914:   }
1915: }

1919: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1920: {
1921:   PetscInt i,idy;

1924:   switch (addv) {
1925:   case INSERT_VALUES:
1926:   case INSERT_ALL_VALUES:
1927:     for (i=0; i<n; i++) {
1928:       idy       = *indicesy++;
1929:       y[idy]    = x[0];
1930:       y[idy+1]  = x[1];
1931:       y[idy+2]  = x[2];
1932:       y[idy+3]  = x[3];
1933:       y[idy+4]  = x[4];
1934:       y[idy+5]  = x[5];
1935:       y[idy+6]  = x[6];
1936:       y[idy+7]  = x[7];
1937:       y[idy+8]  = x[8];
1938:       y[idy+9]  = x[9];
1939:       y[idy+10] = x[10];
1940:       y[idy+11] = x[11];
1941:       x        += 12;
1942:     }
1943:     break;
1944:   case ADD_VALUES:
1945:   case ADD_ALL_VALUES:
1946:     for (i=0; i<n; i++) {
1947:       idy        = *indicesy++;
1948:       y[idy]    += x[0];
1949:       y[idy+1]  += x[1];
1950:       y[idy+2]  += x[2];
1951:       y[idy+3]  += x[3];
1952:       y[idy+4]  += x[4];
1953:       y[idy+5]  += x[5];
1954:       y[idy+6]  += x[6];
1955:       y[idy+7]  += x[7];
1956:       y[idy+8]  += x[8];
1957:       y[idy+9]  += x[9];
1958:       y[idy+10] += x[10];
1959:       y[idy+11] += x[11];
1960:       x         += 12;
1961:     }
1962:     break;
1963: #if !defined(PETSC_USE_COMPLEX)
1964:   case MAX_VALUES:
1965:     for (i=0; i<n; i++) {
1966:       idy       = *indicesy++;
1967:       y[idy]    = PetscMax(y[idy],x[0]);
1968:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1969:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1970:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1971:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1972:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1973:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1974:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1975:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1976:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1977:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1978:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1979:       x        += 12;
1980:     }
1981: #else
1982:   case MAX_VALUES:
1983: #endif
1984:   case NOT_SET_VALUES:
1985:     break;
1986:   default:
1987:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1988:   }
1989:   return(0);
1990: }

1994: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1995: {
1996:   PetscInt i,idx,idy;

1999:   switch (addv) {
2000:   case INSERT_VALUES:
2001:   case INSERT_ALL_VALUES:
2002:     for (i=0; i<n; i++) {
2003:       idx       = *indicesx++;
2004:       idy       = *indicesy++;
2005:       y[idy]    = x[idx];
2006:       y[idy+1]  = x[idx+1];
2007:       y[idy+2]  = x[idx+2];
2008:       y[idy+3]  = x[idx+3];
2009:       y[idy+4]  = x[idx+4];
2010:       y[idy+5]  = x[idx+5];
2011:       y[idy+6]  = x[idx+6];
2012:       y[idy+7]  = x[idx+7];
2013:       y[idy+8]  = x[idx+8];
2014:       y[idy+9]  = x[idx+9];
2015:       y[idy+10] = x[idx+10];
2016:       y[idy+11] = x[idx+11];
2017:     }
2018:     break;
2019:   case ADD_VALUES:
2020:   case ADD_ALL_VALUES:
2021:     for (i=0; i<n; i++) {
2022:       idx        = *indicesx++;
2023:       idy        = *indicesy++;
2024:       y[idy]    += x[idx];
2025:       y[idy+1]  += x[idx+1];
2026:       y[idy+2]  += x[idx+2];
2027:       y[idy+3]  += x[idx+3];
2028:       y[idy+4]  += x[idx+4];
2029:       y[idy+5]  += x[idx+5];
2030:       y[idy+6]  += x[idx+6];
2031:       y[idy+7]  += x[idx+7];
2032:       y[idy+8]  += x[idx+8];
2033:       y[idy+9]  += x[idx+9];
2034:       y[idy+10] += x[idx+10];
2035:       y[idy+11] += x[idx+11];
2036:     }
2037:     break;
2038: #if !defined(PETSC_USE_COMPLEX)
2039:   case MAX_VALUES:
2040:     for (i=0; i<n; i++) {
2041:       idx       = *indicesx++;
2042:       idy       = *indicesy++;
2043:       y[idy]    = PetscMax(y[idy],x[idx]);
2044:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
2045:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
2046:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
2047:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
2048:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
2049:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
2050:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
2051:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
2052:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
2053:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2054:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2055:     }
2056: #else
2057:   case MAX_VALUES:
2058: #endif
2059:   case NOT_SET_VALUES:
2060:     break;
2061:   default:
2062:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2063:   }
2064:   return(0);
2065: }

2067: /* ----------------------------------------------------------------------------------------------- */
2068: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2069: {
2070:   PetscInt       i,idx;

2072:   for (i=0; i<n; i++) {
2073:     idx   = *indicesx++;
2074:     PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2075:     y    += bs;
2076:   }
2077: }

2081: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2082: {
2083:   PetscInt i,idy,j;

2086:   switch (addv) {
2087:   case INSERT_VALUES:
2088:   case INSERT_ALL_VALUES:
2089:     for (i=0; i<n; i++) {
2090:       idy       = *indicesy++;
2091:       PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2092:       x        += bs;
2093:     }
2094:     break;
2095:   case ADD_VALUES:
2096:   case ADD_ALL_VALUES:
2097:     for (i=0; i<n; i++) {
2098:       idy        = *indicesy++;
2099:       for (j=0; j<bs; j++) y[idy+j] += x[j];
2100:       x         += bs;
2101:     }
2102:     break;
2103: #if !defined(PETSC_USE_COMPLEX)
2104:   case MAX_VALUES:
2105:     for (i=0; i<n; i++) {
2106:       idy = *indicesy++;
2107:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2108:       x  += bs;
2109:     }
2110: #else
2111:   case MAX_VALUES:
2112: #endif
2113:   case NOT_SET_VALUES:
2114:     break;
2115:   default:
2116:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2117:   }
2118:   return(0);
2119: }

2123: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2124: {
2125:   PetscInt i,idx,idy,j;

2128:   switch (addv) {
2129:   case INSERT_VALUES:
2130:   case INSERT_ALL_VALUES:
2131:     for (i=0; i<n; i++) {
2132:       idx       = *indicesx++;
2133:       idy       = *indicesy++;
2134:       PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2135:     }
2136:     break;
2137:   case ADD_VALUES:
2138:   case ADD_ALL_VALUES:
2139:     for (i=0; i<n; i++) {
2140:       idx        = *indicesx++;
2141:       idy        = *indicesy++;
2142:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
2143:     }
2144:     break;
2145: #if !defined(PETSC_USE_COMPLEX)
2146:   case MAX_VALUES:
2147:     for (i=0; i<n; i++) {
2148:       idx       = *indicesx++;
2149:       idy       = *indicesy++;
2150:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2151:     }
2152: #else
2153:   case MAX_VALUES:
2154: #endif
2155:   case NOT_SET_VALUES:
2156:     break;
2157:   default:
2158:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2159:   }
2160:   return(0);
2161: }

2163: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2164: #define BS 1
2165: #include <../src/vec/vec/utils/vpscat.h>
2166: #define BS 2
2167: #include <../src/vec/vec/utils/vpscat.h>
2168: #define BS 3
2169: #include <../src/vec/vec/utils/vpscat.h>
2170: #define BS 4
2171: #include <../src/vec/vec/utils/vpscat.h>
2172: #define BS 5
2173: #include <../src/vec/vec/utils/vpscat.h>
2174: #define BS 6
2175: #include <../src/vec/vec/utils/vpscat.h>
2176: #define BS 7
2177: #include <../src/vec/vec/utils/vpscat.h>
2178: #define BS 8
2179: #include <../src/vec/vec/utils/vpscat.h>
2180: #define BS 9
2181: #include <../src/vec/vec/utils/vpscat.h>
2182: #define BS 10
2183: #include <../src/vec/vec/utils/vpscat.h>
2184: #define BS 11
2185: #include <../src/vec/vec/utils/vpscat.h>
2186: #define BS 12
2187: #include <../src/vec/vec/utils/vpscat.h>
2188: #define BS bs
2189: #include <../src/vec/vec/utils/vpscat.h>

2191: /* ==========================================================================================*/

2193: /*              create parallel to sequential scatter context                           */

2195: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

2199: /*@
2200:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

2202:      Collective on VecScatter

2204:    Input Parameters:
2205: +     VecScatter - obtained with VecScatterCreateEmpty()
2206: .     nsends -
2207: .     sendSizes -
2208: .     sendProcs -
2209: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
2210: .     nrecvs - number of receives to expect
2211: .     recvSizes -
2212: .     recvProcs - processes who are sending to me
2213: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
2214: -     bs - size of block

2216:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
2217:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

2219:        Probably does not handle sends to self properly. It should remove those from the counts that are used
2220:       in allocating space inside of the from struct

2222:   Level: intermediate

2224: @*/
2225: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
2226: {
2227:   VecScatter_MPI_General *from, *to;
2228:   PetscInt               sendSize, recvSize;
2229:   PetscInt               n, i;
2230:   PetscErrorCode         ierr;

2232:   /* allocate entire send scatter context */
2233:   PetscNewLog(ctx,&to);
2234:   to->n = nsends;
2235:   for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];

2237:   PetscMalloc1(to->n,&to->requests);
2238:   PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
2239:   PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);

2241:   to->starts[0] = 0;
2242:   for (n = 0; n < to->n; n++) {
2243:     if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
2244:     to->starts[n+1] = to->starts[n] + sendSizes[n];
2245:     to->procs[n]    = sendProcs[n];
2246:     for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
2247:   }
2248:   ctx->todata = (void*) to;

2250:   /* allocate entire receive scatter context */
2251:   PetscNewLog(ctx,&from);
2252:   from->n = nrecvs;
2253:   for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];

2255:   PetscMalloc1(from->n,&from->requests);
2256:   PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);

2258:   from->starts[0] = 0;
2259:   for (n = 0; n < from->n; n++) {
2260:     if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
2261:     from->starts[n+1] = from->starts[n] + recvSizes[n];
2262:     from->procs[n]    = recvProcs[n];
2263:     for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
2264:   }
2265:   ctx->fromdata = (void*)from;

2267:   /* No local scatter optimization */
2268:   from->local.n                    = 0;
2269:   from->local.vslots               = 0;
2270:   to->local.n                      = 0;
2271:   to->local.vslots                 = 0;
2272:   from->local.nonmatching_computed = PETSC_FALSE;
2273:   from->local.n_nonmatching        = 0;
2274:   from->local.slots_nonmatching    = 0;
2275:   to->local.nonmatching_computed   = PETSC_FALSE;
2276:   to->local.n_nonmatching          = 0;
2277:   to->local.slots_nonmatching      = 0;

2279:   from->type = VEC_SCATTER_MPI_GENERAL;
2280:   to->type   = VEC_SCATTER_MPI_GENERAL;
2281:   from->bs   = bs;
2282:   to->bs     = bs;
2283:   VecScatterCreateCommon_PtoS(from, to, ctx);

2285:   /* mark lengths as negative so it won't check local vector lengths */
2286:   ctx->from_n = ctx->to_n = -1;
2287:   return(0);
2288: }

2290: /*
2291:    bs indicates how many elements there are in each block. Normally this would be 1.

2293:    contains check that PetscMPIInt can handle the sizes needed
2294: */
2297: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2298: {
2299:   VecScatter_MPI_General *from,*to;
2300:   PetscMPIInt            size,rank,imdex,tag,n;
2301:   PetscInt               *source = NULL,*owners = NULL,nxr;
2302:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2303:   PetscMPIInt            *nprocs = NULL,nrecvs;
2304:   PetscInt               i,j,idx,nsends;
2305:   PetscMPIInt            *owner = NULL;
2306:   PetscInt               *starts = NULL,count,slen;
2307:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2308:   PetscMPIInt            *onodes1,*olengths1;
2309:   MPI_Comm               comm;
2310:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2311:   MPI_Status             recv_status,*send_status;
2312:   PetscErrorCode         ierr;

2315:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2316:   PetscObjectGetComm((PetscObject)xin,&comm);
2317:   MPI_Comm_rank(comm,&rank);
2318:   MPI_Comm_size(comm,&size);
2319:   owners = xin->map->range;
2320:   VecGetSize(yin,&lengthy);
2321:   VecGetSize(xin,&lengthx);

2323:   /*  first count number of contributors to each processor */
2324:   PetscMalloc2(size,&nprocs,nx,&owner);
2325:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2327:   j      = 0;
2328:   nsends = 0;
2329:   for (i=0; i<nx; i++) {
2330:     idx = bs*inidx[i];
2331:     if (idx < owners[j]) j = 0;
2332:     for (; j<size; j++) {
2333:       if (idx < owners[j+1]) {
2334:         if (!nprocs[j]++) nsends++;
2335:         owner[i] = j;
2336:         break;
2337:       }
2338:     }
2339:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2340:   }

2342:   nprocslocal  = nprocs[rank];
2343:   nprocs[rank] = 0;
2344:   if (nprocslocal) nsends--;
2345:   /* inform other processors of number of messages and max length*/
2346:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2347:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2348:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2349:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2351:   /* post receives:   */
2352:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2353:   count = 0;
2354:   for (i=0; i<nrecvs; i++) {
2355:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2356:     count += olengths1[i];
2357:   }

2359:   /* do sends:
2360:      1) starts[i] gives the starting index in svalues for stuff going to
2361:      the ith processor
2362:   */
2363:   nxr = 0;
2364:   for (i=0; i<nx; i++) {
2365:     if (owner[i] != rank) nxr++;
2366:   }
2367:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2369:   starts[0]  = 0;
2370:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2371:   for (i=0; i<nx; i++) {
2372:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2373:   }
2374:   starts[0] = 0;
2375:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2376:   count = 0;
2377:   for (i=0; i<size; i++) {
2378:     if (nprocs[i]) {
2379:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2380:     }
2381:   }

2383:   /*  wait on receives */
2384:   count = nrecvs;
2385:   slen  = 0;
2386:   while (count) {
2387:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2388:     /* unpack receives into our local space */
2389:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2390:     slen += n;
2391:     count--;
2392:   }

2394:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2396:   /* allocate entire send scatter context */
2397:   PetscNewLog(ctx,&to);
2398:   to->n = nrecvs;

2400:   PetscMalloc1(nrecvs,&to->requests);
2401:   PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2402:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2404:   ctx->todata   = (void*)to;
2405:   to->starts[0] = 0;

2407:   if (nrecvs) {
2408:     /* move the data into the send scatter */
2409:     base     = owners[rank];
2410:     rsvalues = rvalues;
2411:     for (i=0; i<nrecvs; i++) {
2412:       to->starts[i+1] = to->starts[i] + olengths1[i];
2413:       to->procs[i]    = onodes1[i];
2414:       values = rsvalues;
2415:       rsvalues += olengths1[i];
2416:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2417:     }
2418:   }
2419:   PetscFree(olengths1);
2420:   PetscFree(onodes1);
2421:   PetscFree3(rvalues,source,recv_waits);

2423:   /* allocate entire receive scatter context */
2424:   PetscNewLog(ctx,&from);
2425:   from->n = nsends;

2427:   PetscMalloc1(nsends,&from->requests);
2428:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2429:   ctx->fromdata = (void*)from;

2431:   /* move data into receive scatter */
2432:   PetscMalloc2(size,&lowner,nsends+1,&start);
2433:   count = 0; from->starts[0] = start[0] = 0;
2434:   for (i=0; i<size; i++) {
2435:     if (nprocs[i]) {
2436:       lowner[i]            = count;
2437:       from->procs[count++] = i;
2438:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2439:     }
2440:   }

2442:   for (i=0; i<nx; i++) {
2443:     if (owner[i] != rank) {
2444:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2445:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2446:     }
2447:   }
2448:   PetscFree2(lowner,start);
2449:   PetscFree2(nprocs,owner);

2451:   /* wait on sends */
2452:   if (nsends) {
2453:     PetscMalloc1(nsends,&send_status);
2454:     MPI_Waitall(nsends,send_waits,send_status);
2455:     PetscFree(send_status);
2456:   }
2457:   PetscFree3(svalues,send_waits,starts);

2459:   if (nprocslocal) {
2460:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2461:     /* we have a scatter to ourselves */
2462:     PetscMalloc1(nt,&to->local.vslots);
2463:     PetscMalloc1(nt,&from->local.vslots);
2464:     nt   = 0;
2465:     for (i=0; i<nx; i++) {
2466:       idx = bs*inidx[i];
2467:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2468:         to->local.vslots[nt]     = idx - owners[rank];
2469:         from->local.vslots[nt++] = bs*inidy[i];
2470:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2471:       }
2472:     }
2473:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2474:   } else {
2475:     from->local.n      = 0;
2476:     from->local.vslots = 0;
2477:     to->local.n        = 0;
2478:     to->local.vslots   = 0;
2479:   }

2481:   from->local.nonmatching_computed = PETSC_FALSE;
2482:   from->local.n_nonmatching        = 0;
2483:   from->local.slots_nonmatching    = 0;
2484:   to->local.nonmatching_computed   = PETSC_FALSE;
2485:   to->local.n_nonmatching          = 0;
2486:   to->local.slots_nonmatching      = 0;

2488:   from->type = VEC_SCATTER_MPI_GENERAL;
2489:   to->type   = VEC_SCATTER_MPI_GENERAL;
2490:   from->bs   = bs;
2491:   to->bs     = bs;

2493:   VecScatterCreateCommon_PtoS(from,to,ctx);
2494:   return(0);
2495: }

2497: /*
2498:    bs indicates how many elements there are in each block. Normally this would be 1.
2499: */
2502: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2503: {
2504:   MPI_Comm       comm;
2505:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2506:   PetscInt       bs   = to->bs;
2507:   PetscMPIInt    size;
2508:   PetscInt       i, n;

2512:   PetscObjectGetComm((PetscObject)ctx,&comm);
2513:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2514:   ctx->destroy = VecScatterDestroy_PtoP;

2516:   ctx->reproduce = PETSC_FALSE;
2517:   to->sendfirst  = PETSC_FALSE;
2518:   PetscOptionsGetBool(NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2519:   PetscOptionsGetBool(NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2520:   from->sendfirst = to->sendfirst;

2522:   MPI_Comm_size(comm,&size);
2523:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2524:   to->contiq = PETSC_FALSE;
2525:   n = from->starts[from->n];
2526:   from->contiq = PETSC_TRUE;
2527:   for (i=1; i<n; i++) {
2528:     if (from->indices[i] != from->indices[i-1] + bs) {
2529:       from->contiq = PETSC_FALSE;
2530:       break;
2531:     }
2532:   }

2534:   to->use_alltoallv = PETSC_FALSE;
2535:   PetscOptionsGetBool(NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2536:   from->use_alltoallv = to->use_alltoallv;
2537:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2538: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
2539:   if (to->use_alltoallv) {
2540:     to->use_alltoallw = PETSC_FALSE;
2541:     PetscOptionsGetBool(NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2542:   }
2543:   from->use_alltoallw = to->use_alltoallw;
2544:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2545: #endif

2547: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2548:   to->use_window = PETSC_FALSE;
2549:   PetscOptionsGetBool(NULL,"-vecscatter_window",&to->use_window,NULL);
2550:   from->use_window = to->use_window;
2551: #endif

2553:   if (to->use_alltoallv) {

2555:     PetscMalloc2(size,&to->counts,size,&to->displs);
2556:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2557:     for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);

2559:     to->displs[0] = 0;
2560:     for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];

2562:     PetscMalloc2(size,&from->counts,size,&from->displs);
2563:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2564:     for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2565:     from->displs[0] = 0;
2566:     for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];

2568: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2569:     if (to->use_alltoallw) {
2570:       PetscMPIInt mpibs, mpilen;

2572:       ctx->packtogether = PETSC_FALSE;
2573:       PetscMPIIntCast(bs,&mpibs);
2574:       PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2575:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2576:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2577:       for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;

2579:       for (i=0; i<to->n; i++) {
2580:         to->wcounts[to->procs[i]] = 1;
2581:         PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2582:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2583:         MPI_Type_commit(to->types+to->procs[i]);
2584:       }
2585:       PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2586:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2587:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2588:       for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;

2590:       if (from->contiq) {
2591:         PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2592:         for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);

2594:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2595:         for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];

2597:       } else {
2598:         for (i=0; i<from->n; i++) {
2599:           from->wcounts[from->procs[i]] = 1;
2600:           PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2601:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2602:           MPI_Type_commit(from->types+from->procs[i]);
2603:         }
2604:       }
2605:     } else ctx->copy = VecScatterCopy_PtoP_AllToAll;

2607: #else
2608:     to->use_alltoallw   = PETSC_FALSE;
2609:     from->use_alltoallw = PETSC_FALSE;
2610:     ctx->copy           = VecScatterCopy_PtoP_AllToAll;
2611: #endif
2612: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2613:   } else if (to->use_window) {
2614:     PetscMPIInt temptag,winsize;
2615:     MPI_Request *request;
2616:     MPI_Status  *status;

2618:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2619:     winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2620:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2621:     PetscMalloc1(to->n,&to->winstarts);
2622:     PetscMalloc2(to->n,&request,to->n,&status);
2623:     for (i=0; i<to->n; i++) {
2624:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2625:     }
2626:     for (i=0; i<from->n; i++) {
2627:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2628:     }
2629:     MPI_Waitall(to->n,request,status);
2630:     PetscFree2(request,status);

2632:     winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2633:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2634:     PetscMalloc1(from->n,&from->winstarts);
2635:     PetscMalloc2(from->n,&request,from->n,&status);
2636:     for (i=0; i<from->n; i++) {
2637:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2638:     }
2639:     for (i=0; i<to->n; i++) {
2640:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2641:     }
2642:     MPI_Waitall(from->n,request,status);
2643:     PetscFree2(request,status);
2644: #endif
2645:   } else {
2646:     PetscBool   use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2647:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2648:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2649:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2650:     MPI_Request *rev_swaits,*rev_rwaits;
2651:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2653:     /* allocate additional wait variables for the "reverse" scatter */
2654:     PetscMalloc1(to->n,&rev_rwaits);
2655:     PetscMalloc1(from->n,&rev_swaits);
2656:     to->rev_requests   = rev_rwaits;
2657:     from->rev_requests = rev_swaits;

2659:     /* Register the receives that you will use later (sends for scatter reverse) */
2660:     PetscOptionsGetBool(NULL,"-vecscatter_rsend",&use_rsend,NULL);
2661:     PetscOptionsGetBool(NULL,"-vecscatter_ssend",&use_ssend,NULL);
2662:     if (use_rsend) {
2663:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2664:       to->use_readyreceiver   = PETSC_TRUE;
2665:       from->use_readyreceiver = PETSC_TRUE;
2666:     } else {
2667:       to->use_readyreceiver   = PETSC_FALSE;
2668:       from->use_readyreceiver = PETSC_FALSE;
2669:     }
2670:     if (use_ssend) {
2671:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2672:     }

2674:     for (i=0; i<from->n; i++) {
2675:       if (use_rsend) {
2676:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2677:       } else if (use_ssend) {
2678:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2679:       } else {
2680:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2681:       }
2682:     }

2684:     for (i=0; i<to->n; i++) {
2685:       if (use_rsend) {
2686:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2687:       } else if (use_ssend) {
2688:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2689:       } else {
2690:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2691:       }
2692:     }
2693:     /* Register receives for scatter and reverse */
2694:     for (i=0; i<from->n; i++) {
2695:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2696:     }
2697:     for (i=0; i<to->n; i++) {
2698:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2699:     }
2700:     if (use_rsend) {
2701:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2702:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2703:       MPI_Barrier(comm);
2704:     }

2706:     ctx->copy = VecScatterCopy_PtoP_X;
2707:   }
2708:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2710: #if defined(PETSC_USE_DEBUG)
2711:   MPI_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2712:   MPI_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2713:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2714: #endif

2716:   switch (bs) {
2717:   case 12:
2718:     ctx->begin = VecScatterBegin_12;
2719:     ctx->end   = VecScatterEnd_12;
2720:     break;
2721:   case 11:
2722:     ctx->begin = VecScatterBegin_11;
2723:     ctx->end   = VecScatterEnd_11;
2724:     break;
2725:   case 10:
2726:     ctx->begin = VecScatterBegin_10;
2727:     ctx->end   = VecScatterEnd_10;
2728:     break;
2729:   case 9:
2730:     ctx->begin = VecScatterBegin_9;
2731:     ctx->end   = VecScatterEnd_9;
2732:     break;
2733:   case 8:
2734:     ctx->begin = VecScatterBegin_8;
2735:     ctx->end   = VecScatterEnd_8;
2736:     break;
2737:   case 7:
2738:     ctx->begin = VecScatterBegin_7;
2739:     ctx->end   = VecScatterEnd_7;
2740:     break;
2741:   case 6:
2742:     ctx->begin = VecScatterBegin_6;
2743:     ctx->end   = VecScatterEnd_6;
2744:     break;
2745:   case 5:
2746:     ctx->begin = VecScatterBegin_5;
2747:     ctx->end   = VecScatterEnd_5;
2748:     break;
2749:   case 4:
2750:     ctx->begin = VecScatterBegin_4;
2751:     ctx->end   = VecScatterEnd_4;
2752:     break;
2753:   case 3:
2754:     ctx->begin = VecScatterBegin_3;
2755:     ctx->end   = VecScatterEnd_3;
2756:     break;
2757:   case 2:
2758:     ctx->begin = VecScatterBegin_2;
2759:     ctx->end   = VecScatterEnd_2;
2760:     break;
2761:   case 1:
2762:     ctx->begin = VecScatterBegin_1;
2763:     ctx->end   = VecScatterEnd_1;
2764:     break;
2765:   default:
2766:     ctx->begin = VecScatterBegin_bs;
2767:     ctx->end   = VecScatterEnd_bs;

2769:   }
2770:   ctx->view = VecScatterView_MPI;
2771:   /* Check if the local scatter is actually a copy; important special case */
2772:   if (to->local.n) {
2773:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2774:   }
2775:   return(0);
2776: }



2780: /* ------------------------------------------------------------------------------------*/
2781: /*
2782:          Scatter from local Seq vectors to a parallel vector.
2783:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2784:          reverses the result.
2785: */
2788: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2789: {
2790:   PetscErrorCode         ierr;
2791:   MPI_Request            *waits;
2792:   VecScatter_MPI_General *to,*from;

2795:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2796:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2797:   from          = (VecScatter_MPI_General*)ctx->todata;
2798:   ctx->todata   = (void*)to;
2799:   ctx->fromdata = (void*)from;
2800:   /* these two are special, they are ALWAYS stored in to struct */
2801:   to->sstatus   = from->sstatus;
2802:   to->rstatus   = from->rstatus;

2804:   from->sstatus = 0;
2805:   from->rstatus = 0;

2807:   waits              = from->rev_requests;
2808:   from->rev_requests = from->requests;
2809:   from->requests     = waits;
2810:   waits              = to->rev_requests;
2811:   to->rev_requests   = to->requests;
2812:   to->requests       = waits;
2813:   return(0);
2814: }

2816: /* ---------------------------------------------------------------------------------*/
2819: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2820: {
2822:   PetscMPIInt    size,rank,tag,imdex,n;
2823:   PetscInt       *owners = xin->map->range;
2824:   PetscMPIInt    *nprocs = NULL;
2825:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2826:   PetscMPIInt    *owner   = NULL;
2827:   PetscInt       *starts  = NULL,count,slen;
2828:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2829:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2830:   MPI_Comm       comm;
2831:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2832:   MPI_Status     recv_status,*send_status = NULL;
2833:   PetscBool      duplicate = PETSC_FALSE;
2834: #if defined(PETSC_USE_DEBUG)
2835:   PetscBool      found = PETSC_FALSE;
2836: #endif

2839:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2840:   PetscObjectGetComm((PetscObject)xin,&comm);
2841:   MPI_Comm_size(comm,&size);
2842:   MPI_Comm_rank(comm,&rank);
2843:   if (size == 1) {
2844:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2845:     return(0);
2846:   }

2848:   /*
2849:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2850:      They then call the StoPScatterCreate()
2851:   */
2852:   /*  first count number of contributors to each processor */
2853:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2854:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2856:   lastidx = -1;
2857:   j       = 0;
2858:   for (i=0; i<nx; i++) {
2859:     /* if indices are NOT locally sorted, need to start search at the beginning */
2860:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2861:     lastidx = idx;
2862:     for (; j<size; j++) {
2863:       if (idx >= owners[j] && idx < owners[j+1]) {
2864:         nprocs[j]++;
2865:         owner[i] = j;
2866: #if defined(PETSC_USE_DEBUG)
2867:         found = PETSC_TRUE;
2868: #endif
2869:         break;
2870:       }
2871:     }
2872: #if defined(PETSC_USE_DEBUG)
2873:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2874:     found = PETSC_FALSE;
2875: #endif
2876:   }
2877:   nsends = 0;
2878:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2880:   /* inform other processors of number of messages and max length*/
2881:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2882:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2883:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2884:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2886:   /* post receives:   */
2887:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

2889:   count = 0;
2890:   for (i=0; i<nrecvs; i++) {
2891:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2892:     count += olengths1[i];
2893:   }
2894:   PetscFree(onodes1);

2896:   /* do sends:
2897:       1) starts[i] gives the starting index in svalues for stuff going to
2898:          the ith processor
2899:   */
2900:   starts[0]= 0;
2901:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2902:   for (i=0; i<nx; i++) {
2903:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2904:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2905:   }

2907:   starts[0] = 0;
2908:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2909:   count = 0;
2910:   for (i=0; i<size; i++) {
2911:     if (nprocs[i]) {
2912:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2913:       count++;
2914:     }
2915:   }
2916:   PetscFree3(nprocs,owner,starts);

2918:   /*  wait on receives */
2919:   count = nrecvs;
2920:   slen  = 0;
2921:   while (count) {
2922:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2923:     /* unpack receives into our local space */
2924:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2925:     slen += n/2;
2926:     count--;
2927:   }
2928:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2930:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2931:   base     = owners[rank];
2932:   count    = 0;
2933:   rsvalues = rvalues;
2934:   for (i=0; i<nrecvs; i++) {
2935:     values    = rsvalues;
2936:     rsvalues += 2*olengths1[i];
2937:     for (j=0; j<olengths1[i]; j++) {
2938:       local_inidx[count]   = values[2*j] - base;
2939:       local_inidy[count++] = values[2*j+1];
2940:     }
2941:   }
2942:   PetscFree(olengths1);

2944:   /* wait on sends */
2945:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2946:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2948:   /*
2949:      should sort and remove duplicates from local_inidx,local_inidy
2950:   */

2952: #if defined(do_it_slow)
2953:   /* sort on the from index */
2954:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2955:   start = 0;
2956:   while (start < slen) {
2957:     count = start+1;
2958:     last  = local_inidx[start];
2959:     while (count < slen && last == local_inidx[count]) count++;
2960:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2961:       /* sort on to index */
2962:       PetscSortInt(count-start,local_inidy+start);
2963:     }
2964:     /* remove duplicates; not most efficient way, but probably good enough */
2965:     i = start;
2966:     while (i < count-1) {
2967:       if (local_inidy[i] != local_inidy[i+1]) i++;
2968:       else { /* found a duplicate */
2969:         duplicate = PETSC_TRUE;
2970:         for (j=i; j<slen-1; j++) {
2971:           local_inidx[j] = local_inidx[j+1];
2972:           local_inidy[j] = local_inidy[j+1];
2973:         }
2974:         slen--;
2975:         count--;
2976:       }
2977:     }
2978:     start = count;
2979:   }
2980: #endif
2981:   if (duplicate) {
2982:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2983:   }
2984:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2985:   PetscFree2(local_inidx,local_inidy);
2986:   return(0);
2987: }