Actual source code: vpscat.c

petsc-3.7.0 2016-04-25
Report Typos and Errors
  2: /*
  3:     Defines parallel vector scatters.
  4: */

  6: #include <../src/vec/vec/impls/dvecimpl.h>         /*I "petscvec.h" I*/
  7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  8: #include <petscsf.h>

 12: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 13: {
 14:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 15:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 16:   PetscErrorCode         ierr;
 17:   PetscInt               i;
 18:   PetscMPIInt            rank;
 19:   PetscViewerFormat      format;
 20:   PetscBool              iascii;

 23:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 24:   if (iascii) {
 25:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 26:     PetscViewerGetFormat(viewer,&format);
 27:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 28:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 30:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 31:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 32:       itmp = to->starts[to->n+1];
 33:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 34:       itmp = from->starts[from->n+1];
 35:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 36:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 38:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 43:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 45:     } else {
 46:       PetscViewerASCIIPushSynchronized(viewer);
 47:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 48:       if (to->n) {
 49:         for (i=0; i<to->n; i++) {
 50:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 51:         }
 52:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 53:         for (i=0; i<to->starts[to->n]; i++) {
 54:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 55:         }
 56:       }

 58:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 59:       if (from->n) {
 60:         for (i=0; i<from->n; i++) {
 61:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 62:         }

 64:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 65:         for (i=0; i<from->starts[from->n]; i++) {
 66:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 67:         }
 68:       }
 69:       if (to->local.n) {
 70:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 71:         for (i=0; i<to->local.n; i++) {
 72:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 73:         }
 74:       }

 76:       PetscViewerFlush(viewer);
 77:       PetscViewerASCIIPopSynchronized(viewer);
 78:     }
 79:   }
 80:   return(0);
 81: }

 83: /* -----------------------------------------------------------------------------------*/
 84: /*
 85:       The next routine determines what part of  the local part of the scatter is an
 86:   exact copy of values into their current location. We check this here and
 87:   then know that we need not perform that portion of the scatter when the vector is
 88:   scattering to itself with INSERT_VALUES.

 90:      This is currently not used but would speed up, for example DMLocalToLocalBegin/End()

 92: */
 95: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 96: {
 97:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
 99:   PetscInt       *nto_slots,*nfrom_slots,j = 0;

102:   for (i=0; i<n; i++) {
103:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
104:   }

106:   if (!n_nonmatching) {
107:     to->nonmatching_computed = PETSC_TRUE;
108:     to->n_nonmatching        = from->n_nonmatching = 0;

110:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
111:   } else if (n_nonmatching == n) {
112:     to->nonmatching_computed = PETSC_FALSE;

114:     PetscInfo(scatter,"All values non-matching\n");
115:   } else {
116:     to->nonmatching_computed= PETSC_TRUE;
117:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;

119:     PetscMalloc1(n_nonmatching,&nto_slots);
120:     PetscMalloc1(n_nonmatching,&nfrom_slots);

122:     to->slots_nonmatching   = nto_slots;
123:     from->slots_nonmatching = nfrom_slots;
124:     for (i=0; i<n; i++) {
125:       if (to_slots[i] != from_slots[i]) {
126:         nto_slots[j]   = to_slots[i];
127:         nfrom_slots[j] = from_slots[i];
128:         j++;
129:       }
130:     }
131:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
132:   }
133:   return(0);
134: }

136: /* --------------------------------------------------------------------------------------*/

138: /* -------------------------------------------------------------------------------------*/
141: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
142: {
143:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
144:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
145:   PetscErrorCode         ierr;
146:   PetscInt               i;

149:   if (to->use_readyreceiver) {
150:     /*
151:        Since we have already posted sends we must cancel them before freeing
152:        the requests
153:     */
154:     for (i=0; i<from->n; i++) {
155:       MPI_Cancel(from->requests+i);
156:     }
157:     for (i=0; i<to->n; i++) {
158:       MPI_Cancel(to->rev_requests+i);
159:     }
160:     MPI_Waitall(from->n,from->requests,to->rstatus);
161:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
162:   }

164: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
165:   if (to->use_alltoallw) {
166:     for (i=0; i<to->n; i++) {
167:       MPI_Type_free(to->types+to->procs[i]);
168:     }
169:     PetscFree3(to->wcounts,to->wdispls,to->types);
170:     if (!from->contiq) {
171:       for (i=0; i<from->n; i++) {
172:         MPI_Type_free(from->types+from->procs[i]);
173:       }
174:     }
175:     PetscFree3(from->wcounts,from->wdispls,from->types);
176:   }
177: #endif

179: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
180:   if (to->use_window) {
181:     MPI_Win_free(&from->window);
182:     MPI_Win_free(&to->window);
183:     PetscFree(from->winstarts);
184:     PetscFree(to->winstarts);
185:   }
186: #endif

188:   if (to->use_alltoallv) {
189:     PetscFree2(to->counts,to->displs);
190:     PetscFree2(from->counts,from->displs);
191:   }

193:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
194:   /*
195:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
196:      message passing.
197:   */
198: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
199:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
200:     if (to->requests) {
201:       for (i=0; i<to->n; i++) {
202:         MPI_Request_free(to->requests + i);
203:       }
204:     }
205:     if (to->rev_requests) {
206:       for (i=0; i<to->n; i++) {
207:         MPI_Request_free(to->rev_requests + i);
208:       }
209:     }
210:   }
211:   /*
212:       MPICH could not properly cancel requests thus with ready receiver mode we
213:     cannot free the requests. It may be fixed now, if not then put the following
214:     code inside a if (!to->use_readyreceiver) {
215:   */
216:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
217:     if (from->requests) {
218:       for (i=0; i<from->n; i++) {
219:         MPI_Request_free(from->requests + i);
220:       }
221:     }

223:     if (from->rev_requests) {
224:       for (i=0; i<from->n; i++) {
225:         MPI_Request_free(from->rev_requests + i);
226:       }
227:     }
228:   }
229: #endif

231:   PetscFree(to->local.vslots);
232:   PetscFree(from->local.vslots);
233:   PetscFree2(to->counts,to->displs);
234:   PetscFree2(from->counts,from->displs);
235:   PetscFree(to->local.slots_nonmatching);
236:   PetscFree(from->local.slots_nonmatching);
237:   PetscFree(to->rev_requests);
238:   PetscFree(from->rev_requests);
239:   PetscFree(to->requests);
240:   PetscFree(from->requests);
241:   PetscFree4(to->values,to->indices,to->starts,to->procs);
242:   PetscFree2(to->sstatus,to->rstatus);
243:   PetscFree4(from->values,from->indices,from->starts,from->procs);
244:   PetscFree(from);
245:   PetscFree(to);
246:   return(0);
247: }



251: /* --------------------------------------------------------------------------------------*/
252: /*
253:     Special optimization to see if the local part of the scatter is actually
254:     a copy. The scatter routines call PetscMemcpy() instead.

256: */
259: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
260: {
261:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
262:   PetscInt       to_start,from_start;

266:   to_start   = to_slots[0];
267:   from_start = from_slots[0];

269:   for (i=1; i<n; i++) {
270:     to_start   += bs;
271:     from_start += bs;
272:     if (to_slots[i]   != to_start)   return(0);
273:     if (from_slots[i] != from_start) return(0);
274:   }
275:   to->is_copy       = PETSC_TRUE;
276:   to->copy_start    = to_slots[0];
277:   to->copy_length   = bs*sizeof(PetscScalar)*n;
278:   from->is_copy     = PETSC_TRUE;
279:   from->copy_start  = from_slots[0];
280:   from->copy_length = bs*sizeof(PetscScalar)*n;
281:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
282:   return(0);
283: }

285: /* --------------------------------------------------------------------------------------*/

289: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
290: {
291:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
292:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
293:   PetscErrorCode         ierr;
294:   PetscInt               ny,bs = in_from->bs;

297:   out->ops->begin   = in->ops->begin;
298:   out->ops->end     = in->ops->end;
299:   out->ops->copy    = in->ops->copy;
300:   out->ops->destroy = in->ops->destroy;
301:   out->ops->view    = in->ops->view;

303:   /* allocate entire send scatter context */
304:   PetscNewLog(out,&out_to);
305:   PetscNewLog(out,&out_from);

307:   ny                = in_to->starts[in_to->n];
308:   out_to->n         = in_to->n;
309:   out_to->type      = in_to->type;
310:   out_to->sendfirst = in_to->sendfirst;

312:   PetscMalloc1(out_to->n,&out_to->requests);
313:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
314:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
315:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
316:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
317:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

319:   out->todata                        = (void*)out_to;
320:   out_to->local.n                    = in_to->local.n;
321:   out_to->local.nonmatching_computed = PETSC_FALSE;
322:   out_to->local.n_nonmatching        = 0;
323:   out_to->local.slots_nonmatching    = 0;
324:   if (in_to->local.n) {
325:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
326:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
327:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
328:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
329:   } else {
330:     out_to->local.vslots   = 0;
331:     out_from->local.vslots = 0;
332:   }

334:   /* allocate entire receive context */
335:   out_from->type      = in_from->type;
336:   ny                  = in_from->starts[in_from->n];
337:   out_from->n         = in_from->n;
338:   out_from->sendfirst = in_from->sendfirst;

340:   PetscMalloc1(out_from->n,&out_from->requests);
341:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
342:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
343:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
344:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

346:   out->fromdata                        = (void*)out_from;
347:   out_from->local.n                    = in_from->local.n;
348:   out_from->local.nonmatching_computed = PETSC_FALSE;
349:   out_from->local.n_nonmatching        = 0;
350:   out_from->local.slots_nonmatching    = 0;

352:   /*
353:       set up the request arrays for use with isend_init() and irecv_init()
354:   */
355:   {
356:     PetscMPIInt tag;
357:     MPI_Comm    comm;
358:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
359:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
360:     PetscInt    i;
361:     PetscBool   flg;
362:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
363:     MPI_Request *rev_swaits,*rev_rwaits;
364:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

366:     PetscMalloc1(in_to->n,&out_to->rev_requests);
367:     PetscMalloc1(in_from->n,&out_from->rev_requests);

369:     rev_rwaits = out_to->rev_requests;
370:     rev_swaits = out_from->rev_requests;

372:     out_from->bs = out_to->bs = bs;
373:     tag          = ((PetscObject)out)->tag;
374:     PetscObjectGetComm((PetscObject)out,&comm);

376:     /* Register the receives that you will use later (sends for scatter reverse) */
377:     for (i=0; i<out_from->n; i++) {
378:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
379:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
380:     }

382:     flg  = PETSC_FALSE;
383:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
384:     if (flg) {
385:       out_to->use_readyreceiver   = PETSC_TRUE;
386:       out_from->use_readyreceiver = PETSC_TRUE;
387:       for (i=0; i<out_to->n; i++) {
388:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
389:       }
390:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
391:       MPI_Barrier(comm);
392:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
393:     } else {
394:       out_to->use_readyreceiver   = PETSC_FALSE;
395:       out_from->use_readyreceiver = PETSC_FALSE;
396:       flg                         = PETSC_FALSE;
397:       PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
398:       if (flg) {
399:         PetscInfo(in,"Using VecScatter Ssend mode\n");
400:       }
401:       for (i=0; i<out_to->n; i++) {
402:         if (!flg) {
403:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
404:         } else {
405:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
406:         }
407:       }
408:     }
409:     /* Register receives for scatter reverse */
410:     for (i=0; i<out_to->n; i++) {
411:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
412:     }
413:   }
414:   return(0);
415: }

419: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
420: {
421:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
422:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
423:   PetscErrorCode         ierr;
424:   PetscInt               ny,bs = in_from->bs;
425:   PetscMPIInt            size;

428:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

430:   out->ops->begin     = in->ops->begin;
431:   out->ops->end       = in->ops->end;
432:   out->ops->copy      = in->ops->copy;
433:   out->ops->destroy   = in->ops->destroy;
434:   out->ops->view      = in->ops->view;

436:   /* allocate entire send scatter context */
437:   PetscNewLog(out,&out_to);
438:   PetscNewLog(out,&out_from);

440:   ny                = in_to->starts[in_to->n];
441:   out_to->n         = in_to->n;
442:   out_to->type      = in_to->type;
443:   out_to->sendfirst = in_to->sendfirst;

445:   PetscMalloc1(out_to->n,&out_to->requests);
446:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
447:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
448:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
449:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
450:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

452:   out->todata                        = (void*)out_to;
453:   out_to->local.n                    = in_to->local.n;
454:   out_to->local.nonmatching_computed = PETSC_FALSE;
455:   out_to->local.n_nonmatching        = 0;
456:   out_to->local.slots_nonmatching    = 0;
457:   if (in_to->local.n) {
458:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
459:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
460:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
461:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
462:   } else {
463:     out_to->local.vslots   = 0;
464:     out_from->local.vslots = 0;
465:   }

467:   /* allocate entire receive context */
468:   out_from->type      = in_from->type;
469:   ny                  = in_from->starts[in_from->n];
470:   out_from->n         = in_from->n;
471:   out_from->sendfirst = in_from->sendfirst;

473:   PetscMalloc1(out_from->n,&out_from->requests);
474:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
475:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
476:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
477:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

479:   out->fromdata                        = (void*)out_from;
480:   out_from->local.n                    = in_from->local.n;
481:   out_from->local.nonmatching_computed = PETSC_FALSE;
482:   out_from->local.n_nonmatching        = 0;
483:   out_from->local.slots_nonmatching    = 0;

485:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

487:   PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
488:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
489:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

491:   PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
492:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
493:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
494:   return(0);
495: }
496: /* --------------------------------------------------------------------------------------------------
497:     Packs and unpacks the message data into send or from receive buffers.

499:     These could be generated automatically.

501:     Fortran kernels etc. could be used.
502: */
503: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
504: {
505:   PetscInt i;
506:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
507: }

511: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
512: {
513:   PetscInt i;

516:   switch (addv) {
517:   case INSERT_VALUES:
518:   case INSERT_ALL_VALUES:
519:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
520:     break;
521:   case ADD_VALUES:
522:   case ADD_ALL_VALUES:
523:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
524:     break;
525: #if !defined(PETSC_USE_COMPLEX)
526:   case MAX_VALUES:
527:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
528: #else
529:   case MAX_VALUES:
530: #endif
531:   case NOT_SET_VALUES:
532:     break;
533:   default:
534:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
535:   }
536:   return(0);
537: }

541: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
542: {
543:   PetscInt i;

546:   switch (addv) {
547:   case INSERT_VALUES:
548:   case INSERT_ALL_VALUES:
549:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
550:     break;
551:   case ADD_VALUES:
552:   case ADD_ALL_VALUES:
553:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
554:     break;
555: #if !defined(PETSC_USE_COMPLEX)
556:   case MAX_VALUES:
557:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
558: #else
559:   case MAX_VALUES:
560: #endif
561:   case NOT_SET_VALUES:
562:     break;
563:   default:
564:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
565:   }
566:   return(0);
567: }

569: /* ----------------------------------------------------------------------------------------------- */
570: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
571: {
572:   PetscInt i,idx;

574:   for (i=0; i<n; i++) {
575:     idx  = *indicesx++;
576:     y[0] = x[idx];
577:     y[1] = x[idx+1];
578:     y   += 2;
579:   }
580: }

584: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
585: {
586:   PetscInt i,idy;

589:   switch (addv) {
590:   case INSERT_VALUES:
591:   case INSERT_ALL_VALUES:
592:     for (i=0; i<n; i++) {
593:       idy      = *indicesy++;
594:       y[idy]   = x[0];
595:       y[idy+1] = x[1];
596:       x       += 2;
597:     }
598:     break;
599:   case ADD_VALUES:
600:   case ADD_ALL_VALUES:
601:     for (i=0; i<n; i++) {
602:       idy       = *indicesy++;
603:       y[idy]   += x[0];
604:       y[idy+1] += x[1];
605:       x        += 2;
606:     }
607:     break;
608: #if !defined(PETSC_USE_COMPLEX)
609:   case MAX_VALUES:
610:     for (i=0; i<n; i++) {
611:       idy      = *indicesy++;
612:       y[idy]   = PetscMax(y[idy],x[0]);
613:       y[idy+1] = PetscMax(y[idy+1],x[1]);
614:       x       += 2;
615:     }
616: #else
617:   case MAX_VALUES:
618: #endif
619:   case NOT_SET_VALUES:
620:     break;
621:   default:
622:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
623:   }
624:   return(0);
625: }

629: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
630: {
631:   PetscInt i,idx,idy;

634:   switch (addv) {
635:   case INSERT_VALUES:
636:   case INSERT_ALL_VALUES:
637:     for (i=0; i<n; i++) {
638:       idx      = *indicesx++;
639:       idy      = *indicesy++;
640:       y[idy]   = x[idx];
641:       y[idy+1] = x[idx+1];
642:     }
643:     break;
644:   case ADD_VALUES:
645:   case ADD_ALL_VALUES:
646:     for (i=0; i<n; i++) {
647:       idx       = *indicesx++;
648:       idy       = *indicesy++;
649:       y[idy]   += x[idx];
650:       y[idy+1] += x[idx+1];
651:     }
652:     break;
653: #if !defined(PETSC_USE_COMPLEX)
654:   case MAX_VALUES:
655:     for (i=0; i<n; i++) {
656:       idx      = *indicesx++;
657:       idy      = *indicesy++;
658:       y[idy]   = PetscMax(y[idy],x[idx]);
659:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
660:     }
661: #else
662:   case MAX_VALUES:
663: #endif
664:   case NOT_SET_VALUES:
665:     break;
666:   default:
667:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
668:   }
669:   return(0);
670: }
671: /* ----------------------------------------------------------------------------------------------- */
672: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
673: {
674:   PetscInt i,idx;

676:   for (i=0; i<n; i++) {
677:     idx  = *indicesx++;
678:     y[0] = x[idx];
679:     y[1] = x[idx+1];
680:     y[2] = x[idx+2];
681:     y   += 3;
682:   }
683: }
686: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
687: {
688:   PetscInt i,idy;

691:   switch (addv) {
692:   case INSERT_VALUES:
693:   case INSERT_ALL_VALUES:
694:     for (i=0; i<n; i++) {
695:       idy      = *indicesy++;
696:       y[idy]   = x[0];
697:       y[idy+1] = x[1];
698:       y[idy+2] = x[2];
699:       x       += 3;
700:     }
701:     break;
702:   case ADD_VALUES:
703:   case ADD_ALL_VALUES:
704:     for (i=0; i<n; i++) {
705:       idy       = *indicesy++;
706:       y[idy]   += x[0];
707:       y[idy+1] += x[1];
708:       y[idy+2] += x[2];
709:       x        += 3;
710:     }
711:     break;
712: #if !defined(PETSC_USE_COMPLEX)
713:   case MAX_VALUES:
714:     for (i=0; i<n; i++) {
715:       idy      = *indicesy++;
716:       y[idy]   = PetscMax(y[idy],x[0]);
717:       y[idy+1] = PetscMax(y[idy+1],x[1]);
718:       y[idy+2] = PetscMax(y[idy+2],x[2]);
719:       x       += 3;
720:     }
721: #else
722:   case MAX_VALUES:
723: #endif
724:   case NOT_SET_VALUES:
725:     break;
726:   default:
727:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
728:   }
729:   return(0);
730: }

734: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
735: {
736:   PetscInt i,idx,idy;

739:   switch (addv) {
740:   case INSERT_VALUES:
741:   case INSERT_ALL_VALUES:
742:     for (i=0; i<n; i++) {
743:       idx      = *indicesx++;
744:       idy      = *indicesy++;
745:       y[idy]   = x[idx];
746:       y[idy+1] = x[idx+1];
747:       y[idy+2] = x[idx+2];
748:     }
749:     break;
750:   case ADD_VALUES:
751:   case ADD_ALL_VALUES:
752:     for (i=0; i<n; i++) {
753:       idx       = *indicesx++;
754:       idy       = *indicesy++;
755:       y[idy]   += x[idx];
756:       y[idy+1] += x[idx+1];
757:       y[idy+2] += x[idx+2];
758:     }
759:     break;
760: #if !defined(PETSC_USE_COMPLEX)
761:   case MAX_VALUES:
762:     for (i=0; i<n; i++) {
763:       idx      = *indicesx++;
764:       idy      = *indicesy++;
765:       y[idy]   = PetscMax(y[idy],x[idx]);
766:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
767:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
768:     }
769: #else
770:   case MAX_VALUES:
771: #endif
772:   case NOT_SET_VALUES:
773:     break;
774:   default:
775:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
776:   }
777:   return(0);
778: }
779: /* ----------------------------------------------------------------------------------------------- */
780: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
781: {
782:   PetscInt i,idx;

784:   for (i=0; i<n; i++) {
785:     idx  = *indicesx++;
786:     y[0] = x[idx];
787:     y[1] = x[idx+1];
788:     y[2] = x[idx+2];
789:     y[3] = x[idx+3];
790:     y   += 4;
791:   }
792: }
795: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
796: {
797:   PetscInt i,idy;

800:   switch (addv) {
801:   case INSERT_VALUES:
802:   case INSERT_ALL_VALUES:
803:     for (i=0; i<n; i++) {
804:       idy      = *indicesy++;
805:       y[idy]   = x[0];
806:       y[idy+1] = x[1];
807:       y[idy+2] = x[2];
808:       y[idy+3] = x[3];
809:       x       += 4;
810:     }
811:     break;
812:   case ADD_VALUES:
813:   case ADD_ALL_VALUES:
814:     for (i=0; i<n; i++) {
815:       idy       = *indicesy++;
816:       y[idy]   += x[0];
817:       y[idy+1] += x[1];
818:       y[idy+2] += x[2];
819:       y[idy+3] += x[3];
820:       x        += 4;
821:     }
822:     break;
823: #if !defined(PETSC_USE_COMPLEX)
824:   case MAX_VALUES:
825:     for (i=0; i<n; i++) {
826:       idy      = *indicesy++;
827:       y[idy]   = PetscMax(y[idy],x[0]);
828:       y[idy+1] = PetscMax(y[idy+1],x[1]);
829:       y[idy+2] = PetscMax(y[idy+2],x[2]);
830:       y[idy+3] = PetscMax(y[idy+3],x[3]);
831:       x       += 4;
832:     }
833: #else
834:   case MAX_VALUES:
835: #endif
836:   case NOT_SET_VALUES:
837:     break;
838:   default:
839:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
840:   }
841:   return(0);
842: }

846: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
847: {
848:   PetscInt i,idx,idy;

851:   switch (addv) {
852:   case INSERT_VALUES:
853:   case INSERT_ALL_VALUES:
854:     for (i=0; i<n; i++) {
855:       idx      = *indicesx++;
856:       idy      = *indicesy++;
857:       y[idy]   = x[idx];
858:       y[idy+1] = x[idx+1];
859:       y[idy+2] = x[idx+2];
860:       y[idy+3] = x[idx+3];
861:     }
862:     break;
863:   case ADD_VALUES:
864:   case ADD_ALL_VALUES:
865:     for (i=0; i<n; i++) {
866:       idx       = *indicesx++;
867:       idy       = *indicesy++;
868:       y[idy]   += x[idx];
869:       y[idy+1] += x[idx+1];
870:       y[idy+2] += x[idx+2];
871:       y[idy+3] += x[idx+3];
872:     }
873:     break;
874: #if !defined(PETSC_USE_COMPLEX)
875:   case MAX_VALUES:
876:     for (i=0; i<n; i++) {
877:       idx      = *indicesx++;
878:       idy      = *indicesy++;
879:       y[idy]   = PetscMax(y[idy],x[idx]);
880:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
881:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
882:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
883:     }
884: #else
885:   case MAX_VALUES:
886: #endif
887:   case NOT_SET_VALUES:
888:     break;
889:   default:
890:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
891:   }
892:   return(0);
893: }
894: /* ----------------------------------------------------------------------------------------------- */
895: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
896: {
897:   PetscInt i,idx;

899:   for (i=0; i<n; i++) {
900:     idx  = *indicesx++;
901:     y[0] = x[idx];
902:     y[1] = x[idx+1];
903:     y[2] = x[idx+2];
904:     y[3] = x[idx+3];
905:     y[4] = x[idx+4];
906:     y   += 5;
907:   }
908: }

912: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
913: {
914:   PetscInt i,idy;

917:   switch (addv) {
918:   case INSERT_VALUES:
919:   case INSERT_ALL_VALUES:
920:     for (i=0; i<n; i++) {
921:       idy      = *indicesy++;
922:       y[idy]   = x[0];
923:       y[idy+1] = x[1];
924:       y[idy+2] = x[2];
925:       y[idy+3] = x[3];
926:       y[idy+4] = x[4];
927:       x       += 5;
928:     }
929:     break;
930:   case ADD_VALUES:
931:   case ADD_ALL_VALUES:
932:     for (i=0; i<n; i++) {
933:       idy       = *indicesy++;
934:       y[idy]   += x[0];
935:       y[idy+1] += x[1];
936:       y[idy+2] += x[2];
937:       y[idy+3] += x[3];
938:       y[idy+4] += x[4];
939:       x        += 5;
940:     }
941:     break;
942: #if !defined(PETSC_USE_COMPLEX)
943:   case MAX_VALUES:
944:     for (i=0; i<n; i++) {
945:       idy      = *indicesy++;
946:       y[idy]   = PetscMax(y[idy],x[0]);
947:       y[idy+1] = PetscMax(y[idy+1],x[1]);
948:       y[idy+2] = PetscMax(y[idy+2],x[2]);
949:       y[idy+3] = PetscMax(y[idy+3],x[3]);
950:       y[idy+4] = PetscMax(y[idy+4],x[4]);
951:       x       += 5;
952:     }
953: #else
954:   case MAX_VALUES:
955: #endif
956:   case NOT_SET_VALUES:
957:     break;
958:   default:
959:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
960:   }
961:   return(0);
962: }

966: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
967: {
968:   PetscInt i,idx,idy;

971:   switch (addv) {
972:   case INSERT_VALUES:
973:   case INSERT_ALL_VALUES:
974:     for (i=0; i<n; i++) {
975:       idx      = *indicesx++;
976:       idy      = *indicesy++;
977:       y[idy]   = x[idx];
978:       y[idy+1] = x[idx+1];
979:       y[idy+2] = x[idx+2];
980:       y[idy+3] = x[idx+3];
981:       y[idy+4] = x[idx+4];
982:     }
983:     break;
984:   case ADD_VALUES:
985:   case ADD_ALL_VALUES:
986:     for (i=0; i<n; i++) {
987:       idx       = *indicesx++;
988:       idy       = *indicesy++;
989:       y[idy]   += x[idx];
990:       y[idy+1] += x[idx+1];
991:       y[idy+2] += x[idx+2];
992:       y[idy+3] += x[idx+3];
993:       y[idy+4] += x[idx+4];
994:     }
995:     break;
996: #if !defined(PETSC_USE_COMPLEX)
997:   case MAX_VALUES:
998:     for (i=0; i<n; i++) {
999:       idx      = *indicesx++;
1000:       idy      = *indicesy++;
1001:       y[idy]   = PetscMax(y[idy],x[idx]);
1002:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1003:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1004:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1005:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1006:     }
1007: #else
1008:   case MAX_VALUES:
1009: #endif
1010:   case NOT_SET_VALUES:
1011:     break;
1012:   default:
1013:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1014:   }
1015:   return(0);
1016: }
1017: /* ----------------------------------------------------------------------------------------------- */
1018: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1019: {
1020:   PetscInt i,idx;

1022:   for (i=0; i<n; i++) {
1023:     idx  = *indicesx++;
1024:     y[0] = x[idx];
1025:     y[1] = x[idx+1];
1026:     y[2] = x[idx+2];
1027:     y[3] = x[idx+3];
1028:     y[4] = x[idx+4];
1029:     y[5] = x[idx+5];
1030:     y   += 6;
1031:   }
1032: }

1036: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1037: {
1038:   PetscInt i,idy;

1041:   switch (addv) {
1042:   case INSERT_VALUES:
1043:   case INSERT_ALL_VALUES:
1044:     for (i=0; i<n; i++) {
1045:       idy      = *indicesy++;
1046:       y[idy]   = x[0];
1047:       y[idy+1] = x[1];
1048:       y[idy+2] = x[2];
1049:       y[idy+3] = x[3];
1050:       y[idy+4] = x[4];
1051:       y[idy+5] = x[5];
1052:       x       += 6;
1053:     }
1054:     break;
1055:   case ADD_VALUES:
1056:   case ADD_ALL_VALUES:
1057:     for (i=0; i<n; i++) {
1058:       idy       = *indicesy++;
1059:       y[idy]   += x[0];
1060:       y[idy+1] += x[1];
1061:       y[idy+2] += x[2];
1062:       y[idy+3] += x[3];
1063:       y[idy+4] += x[4];
1064:       y[idy+5] += x[5];
1065:       x        += 6;
1066:     }
1067:     break;
1068: #if !defined(PETSC_USE_COMPLEX)
1069:   case MAX_VALUES:
1070:     for (i=0; i<n; i++) {
1071:       idy      = *indicesy++;
1072:       y[idy]   = PetscMax(y[idy],x[0]);
1073:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1074:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1075:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1076:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1077:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1078:       x       += 6;
1079:     }
1080: #else
1081:   case MAX_VALUES:
1082: #endif
1083:   case NOT_SET_VALUES:
1084:     break;
1085:   default:
1086:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1087:   }
1088:   return(0);
1089: }

1093: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1094: {
1095:   PetscInt i,idx,idy;

1098:   switch (addv) {
1099:   case INSERT_VALUES:
1100:   case INSERT_ALL_VALUES:
1101:     for (i=0; i<n; i++) {
1102:       idx      = *indicesx++;
1103:       idy      = *indicesy++;
1104:       y[idy]   = x[idx];
1105:       y[idy+1] = x[idx+1];
1106:       y[idy+2] = x[idx+2];
1107:       y[idy+3] = x[idx+3];
1108:       y[idy+4] = x[idx+4];
1109:       y[idy+5] = x[idx+5];
1110:     }
1111:     break;
1112:   case ADD_VALUES:
1113:   case ADD_ALL_VALUES:
1114:     for (i=0; i<n; i++) {
1115:       idx       = *indicesx++;
1116:       idy       = *indicesy++;
1117:       y[idy]   += x[idx];
1118:       y[idy+1] += x[idx+1];
1119:       y[idy+2] += x[idx+2];
1120:       y[idy+3] += x[idx+3];
1121:       y[idy+4] += x[idx+4];
1122:       y[idy+5] += x[idx+5];
1123:     }
1124:     break;
1125: #if !defined(PETSC_USE_COMPLEX)
1126:   case MAX_VALUES:
1127:     for (i=0; i<n; i++) {
1128:       idx      = *indicesx++;
1129:       idy      = *indicesy++;
1130:       y[idy]   = PetscMax(y[idy],x[idx]);
1131:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1132:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1133:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1134:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1135:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1136:     }
1137: #else
1138:   case MAX_VALUES:
1139: #endif
1140:   case NOT_SET_VALUES:
1141:     break;
1142:   default:
1143:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1144:   }
1145:   return(0);
1146: }
1147: /* ----------------------------------------------------------------------------------------------- */
1148: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1149: {
1150:   PetscInt i,idx;

1152:   for (i=0; i<n; i++) {
1153:     idx  = *indicesx++;
1154:     y[0] = x[idx];
1155:     y[1] = x[idx+1];
1156:     y[2] = x[idx+2];
1157:     y[3] = x[idx+3];
1158:     y[4] = x[idx+4];
1159:     y[5] = x[idx+5];
1160:     y[6] = x[idx+6];
1161:     y   += 7;
1162:   }
1163: }

1167: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1168: {
1169:   PetscInt i,idy;

1172:   switch (addv) {
1173:   case INSERT_VALUES:
1174:   case INSERT_ALL_VALUES:
1175:     for (i=0; i<n; i++) {
1176:       idy      = *indicesy++;
1177:       y[idy]   = x[0];
1178:       y[idy+1] = x[1];
1179:       y[idy+2] = x[2];
1180:       y[idy+3] = x[3];
1181:       y[idy+4] = x[4];
1182:       y[idy+5] = x[5];
1183:       y[idy+6] = x[6];
1184:       x       += 7;
1185:     }
1186:     break;
1187:   case ADD_VALUES:
1188:   case ADD_ALL_VALUES:
1189:     for (i=0; i<n; i++) {
1190:       idy       = *indicesy++;
1191:       y[idy]   += x[0];
1192:       y[idy+1] += x[1];
1193:       y[idy+2] += x[2];
1194:       y[idy+3] += x[3];
1195:       y[idy+4] += x[4];
1196:       y[idy+5] += x[5];
1197:       y[idy+6] += x[6];
1198:       x        += 7;
1199:     }
1200:     break;
1201: #if !defined(PETSC_USE_COMPLEX)
1202:   case MAX_VALUES:
1203:     for (i=0; i<n; i++) {
1204:       idy      = *indicesy++;
1205:       y[idy]   = PetscMax(y[idy],x[0]);
1206:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1207:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1208:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1209:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1210:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1211:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1212:       x       += 7;
1213:     }
1214: #else
1215:   case MAX_VALUES:
1216: #endif
1217:   case NOT_SET_VALUES:
1218:     break;
1219:   default:
1220:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1221:   }
1222:   return(0);
1223: }

1227: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1228: {
1229:   PetscInt i,idx,idy;

1232:   switch (addv) {
1233:   case INSERT_VALUES:
1234:   case INSERT_ALL_VALUES:
1235:     for (i=0; i<n; i++) {
1236:       idx      = *indicesx++;
1237:       idy      = *indicesy++;
1238:       y[idy]   = x[idx];
1239:       y[idy+1] = x[idx+1];
1240:       y[idy+2] = x[idx+2];
1241:       y[idy+3] = x[idx+3];
1242:       y[idy+4] = x[idx+4];
1243:       y[idy+5] = x[idx+5];
1244:       y[idy+6] = x[idx+6];
1245:     }
1246:     break;
1247:   case ADD_VALUES:
1248:   case ADD_ALL_VALUES:
1249:     for (i=0; i<n; i++) {
1250:       idx       = *indicesx++;
1251:       idy       = *indicesy++;
1252:       y[idy]   += x[idx];
1253:       y[idy+1] += x[idx+1];
1254:       y[idy+2] += x[idx+2];
1255:       y[idy+3] += x[idx+3];
1256:       y[idy+4] += x[idx+4];
1257:       y[idy+5] += x[idx+5];
1258:       y[idy+6] += x[idx+6];
1259:     }
1260:     break;
1261: #if !defined(PETSC_USE_COMPLEX)
1262:   case MAX_VALUES:
1263:     for (i=0; i<n; i++) {
1264:       idx      = *indicesx++;
1265:       idy      = *indicesy++;
1266:       y[idy]   = PetscMax(y[idy],x[idx]);
1267:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1268:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1269:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1270:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1271:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1272:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1273:     }
1274: #else
1275:   case MAX_VALUES:
1276: #endif
1277:   case NOT_SET_VALUES:
1278:     break;
1279:   default:
1280:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1281:   }
1282:   return(0);
1283: }
1284: /* ----------------------------------------------------------------------------------------------- */
1285: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1286: {
1287:   PetscInt i,idx;

1289:   for (i=0; i<n; i++) {
1290:     idx  = *indicesx++;
1291:     y[0] = x[idx];
1292:     y[1] = x[idx+1];
1293:     y[2] = x[idx+2];
1294:     y[3] = x[idx+3];
1295:     y[4] = x[idx+4];
1296:     y[5] = x[idx+5];
1297:     y[6] = x[idx+6];
1298:     y[7] = x[idx+7];
1299:     y   += 8;
1300:   }
1301: }

1305: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1306: {
1307:   PetscInt i,idy;

1310:   switch (addv) {
1311:   case INSERT_VALUES:
1312:   case INSERT_ALL_VALUES:
1313:     for (i=0; i<n; i++) {
1314:       idy      = *indicesy++;
1315:       y[idy]   = x[0];
1316:       y[idy+1] = x[1];
1317:       y[idy+2] = x[2];
1318:       y[idy+3] = x[3];
1319:       y[idy+4] = x[4];
1320:       y[idy+5] = x[5];
1321:       y[idy+6] = x[6];
1322:       y[idy+7] = x[7];
1323:       x       += 8;
1324:     }
1325:     break;
1326:   case ADD_VALUES:
1327:   case ADD_ALL_VALUES:
1328:     for (i=0; i<n; i++) {
1329:       idy       = *indicesy++;
1330:       y[idy]   += x[0];
1331:       y[idy+1] += x[1];
1332:       y[idy+2] += x[2];
1333:       y[idy+3] += x[3];
1334:       y[idy+4] += x[4];
1335:       y[idy+5] += x[5];
1336:       y[idy+6] += x[6];
1337:       y[idy+7] += x[7];
1338:       x        += 8;
1339:     }
1340:     break;
1341: #if !defined(PETSC_USE_COMPLEX)
1342:   case MAX_VALUES:
1343:     for (i=0; i<n; i++) {
1344:       idy      = *indicesy++;
1345:       y[idy]   = PetscMax(y[idy],x[0]);
1346:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1347:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1348:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1349:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1350:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1351:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1352:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1353:       x       += 8;
1354:     }
1355: #else
1356:   case MAX_VALUES:
1357: #endif
1358:   case NOT_SET_VALUES:
1359:     break;
1360:   default:
1361:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1362:   }
1363:   return(0);
1364: }

1368: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1369: {
1370:   PetscInt i,idx,idy;

1373:   switch (addv) {
1374:   case INSERT_VALUES:
1375:   case INSERT_ALL_VALUES:
1376:     for (i=0; i<n; i++) {
1377:       idx      = *indicesx++;
1378:       idy      = *indicesy++;
1379:       y[idy]   = x[idx];
1380:       y[idy+1] = x[idx+1];
1381:       y[idy+2] = x[idx+2];
1382:       y[idy+3] = x[idx+3];
1383:       y[idy+4] = x[idx+4];
1384:       y[idy+5] = x[idx+5];
1385:       y[idy+6] = x[idx+6];
1386:       y[idy+7] = x[idx+7];
1387:     }
1388:     break;
1389:   case ADD_VALUES:
1390:   case ADD_ALL_VALUES:
1391:     for (i=0; i<n; i++) {
1392:       idx       = *indicesx++;
1393:       idy       = *indicesy++;
1394:       y[idy]   += x[idx];
1395:       y[idy+1] += x[idx+1];
1396:       y[idy+2] += x[idx+2];
1397:       y[idy+3] += x[idx+3];
1398:       y[idy+4] += x[idx+4];
1399:       y[idy+5] += x[idx+5];
1400:       y[idy+6] += x[idx+6];
1401:       y[idy+7] += x[idx+7];
1402:     }
1403:     break;
1404: #if !defined(PETSC_USE_COMPLEX)
1405:   case MAX_VALUES:
1406:     for (i=0; i<n; i++) {
1407:       idx      = *indicesx++;
1408:       idy      = *indicesy++;
1409:       y[idy]   = PetscMax(y[idy],x[idx]);
1410:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1411:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1412:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1413:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1414:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1415:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1416:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1417:     }
1418: #else
1419:   case MAX_VALUES:
1420: #endif
1421:   case NOT_SET_VALUES:
1422:     break;
1423:   default:
1424:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1425:   }
1426:   return(0);
1427: }

1429: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1430: {
1431:   PetscInt i,idx;

1433:   for (i=0; i<n; i++) {
1434:     idx   = *indicesx++;
1435:     y[0]  = x[idx];
1436:     y[1]  = x[idx+1];
1437:     y[2]  = x[idx+2];
1438:     y[3]  = x[idx+3];
1439:     y[4]  = x[idx+4];
1440:     y[5]  = x[idx+5];
1441:     y[6]  = x[idx+6];
1442:     y[7]  = x[idx+7];
1443:     y[8]  = x[idx+8];
1444:     y    += 9;
1445:   }
1446: }

1450: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1451: {
1452:   PetscInt i,idy;

1455:   switch (addv) {
1456:   case INSERT_VALUES:
1457:   case INSERT_ALL_VALUES:
1458:     for (i=0; i<n; i++) {
1459:       idy       = *indicesy++;
1460:       y[idy]    = x[0];
1461:       y[idy+1]  = x[1];
1462:       y[idy+2]  = x[2];
1463:       y[idy+3]  = x[3];
1464:       y[idy+4]  = x[4];
1465:       y[idy+5]  = x[5];
1466:       y[idy+6]  = x[6];
1467:       y[idy+7]  = x[7];
1468:       y[idy+8]  = x[8];
1469:       x        += 9;
1470:     }
1471:     break;
1472:   case ADD_VALUES:
1473:   case ADD_ALL_VALUES:
1474:     for (i=0; i<n; i++) {
1475:       idy        = *indicesy++;
1476:       y[idy]    += x[0];
1477:       y[idy+1]  += x[1];
1478:       y[idy+2]  += x[2];
1479:       y[idy+3]  += x[3];
1480:       y[idy+4]  += x[4];
1481:       y[idy+5]  += x[5];
1482:       y[idy+6]  += x[6];
1483:       y[idy+7]  += x[7];
1484:       y[idy+8]  += x[8];
1485:       x         += 9;
1486:     }
1487:     break;
1488: #if !defined(PETSC_USE_COMPLEX)
1489:   case MAX_VALUES:
1490:     for (i=0; i<n; i++) {
1491:       idy       = *indicesy++;
1492:       y[idy]    = PetscMax(y[idy],x[0]);
1493:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1494:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1495:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1496:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1497:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1498:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1499:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1500:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1501:       x        += 9;
1502:     }
1503: #else
1504:   case MAX_VALUES:
1505: #endif
1506:   case NOT_SET_VALUES:
1507:     break;
1508:   default:
1509:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1510:   }
1511:   return(0);
1512: }

1516: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1517: {
1518:   PetscInt i,idx,idy;

1521:   switch (addv) {
1522:   case INSERT_VALUES:
1523:   case INSERT_ALL_VALUES:
1524:     for (i=0; i<n; i++) {
1525:       idx       = *indicesx++;
1526:       idy       = *indicesy++;
1527:       y[idy]    = x[idx];
1528:       y[idy+1]  = x[idx+1];
1529:       y[idy+2]  = x[idx+2];
1530:       y[idy+3]  = x[idx+3];
1531:       y[idy+4]  = x[idx+4];
1532:       y[idy+5]  = x[idx+5];
1533:       y[idy+6]  = x[idx+6];
1534:       y[idy+7]  = x[idx+7];
1535:       y[idy+8]  = x[idx+8];
1536:     }
1537:     break;
1538:   case ADD_VALUES:
1539:   case ADD_ALL_VALUES:
1540:     for (i=0; i<n; i++) {
1541:       idx        = *indicesx++;
1542:       idy        = *indicesy++;
1543:       y[idy]    += x[idx];
1544:       y[idy+1]  += x[idx+1];
1545:       y[idy+2]  += x[idx+2];
1546:       y[idy+3]  += x[idx+3];
1547:       y[idy+4]  += x[idx+4];
1548:       y[idy+5]  += x[idx+5];
1549:       y[idy+6]  += x[idx+6];
1550:       y[idy+7]  += x[idx+7];
1551:       y[idy+8]  += x[idx+8];
1552:     }
1553:     break;
1554: #if !defined(PETSC_USE_COMPLEX)
1555:   case MAX_VALUES:
1556:     for (i=0; i<n; i++) {
1557:       idx       = *indicesx++;
1558:       idy       = *indicesy++;
1559:       y[idy]    = PetscMax(y[idy],x[idx]);
1560:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1561:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1562:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1563:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1564:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1565:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1566:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1567:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1568:     }
1569: #else
1570:   case MAX_VALUES:
1571: #endif
1572:   case NOT_SET_VALUES:
1573:     break;
1574:   default:
1575:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1576:   }
1577:   return(0);
1578: }

1580: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1581: {
1582:   PetscInt i,idx;

1584:   for (i=0; i<n; i++) {
1585:     idx   = *indicesx++;
1586:     y[0]  = x[idx];
1587:     y[1]  = x[idx+1];
1588:     y[2]  = x[idx+2];
1589:     y[3]  = x[idx+3];
1590:     y[4]  = x[idx+4];
1591:     y[5]  = x[idx+5];
1592:     y[6]  = x[idx+6];
1593:     y[7]  = x[idx+7];
1594:     y[8]  = x[idx+8];
1595:     y[9]  = x[idx+9];
1596:     y    += 10;
1597:   }
1598: }

1602: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1603: {
1604:   PetscInt i,idy;

1607:   switch (addv) {
1608:   case INSERT_VALUES:
1609:   case INSERT_ALL_VALUES:
1610:     for (i=0; i<n; i++) {
1611:       idy       = *indicesy++;
1612:       y[idy]    = x[0];
1613:       y[idy+1]  = x[1];
1614:       y[idy+2]  = x[2];
1615:       y[idy+3]  = x[3];
1616:       y[idy+4]  = x[4];
1617:       y[idy+5]  = x[5];
1618:       y[idy+6]  = x[6];
1619:       y[idy+7]  = x[7];
1620:       y[idy+8]  = x[8];
1621:       y[idy+9]  = x[9];
1622:       x        += 10;
1623:     }
1624:     break;
1625:   case ADD_VALUES:
1626:   case ADD_ALL_VALUES:
1627:     for (i=0; i<n; i++) {
1628:       idy        = *indicesy++;
1629:       y[idy]    += x[0];
1630:       y[idy+1]  += x[1];
1631:       y[idy+2]  += x[2];
1632:       y[idy+3]  += x[3];
1633:       y[idy+4]  += x[4];
1634:       y[idy+5]  += x[5];
1635:       y[idy+6]  += x[6];
1636:       y[idy+7]  += x[7];
1637:       y[idy+8]  += x[8];
1638:       y[idy+9]  += x[9];
1639:       x         += 10;
1640:     }
1641:     break;
1642: #if !defined(PETSC_USE_COMPLEX)
1643:   case MAX_VALUES:
1644:     for (i=0; i<n; i++) {
1645:       idy       = *indicesy++;
1646:       y[idy]    = PetscMax(y[idy],x[0]);
1647:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1648:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1649:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1650:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1651:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1652:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1653:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1654:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1655:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1656:       x        += 10;
1657:     }
1658: #else
1659:   case MAX_VALUES:
1660: #endif
1661:   case NOT_SET_VALUES:
1662:     break;
1663:   default:
1664:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1665:   }
1666:   return(0);
1667: }

1671: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1672: {
1673:   PetscInt i,idx,idy;

1676:   switch (addv) {
1677:   case INSERT_VALUES:
1678:   case INSERT_ALL_VALUES:
1679:     for (i=0; i<n; i++) {
1680:       idx       = *indicesx++;
1681:       idy       = *indicesy++;
1682:       y[idy]    = x[idx];
1683:       y[idy+1]  = x[idx+1];
1684:       y[idy+2]  = x[idx+2];
1685:       y[idy+3]  = x[idx+3];
1686:       y[idy+4]  = x[idx+4];
1687:       y[idy+5]  = x[idx+5];
1688:       y[idy+6]  = x[idx+6];
1689:       y[idy+7]  = x[idx+7];
1690:       y[idy+8]  = x[idx+8];
1691:       y[idy+9]  = x[idx+9];
1692:     }
1693:     break;
1694:   case ADD_VALUES:
1695:   case ADD_ALL_VALUES:
1696:     for (i=0; i<n; i++) {
1697:       idx        = *indicesx++;
1698:       idy        = *indicesy++;
1699:       y[idy]    += x[idx];
1700:       y[idy+1]  += x[idx+1];
1701:       y[idy+2]  += x[idx+2];
1702:       y[idy+3]  += x[idx+3];
1703:       y[idy+4]  += x[idx+4];
1704:       y[idy+5]  += x[idx+5];
1705:       y[idy+6]  += x[idx+6];
1706:       y[idy+7]  += x[idx+7];
1707:       y[idy+8]  += x[idx+8];
1708:       y[idy+9]  += x[idx+9];
1709:     }
1710:     break;
1711: #if !defined(PETSC_USE_COMPLEX)
1712:   case MAX_VALUES:
1713:     for (i=0; i<n; i++) {
1714:       idx       = *indicesx++;
1715:       idy       = *indicesy++;
1716:       y[idy]    = PetscMax(y[idy],x[idx]);
1717:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1718:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1719:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1720:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1721:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1722:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1723:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1724:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1725:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1726:     }
1727: #else
1728:   case MAX_VALUES:
1729: #endif
1730:   case NOT_SET_VALUES:
1731:     break;
1732:   default:
1733:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1734:   }
1735:   return(0);
1736: }

1738: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1739: {
1740:   PetscInt i,idx;

1742:   for (i=0; i<n; i++) {
1743:     idx   = *indicesx++;
1744:     y[0]  = x[idx];
1745:     y[1]  = x[idx+1];
1746:     y[2]  = x[idx+2];
1747:     y[3]  = x[idx+3];
1748:     y[4]  = x[idx+4];
1749:     y[5]  = x[idx+5];
1750:     y[6]  = x[idx+6];
1751:     y[7]  = x[idx+7];
1752:     y[8]  = x[idx+8];
1753:     y[9]  = x[idx+9];
1754:     y[10] = x[idx+10];
1755:     y    += 11;
1756:   }
1757: }

1761: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1762: {
1763:   PetscInt i,idy;

1766:   switch (addv) {
1767:   case INSERT_VALUES:
1768:   case INSERT_ALL_VALUES:
1769:     for (i=0; i<n; i++) {
1770:       idy       = *indicesy++;
1771:       y[idy]    = x[0];
1772:       y[idy+1]  = x[1];
1773:       y[idy+2]  = x[2];
1774:       y[idy+3]  = x[3];
1775:       y[idy+4]  = x[4];
1776:       y[idy+5]  = x[5];
1777:       y[idy+6]  = x[6];
1778:       y[idy+7]  = x[7];
1779:       y[idy+8]  = x[8];
1780:       y[idy+9]  = x[9];
1781:       y[idy+10] = x[10];
1782:       x        += 11;
1783:     }
1784:     break;
1785:   case ADD_VALUES:
1786:   case ADD_ALL_VALUES:
1787:     for (i=0; i<n; i++) {
1788:       idy        = *indicesy++;
1789:       y[idy]    += x[0];
1790:       y[idy+1]  += x[1];
1791:       y[idy+2]  += x[2];
1792:       y[idy+3]  += x[3];
1793:       y[idy+4]  += x[4];
1794:       y[idy+5]  += x[5];
1795:       y[idy+6]  += x[6];
1796:       y[idy+7]  += x[7];
1797:       y[idy+8]  += x[8];
1798:       y[idy+9]  += x[9];
1799:       y[idy+10] += x[10];
1800:       x         += 11;
1801:     }
1802:     break;
1803: #if !defined(PETSC_USE_COMPLEX)
1804:   case MAX_VALUES:
1805:     for (i=0; i<n; i++) {
1806:       idy       = *indicesy++;
1807:       y[idy]    = PetscMax(y[idy],x[0]);
1808:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1809:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1810:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1811:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1812:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1813:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1814:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1815:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1816:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1817:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1818:       x        += 11;
1819:     }
1820: #else
1821:   case MAX_VALUES:
1822: #endif
1823:   case NOT_SET_VALUES:
1824:     break;
1825:   default:
1826:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1827:   }
1828:   return(0);
1829: }

1833: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1834: {
1835:   PetscInt i,idx,idy;

1838:   switch (addv) {
1839:   case INSERT_VALUES:
1840:   case INSERT_ALL_VALUES:
1841:     for (i=0; i<n; i++) {
1842:       idx       = *indicesx++;
1843:       idy       = *indicesy++;
1844:       y[idy]    = x[idx];
1845:       y[idy+1]  = x[idx+1];
1846:       y[idy+2]  = x[idx+2];
1847:       y[idy+3]  = x[idx+3];
1848:       y[idy+4]  = x[idx+4];
1849:       y[idy+5]  = x[idx+5];
1850:       y[idy+6]  = x[idx+6];
1851:       y[idy+7]  = x[idx+7];
1852:       y[idy+8]  = x[idx+8];
1853:       y[idy+9]  = x[idx+9];
1854:       y[idy+10] = x[idx+10];
1855:     }
1856:     break;
1857:   case ADD_VALUES:
1858:   case ADD_ALL_VALUES:
1859:     for (i=0; i<n; i++) {
1860:       idx        = *indicesx++;
1861:       idy        = *indicesy++;
1862:       y[idy]    += x[idx];
1863:       y[idy+1]  += x[idx+1];
1864:       y[idy+2]  += x[idx+2];
1865:       y[idy+3]  += x[idx+3];
1866:       y[idy+4]  += x[idx+4];
1867:       y[idy+5]  += x[idx+5];
1868:       y[idy+6]  += x[idx+6];
1869:       y[idy+7]  += x[idx+7];
1870:       y[idy+8]  += x[idx+8];
1871:       y[idy+9]  += x[idx+9];
1872:       y[idy+10] += x[idx+10];
1873:     }
1874:     break;
1875: #if !defined(PETSC_USE_COMPLEX)
1876:   case MAX_VALUES:
1877:     for (i=0; i<n; i++) {
1878:       idx       = *indicesx++;
1879:       idy       = *indicesy++;
1880:       y[idy]    = PetscMax(y[idy],x[idx]);
1881:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1882:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1883:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1884:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1885:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1886:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1887:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1888:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1889:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1890:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1891:     }
1892: #else
1893:   case MAX_VALUES:
1894: #endif
1895:   case NOT_SET_VALUES:
1896:     break;
1897:   default:
1898:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1899:   }
1900:   return(0);
1901: }

1903: /* ----------------------------------------------------------------------------------------------- */
1904: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1905: {
1906:   PetscInt i,idx;

1908:   for (i=0; i<n; i++) {
1909:     idx   = *indicesx++;
1910:     y[0]  = x[idx];
1911:     y[1]  = x[idx+1];
1912:     y[2]  = x[idx+2];
1913:     y[3]  = x[idx+3];
1914:     y[4]  = x[idx+4];
1915:     y[5]  = x[idx+5];
1916:     y[6]  = x[idx+6];
1917:     y[7]  = x[idx+7];
1918:     y[8]  = x[idx+8];
1919:     y[9]  = x[idx+9];
1920:     y[10] = x[idx+10];
1921:     y[11] = x[idx+11];
1922:     y    += 12;
1923:   }
1924: }

1928: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1929: {
1930:   PetscInt i,idy;

1933:   switch (addv) {
1934:   case INSERT_VALUES:
1935:   case INSERT_ALL_VALUES:
1936:     for (i=0; i<n; i++) {
1937:       idy       = *indicesy++;
1938:       y[idy]    = x[0];
1939:       y[idy+1]  = x[1];
1940:       y[idy+2]  = x[2];
1941:       y[idy+3]  = x[3];
1942:       y[idy+4]  = x[4];
1943:       y[idy+5]  = x[5];
1944:       y[idy+6]  = x[6];
1945:       y[idy+7]  = x[7];
1946:       y[idy+8]  = x[8];
1947:       y[idy+9]  = x[9];
1948:       y[idy+10] = x[10];
1949:       y[idy+11] = x[11];
1950:       x        += 12;
1951:     }
1952:     break;
1953:   case ADD_VALUES:
1954:   case ADD_ALL_VALUES:
1955:     for (i=0; i<n; i++) {
1956:       idy        = *indicesy++;
1957:       y[idy]    += x[0];
1958:       y[idy+1]  += x[1];
1959:       y[idy+2]  += x[2];
1960:       y[idy+3]  += x[3];
1961:       y[idy+4]  += x[4];
1962:       y[idy+5]  += x[5];
1963:       y[idy+6]  += x[6];
1964:       y[idy+7]  += x[7];
1965:       y[idy+8]  += x[8];
1966:       y[idy+9]  += x[9];
1967:       y[idy+10] += x[10];
1968:       y[idy+11] += x[11];
1969:       x         += 12;
1970:     }
1971:     break;
1972: #if !defined(PETSC_USE_COMPLEX)
1973:   case MAX_VALUES:
1974:     for (i=0; i<n; i++) {
1975:       idy       = *indicesy++;
1976:       y[idy]    = PetscMax(y[idy],x[0]);
1977:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1978:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1979:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1980:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1981:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1982:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1983:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1984:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1985:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1986:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1987:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1988:       x        += 12;
1989:     }
1990: #else
1991:   case MAX_VALUES:
1992: #endif
1993:   case NOT_SET_VALUES:
1994:     break;
1995:   default:
1996:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1997:   }
1998:   return(0);
1999: }

2003: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2004: {
2005:   PetscInt i,idx,idy;

2008:   switch (addv) {
2009:   case INSERT_VALUES:
2010:   case INSERT_ALL_VALUES:
2011:     for (i=0; i<n; i++) {
2012:       idx       = *indicesx++;
2013:       idy       = *indicesy++;
2014:       y[idy]    = x[idx];
2015:       y[idy+1]  = x[idx+1];
2016:       y[idy+2]  = x[idx+2];
2017:       y[idy+3]  = x[idx+3];
2018:       y[idy+4]  = x[idx+4];
2019:       y[idy+5]  = x[idx+5];
2020:       y[idy+6]  = x[idx+6];
2021:       y[idy+7]  = x[idx+7];
2022:       y[idy+8]  = x[idx+8];
2023:       y[idy+9]  = x[idx+9];
2024:       y[idy+10] = x[idx+10];
2025:       y[idy+11] = x[idx+11];
2026:     }
2027:     break;
2028:   case ADD_VALUES:
2029:   case ADD_ALL_VALUES:
2030:     for (i=0; i<n; i++) {
2031:       idx        = *indicesx++;
2032:       idy        = *indicesy++;
2033:       y[idy]    += x[idx];
2034:       y[idy+1]  += x[idx+1];
2035:       y[idy+2]  += x[idx+2];
2036:       y[idy+3]  += x[idx+3];
2037:       y[idy+4]  += x[idx+4];
2038:       y[idy+5]  += x[idx+5];
2039:       y[idy+6]  += x[idx+6];
2040:       y[idy+7]  += x[idx+7];
2041:       y[idy+8]  += x[idx+8];
2042:       y[idy+9]  += x[idx+9];
2043:       y[idy+10] += x[idx+10];
2044:       y[idy+11] += x[idx+11];
2045:     }
2046:     break;
2047: #if !defined(PETSC_USE_COMPLEX)
2048:   case MAX_VALUES:
2049:     for (i=0; i<n; i++) {
2050:       idx       = *indicesx++;
2051:       idy       = *indicesy++;
2052:       y[idy]    = PetscMax(y[idy],x[idx]);
2053:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
2054:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
2055:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
2056:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
2057:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
2058:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
2059:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
2060:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
2061:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
2062:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2063:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2064:     }
2065: #else
2066:   case MAX_VALUES:
2067: #endif
2068:   case NOT_SET_VALUES:
2069:     break;
2070:   default:
2071:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2072:   }
2073:   return(0);
2074: }

2076: /* ----------------------------------------------------------------------------------------------- */
2077: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2078: {
2079:   PetscInt       i,idx;

2081:   for (i=0; i<n; i++) {
2082:     idx   = *indicesx++;
2083:     PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2084:     y    += bs;
2085:   }
2086: }

2090: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2091: {
2092:   PetscInt i,idy,j;

2095:   switch (addv) {
2096:   case INSERT_VALUES:
2097:   case INSERT_ALL_VALUES:
2098:     for (i=0; i<n; i++) {
2099:       idy       = *indicesy++;
2100:       PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2101:       x        += bs;
2102:     }
2103:     break;
2104:   case ADD_VALUES:
2105:   case ADD_ALL_VALUES:
2106:     for (i=0; i<n; i++) {
2107:       idy        = *indicesy++;
2108:       for (j=0; j<bs; j++) y[idy+j] += x[j];
2109:       x         += bs;
2110:     }
2111:     break;
2112: #if !defined(PETSC_USE_COMPLEX)
2113:   case MAX_VALUES:
2114:     for (i=0; i<n; i++) {
2115:       idy = *indicesy++;
2116:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2117:       x  += bs;
2118:     }
2119: #else
2120:   case MAX_VALUES:
2121: #endif
2122:   case NOT_SET_VALUES:
2123:     break;
2124:   default:
2125:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2126:   }
2127:   return(0);
2128: }

2132: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2133: {
2134:   PetscInt i,idx,idy,j;

2137:   switch (addv) {
2138:   case INSERT_VALUES:
2139:   case INSERT_ALL_VALUES:
2140:     for (i=0; i<n; i++) {
2141:       idx       = *indicesx++;
2142:       idy       = *indicesy++;
2143:       PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2144:     }
2145:     break;
2146:   case ADD_VALUES:
2147:   case ADD_ALL_VALUES:
2148:     for (i=0; i<n; i++) {
2149:       idx        = *indicesx++;
2150:       idy        = *indicesy++;
2151:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
2152:     }
2153:     break;
2154: #if !defined(PETSC_USE_COMPLEX)
2155:   case MAX_VALUES:
2156:     for (i=0; i<n; i++) {
2157:       idx       = *indicesx++;
2158:       idy       = *indicesy++;
2159:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2160:     }
2161: #else
2162:   case MAX_VALUES:
2163: #endif
2164:   case NOT_SET_VALUES:
2165:     break;
2166:   default:
2167:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2168:   }
2169:   return(0);
2170: }

2172: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2173: #define BS 1
2174: #include <../src/vec/vec/utils/vpscat.h>
2175: #define BS 2
2176: #include <../src/vec/vec/utils/vpscat.h>
2177: #define BS 3
2178: #include <../src/vec/vec/utils/vpscat.h>
2179: #define BS 4
2180: #include <../src/vec/vec/utils/vpscat.h>
2181: #define BS 5
2182: #include <../src/vec/vec/utils/vpscat.h>
2183: #define BS 6
2184: #include <../src/vec/vec/utils/vpscat.h>
2185: #define BS 7
2186: #include <../src/vec/vec/utils/vpscat.h>
2187: #define BS 8
2188: #include <../src/vec/vec/utils/vpscat.h>
2189: #define BS 9
2190: #include <../src/vec/vec/utils/vpscat.h>
2191: #define BS 10
2192: #include <../src/vec/vec/utils/vpscat.h>
2193: #define BS 11
2194: #include <../src/vec/vec/utils/vpscat.h>
2195: #define BS 12
2196: #include <../src/vec/vec/utils/vpscat.h>
2197: #define BS bs
2198: #include <../src/vec/vec/utils/vpscat.h>

2200: /* ==========================================================================================*/

2202: /*              create parallel to sequential scatter context                           */

2204: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

2208: /*@
2209:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

2211:      Collective on VecScatter

2213:    Input Parameters:
2214: +     VecScatter - obtained with VecScatterCreateEmpty()
2215: .     nsends -
2216: .     sendSizes -
2217: .     sendProcs -
2218: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
2219: .     nrecvs - number of receives to expect
2220: .     recvSizes -
2221: .     recvProcs - processes who are sending to me
2222: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
2223: -     bs - size of block

2225:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
2226:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

2228:        Probably does not handle sends to self properly. It should remove those from the counts that are used
2229:       in allocating space inside of the from struct

2231:   Level: intermediate

2233: @*/
2234: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
2235: {
2236:   VecScatter_MPI_General *from, *to;
2237:   PetscInt               sendSize, recvSize;
2238:   PetscInt               n, i;
2239:   PetscErrorCode         ierr;

2241:   /* allocate entire send scatter context */
2242:   PetscNewLog(ctx,&to);
2243:   to->n = nsends;
2244:   for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];

2246:   PetscMalloc1(to->n,&to->requests);
2247:   PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
2248:   PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);

2250:   to->starts[0] = 0;
2251:   for (n = 0; n < to->n; n++) {
2252:     if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
2253:     to->starts[n+1] = to->starts[n] + sendSizes[n];
2254:     to->procs[n]    = sendProcs[n];
2255:     for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
2256:   }
2257:   ctx->todata = (void*) to;

2259:   /* allocate entire receive scatter context */
2260:   PetscNewLog(ctx,&from);
2261:   from->n = nrecvs;
2262:   for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];

2264:   PetscMalloc1(from->n,&from->requests);
2265:   PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);

2267:   from->starts[0] = 0;
2268:   for (n = 0; n < from->n; n++) {
2269:     if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
2270:     from->starts[n+1] = from->starts[n] + recvSizes[n];
2271:     from->procs[n]    = recvProcs[n];
2272:     for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
2273:   }
2274:   ctx->fromdata = (void*)from;

2276:   /* No local scatter optimization */
2277:   from->local.n                    = 0;
2278:   from->local.vslots               = 0;
2279:   to->local.n                      = 0;
2280:   to->local.vslots                 = 0;
2281:   from->local.nonmatching_computed = PETSC_FALSE;
2282:   from->local.n_nonmatching        = 0;
2283:   from->local.slots_nonmatching    = 0;
2284:   to->local.nonmatching_computed   = PETSC_FALSE;
2285:   to->local.n_nonmatching          = 0;
2286:   to->local.slots_nonmatching      = 0;

2288:   from->type = VEC_SCATTER_MPI_GENERAL;
2289:   to->type   = VEC_SCATTER_MPI_GENERAL;
2290:   from->bs   = bs;
2291:   to->bs     = bs;
2292:   VecScatterCreateCommon_PtoS(from, to, ctx);

2294:   /* mark lengths as negative so it won't check local vector lengths */
2295:   ctx->from_n = ctx->to_n = -1;
2296:   return(0);
2297: }

2299: /*
2300:    bs indicates how many elements there are in each block. Normally this would be 1.

2302:    contains check that PetscMPIInt can handle the sizes needed
2303: */
2306: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2307: {
2308:   VecScatter_MPI_General *from,*to;
2309:   PetscMPIInt            size,rank,imdex,tag,n;
2310:   PetscInt               *source = NULL,*owners = NULL,nxr;
2311:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2312:   PetscMPIInt            *nprocs = NULL,nrecvs;
2313:   PetscInt               i,j,idx,nsends;
2314:   PetscMPIInt            *owner = NULL;
2315:   PetscInt               *starts = NULL,count,slen;
2316:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2317:   PetscMPIInt            *onodes1,*olengths1;
2318:   MPI_Comm               comm;
2319:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2320:   MPI_Status             recv_status,*send_status;
2321:   PetscErrorCode         ierr;

2324:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2325:   PetscObjectGetComm((PetscObject)xin,&comm);
2326:   MPI_Comm_rank(comm,&rank);
2327:   MPI_Comm_size(comm,&size);
2328:   owners = xin->map->range;
2329:   VecGetSize(yin,&lengthy);
2330:   VecGetSize(xin,&lengthx);

2332:   /*  first count number of contributors to each processor */
2333:   PetscMalloc2(size,&nprocs,nx,&owner);
2334:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2336:   j      = 0;
2337:   nsends = 0;
2338:   for (i=0; i<nx; i++) {
2339:     idx = bs*inidx[i];
2340:     if (idx < owners[j]) j = 0;
2341:     for (; j<size; j++) {
2342:       if (idx < owners[j+1]) {
2343:         if (!nprocs[j]++) nsends++;
2344:         owner[i] = j;
2345:         break;
2346:       }
2347:     }
2348:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2349:   }

2351:   nprocslocal  = nprocs[rank];
2352:   nprocs[rank] = 0;
2353:   if (nprocslocal) nsends--;
2354:   /* inform other processors of number of messages and max length*/
2355:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2356:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2357:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2358:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2360:   /* post receives:   */
2361:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2362:   count = 0;
2363:   for (i=0; i<nrecvs; i++) {
2364:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2365:     count += olengths1[i];
2366:   }

2368:   /* do sends:
2369:      1) starts[i] gives the starting index in svalues for stuff going to
2370:      the ith processor
2371:   */
2372:   nxr = 0;
2373:   for (i=0; i<nx; i++) {
2374:     if (owner[i] != rank) nxr++;
2375:   }
2376:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2378:   starts[0]  = 0;
2379:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2380:   for (i=0; i<nx; i++) {
2381:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2382:   }
2383:   starts[0] = 0;
2384:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2385:   count = 0;
2386:   for (i=0; i<size; i++) {
2387:     if (nprocs[i]) {
2388:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2389:     }
2390:   }

2392:   /*  wait on receives */
2393:   count = nrecvs;
2394:   slen  = 0;
2395:   while (count) {
2396:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2397:     /* unpack receives into our local space */
2398:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2399:     slen += n;
2400:     count--;
2401:   }

2403:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2405:   /* allocate entire send scatter context */
2406:   PetscNewLog(ctx,&to);
2407:   to->n = nrecvs;

2409:   PetscMalloc1(nrecvs,&to->requests);
2410:   PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2411:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2413:   ctx->todata   = (void*)to;
2414:   to->starts[0] = 0;

2416:   if (nrecvs) {
2417:     /* move the data into the send scatter */
2418:     base     = owners[rank];
2419:     rsvalues = rvalues;
2420:     for (i=0; i<nrecvs; i++) {
2421:       to->starts[i+1] = to->starts[i] + olengths1[i];
2422:       to->procs[i]    = onodes1[i];
2423:       values = rsvalues;
2424:       rsvalues += olengths1[i];
2425:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2426:     }
2427:   }
2428:   PetscFree(olengths1);
2429:   PetscFree(onodes1);
2430:   PetscFree3(rvalues,source,recv_waits);

2432:   /* allocate entire receive scatter context */
2433:   PetscNewLog(ctx,&from);
2434:   from->n = nsends;

2436:   PetscMalloc1(nsends,&from->requests);
2437:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2438:   ctx->fromdata = (void*)from;

2440:   /* move data into receive scatter */
2441:   PetscMalloc2(size,&lowner,nsends+1,&start);
2442:   count = 0; from->starts[0] = start[0] = 0;
2443:   for (i=0; i<size; i++) {
2444:     if (nprocs[i]) {
2445:       lowner[i]            = count;
2446:       from->procs[count++] = i;
2447:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2448:     }
2449:   }

2451:   for (i=0; i<nx; i++) {
2452:     if (owner[i] != rank) {
2453:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2454:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2455:     }
2456:   }
2457:   PetscFree2(lowner,start);
2458:   PetscFree2(nprocs,owner);

2460:   /* wait on sends */
2461:   if (nsends) {
2462:     PetscMalloc1(nsends,&send_status);
2463:     MPI_Waitall(nsends,send_waits,send_status);
2464:     PetscFree(send_status);
2465:   }
2466:   PetscFree3(svalues,send_waits,starts);

2468:   if (nprocslocal) {
2469:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2470:     /* we have a scatter to ourselves */
2471:     PetscMalloc1(nt,&to->local.vslots);
2472:     PetscMalloc1(nt,&from->local.vslots);
2473:     nt   = 0;
2474:     for (i=0; i<nx; i++) {
2475:       idx = bs*inidx[i];
2476:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2477:         to->local.vslots[nt]     = idx - owners[rank];
2478:         from->local.vslots[nt++] = bs*inidy[i];
2479:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2480:       }
2481:     }
2482:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2483:   } else {
2484:     from->local.n      = 0;
2485:     from->local.vslots = 0;
2486:     to->local.n        = 0;
2487:     to->local.vslots   = 0;
2488:   }

2490:   from->local.nonmatching_computed = PETSC_FALSE;
2491:   from->local.n_nonmatching        = 0;
2492:   from->local.slots_nonmatching    = 0;
2493:   to->local.nonmatching_computed   = PETSC_FALSE;
2494:   to->local.n_nonmatching          = 0;
2495:   to->local.slots_nonmatching      = 0;

2497:   from->type = VEC_SCATTER_MPI_GENERAL;
2498:   to->type   = VEC_SCATTER_MPI_GENERAL;
2499:   from->bs   = bs;
2500:   to->bs     = bs;

2502:   VecScatterCreateCommon_PtoS(from,to,ctx);
2503:   return(0);
2504: }

2506: /*
2507:    bs indicates how many elements there are in each block. Normally this would be 1.
2508: */
2511: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2512: {
2513:   MPI_Comm       comm;
2514:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2515:   PetscInt       bs   = to->bs;
2516:   PetscMPIInt    size;
2517:   PetscInt       i, n;

2521:   PetscObjectGetComm((PetscObject)ctx,&comm);
2522:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2523:   ctx->ops->destroy = VecScatterDestroy_PtoP;

2525:   ctx->reproduce = PETSC_FALSE;
2526:   to->sendfirst  = PETSC_FALSE;
2527:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2528:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2529:   from->sendfirst = to->sendfirst;

2531:   MPI_Comm_size(comm,&size);
2532:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2533:   to->contiq = PETSC_FALSE;
2534:   n = from->starts[from->n];
2535:   from->contiq = PETSC_TRUE;
2536:   for (i=1; i<n; i++) {
2537:     if (from->indices[i] != from->indices[i-1] + bs) {
2538:       from->contiq = PETSC_FALSE;
2539:       break;
2540:     }
2541:   }

2543:   to->use_alltoallv = PETSC_FALSE;
2544:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2545:   from->use_alltoallv = to->use_alltoallv;
2546:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2547: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
2548:   if (to->use_alltoallv) {
2549:     to->use_alltoallw = PETSC_FALSE;
2550:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2551:   }
2552:   from->use_alltoallw = to->use_alltoallw;
2553:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2554: #endif

2556: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2557:   to->use_window = PETSC_FALSE;
2558:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2559:   from->use_window = to->use_window;
2560: #endif

2562:   if (to->use_alltoallv) {

2564:     PetscMalloc2(size,&to->counts,size,&to->displs);
2565:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2566:     for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);

2568:     to->displs[0] = 0;
2569:     for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];

2571:     PetscMalloc2(size,&from->counts,size,&from->displs);
2572:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2573:     for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2574:     from->displs[0] = 0;
2575:     for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];

2577: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2578:     if (to->use_alltoallw) {
2579:       PetscMPIInt mpibs, mpilen;

2581:       ctx->packtogether = PETSC_FALSE;
2582:       PetscMPIIntCast(bs,&mpibs);
2583:       PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2584:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2585:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2586:       for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;

2588:       for (i=0; i<to->n; i++) {
2589:         to->wcounts[to->procs[i]] = 1;
2590:         PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2591:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2592:         MPI_Type_commit(to->types+to->procs[i]);
2593:       }
2594:       PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2595:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2596:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2597:       for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;

2599:       if (from->contiq) {
2600:         PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2601:         for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);

2603:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2604:         for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];

2606:       } else {
2607:         for (i=0; i<from->n; i++) {
2608:           from->wcounts[from->procs[i]] = 1;
2609:           PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2610:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2611:           MPI_Type_commit(from->types+from->procs[i]);
2612:         }
2613:       }
2614:     } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;

2616: #else
2617:     to->use_alltoallw   = PETSC_FALSE;
2618:     from->use_alltoallw = PETSC_FALSE;
2619:     ctx->ops->copy      = VecScatterCopy_PtoP_AllToAll;
2620: #endif
2621: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2622:   } else if (to->use_window) {
2623:     PetscMPIInt temptag,winsize;
2624:     MPI_Request *request;
2625:     MPI_Status  *status;

2627:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2628:     winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2629:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2630:     PetscMalloc1(to->n,&to->winstarts);
2631:     PetscMalloc2(to->n,&request,to->n,&status);
2632:     for (i=0; i<to->n; i++) {
2633:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2634:     }
2635:     for (i=0; i<from->n; i++) {
2636:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2637:     }
2638:     MPI_Waitall(to->n,request,status);
2639:     PetscFree2(request,status);

2641:     winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2642:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2643:     PetscMalloc1(from->n,&from->winstarts);
2644:     PetscMalloc2(from->n,&request,from->n,&status);
2645:     for (i=0; i<from->n; i++) {
2646:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2647:     }
2648:     for (i=0; i<to->n; i++) {
2649:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2650:     }
2651:     MPI_Waitall(from->n,request,status);
2652:     PetscFree2(request,status);
2653: #endif
2654:   } else {
2655:     PetscBool   use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2656:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2657:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2658:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2659:     MPI_Request *rev_swaits,*rev_rwaits;
2660:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2662:     /* allocate additional wait variables for the "reverse" scatter */
2663:     PetscMalloc1(to->n,&rev_rwaits);
2664:     PetscMalloc1(from->n,&rev_swaits);
2665:     to->rev_requests   = rev_rwaits;
2666:     from->rev_requests = rev_swaits;

2668:     /* Register the receives that you will use later (sends for scatter reverse) */
2669:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2670:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2671:     if (use_rsend) {
2672:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2673:       to->use_readyreceiver   = PETSC_TRUE;
2674:       from->use_readyreceiver = PETSC_TRUE;
2675:     } else {
2676:       to->use_readyreceiver   = PETSC_FALSE;
2677:       from->use_readyreceiver = PETSC_FALSE;
2678:     }
2679:     if (use_ssend) {
2680:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2681:     }

2683:     for (i=0; i<from->n; i++) {
2684:       if (use_rsend) {
2685:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2686:       } else if (use_ssend) {
2687:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2688:       } else {
2689:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2690:       }
2691:     }

2693:     for (i=0; i<to->n; i++) {
2694:       if (use_rsend) {
2695:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2696:       } else if (use_ssend) {
2697:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2698:       } else {
2699:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2700:       }
2701:     }
2702:     /* Register receives for scatter and reverse */
2703:     for (i=0; i<from->n; i++) {
2704:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2705:     }
2706:     for (i=0; i<to->n; i++) {
2707:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2708:     }
2709:     if (use_rsend) {
2710:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2711:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2712:       MPI_Barrier(comm);
2713:     }

2715:     ctx->ops->copy = VecScatterCopy_PtoP_X;
2716:   }
2717:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2719: #if defined(PETSC_USE_DEBUG)
2720:   MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2721:   MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2722:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2723: #endif

2725:   switch (bs) {
2726:   case 12:
2727:     ctx->ops->begin = VecScatterBegin_12;
2728:     ctx->ops->end   = VecScatterEnd_12;
2729:     break;
2730:   case 11:
2731:     ctx->ops->begin = VecScatterBegin_11;
2732:     ctx->ops->end   = VecScatterEnd_11;
2733:     break;
2734:   case 10:
2735:     ctx->ops->begin = VecScatterBegin_10;
2736:     ctx->ops->end   = VecScatterEnd_10;
2737:     break;
2738:   case 9:
2739:     ctx->ops->begin = VecScatterBegin_9;
2740:     ctx->ops->end   = VecScatterEnd_9;
2741:     break;
2742:   case 8:
2743:     ctx->ops->begin = VecScatterBegin_8;
2744:     ctx->ops->end   = VecScatterEnd_8;
2745:     break;
2746:   case 7:
2747:     ctx->ops->begin = VecScatterBegin_7;
2748:     ctx->ops->end   = VecScatterEnd_7;
2749:     break;
2750:   case 6:
2751:     ctx->ops->begin = VecScatterBegin_6;
2752:     ctx->ops->end   = VecScatterEnd_6;
2753:     break;
2754:   case 5:
2755:     ctx->ops->begin = VecScatterBegin_5;
2756:     ctx->ops->end   = VecScatterEnd_5;
2757:     break;
2758:   case 4:
2759:     ctx->ops->begin = VecScatterBegin_4;
2760:     ctx->ops->end   = VecScatterEnd_4;
2761:     break;
2762:   case 3:
2763:     ctx->ops->begin = VecScatterBegin_3;
2764:     ctx->ops->end   = VecScatterEnd_3;
2765:     break;
2766:   case 2:
2767:     ctx->ops->begin = VecScatterBegin_2;
2768:     ctx->ops->end   = VecScatterEnd_2;
2769:     break;
2770:   case 1:
2771:     ctx->ops->begin = VecScatterBegin_1;
2772:     ctx->ops->end   = VecScatterEnd_1;
2773:     break;
2774:   default:
2775:     ctx->ops->begin = VecScatterBegin_bs;
2776:     ctx->ops->end   = VecScatterEnd_bs;

2778:   }
2779:   ctx->ops->view = VecScatterView_MPI;
2780:   /* Check if the local scatter is actually a copy; important special case */
2781:   if (to->local.n) {
2782:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2783:   }
2784:   return(0);
2785: }



2789: /* ------------------------------------------------------------------------------------*/
2790: /*
2791:          Scatter from local Seq vectors to a parallel vector.
2792:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2793:          reverses the result.
2794: */
2797: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2798: {
2799:   PetscErrorCode         ierr;
2800:   MPI_Request            *waits;
2801:   VecScatter_MPI_General *to,*from;

2804:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2805:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2806:   from          = (VecScatter_MPI_General*)ctx->todata;
2807:   ctx->todata   = (void*)to;
2808:   ctx->fromdata = (void*)from;
2809:   /* these two are special, they are ALWAYS stored in to struct */
2810:   to->sstatus   = from->sstatus;
2811:   to->rstatus   = from->rstatus;

2813:   from->sstatus = 0;
2814:   from->rstatus = 0;

2816:   waits              = from->rev_requests;
2817:   from->rev_requests = from->requests;
2818:   from->requests     = waits;
2819:   waits              = to->rev_requests;
2820:   to->rev_requests   = to->requests;
2821:   to->requests       = waits;
2822:   return(0);
2823: }

2825: /* ---------------------------------------------------------------------------------*/
2828: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2829: {
2831:   PetscMPIInt    size,rank,tag,imdex,n;
2832:   PetscInt       *owners = xin->map->range;
2833:   PetscMPIInt    *nprocs = NULL;
2834:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2835:   PetscMPIInt    *owner   = NULL;
2836:   PetscInt       *starts  = NULL,count,slen;
2837:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2838:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2839:   MPI_Comm       comm;
2840:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2841:   MPI_Status     recv_status,*send_status = NULL;
2842:   PetscBool      duplicate = PETSC_FALSE;
2843: #if defined(PETSC_USE_DEBUG)
2844:   PetscBool      found = PETSC_FALSE;
2845: #endif

2848:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2849:   PetscObjectGetComm((PetscObject)xin,&comm);
2850:   MPI_Comm_size(comm,&size);
2851:   MPI_Comm_rank(comm,&rank);
2852:   if (size == 1) {
2853:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2854:     return(0);
2855:   }

2857:   /*
2858:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2859:      They then call the StoPScatterCreate()
2860:   */
2861:   /*  first count number of contributors to each processor */
2862:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2863:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2865:   lastidx = -1;
2866:   j       = 0;
2867:   for (i=0; i<nx; i++) {
2868:     /* if indices are NOT locally sorted, need to start search at the beginning */
2869:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2870:     lastidx = idx;
2871:     for (; j<size; j++) {
2872:       if (idx >= owners[j] && idx < owners[j+1]) {
2873:         nprocs[j]++;
2874:         owner[i] = j;
2875: #if defined(PETSC_USE_DEBUG)
2876:         found = PETSC_TRUE;
2877: #endif
2878:         break;
2879:       }
2880:     }
2881: #if defined(PETSC_USE_DEBUG)
2882:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2883:     found = PETSC_FALSE;
2884: #endif
2885:   }
2886:   nsends = 0;
2887:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2889:   /* inform other processors of number of messages and max length*/
2890:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2891:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2892:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2893:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2895:   /* post receives:   */
2896:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

2898:   count = 0;
2899:   for (i=0; i<nrecvs; i++) {
2900:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2901:     count += olengths1[i];
2902:   }
2903:   PetscFree(onodes1);

2905:   /* do sends:
2906:       1) starts[i] gives the starting index in svalues for stuff going to
2907:          the ith processor
2908:   */
2909:   starts[0]= 0;
2910:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2911:   for (i=0; i<nx; i++) {
2912:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2913:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2914:   }

2916:   starts[0] = 0;
2917:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2918:   count = 0;
2919:   for (i=0; i<size; i++) {
2920:     if (nprocs[i]) {
2921:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2922:       count++;
2923:     }
2924:   }
2925:   PetscFree3(nprocs,owner,starts);

2927:   /*  wait on receives */
2928:   count = nrecvs;
2929:   slen  = 0;
2930:   while (count) {
2931:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2932:     /* unpack receives into our local space */
2933:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2934:     slen += n/2;
2935:     count--;
2936:   }
2937:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2939:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2940:   base     = owners[rank];
2941:   count    = 0;
2942:   rsvalues = rvalues;
2943:   for (i=0; i<nrecvs; i++) {
2944:     values    = rsvalues;
2945:     rsvalues += 2*olengths1[i];
2946:     for (j=0; j<olengths1[i]; j++) {
2947:       local_inidx[count]   = values[2*j] - base;
2948:       local_inidy[count++] = values[2*j+1];
2949:     }
2950:   }
2951:   PetscFree(olengths1);

2953:   /* wait on sends */
2954:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2955:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2957:   /*
2958:      should sort and remove duplicates from local_inidx,local_inidy
2959:   */

2961: #if defined(do_it_slow)
2962:   /* sort on the from index */
2963:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2964:   start = 0;
2965:   while (start < slen) {
2966:     count = start+1;
2967:     last  = local_inidx[start];
2968:     while (count < slen && last == local_inidx[count]) count++;
2969:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2970:       /* sort on to index */
2971:       PetscSortInt(count-start,local_inidy+start);
2972:     }
2973:     /* remove duplicates; not most efficient way, but probably good enough */
2974:     i = start;
2975:     while (i < count-1) {
2976:       if (local_inidy[i] != local_inidy[i+1]) i++;
2977:       else { /* found a duplicate */
2978:         duplicate = PETSC_TRUE;
2979:         for (j=i; j<slen-1; j++) {
2980:           local_inidx[j] = local_inidx[j+1];
2981:           local_inidy[j] = local_inidy[j+1];
2982:         }
2983:         slen--;
2984:         count--;
2985:       }
2986:     }
2987:     start = count;
2988:   }
2989: #endif
2990:   if (duplicate) {
2991:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2992:   }
2993:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2994:   PetscFree2(local_inidx,local_inidy);
2995:   return(0);
2996: }

3000: /*@
3001:   PetscSFCreateFromZero - Create a PetscSF that maps a Vec from sequential to distributed

3003:   Input Parameters:
3004: . gv - A distributed Vec

3006:   Output Parameters:
3007: . sf - The SF created mapping a sequential Vec to gv

3009:   Level: developer

3011: .seealso: DMPlexDistributedToSequential()
3012: @*/
3013: PetscErrorCode PetscSFCreateFromZero(MPI_Comm comm, Vec gv, PetscSF *sf)
3014: {
3015:   PetscSFNode   *remotenodes;
3016:   PetscInt      *localnodes;
3017:   PetscInt       N, n, start, numroots, l;
3018:   PetscMPIInt    rank;

3022:   PetscSFCreate(comm, sf);
3023:   VecGetSize(gv, &N);
3024:   VecGetLocalSize(gv, &n);
3025:   VecGetOwnershipRange(gv, &start, NULL);
3026:   MPI_Comm_rank(comm, &rank);
3027:   PetscMalloc1(n, &localnodes);
3028:   PetscMalloc1(n, &remotenodes);
3029:   if (!rank) numroots = N;
3030:   else       numroots = 0;
3031:   for (l = 0; l < n; ++l) {
3032:     localnodes[l]        = l;
3033:     remotenodes[l].rank  = 0;
3034:     remotenodes[l].index = l+start;
3035:   }
3036:   PetscSFSetGraph(*sf, numroots, n, localnodes, PETSC_OWN_POINTER, remotenodes, PETSC_OWN_POINTER);
3037:   return(0);
3038: }