Actual source code: vpscat.c

petsc-3.8.3 2017-12-09
Report Typos and Errors

  2: /*
  3:     Defines parallel vector scatters.
  4: */

  6:  #include <../src/vec/vec/impls/dvecimpl.h>
  7:  #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  8:  #include <petscsf.h>

 10: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 11: {
 12:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 13:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 14:   PetscErrorCode         ierr;
 15:   PetscInt               i;
 16:   PetscMPIInt            rank;
 17:   PetscViewerFormat      format;
 18:   PetscBool              iascii;

 21:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 22:   if (iascii) {
 23:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 24:     PetscViewerGetFormat(viewer,&format);
 25:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 26:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 28:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 29:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 30:       itmp = to->starts[to->n+1];
 31:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 32:       itmp = from->starts[from->n+1];
 33:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 34:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 36:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 37:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 38:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 41:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 43:     } else {
 44:       PetscViewerASCIIPushSynchronized(viewer);
 45:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 46:       if (to->n) {
 47:         for (i=0; i<to->n; i++) {
 48:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 49:         }
 50:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 51:         for (i=0; i<to->starts[to->n]; i++) {
 52:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 53:         }
 54:       }

 56:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 57:       if (from->n) {
 58:         for (i=0; i<from->n; i++) {
 59:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 60:         }

 62:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 63:         for (i=0; i<from->starts[from->n]; i++) {
 64:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 65:         }
 66:       }
 67:       if (to->local.n) {
 68:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 69:         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
 70:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
 71:         }
 72:       }

 74:       PetscViewerFlush(viewer);
 75:       PetscViewerASCIIPopSynchronized(viewer);
 76:     }
 77:   }
 78:   return(0);
 79: }

 81: /* -----------------------------------------------------------------------------------*/
 82: /*
 83:       The next routine determines what part of  the local part of the scatter is an
 84:   exact copy of values into their current location. We check this here and
 85:   then know that we need not perform that portion of the scatter when the vector is
 86:   scattering to itself with INSERT_VALUES.

 88:      This is currently not used but would speed up, for example DMLocalToLocalBegin/End()

 90: */
 91: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 92: {
 93:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
 95:   PetscInt       *nto_slots,*nfrom_slots,j = 0;

 98:   for (i=0; i<n; i++) {
 99:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
100:   }

102:   if (!n_nonmatching) {
103:     to->nonmatching_computed = PETSC_TRUE;
104:     to->n_nonmatching        = from->n_nonmatching = 0;

106:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
107:   } else if (n_nonmatching == n) {
108:     to->nonmatching_computed = PETSC_FALSE;

110:     PetscInfo(scatter,"All values non-matching\n");
111:   } else {
112:     to->nonmatching_computed= PETSC_TRUE;
113:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;

115:     PetscMalloc1(n_nonmatching,&nto_slots);
116:     PetscMalloc1(n_nonmatching,&nfrom_slots);

118:     to->slots_nonmatching   = nto_slots;
119:     from->slots_nonmatching = nfrom_slots;
120:     for (i=0; i<n; i++) {
121:       if (to_slots[i] != from_slots[i]) {
122:         nto_slots[j]   = to_slots[i];
123:         nfrom_slots[j] = from_slots[i];
124:         j++;
125:       }
126:     }
127:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
128:   }
129:   return(0);
130: }

132: /* --------------------------------------------------------------------------------------*/

134: /* -------------------------------------------------------------------------------------*/
135: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
136: {
137:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
138:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
139:   PetscErrorCode         ierr;
140:   PetscInt               i;

143:   if (to->use_readyreceiver) {
144:     /*
145:        Since we have already posted sends we must cancel them before freeing
146:        the requests
147:     */
148:     for (i=0; i<from->n; i++) {
149:       MPI_Cancel(from->requests+i);
150:     }
151:     for (i=0; i<to->n; i++) {
152:       MPI_Cancel(to->rev_requests+i);
153:     }
154:     MPI_Waitall(from->n,from->requests,to->rstatus);
155:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
156:   }

158: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
159:   if (to->use_alltoallw) {
160:     for (i=0; i<to->n; i++) {
161:       MPI_Type_free(to->types+to->procs[i]);
162:     }
163:     PetscFree3(to->wcounts,to->wdispls,to->types);
164:     if (!from->contiq) {
165:       for (i=0; i<from->n; i++) {
166:         MPI_Type_free(from->types+from->procs[i]);
167:       }
168:     }
169:     PetscFree3(from->wcounts,from->wdispls,from->types);
170:   }
171: #endif

173: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
174:   if (to->use_window) {
175:     MPI_Win_free(&from->window);
176:     MPI_Win_free(&to->window);
177:     PetscFree(from->winstarts);
178:     PetscFree(to->winstarts);
179:   }
180: #endif

182:   if (to->use_alltoallv) {
183:     PetscFree2(to->counts,to->displs);
184:     PetscFree2(from->counts,from->displs);
185:   }

187:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
188:   /*
189:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
190:      message passing.
191:   */
192: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
193:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
194:     if (to->requests) {
195:       for (i=0; i<to->n; i++) {
196:         MPI_Request_free(to->requests + i);
197:       }
198:     }
199:     if (to->rev_requests) {
200:       for (i=0; i<to->n; i++) {
201:         MPI_Request_free(to->rev_requests + i);
202:       }
203:     }
204:   }
205:   /*
206:       MPICH could not properly cancel requests thus with ready receiver mode we
207:     cannot free the requests. It may be fixed now, if not then put the following
208:     code inside a if (!to->use_readyreceiver) {
209:   */
210:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
211:     if (from->requests) {
212:       for (i=0; i<from->n; i++) {
213:         MPI_Request_free(from->requests + i);
214:       }
215:     }

217:     if (from->rev_requests) {
218:       for (i=0; i<from->n; i++) {
219:         MPI_Request_free(from->rev_requests + i);
220:       }
221:     }
222:   }
223: #endif

225:   PetscFree(to->local.vslots);
226:   PetscFree(from->local.vslots);
227:   PetscFree2(to->counts,to->displs);
228:   PetscFree2(from->counts,from->displs);
229:   PetscFree(to->local.slots_nonmatching);
230:   PetscFree(from->local.slots_nonmatching);
231:   PetscFree(to->rev_requests);
232:   PetscFree(from->rev_requests);
233:   PetscFree(to->requests);
234:   PetscFree(from->requests);
235:   PetscFree4(to->values,to->indices,to->starts,to->procs);
236:   PetscFree2(to->sstatus,to->rstatus);
237:   PetscFree4(from->values,from->indices,from->starts,from->procs);
238:   PetscFree(from);
239:   PetscFree(to);
240:   return(0);
241: }



245: /* --------------------------------------------------------------------------------------*/
246: /*
247:     Special optimization to see if the local part of the scatter is actually
248:     a copy. The scatter routines call PetscMemcpy() instead.

250: */
251: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
252: {
253:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
254:   PetscInt       to_start,from_start;

258:   to_start   = to_slots[0];
259:   from_start = from_slots[0];

261:   for (i=1; i<n; i++) {
262:     to_start   += bs;
263:     from_start += bs;
264:     if (to_slots[i]   != to_start)   return(0);
265:     if (from_slots[i] != from_start) return(0);
266:   }
267:   to->is_copy       = PETSC_TRUE;
268:   to->copy_start    = to_slots[0];
269:   to->copy_length   = bs*sizeof(PetscScalar)*n;
270:   from->is_copy     = PETSC_TRUE;
271:   from->copy_start  = from_slots[0];
272:   from->copy_length = bs*sizeof(PetscScalar)*n;
273:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
274:   return(0);
275: }

277: /* --------------------------------------------------------------------------------------*/

279: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
280: {
281:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
282:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
283:   PetscErrorCode         ierr;
284:   PetscInt               ny,bs = in_from->bs;

287:   out->ops->begin   = in->ops->begin;
288:   out->ops->end     = in->ops->end;
289:   out->ops->copy    = in->ops->copy;
290:   out->ops->destroy = in->ops->destroy;
291:   out->ops->view    = in->ops->view;

293:   /* allocate entire send scatter context */
294:   PetscNewLog(out,&out_to);
295:   PetscNewLog(out,&out_from);

297:   ny                = in_to->starts[in_to->n];
298:   out_to->n         = in_to->n;
299:   out_to->type      = in_to->type;
300:   out_to->sendfirst = in_to->sendfirst;

302:   PetscMalloc1(out_to->n,&out_to->requests);
303:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
304:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
305:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
306:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
307:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

309:   out->todata                        = (void*)out_to;
310:   out_to->local.n                    = in_to->local.n;
311:   out_to->local.nonmatching_computed = PETSC_FALSE;
312:   out_to->local.n_nonmatching        = 0;
313:   out_to->local.slots_nonmatching    = 0;
314:   if (in_to->local.n) {
315:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
316:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
317:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
318:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
319:   } else {
320:     out_to->local.vslots   = 0;
321:     out_from->local.vslots = 0;
322:   }

324:   /* allocate entire receive context */
325:   out_from->type      = in_from->type;
326:   ny                  = in_from->starts[in_from->n];
327:   out_from->n         = in_from->n;
328:   out_from->sendfirst = in_from->sendfirst;

330:   PetscMalloc1(out_from->n,&out_from->requests);
331:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
332:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
333:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
334:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

336:   out->fromdata                        = (void*)out_from;
337:   out_from->local.n                    = in_from->local.n;
338:   out_from->local.nonmatching_computed = PETSC_FALSE;
339:   out_from->local.n_nonmatching        = 0;
340:   out_from->local.slots_nonmatching    = 0;

342:   /*
343:       set up the request arrays for use with isend_init() and irecv_init()
344:   */
345:   {
346:     PetscMPIInt tag;
347:     MPI_Comm    comm;
348:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
349:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
350:     PetscInt    i;
351:     PetscBool   flg;
352:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
353:     MPI_Request *rev_swaits,*rev_rwaits;
354:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

356:     PetscMalloc1(in_to->n,&out_to->rev_requests);
357:     PetscMalloc1(in_from->n,&out_from->rev_requests);

359:     rev_rwaits = out_to->rev_requests;
360:     rev_swaits = out_from->rev_requests;

362:     out_from->bs = out_to->bs = bs;
363:     tag          = ((PetscObject)out)->tag;
364:     PetscObjectGetComm((PetscObject)out,&comm);

366:     /* Register the receives that you will use later (sends for scatter reverse) */
367:     for (i=0; i<out_from->n; i++) {
368:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
369:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
370:     }

372:     flg  = PETSC_FALSE;
373:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
374:     if (flg) {
375:       out_to->use_readyreceiver   = PETSC_TRUE;
376:       out_from->use_readyreceiver = PETSC_TRUE;
377:       for (i=0; i<out_to->n; i++) {
378:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
379:       }
380:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
381:       MPI_Barrier(comm);
382:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
383:     } else {
384:       out_to->use_readyreceiver   = PETSC_FALSE;
385:       out_from->use_readyreceiver = PETSC_FALSE;
386:       flg                         = PETSC_FALSE;
387:       PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
388:       if (flg) {
389:         PetscInfo(in,"Using VecScatter Ssend mode\n");
390:       }
391:       for (i=0; i<out_to->n; i++) {
392:         if (!flg) {
393:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
394:         } else {
395:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
396:         }
397:       }
398:     }
399:     /* Register receives for scatter reverse */
400:     for (i=0; i<out_to->n; i++) {
401:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
402:     }
403:   }
404:   return(0);
405: }

407: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
408: {
409:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
410:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
411:   PetscErrorCode         ierr;
412:   PetscInt               ny,bs = in_from->bs;
413:   PetscMPIInt            size;

416:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

418:   out->ops->begin     = in->ops->begin;
419:   out->ops->end       = in->ops->end;
420:   out->ops->copy      = in->ops->copy;
421:   out->ops->destroy   = in->ops->destroy;
422:   out->ops->view      = in->ops->view;

424:   /* allocate entire send scatter context */
425:   PetscNewLog(out,&out_to);
426:   PetscNewLog(out,&out_from);

428:   ny                = in_to->starts[in_to->n];
429:   out_to->n         = in_to->n;
430:   out_to->type      = in_to->type;
431:   out_to->sendfirst = in_to->sendfirst;

433:   PetscMalloc1(out_to->n,&out_to->requests);
434:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
435:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
436:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
437:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
438:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

440:   out->todata                        = (void*)out_to;
441:   out_to->local.n                    = in_to->local.n;
442:   out_to->local.nonmatching_computed = PETSC_FALSE;
443:   out_to->local.n_nonmatching        = 0;
444:   out_to->local.slots_nonmatching    = 0;
445:   if (in_to->local.n) {
446:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
447:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
448:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
449:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
450:   } else {
451:     out_to->local.vslots   = 0;
452:     out_from->local.vslots = 0;
453:   }

455:   /* allocate entire receive context */
456:   out_from->type      = in_from->type;
457:   ny                  = in_from->starts[in_from->n];
458:   out_from->n         = in_from->n;
459:   out_from->sendfirst = in_from->sendfirst;

461:   PetscMalloc1(out_from->n,&out_from->requests);
462:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
463:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
464:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
465:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

467:   out->fromdata                        = (void*)out_from;
468:   out_from->local.n                    = in_from->local.n;
469:   out_from->local.nonmatching_computed = PETSC_FALSE;
470:   out_from->local.n_nonmatching        = 0;
471:   out_from->local.slots_nonmatching    = 0;

473:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

475:   PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
476:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
477:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

479:   PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
480:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
481:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
482:   return(0);
483: }
484: /* --------------------------------------------------------------------------------------------------
485:     Packs and unpacks the message data into send or from receive buffers.

487:     These could be generated automatically.

489:     Fortran kernels etc. could be used.
490: */
491: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
492: {
493:   PetscInt i;
494:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
495: }

497: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
498: {
499:   PetscInt i;

502:   switch (addv) {
503:   case INSERT_VALUES:
504:   case INSERT_ALL_VALUES:
505:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
506:     break;
507:   case ADD_VALUES:
508:   case ADD_ALL_VALUES:
509:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
510:     break;
511: #if !defined(PETSC_USE_COMPLEX)
512:   case MAX_VALUES:
513:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
514: #else
515:   case MAX_VALUES:
516: #endif
517:   case NOT_SET_VALUES:
518:     break;
519:   default:
520:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
521:   }
522:   return(0);
523: }

525: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
526: {
527:   PetscInt i;

530:   switch (addv) {
531:   case INSERT_VALUES:
532:   case INSERT_ALL_VALUES:
533:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
534:     break;
535:   case ADD_VALUES:
536:   case ADD_ALL_VALUES:
537:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
538:     break;
539: #if !defined(PETSC_USE_COMPLEX)
540:   case MAX_VALUES:
541:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
542: #else
543:   case MAX_VALUES:
544: #endif
545:   case NOT_SET_VALUES:
546:     break;
547:   default:
548:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
549:   }
550:   return(0);
551: }

553: /* ----------------------------------------------------------------------------------------------- */
554: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
555: {
556:   PetscInt i,idx;

558:   for (i=0; i<n; i++) {
559:     idx  = *indicesx++;
560:     y[0] = x[idx];
561:     y[1] = x[idx+1];
562:     y   += 2;
563:   }
564: }

566: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
567: {
568:   PetscInt i,idy;

571:   switch (addv) {
572:   case INSERT_VALUES:
573:   case INSERT_ALL_VALUES:
574:     for (i=0; i<n; i++) {
575:       idy      = *indicesy++;
576:       y[idy]   = x[0];
577:       y[idy+1] = x[1];
578:       x       += 2;
579:     }
580:     break;
581:   case ADD_VALUES:
582:   case ADD_ALL_VALUES:
583:     for (i=0; i<n; i++) {
584:       idy       = *indicesy++;
585:       y[idy]   += x[0];
586:       y[idy+1] += x[1];
587:       x        += 2;
588:     }
589:     break;
590: #if !defined(PETSC_USE_COMPLEX)
591:   case MAX_VALUES:
592:     for (i=0; i<n; i++) {
593:       idy      = *indicesy++;
594:       y[idy]   = PetscMax(y[idy],x[0]);
595:       y[idy+1] = PetscMax(y[idy+1],x[1]);
596:       x       += 2;
597:     }
598: #else
599:   case MAX_VALUES:
600: #endif
601:   case NOT_SET_VALUES:
602:     break;
603:   default:
604:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
605:   }
606:   return(0);
607: }

609: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
610: {
611:   PetscInt i,idx,idy;

614:   switch (addv) {
615:   case INSERT_VALUES:
616:   case INSERT_ALL_VALUES:
617:     for (i=0; i<n; i++) {
618:       idx      = *indicesx++;
619:       idy      = *indicesy++;
620:       y[idy]   = x[idx];
621:       y[idy+1] = x[idx+1];
622:     }
623:     break;
624:   case ADD_VALUES:
625:   case ADD_ALL_VALUES:
626:     for (i=0; i<n; i++) {
627:       idx       = *indicesx++;
628:       idy       = *indicesy++;
629:       y[idy]   += x[idx];
630:       y[idy+1] += x[idx+1];
631:     }
632:     break;
633: #if !defined(PETSC_USE_COMPLEX)
634:   case MAX_VALUES:
635:     for (i=0; i<n; i++) {
636:       idx      = *indicesx++;
637:       idy      = *indicesy++;
638:       y[idy]   = PetscMax(y[idy],x[idx]);
639:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
640:     }
641: #else
642:   case MAX_VALUES:
643: #endif
644:   case NOT_SET_VALUES:
645:     break;
646:   default:
647:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
648:   }
649:   return(0);
650: }
651: /* ----------------------------------------------------------------------------------------------- */
652: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
653: {
654:   PetscInt i,idx;

656:   for (i=0; i<n; i++) {
657:     idx  = *indicesx++;
658:     y[0] = x[idx];
659:     y[1] = x[idx+1];
660:     y[2] = x[idx+2];
661:     y   += 3;
662:   }
663: }
664: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
665: {
666:   PetscInt i,idy;

669:   switch (addv) {
670:   case INSERT_VALUES:
671:   case INSERT_ALL_VALUES:
672:     for (i=0; i<n; i++) {
673:       idy      = *indicesy++;
674:       y[idy]   = x[0];
675:       y[idy+1] = x[1];
676:       y[idy+2] = x[2];
677:       x       += 3;
678:     }
679:     break;
680:   case ADD_VALUES:
681:   case ADD_ALL_VALUES:
682:     for (i=0; i<n; i++) {
683:       idy       = *indicesy++;
684:       y[idy]   += x[0];
685:       y[idy+1] += x[1];
686:       y[idy+2] += x[2];
687:       x        += 3;
688:     }
689:     break;
690: #if !defined(PETSC_USE_COMPLEX)
691:   case MAX_VALUES:
692:     for (i=0; i<n; i++) {
693:       idy      = *indicesy++;
694:       y[idy]   = PetscMax(y[idy],x[0]);
695:       y[idy+1] = PetscMax(y[idy+1],x[1]);
696:       y[idy+2] = PetscMax(y[idy+2],x[2]);
697:       x       += 3;
698:     }
699: #else
700:   case MAX_VALUES:
701: #endif
702:   case NOT_SET_VALUES:
703:     break;
704:   default:
705:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
706:   }
707:   return(0);
708: }

710: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
711: {
712:   PetscInt i,idx,idy;

715:   switch (addv) {
716:   case INSERT_VALUES:
717:   case INSERT_ALL_VALUES:
718:     for (i=0; i<n; i++) {
719:       idx      = *indicesx++;
720:       idy      = *indicesy++;
721:       y[idy]   = x[idx];
722:       y[idy+1] = x[idx+1];
723:       y[idy+2] = x[idx+2];
724:     }
725:     break;
726:   case ADD_VALUES:
727:   case ADD_ALL_VALUES:
728:     for (i=0; i<n; i++) {
729:       idx       = *indicesx++;
730:       idy       = *indicesy++;
731:       y[idy]   += x[idx];
732:       y[idy+1] += x[idx+1];
733:       y[idy+2] += x[idx+2];
734:     }
735:     break;
736: #if !defined(PETSC_USE_COMPLEX)
737:   case MAX_VALUES:
738:     for (i=0; i<n; i++) {
739:       idx      = *indicesx++;
740:       idy      = *indicesy++;
741:       y[idy]   = PetscMax(y[idy],x[idx]);
742:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
743:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
744:     }
745: #else
746:   case MAX_VALUES:
747: #endif
748:   case NOT_SET_VALUES:
749:     break;
750:   default:
751:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
752:   }
753:   return(0);
754: }
755: /* ----------------------------------------------------------------------------------------------- */
756: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
757: {
758:   PetscInt i,idx;

760:   for (i=0; i<n; i++) {
761:     idx  = *indicesx++;
762:     y[0] = x[idx];
763:     y[1] = x[idx+1];
764:     y[2] = x[idx+2];
765:     y[3] = x[idx+3];
766:     y   += 4;
767:   }
768: }
769: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
770: {
771:   PetscInt i,idy;

774:   switch (addv) {
775:   case INSERT_VALUES:
776:   case INSERT_ALL_VALUES:
777:     for (i=0; i<n; i++) {
778:       idy      = *indicesy++;
779:       y[idy]   = x[0];
780:       y[idy+1] = x[1];
781:       y[idy+2] = x[2];
782:       y[idy+3] = x[3];
783:       x       += 4;
784:     }
785:     break;
786:   case ADD_VALUES:
787:   case ADD_ALL_VALUES:
788:     for (i=0; i<n; i++) {
789:       idy       = *indicesy++;
790:       y[idy]   += x[0];
791:       y[idy+1] += x[1];
792:       y[idy+2] += x[2];
793:       y[idy+3] += x[3];
794:       x        += 4;
795:     }
796:     break;
797: #if !defined(PETSC_USE_COMPLEX)
798:   case MAX_VALUES:
799:     for (i=0; i<n; i++) {
800:       idy      = *indicesy++;
801:       y[idy]   = PetscMax(y[idy],x[0]);
802:       y[idy+1] = PetscMax(y[idy+1],x[1]);
803:       y[idy+2] = PetscMax(y[idy+2],x[2]);
804:       y[idy+3] = PetscMax(y[idy+3],x[3]);
805:       x       += 4;
806:     }
807: #else
808:   case MAX_VALUES:
809: #endif
810:   case NOT_SET_VALUES:
811:     break;
812:   default:
813:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
814:   }
815:   return(0);
816: }

818: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
819: {
820:   PetscInt i,idx,idy;

823:   switch (addv) {
824:   case INSERT_VALUES:
825:   case INSERT_ALL_VALUES:
826:     for (i=0; i<n; i++) {
827:       idx      = *indicesx++;
828:       idy      = *indicesy++;
829:       y[idy]   = x[idx];
830:       y[idy+1] = x[idx+1];
831:       y[idy+2] = x[idx+2];
832:       y[idy+3] = x[idx+3];
833:     }
834:     break;
835:   case ADD_VALUES:
836:   case ADD_ALL_VALUES:
837:     for (i=0; i<n; i++) {
838:       idx       = *indicesx++;
839:       idy       = *indicesy++;
840:       y[idy]   += x[idx];
841:       y[idy+1] += x[idx+1];
842:       y[idy+2] += x[idx+2];
843:       y[idy+3] += x[idx+3];
844:     }
845:     break;
846: #if !defined(PETSC_USE_COMPLEX)
847:   case MAX_VALUES:
848:     for (i=0; i<n; i++) {
849:       idx      = *indicesx++;
850:       idy      = *indicesy++;
851:       y[idy]   = PetscMax(y[idy],x[idx]);
852:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
853:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
854:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
855:     }
856: #else
857:   case MAX_VALUES:
858: #endif
859:   case NOT_SET_VALUES:
860:     break;
861:   default:
862:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
863:   }
864:   return(0);
865: }
866: /* ----------------------------------------------------------------------------------------------- */
867: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
868: {
869:   PetscInt i,idx;

871:   for (i=0; i<n; i++) {
872:     idx  = *indicesx++;
873:     y[0] = x[idx];
874:     y[1] = x[idx+1];
875:     y[2] = x[idx+2];
876:     y[3] = x[idx+3];
877:     y[4] = x[idx+4];
878:     y   += 5;
879:   }
880: }

882: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
883: {
884:   PetscInt i,idy;

887:   switch (addv) {
888:   case INSERT_VALUES:
889:   case INSERT_ALL_VALUES:
890:     for (i=0; i<n; i++) {
891:       idy      = *indicesy++;
892:       y[idy]   = x[0];
893:       y[idy+1] = x[1];
894:       y[idy+2] = x[2];
895:       y[idy+3] = x[3];
896:       y[idy+4] = x[4];
897:       x       += 5;
898:     }
899:     break;
900:   case ADD_VALUES:
901:   case ADD_ALL_VALUES:
902:     for (i=0; i<n; i++) {
903:       idy       = *indicesy++;
904:       y[idy]   += x[0];
905:       y[idy+1] += x[1];
906:       y[idy+2] += x[2];
907:       y[idy+3] += x[3];
908:       y[idy+4] += x[4];
909:       x        += 5;
910:     }
911:     break;
912: #if !defined(PETSC_USE_COMPLEX)
913:   case MAX_VALUES:
914:     for (i=0; i<n; i++) {
915:       idy      = *indicesy++;
916:       y[idy]   = PetscMax(y[idy],x[0]);
917:       y[idy+1] = PetscMax(y[idy+1],x[1]);
918:       y[idy+2] = PetscMax(y[idy+2],x[2]);
919:       y[idy+3] = PetscMax(y[idy+3],x[3]);
920:       y[idy+4] = PetscMax(y[idy+4],x[4]);
921:       x       += 5;
922:     }
923: #else
924:   case MAX_VALUES:
925: #endif
926:   case NOT_SET_VALUES:
927:     break;
928:   default:
929:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
930:   }
931:   return(0);
932: }

934: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
935: {
936:   PetscInt i,idx,idy;

939:   switch (addv) {
940:   case INSERT_VALUES:
941:   case INSERT_ALL_VALUES:
942:     for (i=0; i<n; i++) {
943:       idx      = *indicesx++;
944:       idy      = *indicesy++;
945:       y[idy]   = x[idx];
946:       y[idy+1] = x[idx+1];
947:       y[idy+2] = x[idx+2];
948:       y[idy+3] = x[idx+3];
949:       y[idy+4] = x[idx+4];
950:     }
951:     break;
952:   case ADD_VALUES:
953:   case ADD_ALL_VALUES:
954:     for (i=0; i<n; i++) {
955:       idx       = *indicesx++;
956:       idy       = *indicesy++;
957:       y[idy]   += x[idx];
958:       y[idy+1] += x[idx+1];
959:       y[idy+2] += x[idx+2];
960:       y[idy+3] += x[idx+3];
961:       y[idy+4] += x[idx+4];
962:     }
963:     break;
964: #if !defined(PETSC_USE_COMPLEX)
965:   case MAX_VALUES:
966:     for (i=0; i<n; i++) {
967:       idx      = *indicesx++;
968:       idy      = *indicesy++;
969:       y[idy]   = PetscMax(y[idy],x[idx]);
970:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
971:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
972:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
973:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
974:     }
975: #else
976:   case MAX_VALUES:
977: #endif
978:   case NOT_SET_VALUES:
979:     break;
980:   default:
981:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
982:   }
983:   return(0);
984: }
985: /* ----------------------------------------------------------------------------------------------- */
986: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
987: {
988:   PetscInt i,idx;

990:   for (i=0; i<n; i++) {
991:     idx  = *indicesx++;
992:     y[0] = x[idx];
993:     y[1] = x[idx+1];
994:     y[2] = x[idx+2];
995:     y[3] = x[idx+3];
996:     y[4] = x[idx+4];
997:     y[5] = x[idx+5];
998:     y   += 6;
999:   }
1000: }

1002: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1003: {
1004:   PetscInt i,idy;

1007:   switch (addv) {
1008:   case INSERT_VALUES:
1009:   case INSERT_ALL_VALUES:
1010:     for (i=0; i<n; i++) {
1011:       idy      = *indicesy++;
1012:       y[idy]   = x[0];
1013:       y[idy+1] = x[1];
1014:       y[idy+2] = x[2];
1015:       y[idy+3] = x[3];
1016:       y[idy+4] = x[4];
1017:       y[idy+5] = x[5];
1018:       x       += 6;
1019:     }
1020:     break;
1021:   case ADD_VALUES:
1022:   case ADD_ALL_VALUES:
1023:     for (i=0; i<n; i++) {
1024:       idy       = *indicesy++;
1025:       y[idy]   += x[0];
1026:       y[idy+1] += x[1];
1027:       y[idy+2] += x[2];
1028:       y[idy+3] += x[3];
1029:       y[idy+4] += x[4];
1030:       y[idy+5] += x[5];
1031:       x        += 6;
1032:     }
1033:     break;
1034: #if !defined(PETSC_USE_COMPLEX)
1035:   case MAX_VALUES:
1036:     for (i=0; i<n; i++) {
1037:       idy      = *indicesy++;
1038:       y[idy]   = PetscMax(y[idy],x[0]);
1039:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1040:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1041:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1042:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1043:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1044:       x       += 6;
1045:     }
1046: #else
1047:   case MAX_VALUES:
1048: #endif
1049:   case NOT_SET_VALUES:
1050:     break;
1051:   default:
1052:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1053:   }
1054:   return(0);
1055: }

1057: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1058: {
1059:   PetscInt i,idx,idy;

1062:   switch (addv) {
1063:   case INSERT_VALUES:
1064:   case INSERT_ALL_VALUES:
1065:     for (i=0; i<n; i++) {
1066:       idx      = *indicesx++;
1067:       idy      = *indicesy++;
1068:       y[idy]   = x[idx];
1069:       y[idy+1] = x[idx+1];
1070:       y[idy+2] = x[idx+2];
1071:       y[idy+3] = x[idx+3];
1072:       y[idy+4] = x[idx+4];
1073:       y[idy+5] = x[idx+5];
1074:     }
1075:     break;
1076:   case ADD_VALUES:
1077:   case ADD_ALL_VALUES:
1078:     for (i=0; i<n; i++) {
1079:       idx       = *indicesx++;
1080:       idy       = *indicesy++;
1081:       y[idy]   += x[idx];
1082:       y[idy+1] += x[idx+1];
1083:       y[idy+2] += x[idx+2];
1084:       y[idy+3] += x[idx+3];
1085:       y[idy+4] += x[idx+4];
1086:       y[idy+5] += x[idx+5];
1087:     }
1088:     break;
1089: #if !defined(PETSC_USE_COMPLEX)
1090:   case MAX_VALUES:
1091:     for (i=0; i<n; i++) {
1092:       idx      = *indicesx++;
1093:       idy      = *indicesy++;
1094:       y[idy]   = PetscMax(y[idy],x[idx]);
1095:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1096:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1097:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1098:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1099:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1100:     }
1101: #else
1102:   case MAX_VALUES:
1103: #endif
1104:   case NOT_SET_VALUES:
1105:     break;
1106:   default:
1107:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1108:   }
1109:   return(0);
1110: }
1111: /* ----------------------------------------------------------------------------------------------- */
1112: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1113: {
1114:   PetscInt i,idx;

1116:   for (i=0; i<n; i++) {
1117:     idx  = *indicesx++;
1118:     y[0] = x[idx];
1119:     y[1] = x[idx+1];
1120:     y[2] = x[idx+2];
1121:     y[3] = x[idx+3];
1122:     y[4] = x[idx+4];
1123:     y[5] = x[idx+5];
1124:     y[6] = x[idx+6];
1125:     y   += 7;
1126:   }
1127: }

1129: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1130: {
1131:   PetscInt i,idy;

1134:   switch (addv) {
1135:   case INSERT_VALUES:
1136:   case INSERT_ALL_VALUES:
1137:     for (i=0; i<n; i++) {
1138:       idy      = *indicesy++;
1139:       y[idy]   = x[0];
1140:       y[idy+1] = x[1];
1141:       y[idy+2] = x[2];
1142:       y[idy+3] = x[3];
1143:       y[idy+4] = x[4];
1144:       y[idy+5] = x[5];
1145:       y[idy+6] = x[6];
1146:       x       += 7;
1147:     }
1148:     break;
1149:   case ADD_VALUES:
1150:   case ADD_ALL_VALUES:
1151:     for (i=0; i<n; i++) {
1152:       idy       = *indicesy++;
1153:       y[idy]   += x[0];
1154:       y[idy+1] += x[1];
1155:       y[idy+2] += x[2];
1156:       y[idy+3] += x[3];
1157:       y[idy+4] += x[4];
1158:       y[idy+5] += x[5];
1159:       y[idy+6] += x[6];
1160:       x        += 7;
1161:     }
1162:     break;
1163: #if !defined(PETSC_USE_COMPLEX)
1164:   case MAX_VALUES:
1165:     for (i=0; i<n; i++) {
1166:       idy      = *indicesy++;
1167:       y[idy]   = PetscMax(y[idy],x[0]);
1168:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1169:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1170:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1171:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1172:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1173:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1174:       x       += 7;
1175:     }
1176: #else
1177:   case MAX_VALUES:
1178: #endif
1179:   case NOT_SET_VALUES:
1180:     break;
1181:   default:
1182:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1183:   }
1184:   return(0);
1185: }

1187: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1188: {
1189:   PetscInt i,idx,idy;

1192:   switch (addv) {
1193:   case INSERT_VALUES:
1194:   case INSERT_ALL_VALUES:
1195:     for (i=0; i<n; i++) {
1196:       idx      = *indicesx++;
1197:       idy      = *indicesy++;
1198:       y[idy]   = x[idx];
1199:       y[idy+1] = x[idx+1];
1200:       y[idy+2] = x[idx+2];
1201:       y[idy+3] = x[idx+3];
1202:       y[idy+4] = x[idx+4];
1203:       y[idy+5] = x[idx+5];
1204:       y[idy+6] = x[idx+6];
1205:     }
1206:     break;
1207:   case ADD_VALUES:
1208:   case ADD_ALL_VALUES:
1209:     for (i=0; i<n; i++) {
1210:       idx       = *indicesx++;
1211:       idy       = *indicesy++;
1212:       y[idy]   += x[idx];
1213:       y[idy+1] += x[idx+1];
1214:       y[idy+2] += x[idx+2];
1215:       y[idy+3] += x[idx+3];
1216:       y[idy+4] += x[idx+4];
1217:       y[idy+5] += x[idx+5];
1218:       y[idy+6] += x[idx+6];
1219:     }
1220:     break;
1221: #if !defined(PETSC_USE_COMPLEX)
1222:   case MAX_VALUES:
1223:     for (i=0; i<n; i++) {
1224:       idx      = *indicesx++;
1225:       idy      = *indicesy++;
1226:       y[idy]   = PetscMax(y[idy],x[idx]);
1227:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1228:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1229:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1230:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1231:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1232:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1233:     }
1234: #else
1235:   case MAX_VALUES:
1236: #endif
1237:   case NOT_SET_VALUES:
1238:     break;
1239:   default:
1240:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1241:   }
1242:   return(0);
1243: }
1244: /* ----------------------------------------------------------------------------------------------- */
1245: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1246: {
1247:   PetscInt i,idx;

1249:   for (i=0; i<n; i++) {
1250:     idx  = *indicesx++;
1251:     y[0] = x[idx];
1252:     y[1] = x[idx+1];
1253:     y[2] = x[idx+2];
1254:     y[3] = x[idx+3];
1255:     y[4] = x[idx+4];
1256:     y[5] = x[idx+5];
1257:     y[6] = x[idx+6];
1258:     y[7] = x[idx+7];
1259:     y   += 8;
1260:   }
1261: }

1263: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1264: {
1265:   PetscInt i,idy;

1268:   switch (addv) {
1269:   case INSERT_VALUES:
1270:   case INSERT_ALL_VALUES:
1271:     for (i=0; i<n; i++) {
1272:       idy      = *indicesy++;
1273:       y[idy]   = x[0];
1274:       y[idy+1] = x[1];
1275:       y[idy+2] = x[2];
1276:       y[idy+3] = x[3];
1277:       y[idy+4] = x[4];
1278:       y[idy+5] = x[5];
1279:       y[idy+6] = x[6];
1280:       y[idy+7] = x[7];
1281:       x       += 8;
1282:     }
1283:     break;
1284:   case ADD_VALUES:
1285:   case ADD_ALL_VALUES:
1286:     for (i=0; i<n; i++) {
1287:       idy       = *indicesy++;
1288:       y[idy]   += x[0];
1289:       y[idy+1] += x[1];
1290:       y[idy+2] += x[2];
1291:       y[idy+3] += x[3];
1292:       y[idy+4] += x[4];
1293:       y[idy+5] += x[5];
1294:       y[idy+6] += x[6];
1295:       y[idy+7] += x[7];
1296:       x        += 8;
1297:     }
1298:     break;
1299: #if !defined(PETSC_USE_COMPLEX)
1300:   case MAX_VALUES:
1301:     for (i=0; i<n; i++) {
1302:       idy      = *indicesy++;
1303:       y[idy]   = PetscMax(y[idy],x[0]);
1304:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1305:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1306:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1307:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1308:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1309:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1310:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1311:       x       += 8;
1312:     }
1313: #else
1314:   case MAX_VALUES:
1315: #endif
1316:   case NOT_SET_VALUES:
1317:     break;
1318:   default:
1319:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1320:   }
1321:   return(0);
1322: }

1324: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1325: {
1326:   PetscInt i,idx,idy;

1329:   switch (addv) {
1330:   case INSERT_VALUES:
1331:   case INSERT_ALL_VALUES:
1332:     for (i=0; i<n; i++) {
1333:       idx      = *indicesx++;
1334:       idy      = *indicesy++;
1335:       y[idy]   = x[idx];
1336:       y[idy+1] = x[idx+1];
1337:       y[idy+2] = x[idx+2];
1338:       y[idy+3] = x[idx+3];
1339:       y[idy+4] = x[idx+4];
1340:       y[idy+5] = x[idx+5];
1341:       y[idy+6] = x[idx+6];
1342:       y[idy+7] = x[idx+7];
1343:     }
1344:     break;
1345:   case ADD_VALUES:
1346:   case ADD_ALL_VALUES:
1347:     for (i=0; i<n; i++) {
1348:       idx       = *indicesx++;
1349:       idy       = *indicesy++;
1350:       y[idy]   += x[idx];
1351:       y[idy+1] += x[idx+1];
1352:       y[idy+2] += x[idx+2];
1353:       y[idy+3] += x[idx+3];
1354:       y[idy+4] += x[idx+4];
1355:       y[idy+5] += x[idx+5];
1356:       y[idy+6] += x[idx+6];
1357:       y[idy+7] += x[idx+7];
1358:     }
1359:     break;
1360: #if !defined(PETSC_USE_COMPLEX)
1361:   case MAX_VALUES:
1362:     for (i=0; i<n; i++) {
1363:       idx      = *indicesx++;
1364:       idy      = *indicesy++;
1365:       y[idy]   = PetscMax(y[idy],x[idx]);
1366:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1367:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1368:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1369:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1370:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1371:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1372:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1373:     }
1374: #else
1375:   case MAX_VALUES:
1376: #endif
1377:   case NOT_SET_VALUES:
1378:     break;
1379:   default:
1380:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1381:   }
1382:   return(0);
1383: }

1385: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1386: {
1387:   PetscInt i,idx;

1389:   for (i=0; i<n; i++) {
1390:     idx   = *indicesx++;
1391:     y[0]  = x[idx];
1392:     y[1]  = x[idx+1];
1393:     y[2]  = x[idx+2];
1394:     y[3]  = x[idx+3];
1395:     y[4]  = x[idx+4];
1396:     y[5]  = x[idx+5];
1397:     y[6]  = x[idx+6];
1398:     y[7]  = x[idx+7];
1399:     y[8]  = x[idx+8];
1400:     y    += 9;
1401:   }
1402: }

1404: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1405: {
1406:   PetscInt i,idy;

1409:   switch (addv) {
1410:   case INSERT_VALUES:
1411:   case INSERT_ALL_VALUES:
1412:     for (i=0; i<n; i++) {
1413:       idy       = *indicesy++;
1414:       y[idy]    = x[0];
1415:       y[idy+1]  = x[1];
1416:       y[idy+2]  = x[2];
1417:       y[idy+3]  = x[3];
1418:       y[idy+4]  = x[4];
1419:       y[idy+5]  = x[5];
1420:       y[idy+6]  = x[6];
1421:       y[idy+7]  = x[7];
1422:       y[idy+8]  = x[8];
1423:       x        += 9;
1424:     }
1425:     break;
1426:   case ADD_VALUES:
1427:   case ADD_ALL_VALUES:
1428:     for (i=0; i<n; i++) {
1429:       idy        = *indicesy++;
1430:       y[idy]    += x[0];
1431:       y[idy+1]  += x[1];
1432:       y[idy+2]  += x[2];
1433:       y[idy+3]  += x[3];
1434:       y[idy+4]  += x[4];
1435:       y[idy+5]  += x[5];
1436:       y[idy+6]  += x[6];
1437:       y[idy+7]  += x[7];
1438:       y[idy+8]  += x[8];
1439:       x         += 9;
1440:     }
1441:     break;
1442: #if !defined(PETSC_USE_COMPLEX)
1443:   case MAX_VALUES:
1444:     for (i=0; i<n; i++) {
1445:       idy       = *indicesy++;
1446:       y[idy]    = PetscMax(y[idy],x[0]);
1447:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1448:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1449:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1450:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1451:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1452:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1453:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1454:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1455:       x        += 9;
1456:     }
1457: #else
1458:   case MAX_VALUES:
1459: #endif
1460:   case NOT_SET_VALUES:
1461:     break;
1462:   default:
1463:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1464:   }
1465:   return(0);
1466: }

1468: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1469: {
1470:   PetscInt i,idx,idy;

1473:   switch (addv) {
1474:   case INSERT_VALUES:
1475:   case INSERT_ALL_VALUES:
1476:     for (i=0; i<n; i++) {
1477:       idx       = *indicesx++;
1478:       idy       = *indicesy++;
1479:       y[idy]    = x[idx];
1480:       y[idy+1]  = x[idx+1];
1481:       y[idy+2]  = x[idx+2];
1482:       y[idy+3]  = x[idx+3];
1483:       y[idy+4]  = x[idx+4];
1484:       y[idy+5]  = x[idx+5];
1485:       y[idy+6]  = x[idx+6];
1486:       y[idy+7]  = x[idx+7];
1487:       y[idy+8]  = x[idx+8];
1488:     }
1489:     break;
1490:   case ADD_VALUES:
1491:   case ADD_ALL_VALUES:
1492:     for (i=0; i<n; i++) {
1493:       idx        = *indicesx++;
1494:       idy        = *indicesy++;
1495:       y[idy]    += x[idx];
1496:       y[idy+1]  += x[idx+1];
1497:       y[idy+2]  += x[idx+2];
1498:       y[idy+3]  += x[idx+3];
1499:       y[idy+4]  += x[idx+4];
1500:       y[idy+5]  += x[idx+5];
1501:       y[idy+6]  += x[idx+6];
1502:       y[idy+7]  += x[idx+7];
1503:       y[idy+8]  += x[idx+8];
1504:     }
1505:     break;
1506: #if !defined(PETSC_USE_COMPLEX)
1507:   case MAX_VALUES:
1508:     for (i=0; i<n; i++) {
1509:       idx       = *indicesx++;
1510:       idy       = *indicesy++;
1511:       y[idy]    = PetscMax(y[idy],x[idx]);
1512:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1513:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1514:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1515:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1516:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1517:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1518:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1519:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1520:     }
1521: #else
1522:   case MAX_VALUES:
1523: #endif
1524:   case NOT_SET_VALUES:
1525:     break;
1526:   default:
1527:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1528:   }
1529:   return(0);
1530: }

1532: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1533: {
1534:   PetscInt i,idx;

1536:   for (i=0; i<n; i++) {
1537:     idx   = *indicesx++;
1538:     y[0]  = x[idx];
1539:     y[1]  = x[idx+1];
1540:     y[2]  = x[idx+2];
1541:     y[3]  = x[idx+3];
1542:     y[4]  = x[idx+4];
1543:     y[5]  = x[idx+5];
1544:     y[6]  = x[idx+6];
1545:     y[7]  = x[idx+7];
1546:     y[8]  = x[idx+8];
1547:     y[9]  = x[idx+9];
1548:     y    += 10;
1549:   }
1550: }

1552: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1553: {
1554:   PetscInt i,idy;

1557:   switch (addv) {
1558:   case INSERT_VALUES:
1559:   case INSERT_ALL_VALUES:
1560:     for (i=0; i<n; i++) {
1561:       idy       = *indicesy++;
1562:       y[idy]    = x[0];
1563:       y[idy+1]  = x[1];
1564:       y[idy+2]  = x[2];
1565:       y[idy+3]  = x[3];
1566:       y[idy+4]  = x[4];
1567:       y[idy+5]  = x[5];
1568:       y[idy+6]  = x[6];
1569:       y[idy+7]  = x[7];
1570:       y[idy+8]  = x[8];
1571:       y[idy+9]  = x[9];
1572:       x        += 10;
1573:     }
1574:     break;
1575:   case ADD_VALUES:
1576:   case ADD_ALL_VALUES:
1577:     for (i=0; i<n; i++) {
1578:       idy        = *indicesy++;
1579:       y[idy]    += x[0];
1580:       y[idy+1]  += x[1];
1581:       y[idy+2]  += x[2];
1582:       y[idy+3]  += x[3];
1583:       y[idy+4]  += x[4];
1584:       y[idy+5]  += x[5];
1585:       y[idy+6]  += x[6];
1586:       y[idy+7]  += x[7];
1587:       y[idy+8]  += x[8];
1588:       y[idy+9]  += x[9];
1589:       x         += 10;
1590:     }
1591:     break;
1592: #if !defined(PETSC_USE_COMPLEX)
1593:   case MAX_VALUES:
1594:     for (i=0; i<n; i++) {
1595:       idy       = *indicesy++;
1596:       y[idy]    = PetscMax(y[idy],x[0]);
1597:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1598:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1599:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1600:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1601:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1602:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1603:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1604:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1605:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1606:       x        += 10;
1607:     }
1608: #else
1609:   case MAX_VALUES:
1610: #endif
1611:   case NOT_SET_VALUES:
1612:     break;
1613:   default:
1614:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1615:   }
1616:   return(0);
1617: }

1619: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1620: {
1621:   PetscInt i,idx,idy;

1624:   switch (addv) {
1625:   case INSERT_VALUES:
1626:   case INSERT_ALL_VALUES:
1627:     for (i=0; i<n; i++) {
1628:       idx       = *indicesx++;
1629:       idy       = *indicesy++;
1630:       y[idy]    = x[idx];
1631:       y[idy+1]  = x[idx+1];
1632:       y[idy+2]  = x[idx+2];
1633:       y[idy+3]  = x[idx+3];
1634:       y[idy+4]  = x[idx+4];
1635:       y[idy+5]  = x[idx+5];
1636:       y[idy+6]  = x[idx+6];
1637:       y[idy+7]  = x[idx+7];
1638:       y[idy+8]  = x[idx+8];
1639:       y[idy+9]  = x[idx+9];
1640:     }
1641:     break;
1642:   case ADD_VALUES:
1643:   case ADD_ALL_VALUES:
1644:     for (i=0; i<n; i++) {
1645:       idx        = *indicesx++;
1646:       idy        = *indicesy++;
1647:       y[idy]    += x[idx];
1648:       y[idy+1]  += x[idx+1];
1649:       y[idy+2]  += x[idx+2];
1650:       y[idy+3]  += x[idx+3];
1651:       y[idy+4]  += x[idx+4];
1652:       y[idy+5]  += x[idx+5];
1653:       y[idy+6]  += x[idx+6];
1654:       y[idy+7]  += x[idx+7];
1655:       y[idy+8]  += x[idx+8];
1656:       y[idy+9]  += x[idx+9];
1657:     }
1658:     break;
1659: #if !defined(PETSC_USE_COMPLEX)
1660:   case MAX_VALUES:
1661:     for (i=0; i<n; i++) {
1662:       idx       = *indicesx++;
1663:       idy       = *indicesy++;
1664:       y[idy]    = PetscMax(y[idy],x[idx]);
1665:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1666:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1667:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1668:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1669:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1670:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1671:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1672:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1673:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1674:     }
1675: #else
1676:   case MAX_VALUES:
1677: #endif
1678:   case NOT_SET_VALUES:
1679:     break;
1680:   default:
1681:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1682:   }
1683:   return(0);
1684: }

1686: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1687: {
1688:   PetscInt i,idx;

1690:   for (i=0; i<n; i++) {
1691:     idx   = *indicesx++;
1692:     y[0]  = x[idx];
1693:     y[1]  = x[idx+1];
1694:     y[2]  = x[idx+2];
1695:     y[3]  = x[idx+3];
1696:     y[4]  = x[idx+4];
1697:     y[5]  = x[idx+5];
1698:     y[6]  = x[idx+6];
1699:     y[7]  = x[idx+7];
1700:     y[8]  = x[idx+8];
1701:     y[9]  = x[idx+9];
1702:     y[10] = x[idx+10];
1703:     y    += 11;
1704:   }
1705: }

1707: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1708: {
1709:   PetscInt i,idy;

1712:   switch (addv) {
1713:   case INSERT_VALUES:
1714:   case INSERT_ALL_VALUES:
1715:     for (i=0; i<n; i++) {
1716:       idy       = *indicesy++;
1717:       y[idy]    = x[0];
1718:       y[idy+1]  = x[1];
1719:       y[idy+2]  = x[2];
1720:       y[idy+3]  = x[3];
1721:       y[idy+4]  = x[4];
1722:       y[idy+5]  = x[5];
1723:       y[idy+6]  = x[6];
1724:       y[idy+7]  = x[7];
1725:       y[idy+8]  = x[8];
1726:       y[idy+9]  = x[9];
1727:       y[idy+10] = x[10];
1728:       x        += 11;
1729:     }
1730:     break;
1731:   case ADD_VALUES:
1732:   case ADD_ALL_VALUES:
1733:     for (i=0; i<n; i++) {
1734:       idy        = *indicesy++;
1735:       y[idy]    += x[0];
1736:       y[idy+1]  += x[1];
1737:       y[idy+2]  += x[2];
1738:       y[idy+3]  += x[3];
1739:       y[idy+4]  += x[4];
1740:       y[idy+5]  += x[5];
1741:       y[idy+6]  += x[6];
1742:       y[idy+7]  += x[7];
1743:       y[idy+8]  += x[8];
1744:       y[idy+9]  += x[9];
1745:       y[idy+10] += x[10];
1746:       x         += 11;
1747:     }
1748:     break;
1749: #if !defined(PETSC_USE_COMPLEX)
1750:   case MAX_VALUES:
1751:     for (i=0; i<n; i++) {
1752:       idy       = *indicesy++;
1753:       y[idy]    = PetscMax(y[idy],x[0]);
1754:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1755:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1756:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1757:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1758:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1759:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1760:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1761:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1762:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1763:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1764:       x        += 11;
1765:     }
1766: #else
1767:   case MAX_VALUES:
1768: #endif
1769:   case NOT_SET_VALUES:
1770:     break;
1771:   default:
1772:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1773:   }
1774:   return(0);
1775: }

1777: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1778: {
1779:   PetscInt i,idx,idy;

1782:   switch (addv) {
1783:   case INSERT_VALUES:
1784:   case INSERT_ALL_VALUES:
1785:     for (i=0; i<n; i++) {
1786:       idx       = *indicesx++;
1787:       idy       = *indicesy++;
1788:       y[idy]    = x[idx];
1789:       y[idy+1]  = x[idx+1];
1790:       y[idy+2]  = x[idx+2];
1791:       y[idy+3]  = x[idx+3];
1792:       y[idy+4]  = x[idx+4];
1793:       y[idy+5]  = x[idx+5];
1794:       y[idy+6]  = x[idx+6];
1795:       y[idy+7]  = x[idx+7];
1796:       y[idy+8]  = x[idx+8];
1797:       y[idy+9]  = x[idx+9];
1798:       y[idy+10] = x[idx+10];
1799:     }
1800:     break;
1801:   case ADD_VALUES:
1802:   case ADD_ALL_VALUES:
1803:     for (i=0; i<n; i++) {
1804:       idx        = *indicesx++;
1805:       idy        = *indicesy++;
1806:       y[idy]    += x[idx];
1807:       y[idy+1]  += x[idx+1];
1808:       y[idy+2]  += x[idx+2];
1809:       y[idy+3]  += x[idx+3];
1810:       y[idy+4]  += x[idx+4];
1811:       y[idy+5]  += x[idx+5];
1812:       y[idy+6]  += x[idx+6];
1813:       y[idy+7]  += x[idx+7];
1814:       y[idy+8]  += x[idx+8];
1815:       y[idy+9]  += x[idx+9];
1816:       y[idy+10] += x[idx+10];
1817:     }
1818:     break;
1819: #if !defined(PETSC_USE_COMPLEX)
1820:   case MAX_VALUES:
1821:     for (i=0; i<n; i++) {
1822:       idx       = *indicesx++;
1823:       idy       = *indicesy++;
1824:       y[idy]    = PetscMax(y[idy],x[idx]);
1825:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1826:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1827:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1828:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1829:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1830:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1831:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1832:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1833:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1834:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1835:     }
1836: #else
1837:   case MAX_VALUES:
1838: #endif
1839:   case NOT_SET_VALUES:
1840:     break;
1841:   default:
1842:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1843:   }
1844:   return(0);
1845: }

1847: /* ----------------------------------------------------------------------------------------------- */
1848: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1849: {
1850:   PetscInt i,idx;

1852:   for (i=0; i<n; i++) {
1853:     idx   = *indicesx++;
1854:     y[0]  = x[idx];
1855:     y[1]  = x[idx+1];
1856:     y[2]  = x[idx+2];
1857:     y[3]  = x[idx+3];
1858:     y[4]  = x[idx+4];
1859:     y[5]  = x[idx+5];
1860:     y[6]  = x[idx+6];
1861:     y[7]  = x[idx+7];
1862:     y[8]  = x[idx+8];
1863:     y[9]  = x[idx+9];
1864:     y[10] = x[idx+10];
1865:     y[11] = x[idx+11];
1866:     y    += 12;
1867:   }
1868: }

1870: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1871: {
1872:   PetscInt i,idy;

1875:   switch (addv) {
1876:   case INSERT_VALUES:
1877:   case INSERT_ALL_VALUES:
1878:     for (i=0; i<n; i++) {
1879:       idy       = *indicesy++;
1880:       y[idy]    = x[0];
1881:       y[idy+1]  = x[1];
1882:       y[idy+2]  = x[2];
1883:       y[idy+3]  = x[3];
1884:       y[idy+4]  = x[4];
1885:       y[idy+5]  = x[5];
1886:       y[idy+6]  = x[6];
1887:       y[idy+7]  = x[7];
1888:       y[idy+8]  = x[8];
1889:       y[idy+9]  = x[9];
1890:       y[idy+10] = x[10];
1891:       y[idy+11] = x[11];
1892:       x        += 12;
1893:     }
1894:     break;
1895:   case ADD_VALUES:
1896:   case ADD_ALL_VALUES:
1897:     for (i=0; i<n; i++) {
1898:       idy        = *indicesy++;
1899:       y[idy]    += x[0];
1900:       y[idy+1]  += x[1];
1901:       y[idy+2]  += x[2];
1902:       y[idy+3]  += x[3];
1903:       y[idy+4]  += x[4];
1904:       y[idy+5]  += x[5];
1905:       y[idy+6]  += x[6];
1906:       y[idy+7]  += x[7];
1907:       y[idy+8]  += x[8];
1908:       y[idy+9]  += x[9];
1909:       y[idy+10] += x[10];
1910:       y[idy+11] += x[11];
1911:       x         += 12;
1912:     }
1913:     break;
1914: #if !defined(PETSC_USE_COMPLEX)
1915:   case MAX_VALUES:
1916:     for (i=0; i<n; i++) {
1917:       idy       = *indicesy++;
1918:       y[idy]    = PetscMax(y[idy],x[0]);
1919:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1920:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1921:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1922:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1923:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1924:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1925:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1926:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1927:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1928:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1929:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1930:       x        += 12;
1931:     }
1932: #else
1933:   case MAX_VALUES:
1934: #endif
1935:   case NOT_SET_VALUES:
1936:     break;
1937:   default:
1938:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1939:   }
1940:   return(0);
1941: }

1943: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1944: {
1945:   PetscInt i,idx,idy;

1948:   switch (addv) {
1949:   case INSERT_VALUES:
1950:   case INSERT_ALL_VALUES:
1951:     for (i=0; i<n; i++) {
1952:       idx       = *indicesx++;
1953:       idy       = *indicesy++;
1954:       y[idy]    = x[idx];
1955:       y[idy+1]  = x[idx+1];
1956:       y[idy+2]  = x[idx+2];
1957:       y[idy+3]  = x[idx+3];
1958:       y[idy+4]  = x[idx+4];
1959:       y[idy+5]  = x[idx+5];
1960:       y[idy+6]  = x[idx+6];
1961:       y[idy+7]  = x[idx+7];
1962:       y[idy+8]  = x[idx+8];
1963:       y[idy+9]  = x[idx+9];
1964:       y[idy+10] = x[idx+10];
1965:       y[idy+11] = x[idx+11];
1966:     }
1967:     break;
1968:   case ADD_VALUES:
1969:   case ADD_ALL_VALUES:
1970:     for (i=0; i<n; i++) {
1971:       idx        = *indicesx++;
1972:       idy        = *indicesy++;
1973:       y[idy]    += x[idx];
1974:       y[idy+1]  += x[idx+1];
1975:       y[idy+2]  += x[idx+2];
1976:       y[idy+3]  += x[idx+3];
1977:       y[idy+4]  += x[idx+4];
1978:       y[idy+5]  += x[idx+5];
1979:       y[idy+6]  += x[idx+6];
1980:       y[idy+7]  += x[idx+7];
1981:       y[idy+8]  += x[idx+8];
1982:       y[idy+9]  += x[idx+9];
1983:       y[idy+10] += x[idx+10];
1984:       y[idy+11] += x[idx+11];
1985:     }
1986:     break;
1987: #if !defined(PETSC_USE_COMPLEX)
1988:   case MAX_VALUES:
1989:     for (i=0; i<n; i++) {
1990:       idx       = *indicesx++;
1991:       idy       = *indicesy++;
1992:       y[idy]    = PetscMax(y[idy],x[idx]);
1993:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1994:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1995:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1996:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1997:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1998:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1999:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
2000:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
2001:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
2002:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2003:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2004:     }
2005: #else
2006:   case MAX_VALUES:
2007: #endif
2008:   case NOT_SET_VALUES:
2009:     break;
2010:   default:
2011:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2012:   }
2013:   return(0);
2014: }

2016: /* ----------------------------------------------------------------------------------------------- */
2017: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2018: {
2019:   PetscInt       i,idx;

2021:   for (i=0; i<n; i++) {
2022:     idx   = *indicesx++;
2023:     PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2024:     y    += bs;
2025:   }
2026: }

2028: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2029: {
2030:   PetscInt i,idy,j;

2033:   switch (addv) {
2034:   case INSERT_VALUES:
2035:   case INSERT_ALL_VALUES:
2036:     for (i=0; i<n; i++) {
2037:       idy       = *indicesy++;
2038:       PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2039:       x        += bs;
2040:     }
2041:     break;
2042:   case ADD_VALUES:
2043:   case ADD_ALL_VALUES:
2044:     for (i=0; i<n; i++) {
2045:       idy        = *indicesy++;
2046:       for (j=0; j<bs; j++) y[idy+j] += x[j];
2047:       x         += bs;
2048:     }
2049:     break;
2050: #if !defined(PETSC_USE_COMPLEX)
2051:   case MAX_VALUES:
2052:     for (i=0; i<n; i++) {
2053:       idy = *indicesy++;
2054:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2055:       x  += bs;
2056:     }
2057: #else
2058:   case MAX_VALUES:
2059: #endif
2060:   case NOT_SET_VALUES:
2061:     break;
2062:   default:
2063:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2064:   }
2065:   return(0);
2066: }

2068: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2069: {
2070:   PetscInt i,idx,idy,j;

2073:   switch (addv) {
2074:   case INSERT_VALUES:
2075:   case INSERT_ALL_VALUES:
2076:     for (i=0; i<n; i++) {
2077:       idx       = *indicesx++;
2078:       idy       = *indicesy++;
2079:       PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2080:     }
2081:     break;
2082:   case ADD_VALUES:
2083:   case ADD_ALL_VALUES:
2084:     for (i=0; i<n; i++) {
2085:       idx        = *indicesx++;
2086:       idy        = *indicesy++;
2087:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
2088:     }
2089:     break;
2090: #if !defined(PETSC_USE_COMPLEX)
2091:   case MAX_VALUES:
2092:     for (i=0; i<n; i++) {
2093:       idx       = *indicesx++;
2094:       idy       = *indicesy++;
2095:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2096:     }
2097: #else
2098:   case MAX_VALUES:
2099: #endif
2100:   case NOT_SET_VALUES:
2101:     break;
2102:   default:
2103:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2104:   }
2105:   return(0);
2106: }

2108: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2109: #define BS 1
2110:  #include <../src/vec/vec/utils/vpscat.h>
2111: #define BS 2
2112:  #include <../src/vec/vec/utils/vpscat.h>
2113: #define BS 3
2114:  #include <../src/vec/vec/utils/vpscat.h>
2115: #define BS 4
2116:  #include <../src/vec/vec/utils/vpscat.h>
2117: #define BS 5
2118:  #include <../src/vec/vec/utils/vpscat.h>
2119: #define BS 6
2120:  #include <../src/vec/vec/utils/vpscat.h>
2121: #define BS 7
2122:  #include <../src/vec/vec/utils/vpscat.h>
2123: #define BS 8
2124:  #include <../src/vec/vec/utils/vpscat.h>
2125: #define BS 9
2126:  #include <../src/vec/vec/utils/vpscat.h>
2127: #define BS 10
2128:  #include <../src/vec/vec/utils/vpscat.h>
2129: #define BS 11
2130:  #include <../src/vec/vec/utils/vpscat.h>
2131: #define BS 12
2132:  #include <../src/vec/vec/utils/vpscat.h>
2133: #define BS bs
2134:  #include <../src/vec/vec/utils/vpscat.h>

2136: /* ==========================================================================================*/

2138: /*              create parallel to sequential scatter context                           */

2140: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

2142: /*@
2143:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

2145:      Collective on VecScatter

2147:    Input Parameters:
2148: +     VecScatter - obtained with VecScatterCreateEmpty()
2149: .     nsends -
2150: .     sendSizes -
2151: .     sendProcs -
2152: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
2153: .     nrecvs - number of receives to expect
2154: .     recvSizes -
2155: .     recvProcs - processes who are sending to me
2156: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
2157: -     bs - size of block

2159:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
2160:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

2162:        Probably does not handle sends to self properly. It should remove those from the counts that are used
2163:       in allocating space inside of the from struct

2165:   Level: intermediate

2167: @*/
2168: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
2169: {
2170:   VecScatter_MPI_General *from, *to;
2171:   PetscInt               sendSize, recvSize;
2172:   PetscInt               n, i;
2173:   PetscErrorCode         ierr;

2175:   /* allocate entire send scatter context */
2176:   PetscNewLog(ctx,&to);
2177:   to->n = nsends;
2178:   for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];

2180:   PetscMalloc1(to->n,&to->requests);
2181:   PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
2182:   PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);

2184:   to->starts[0] = 0;
2185:   for (n = 0; n < to->n; n++) {
2186:     if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
2187:     to->starts[n+1] = to->starts[n] + sendSizes[n];
2188:     to->procs[n]    = sendProcs[n];
2189:     for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
2190:   }
2191:   ctx->todata = (void*) to;

2193:   /* allocate entire receive scatter context */
2194:   PetscNewLog(ctx,&from);
2195:   from->n = nrecvs;
2196:   for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];

2198:   PetscMalloc1(from->n,&from->requests);
2199:   PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);

2201:   from->starts[0] = 0;
2202:   for (n = 0; n < from->n; n++) {
2203:     if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
2204:     from->starts[n+1] = from->starts[n] + recvSizes[n];
2205:     from->procs[n]    = recvProcs[n];
2206:     for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
2207:   }
2208:   ctx->fromdata = (void*)from;

2210:   /* No local scatter optimization */
2211:   from->local.n                    = 0;
2212:   from->local.vslots               = 0;
2213:   to->local.n                      = 0;
2214:   to->local.vslots                 = 0;
2215:   from->local.nonmatching_computed = PETSC_FALSE;
2216:   from->local.n_nonmatching        = 0;
2217:   from->local.slots_nonmatching    = 0;
2218:   to->local.nonmatching_computed   = PETSC_FALSE;
2219:   to->local.n_nonmatching          = 0;
2220:   to->local.slots_nonmatching      = 0;

2222:   from->type = VEC_SCATTER_MPI_GENERAL;
2223:   to->type   = VEC_SCATTER_MPI_GENERAL;
2224:   from->bs   = bs;
2225:   to->bs     = bs;
2226:   VecScatterCreateCommon_PtoS(from, to, ctx);

2228:   /* mark lengths as negative so it won't check local vector lengths */
2229:   ctx->from_n = ctx->to_n = -1;
2230:   return(0);
2231: }

2233: /*
2234:    bs indicates how many elements there are in each block. Normally this would be 1.

2236:    contains check that PetscMPIInt can handle the sizes needed
2237: */
2238: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2239: {
2240:   VecScatter_MPI_General *from,*to;
2241:   PetscMPIInt            size,rank,imdex,tag,n;
2242:   PetscInt               *source = NULL,*owners = NULL,nxr;
2243:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2244:   PetscMPIInt            *nprocs = NULL,nrecvs;
2245:   PetscInt               i,j,idx,nsends;
2246:   PetscMPIInt            *owner = NULL;
2247:   PetscInt               *starts = NULL,count,slen;
2248:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2249:   PetscMPIInt            *onodes1,*olengths1;
2250:   MPI_Comm               comm;
2251:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2252:   MPI_Status             recv_status,*send_status;
2253:   PetscErrorCode         ierr;

2256:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2257:   PetscObjectGetComm((PetscObject)xin,&comm);
2258:   MPI_Comm_rank(comm,&rank);
2259:   MPI_Comm_size(comm,&size);
2260:   owners = xin->map->range;
2261:   VecGetSize(yin,&lengthy);
2262:   VecGetSize(xin,&lengthx);

2264:   /*  first count number of contributors to each processor */
2265:   PetscMalloc2(size,&nprocs,nx,&owner);
2266:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2268:   j      = 0;
2269:   nsends = 0;
2270:   for (i=0; i<nx; i++) {
2271:     idx = bs*inidx[i];
2272:     if (idx < owners[j]) j = 0;
2273:     for (; j<size; j++) {
2274:       if (idx < owners[j+1]) {
2275:         if (!nprocs[j]++) nsends++;
2276:         owner[i] = j;
2277:         break;
2278:       }
2279:     }
2280:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2281:   }

2283:   nprocslocal  = nprocs[rank];
2284:   nprocs[rank] = 0;
2285:   if (nprocslocal) nsends--;
2286:   /* inform other processors of number of messages and max length*/
2287:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2288:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2289:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2290:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2292:   /* post receives:   */
2293:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2294:   count = 0;
2295:   for (i=0; i<nrecvs; i++) {
2296:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2297:     count += olengths1[i];
2298:   }

2300:   /* do sends:
2301:      1) starts[i] gives the starting index in svalues for stuff going to
2302:      the ith processor
2303:   */
2304:   nxr = 0;
2305:   for (i=0; i<nx; i++) {
2306:     if (owner[i] != rank) nxr++;
2307:   }
2308:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2310:   starts[0]  = 0;
2311:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2312:   for (i=0; i<nx; i++) {
2313:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2314:   }
2315:   starts[0] = 0;
2316:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2317:   count = 0;
2318:   for (i=0; i<size; i++) {
2319:     if (nprocs[i]) {
2320:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2321:     }
2322:   }

2324:   /*  wait on receives */
2325:   count = nrecvs;
2326:   slen  = 0;
2327:   while (count) {
2328:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2329:     /* unpack receives into our local space */
2330:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2331:     slen += n;
2332:     count--;
2333:   }

2335:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2337:   /* allocate entire send scatter context */
2338:   PetscNewLog(ctx,&to);
2339:   to->n = nrecvs;

2341:   PetscMalloc1(nrecvs,&to->requests);
2342:   PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2343:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2345:   ctx->todata   = (void*)to;
2346:   to->starts[0] = 0;

2348:   if (nrecvs) {
2349:     /* move the data into the send scatter */
2350:     base     = owners[rank];
2351:     rsvalues = rvalues;
2352:     for (i=0; i<nrecvs; i++) {
2353:       to->starts[i+1] = to->starts[i] + olengths1[i];
2354:       to->procs[i]    = onodes1[i];
2355:       values = rsvalues;
2356:       rsvalues += olengths1[i];
2357:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2358:     }
2359:   }
2360:   PetscFree(olengths1);
2361:   PetscFree(onodes1);
2362:   PetscFree3(rvalues,source,recv_waits);

2364:   /* allocate entire receive scatter context */
2365:   PetscNewLog(ctx,&from);
2366:   from->n = nsends;

2368:   PetscMalloc1(nsends,&from->requests);
2369:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2370:   ctx->fromdata = (void*)from;

2372:   /* move data into receive scatter */
2373:   PetscMalloc2(size,&lowner,nsends+1,&start);
2374:   count = 0; from->starts[0] = start[0] = 0;
2375:   for (i=0; i<size; i++) {
2376:     if (nprocs[i]) {
2377:       lowner[i]            = count;
2378:       from->procs[count++] = i;
2379:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2380:     }
2381:   }

2383:   for (i=0; i<nx; i++) {
2384:     if (owner[i] != rank) {
2385:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2386:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2387:     }
2388:   }
2389:   PetscFree2(lowner,start);
2390:   PetscFree2(nprocs,owner);

2392:   /* wait on sends */
2393:   if (nsends) {
2394:     PetscMalloc1(nsends,&send_status);
2395:     MPI_Waitall(nsends,send_waits,send_status);
2396:     PetscFree(send_status);
2397:   }
2398:   PetscFree3(svalues,send_waits,starts);

2400:   if (nprocslocal) {
2401:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2402:     /* we have a scatter to ourselves */
2403:     PetscMalloc1(nt,&to->local.vslots);
2404:     PetscMalloc1(nt,&from->local.vslots);
2405:     nt   = 0;
2406:     for (i=0; i<nx; i++) {
2407:       idx = bs*inidx[i];
2408:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2409:         to->local.vslots[nt]     = idx - owners[rank];
2410:         from->local.vslots[nt++] = bs*inidy[i];
2411:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2412:       }
2413:     }
2414:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2415:   } else {
2416:     from->local.n      = 0;
2417:     from->local.vslots = 0;
2418:     to->local.n        = 0;
2419:     to->local.vslots   = 0;
2420:   }

2422:   from->local.nonmatching_computed = PETSC_FALSE;
2423:   from->local.n_nonmatching        = 0;
2424:   from->local.slots_nonmatching    = 0;
2425:   to->local.nonmatching_computed   = PETSC_FALSE;
2426:   to->local.n_nonmatching          = 0;
2427:   to->local.slots_nonmatching      = 0;

2429:   from->type = VEC_SCATTER_MPI_GENERAL;
2430:   to->type   = VEC_SCATTER_MPI_GENERAL;
2431:   from->bs   = bs;
2432:   to->bs     = bs;

2434:   VecScatterCreateCommon_PtoS(from,to,ctx);
2435:   return(0);
2436: }

2438: /*
2439:    bs indicates how many elements there are in each block. Normally this would be 1.
2440: */
2441: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2442: {
2443:   MPI_Comm       comm;
2444:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2445:   PetscInt       bs   = to->bs;
2446:   PetscMPIInt    size;
2447:   PetscInt       i, n;

2451:   PetscObjectGetComm((PetscObject)ctx,&comm);
2452:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2453:   ctx->ops->destroy = VecScatterDestroy_PtoP;

2455:   ctx->reproduce = PETSC_FALSE;
2456:   to->sendfirst  = PETSC_FALSE;
2457:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2458:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2459:   from->sendfirst = to->sendfirst;

2461:   MPI_Comm_size(comm,&size);
2462:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2463:   to->contiq = PETSC_FALSE;
2464:   n = from->starts[from->n];
2465:   from->contiq = PETSC_TRUE;
2466:   for (i=1; i<n; i++) {
2467:     if (from->indices[i] != from->indices[i-1] + bs) {
2468:       from->contiq = PETSC_FALSE;
2469:       break;
2470:     }
2471:   }

2473:   to->use_alltoallv = PETSC_FALSE;
2474:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2475:   from->use_alltoallv = to->use_alltoallv;
2476:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2477: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
2478:   if (to->use_alltoallv) {
2479:     to->use_alltoallw = PETSC_FALSE;
2480:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2481:   }
2482:   from->use_alltoallw = to->use_alltoallw;
2483:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2484: #endif

2486: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2487:   to->use_window = PETSC_FALSE;
2488:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2489:   from->use_window = to->use_window;
2490: #endif

2492:   if (to->use_alltoallv) {

2494:     PetscMalloc2(size,&to->counts,size,&to->displs);
2495:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2496:     for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);

2498:     to->displs[0] = 0;
2499:     for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];

2501:     PetscMalloc2(size,&from->counts,size,&from->displs);
2502:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2503:     for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2504:     from->displs[0] = 0;
2505:     for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];

2507: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2508:     if (to->use_alltoallw) {
2509:       PetscMPIInt mpibs, mpilen;

2511:       ctx->packtogether = PETSC_FALSE;
2512:       PetscMPIIntCast(bs,&mpibs);
2513:       PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2514:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2515:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2516:       for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;

2518:       for (i=0; i<to->n; i++) {
2519:         to->wcounts[to->procs[i]] = 1;
2520:         PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2521:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2522:         MPI_Type_commit(to->types+to->procs[i]);
2523:       }
2524:       PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2525:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2526:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2527:       for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;

2529:       if (from->contiq) {
2530:         PetscInfo(ctx,"Scattered vector entries are stored contiguously, taking advantage of this with -vecscatter_alltoall\n");
2531:         for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);

2533:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2534:         for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];

2536:       } else {
2537:         for (i=0; i<from->n; i++) {
2538:           from->wcounts[from->procs[i]] = 1;
2539:           PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2540:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2541:           MPI_Type_commit(from->types+from->procs[i]);
2542:         }
2543:       }
2544:     } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;

2546: #else
2547:     to->use_alltoallw   = PETSC_FALSE;
2548:     from->use_alltoallw = PETSC_FALSE;
2549:     ctx->ops->copy      = VecScatterCopy_PtoP_AllToAll;
2550: #endif
2551: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2552:   } else if (to->use_window) {
2553:     PetscMPIInt temptag,winsize;
2554:     MPI_Request *request;
2555:     MPI_Status  *status;

2557:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2558:     winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2559:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2560:     PetscMalloc1(to->n,&to->winstarts);
2561:     PetscMalloc2(to->n,&request,to->n,&status);
2562:     for (i=0; i<to->n; i++) {
2563:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2564:     }
2565:     for (i=0; i<from->n; i++) {
2566:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2567:     }
2568:     MPI_Waitall(to->n,request,status);
2569:     PetscFree2(request,status);

2571:     winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2572:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2573:     PetscMalloc1(from->n,&from->winstarts);
2574:     PetscMalloc2(from->n,&request,from->n,&status);
2575:     for (i=0; i<from->n; i++) {
2576:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2577:     }
2578:     for (i=0; i<to->n; i++) {
2579:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2580:     }
2581:     MPI_Waitall(from->n,request,status);
2582:     PetscFree2(request,status);
2583: #endif
2584:   } else {
2585:     PetscBool   use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2586:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2587:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2588:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2589:     MPI_Request *rev_swaits,*rev_rwaits;
2590:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2592:     /* allocate additional wait variables for the "reverse" scatter */
2593:     PetscMalloc1(to->n,&rev_rwaits);
2594:     PetscMalloc1(from->n,&rev_swaits);
2595:     to->rev_requests   = rev_rwaits;
2596:     from->rev_requests = rev_swaits;

2598:     /* Register the receives that you will use later (sends for scatter reverse) */
2599:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2600:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2601:     if (use_rsend) {
2602:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2603:       to->use_readyreceiver   = PETSC_TRUE;
2604:       from->use_readyreceiver = PETSC_TRUE;
2605:     } else {
2606:       to->use_readyreceiver   = PETSC_FALSE;
2607:       from->use_readyreceiver = PETSC_FALSE;
2608:     }
2609:     if (use_ssend) {
2610:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2611:     }

2613:     for (i=0; i<from->n; i++) {
2614:       if (use_rsend) {
2615:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2616:       } else if (use_ssend) {
2617:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2618:       } else {
2619:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2620:       }
2621:     }

2623:     for (i=0; i<to->n; i++) {
2624:       if (use_rsend) {
2625:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2626:       } else if (use_ssend) {
2627:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2628:       } else {
2629:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2630:       }
2631:     }
2632:     /* Register receives for scatter and reverse */
2633:     for (i=0; i<from->n; i++) {
2634:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2635:     }
2636:     for (i=0; i<to->n; i++) {
2637:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2638:     }
2639:     if (use_rsend) {
2640:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2641:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2642:       MPI_Barrier(comm);
2643:     }

2645:     ctx->ops->copy = VecScatterCopy_PtoP_X;
2646:   }
2647:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2649: #if defined(PETSC_USE_DEBUG)
2650:   MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2651:   MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2652:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2653: #endif

2655:   switch (bs) {
2656:   case 12:
2657:     ctx->ops->begin = VecScatterBegin_12;
2658:     ctx->ops->end   = VecScatterEnd_12;
2659:     break;
2660:   case 11:
2661:     ctx->ops->begin = VecScatterBegin_11;
2662:     ctx->ops->end   = VecScatterEnd_11;
2663:     break;
2664:   case 10:
2665:     ctx->ops->begin = VecScatterBegin_10;
2666:     ctx->ops->end   = VecScatterEnd_10;
2667:     break;
2668:   case 9:
2669:     ctx->ops->begin = VecScatterBegin_9;
2670:     ctx->ops->end   = VecScatterEnd_9;
2671:     break;
2672:   case 8:
2673:     ctx->ops->begin = VecScatterBegin_8;
2674:     ctx->ops->end   = VecScatterEnd_8;
2675:     break;
2676:   case 7:
2677:     ctx->ops->begin = VecScatterBegin_7;
2678:     ctx->ops->end   = VecScatterEnd_7;
2679:     break;
2680:   case 6:
2681:     ctx->ops->begin = VecScatterBegin_6;
2682:     ctx->ops->end   = VecScatterEnd_6;
2683:     break;
2684:   case 5:
2685:     ctx->ops->begin = VecScatterBegin_5;
2686:     ctx->ops->end   = VecScatterEnd_5;
2687:     break;
2688:   case 4:
2689:     ctx->ops->begin = VecScatterBegin_4;
2690:     ctx->ops->end   = VecScatterEnd_4;
2691:     break;
2692:   case 3:
2693:     ctx->ops->begin = VecScatterBegin_3;
2694:     ctx->ops->end   = VecScatterEnd_3;
2695:     break;
2696:   case 2:
2697:     ctx->ops->begin = VecScatterBegin_2;
2698:     ctx->ops->end   = VecScatterEnd_2;
2699:     break;
2700:   case 1:
2701:     ctx->ops->begin = VecScatterBegin_1;
2702:     ctx->ops->end   = VecScatterEnd_1;
2703:     break;
2704:   default:
2705:     ctx->ops->begin = VecScatterBegin_bs;
2706:     ctx->ops->end   = VecScatterEnd_bs;

2708:   }
2709:   ctx->ops->view = VecScatterView_MPI;
2710:   /* Check if the local scatter is actually a copy; important special case */
2711:   if (to->local.n) {
2712:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2713:   }
2714:   return(0);
2715: }



2719: /* ------------------------------------------------------------------------------------*/
2720: /*
2721:          Scatter from local Seq vectors to a parallel vector.
2722:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2723:          reverses the result.
2724: */
2725: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2726: {
2727:   PetscErrorCode         ierr;
2728:   MPI_Request            *waits;
2729:   VecScatter_MPI_General *to,*from;

2732:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2733:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2734:   from          = (VecScatter_MPI_General*)ctx->todata;
2735:   ctx->todata   = (void*)to;
2736:   ctx->fromdata = (void*)from;
2737:   /* these two are special, they are ALWAYS stored in to struct */
2738:   to->sstatus   = from->sstatus;
2739:   to->rstatus   = from->rstatus;

2741:   from->sstatus = 0;
2742:   from->rstatus = 0;

2744:   waits              = from->rev_requests;
2745:   from->rev_requests = from->requests;
2746:   from->requests     = waits;
2747:   waits              = to->rev_requests;
2748:   to->rev_requests   = to->requests;
2749:   to->requests       = waits;
2750:   return(0);
2751: }

2753: /* ---------------------------------------------------------------------------------*/
2754: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2755: {
2757:   PetscMPIInt    size,rank,tag,imdex,n;
2758:   PetscInt       *owners = xin->map->range;
2759:   PetscMPIInt    *nprocs = NULL;
2760:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2761:   PetscMPIInt    *owner   = NULL;
2762:   PetscInt       *starts  = NULL,count,slen;
2763:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2764:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2765:   MPI_Comm       comm;
2766:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2767:   MPI_Status     recv_status,*send_status = NULL;
2768:   PetscBool      duplicate = PETSC_FALSE;
2769: #if defined(PETSC_USE_DEBUG)
2770:   PetscBool      found = PETSC_FALSE;
2771: #endif

2774:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2775:   PetscObjectGetComm((PetscObject)xin,&comm);
2776:   MPI_Comm_size(comm,&size);
2777:   MPI_Comm_rank(comm,&rank);
2778:   if (size == 1) {
2779:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2780:     return(0);
2781:   }

2783:   /*
2784:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2785:      They then call the StoPScatterCreate()
2786:   */
2787:   /*  first count number of contributors to each processor */
2788:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2789:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2791:   lastidx = -1;
2792:   j       = 0;
2793:   for (i=0; i<nx; i++) {
2794:     /* if indices are NOT locally sorted, need to start search at the beginning */
2795:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2796:     lastidx = idx;
2797:     for (; j<size; j++) {
2798:       if (idx >= owners[j] && idx < owners[j+1]) {
2799:         nprocs[j]++;
2800:         owner[i] = j;
2801: #if defined(PETSC_USE_DEBUG)
2802:         found = PETSC_TRUE;
2803: #endif
2804:         break;
2805:       }
2806:     }
2807: #if defined(PETSC_USE_DEBUG)
2808:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2809:     found = PETSC_FALSE;
2810: #endif
2811:   }
2812:   nsends = 0;
2813:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2815:   /* inform other processors of number of messages and max length*/
2816:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2817:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2818:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2819:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2821:   /* post receives:   */
2822:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

2824:   count = 0;
2825:   for (i=0; i<nrecvs; i++) {
2826:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2827:     count += olengths1[i];
2828:   }
2829:   PetscFree(onodes1);

2831:   /* do sends:
2832:       1) starts[i] gives the starting index in svalues for stuff going to
2833:          the ith processor
2834:   */
2835:   starts[0]= 0;
2836:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2837:   for (i=0; i<nx; i++) {
2838:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2839:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2840:   }

2842:   starts[0] = 0;
2843:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2844:   count = 0;
2845:   for (i=0; i<size; i++) {
2846:     if (nprocs[i]) {
2847:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2848:       count++;
2849:     }
2850:   }
2851:   PetscFree3(nprocs,owner,starts);

2853:   /*  wait on receives */
2854:   count = nrecvs;
2855:   slen  = 0;
2856:   while (count) {
2857:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2858:     /* unpack receives into our local space */
2859:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2860:     slen += n/2;
2861:     count--;
2862:   }
2863:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2865:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2866:   base     = owners[rank];
2867:   count    = 0;
2868:   rsvalues = rvalues;
2869:   for (i=0; i<nrecvs; i++) {
2870:     values    = rsvalues;
2871:     rsvalues += 2*olengths1[i];
2872:     for (j=0; j<olengths1[i]; j++) {
2873:       local_inidx[count]   = values[2*j] - base;
2874:       local_inidy[count++] = values[2*j+1];
2875:     }
2876:   }
2877:   PetscFree(olengths1);

2879:   /* wait on sends */
2880:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2881:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2883:   /*
2884:      should sort and remove duplicates from local_inidx,local_inidy
2885:   */

2887: #if defined(do_it_slow)
2888:   /* sort on the from index */
2889:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2890:   start = 0;
2891:   while (start < slen) {
2892:     count = start+1;
2893:     last  = local_inidx[start];
2894:     while (count < slen && last == local_inidx[count]) count++;
2895:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2896:       /* sort on to index */
2897:       PetscSortInt(count-start,local_inidy+start);
2898:     }
2899:     /* remove duplicates; not most efficient way, but probably good enough */
2900:     i = start;
2901:     while (i < count-1) {
2902:       if (local_inidy[i] != local_inidy[i+1]) i++;
2903:       else { /* found a duplicate */
2904:         duplicate = PETSC_TRUE;
2905:         for (j=i; j<slen-1; j++) {
2906:           local_inidx[j] = local_inidx[j+1];
2907:           local_inidy[j] = local_inidy[j+1];
2908:         }
2909:         slen--;
2910:         count--;
2911:       }
2912:     }
2913:     start = count;
2914:   }
2915: #endif
2916:   if (duplicate) {
2917:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2918:   }
2919:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2920:   PetscFree2(local_inidx,local_inidy);
2921:   return(0);
2922: }

2924: /*@
2925:   PetscSFCreateFromZero - Create a PetscSF that maps a Vec from sequential to distributed

2927:   Input Parameters:
2928: . gv - A distributed Vec

2930:   Output Parameters:
2931: . sf - The SF created mapping a sequential Vec to gv

2933:   Level: developer

2935: .seealso: DMPlexDistributedToSequential()
2936: @*/
2937: PetscErrorCode PetscSFCreateFromZero(MPI_Comm comm, Vec gv, PetscSF *sf)
2938: {
2939:   PetscSFNode   *remotenodes;
2940:   PetscInt      *localnodes;
2941:   PetscInt       N, n, start, numroots, l;
2942:   PetscMPIInt    rank;

2946:   PetscSFCreate(comm, sf);
2947:   VecGetSize(gv, &N);
2948:   VecGetLocalSize(gv, &n);
2949:   VecGetOwnershipRange(gv, &start, NULL);
2950:   MPI_Comm_rank(comm, &rank);
2951:   PetscMalloc1(n, &localnodes);
2952:   PetscMalloc1(n, &remotenodes);
2953:   if (!rank) numroots = N;
2954:   else       numroots = 0;
2955:   for (l = 0; l < n; ++l) {
2956:     localnodes[l]        = l;
2957:     remotenodes[l].rank  = 0;
2958:     remotenodes[l].index = l+start;
2959:   }
2960:   PetscSFSetGraph(*sf, numroots, n, localnodes, PETSC_OWN_POINTER, remotenodes, PETSC_OWN_POINTER);
2961:   return(0);
2962: }