Actual source code: vpscat.c

  1: #define PETSCVEC_DLL

  3: /*
  4:     Defines parallel vector scatters.
  5: */

 7:  #include private/isimpl.h
 8:  #include private/vecimpl.h
 9:  #include ../src/vec/vec/impls/dvecimpl.h
 10:  #include ../src/vec/vec/impls/mpi/pvecimpl.h
 11:  #include petscsys.h

 15: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 16: {
 17:   VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
 18:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 19:   PetscErrorCode         ierr;
 20:   PetscInt               i;
 21:   PetscMPIInt            rank;
 22:   PetscViewerFormat      format;
 23:   PetscTruth             iascii;

 26:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 27:   if (iascii) {
 28:     MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
 29:     PetscViewerGetFormat(viewer,&format);
 30:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 31:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 33:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 34:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 35:       itmp = to->starts[to->n+1];
 36:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 37:       itmp = from->starts[from->n+1];
 38:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 39:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);

 41:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 43:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 44:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 45:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 46:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 48:     } else {
 49:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 50:       if (to->n) {
 51:         for (i=0; i<to->n; i++){
 52:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 53:         }
 54:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 55:         for (i=0; i<to->starts[to->n]; i++){
 56:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
 57:         }
 58:       }

 60:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 61:       if (from->n) {
 62:         for (i=0; i<from->n; i++){
 63:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 64:         }

 66:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 67:         for (i=0; i<from->starts[from->n]; i++){
 68:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
 69:         }
 70:       }
 71:       if (to->local.n) {
 72:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
 73:         for (i=0; i<to->local.n; i++){
 74:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 75:         }
 76:       }

 78:       PetscViewerFlush(viewer);
 79:     }
 80:   } else {
 81:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
 82:   }
 83:   return(0);
 84: }

 86: /* -----------------------------------------------------------------------------------*/
 87: /*
 88:       The next routine determines what part of  the local part of the scatter is an
 89:   exact copy of values into their current location. We check this here and
 90:   then know that we need not perform that portion of the scatter when the vector is
 91:   scattering to itself with INSERT_VALUES.

 93:      This is currently not used but would speed up, for example DALocalToLocalBegin/End()

 95: */
 98: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 99: {
100:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
102:   PetscInt       *nto_slots,*nfrom_slots,j = 0;
103: 
105:   for (i=0; i<n; i++) {
106:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
107:   }

109:   if (!n_nonmatching) {
110:     to->nonmatching_computed = PETSC_TRUE;
111:     to->n_nonmatching        = from->n_nonmatching = 0;
112:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
113:   } else if (n_nonmatching == n) {
114:     to->nonmatching_computed = PETSC_FALSE;
115:     PetscInfo(scatter,"All values non-matching\n");
116:   } else {
117:     to->nonmatching_computed= PETSC_TRUE;
118:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;
119:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
120:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
121:     to->slots_nonmatching   = nto_slots;
122:     from->slots_nonmatching = nfrom_slots;
123:     for (i=0; i<n; i++) {
124:       if (to_slots[i] != from_slots[i]) {
125:         nto_slots[j]   = to_slots[i];
126:         nfrom_slots[j] = from_slots[i];
127:         j++;
128:       }
129:     }
130:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131:   }
132:   return(0);
133: }

135: /* --------------------------------------------------------------------------------------*/

137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
143:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144:   PetscErrorCode         ierr;
145:   PetscInt               i;

148:   if (to->use_readyreceiver) {
149:     /*
150:        Since we have already posted sends we must cancel them before freeing 
151:        the requests
152:     */
153:     for (i=0; i<from->n; i++) {
154:       MPI_Cancel(from->requests+i);
155:     }
156:     for (i=0; i<to->n; i++) {
157:       MPI_Cancel(to->rev_requests+i);
158:     }
159:   }

161: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
162:   if (to->use_alltoallw) {
163:     PetscFree3(to->wcounts,to->wdispls,to->types);
164:     PetscFree3(from->wcounts,from->wdispls,from->types);
165:   }
166: #endif

168: #if defined(PETSC_HAVE_MPI_WINDOW)
169:   if (to->use_window) {
170:     MPI_Win_free(&from->window);
171:     MPI_Win_free(&to->window);
172:   }
173: #endif

175:   if (to->use_alltoallv) {
176:     PetscFree2(to->counts,to->displs);
177:     PetscFree2(from->counts,from->displs);
178:   }

180:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
181:   /* 
182:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
183:      message passing.
184:   */
185: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
186:   if (!to->use_alltoallv) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
187:     if (to->requests) {
188:       for (i=0; i<to->n; i++) {
189:         MPI_Request_free(to->requests + i);
190:       }
191:     }
192:     if (to->rev_requests) {
193:       for (i=0; i<to->n; i++) {
194:         MPI_Request_free(to->rev_requests + i);
195:       }
196:     }
197:   }
198:   /*
199:       MPICH could not properly cancel requests thus with ready receiver mode we
200:     cannot free the requests. It may be fixed now, if not then put the following 
201:     code inside a if !to->use_readyreceiver) {
202:   */
203:   if (!to->use_alltoallv) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
204:     if (from->requests) {
205:       for (i=0; i<from->n; i++) {
206:         MPI_Request_free(from->requests + i);
207:       }
208:     }

210:     if (from->rev_requests) {
211:       for (i=0; i<from->n; i++) {
212:         MPI_Request_free(from->rev_requests + i);
213:       }
214:     }
215:   }
216: #endif

218:   PetscFree(to->local.vslots);
219:   PetscFree(from->local.vslots);
220:   PetscFree2(to->counts,to->displs);
221:   PetscFree2(from->counts,from->displs);
222:   PetscFree(to->local.slots_nonmatching);
223:   PetscFree(from->local.slots_nonmatching);
224:   PetscFree(to->rev_requests);
225:   PetscFree(from->rev_requests);
226:   PetscFree(to->requests);
227:   PetscFree(from->requests);
228:   PetscFree4(to->values,to->indices,to->starts,to->procs);
229:   PetscFree2(to->sstatus,to->rstatus);
230:   PetscFree4(from->values,from->indices,from->starts,from->procs);
231:   PetscFree(from);
232:   PetscFree(to);
233:   PetscHeaderDestroy(ctx);
234:   return(0);
235: }



239: /* --------------------------------------------------------------------------------------*/
240: /*
241:     Special optimization to see if the local part of the scatter is actually 
242:     a copy. The scatter routines call PetscMemcpy() instead.
243:  
244: */
247: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
248: {
249:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
250:   PetscInt       to_start,from_start;

254:   to_start   = to_slots[0];
255:   from_start = from_slots[0];

257:   for (i=1; i<n; i++) {
258:     to_start   += bs;
259:     from_start += bs;
260:     if (to_slots[i]   != to_start)   return(0);
261:     if (from_slots[i] != from_start) return(0);
262:   }
263:   to->is_copy       = PETSC_TRUE;
264:   to->copy_start    = to_slots[0];
265:   to->copy_length   = bs*sizeof(PetscScalar)*n;
266:   from->is_copy     = PETSC_TRUE;
267:   from->copy_start  = from_slots[0];
268:   from->copy_length = bs*sizeof(PetscScalar)*n;
269:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
270:   return(0);
271: }

273: /* --------------------------------------------------------------------------------------*/

277: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
278: {
279:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
280:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
281:   PetscErrorCode         ierr;
282:   PetscInt               ny,bs = in_from->bs;

285:   out->begin     = in->begin;
286:   out->end       = in->end;
287:   out->copy      = in->copy;
288:   out->destroy   = in->destroy;
289:   out->view      = in->view;

291:   /* allocate entire send scatter context */
292:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
293:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

295:   ny                = in_to->starts[in_to->n];
296:   out_to->n         = in_to->n;
297:   out_to->type      = in_to->type;
298:   out_to->sendfirst = in_to->sendfirst;

300:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
301:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
302:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
303:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
304:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
305:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
306: 
307:   out->todata       = (void*)out_to;
308:   out_to->local.n   = in_to->local.n;
309:   out_to->local.nonmatching_computed = PETSC_FALSE;
310:   out_to->local.n_nonmatching        = 0;
311:   out_to->local.slots_nonmatching    = 0;
312:   if (in_to->local.n) {
313:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
314:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
315:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
316:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
317:   } else {
318:     out_to->local.vslots   = 0;
319:     out_from->local.vslots = 0;
320:   }

322:   /* allocate entire receive context */
323:   out_from->type      = in_from->type;
324:   ny                  = in_from->starts[in_from->n];
325:   out_from->n         = in_from->n;
326:   out_from->sendfirst = in_from->sendfirst;

328:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
329:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
330:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
331:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
332:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
333:   out->fromdata       = (void*)out_from;
334:   out_from->local.n   = in_from->local.n;
335:   out_from->local.nonmatching_computed = PETSC_FALSE;
336:   out_from->local.n_nonmatching        = 0;
337:   out_from->local.slots_nonmatching    = 0;

339:   /* 
340:       set up the request arrays for use with isend_init() and irecv_init()
341:   */
342:   {
343:     PetscMPIInt tag;
344:     MPI_Comm    comm;
345:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
346:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
347:     PetscInt    i;
348:     PetscTruth  flg;
349:     MPI_Request *swaits  = out_to->requests,*rwaits  = out_from->requests;
350:     MPI_Request *rev_swaits,*rev_rwaits;
351:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

353:     PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
354:     PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);

356:     rev_rwaits = out_to->rev_requests;
357:     rev_swaits = out_from->rev_requests;

359:     out_from->bs = out_to->bs = bs;
360:     tag     = ((PetscObject)out)->tag;
361:     comm    = ((PetscObject)out)->comm;

363:     /* Register the receives that you will use later (sends for scatter reverse) */
364:     for (i=0; i<out_from->n; i++) {
365:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
366:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
367:     }

369:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rsend",&flg);
370:     if (flg) {
371:       out_to->use_readyreceiver    = PETSC_TRUE;
372:       out_from->use_readyreceiver  = PETSC_TRUE;
373:       for (i=0; i<out_to->n; i++) {
374:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
375:       }
376:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
377:       MPI_Barrier(comm);
378:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
379:     } else {
380:       out_to->use_readyreceiver    = PETSC_FALSE;
381:       out_from->use_readyreceiver  = PETSC_FALSE;
382:       PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
383:       if (flg) {
384:         PetscInfo(in,"Using VecScatter Ssend mode\n");
385:       }
386:       for (i=0; i<out_to->n; i++) {
387:         if (!flg) {
388:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
389:         } else {
390:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
391:         }
392:       }
393:     }
394:     /* Register receives for scatter reverse */
395:     for (i=0; i<out_to->n; i++) {
396:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
397:     }
398:   }

400:   return(0);
401: }

405: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
406: {
407:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
408:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
409:   PetscErrorCode         ierr;
410:   PetscInt               ny,bs = in_from->bs;
411:   PetscMPIInt            size;

414:   MPI_Comm_size(((PetscObject)in)->comm,&size);
415:   out->begin     = in->begin;
416:   out->end       = in->end;
417:   out->copy      = in->copy;
418:   out->destroy   = in->destroy;
419:   out->view      = in->view;

421:   /* allocate entire send scatter context */
422:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
423:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

425:   ny                = in_to->starts[in_to->n];
426:   out_to->n         = in_to->n;
427:   out_to->type      = in_to->type;
428:   out_to->sendfirst = in_to->sendfirst;

430:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
431:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
432:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
433:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
434:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
435:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
436: 
437:   out->todata       = (void*)out_to;
438:   out_to->local.n   = in_to->local.n;
439:   out_to->local.nonmatching_computed = PETSC_FALSE;
440:   out_to->local.n_nonmatching        = 0;
441:   out_to->local.slots_nonmatching    = 0;
442:   if (in_to->local.n) {
443:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
444:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
445:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
446:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
447:   } else {
448:     out_to->local.vslots   = 0;
449:     out_from->local.vslots = 0;
450:   }

452:   /* allocate entire receive context */
453:   out_from->type      = in_from->type;
454:   ny                  = in_from->starts[in_from->n];
455:   out_from->n         = in_from->n;
456:   out_from->sendfirst = in_from->sendfirst;

458:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
459:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
460:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
461:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
462:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
463:   out->fromdata       = (void*)out_from;
464:   out_from->local.n   = in_from->local.n;
465:   out_from->local.nonmatching_computed = PETSC_FALSE;
466:   out_from->local.n_nonmatching        = 0;
467:   out_from->local.slots_nonmatching    = 0;

469:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

471:   PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
472:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
473:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

475:   PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
476:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
477:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
478:   return(0);
479: }
480: /* --------------------------------------------------------------------------------------------------
481:     Packs and unpacks the message data into send or from receive buffers. 

483:     These could be generated automatically. 

485:     Fortran kernels etc. could be used.
486: */
487: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
488: {
489:   PetscInt i;
490:   for (i=0; i<n; i++) {
491:     y[i]  = x[indicesx[i]];
492:   }
493: }
494: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
495: {
496:   PetscInt i;
497:   switch (addv) {
498:   case INSERT_VALUES:
499:     for (i=0; i<n; i++) {
500:       y[indicesy[i]] = x[i];
501:     }
502:     break;
503:   case ADD_VALUES:
504:     for (i=0; i<n; i++) {
505:       y[indicesy[i]] += x[i];
506:     }
507:     break;
508: #if !defined(PETSC_USE_COMPLEX)
509:   case MAX_VALUES:
510:     for (i=0; i<n; i++) {
511:       y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
512:     }
513: #else
514:   case MAX_VALUES:
515: #endif
516:   case NOT_SET_VALUES:
517:     break;
518:   }
519: }

521: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
522: {
523:   PetscInt i;
524:   switch (addv) {
525:   case INSERT_VALUES:
526:     for (i=0; i<n; i++) {
527:       y[indicesy[i]] = x[indicesx[i]];
528:     }
529:     break;
530:   case ADD_VALUES:
531:     for (i=0; i<n; i++) {
532:       y[indicesy[i]] += x[indicesx[i]];
533:     }
534:     break;
535: #if !defined(PETSC_USE_COMPLEX)
536:   case MAX_VALUES:
537:     for (i=0; i<n; i++) {
538:       y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
539:     }
540: #else
541:   case MAX_VALUES:
542: #endif
543:   case NOT_SET_VALUES:
544:     break;
545:   }
546: }

548:   /* ----------------------------------------------------------------------------------------------- */
549: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
550: {
551:   PetscInt i,idx;

553:   for (i=0; i<n; i++) {
554:     idx   = *indicesx++;
555:     y[0]  = x[idx];
556:     y[1]  = x[idx+1];
557:     y    += 2;
558:   }
559: }
560: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
561: {
562:   PetscInt i,idy;

564:   switch (addv) {
565:   case INSERT_VALUES:
566:     for (i=0; i<n; i++) {
567:       idy       = *indicesy++;
568:       y[idy]    = x[0];
569:       y[idy+1]  = x[1];
570:       x    += 2;
571:     }
572:     break;
573:   case ADD_VALUES:
574:     for (i=0; i<n; i++) {
575:       idy       = *indicesy++;
576:       y[idy]    += x[0];
577:       y[idy+1]  += x[1];
578:       x    += 2;
579:     }
580:     break;
581: #if !defined(PETSC_USE_COMPLEX)
582:   case MAX_VALUES:
583:     for (i=0; i<n; i++) {
584:       idy       = *indicesy++;
585:       y[idy]    = PetscMax(y[idy],x[0]);
586:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
587:       x    += 2;
588:     }
589: #else
590:   case MAX_VALUES:
591: #endif
592:   case NOT_SET_VALUES:
593:     break;
594:   }
595: }

597: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
598: {
599:   PetscInt i,idx,idy;

601:   switch (addv) {
602:   case INSERT_VALUES:
603:     for (i=0; i<n; i++) {
604:       idx       = *indicesx++;
605:       idy       = *indicesy++;
606:       y[idy]    = x[idx];
607:       y[idy+1]  = x[idx+1];
608:     }
609:     break;
610:   case ADD_VALUES:
611:     for (i=0; i<n; i++) {
612:       idx       = *indicesx++;
613:       idy       = *indicesy++;
614:       y[idy]    += x[idx];
615:       y[idy+1]  += x[idx+1];
616:     }
617:     break;
618: #if !defined(PETSC_USE_COMPLEX)
619:   case MAX_VALUES:
620:     for (i=0; i<n; i++) {
621:       idx       = *indicesx++;
622:       idy       = *indicesy++;
623:       y[idy]    = PetscMax(y[idy],x[idx]);
624:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
625:     }
626: #else
627:   case MAX_VALUES:
628: #endif
629:   case NOT_SET_VALUES:
630:     break;
631:   }
632: }
633:   /* ----------------------------------------------------------------------------------------------- */
634: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
635: {
636:   PetscInt i,idx;

638:   for (i=0; i<n; i++) {
639:     idx   = *indicesx++;
640:     y[0]  = x[idx];
641:     y[1]  = x[idx+1];
642:     y[2]  = x[idx+2];
643:     y    += 3;
644:   }
645: }
646: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
647: {
648:   PetscInt i,idy;

650:   switch (addv) {
651:   case INSERT_VALUES:
652:     for (i=0; i<n; i++) {
653:       idy       = *indicesy++;
654:       y[idy]    = x[0];
655:       y[idy+1]  = x[1];
656:       y[idy+2]  = x[2];
657:       x    += 3;
658:     }
659:     break;
660:   case ADD_VALUES:
661:     for (i=0; i<n; i++) {
662:       idy       = *indicesy++;
663:       y[idy]    += x[0];
664:       y[idy+1]  += x[1];
665:       y[idy+2]  += x[2];
666:       x    += 3;
667:     }
668:     break;
669: #if !defined(PETSC_USE_COMPLEX)
670:   case MAX_VALUES:
671:     for (i=0; i<n; i++) {
672:       idy       = *indicesy++;
673:       y[idy]    = PetscMax(y[idy],x[0]);
674:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
675:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
676:       x    += 3;
677:     }
678: #else
679:   case MAX_VALUES:
680: #endif
681:   case NOT_SET_VALUES:
682:     break;
683:   }
684: }

686: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
687: {
688:   PetscInt i,idx,idy;

690:   switch (addv) {
691:   case INSERT_VALUES:
692:     for (i=0; i<n; i++) {
693:       idx       = *indicesx++;
694:       idy       = *indicesy++;
695:       y[idy]    = x[idx];
696:       y[idy+1]  = x[idx+1];
697:       y[idy+2]  = x[idx+2];
698:     }
699:     break;
700:   case ADD_VALUES:
701:     for (i=0; i<n; i++) {
702:       idx       = *indicesx++;
703:       idy       = *indicesy++;
704:       y[idy]    += x[idx];
705:       y[idy+1]  += x[idx+1];
706:       y[idy+2]  += x[idx+2];
707:     }
708:     break;
709: #if !defined(PETSC_USE_COMPLEX)
710:   case MAX_VALUES:
711:     for (i=0; i<n; i++) {
712:       idx       = *indicesx++;
713:       idy       = *indicesy++;
714:       y[idy]    = PetscMax(y[idy],x[idx]);
715:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
716:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
717:     }
718: #else
719:   case MAX_VALUES:
720: #endif
721:   case NOT_SET_VALUES:
722:     break;
723:   }
724: }
725:   /* ----------------------------------------------------------------------------------------------- */
726: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
727: {
728:   PetscInt i,idx;

730:   for (i=0; i<n; i++) {
731:     idx   = *indicesx++;
732:     y[0]  = x[idx];
733:     y[1]  = x[idx+1];
734:     y[2]  = x[idx+2];
735:     y[3]  = x[idx+3];
736:     y    += 4;
737:   }
738: }
739: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
740: {
741:   PetscInt i,idy;

743:   switch (addv) {
744:   case INSERT_VALUES:
745:     for (i=0; i<n; i++) {
746:       idy       = *indicesy++;
747:       y[idy]    = x[0];
748:       y[idy+1]  = x[1];
749:       y[idy+2]  = x[2];
750:       y[idy+3]  = x[3];
751:       x    += 4;
752:     }
753:     break;
754:   case ADD_VALUES:
755:     for (i=0; i<n; i++) {
756:       idy       = *indicesy++;
757:       y[idy]    += x[0];
758:       y[idy+1]  += x[1];
759:       y[idy+2]  += x[2];
760:       y[idy+3]  += x[3];
761:       x    += 4;
762:     }
763:     break;
764: #if !defined(PETSC_USE_COMPLEX)
765:   case MAX_VALUES:
766:     for (i=0; i<n; i++) {
767:       idy       = *indicesy++;
768:       y[idy]    = PetscMax(y[idy],x[0]);
769:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
770:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
771:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
772:       x    += 4;
773:     }
774: #else
775:   case MAX_VALUES:
776: #endif
777:   case NOT_SET_VALUES:
778:     break;
779:   }
780: }

782: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
783: {
784:   PetscInt i,idx,idy;

786:   switch (addv) {
787:   case INSERT_VALUES:
788:     for (i=0; i<n; i++) {
789:       idx       = *indicesx++;
790:       idy       = *indicesy++;
791:       y[idy]    = x[idx];
792:       y[idy+1]  = x[idx+1];
793:       y[idy+2]  = x[idx+2];
794:       y[idy+3]  = x[idx+3];
795:     }
796:     break;
797:   case ADD_VALUES:
798:     for (i=0; i<n; i++) {
799:       idx       = *indicesx++;
800:       idy       = *indicesy++;
801:       y[idy]    += x[idx];
802:       y[idy+1]  += x[idx+1];
803:       y[idy+2]  += x[idx+2];
804:       y[idy+3]  += x[idx+3];
805:     }
806:     break;
807: #if !defined(PETSC_USE_COMPLEX)
808:   case MAX_VALUES:
809:     for (i=0; i<n; i++) {
810:       idx       = *indicesx++;
811:       idy       = *indicesy++;
812:       y[idy]    = PetscMax(y[idy],x[idx]);
813:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
814:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
815:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
816:     }
817: #else
818:   case MAX_VALUES:
819: #endif
820:   case NOT_SET_VALUES:
821:     break;
822:   }
823: }
824:   /* ----------------------------------------------------------------------------------------------- */
825: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
826: {
827:   PetscInt i,idx;

829:   for (i=0; i<n; i++) {
830:     idx   = *indicesx++;
831:     y[0]  = x[idx];
832:     y[1]  = x[idx+1];
833:     y[2]  = x[idx+2];
834:     y[3]  = x[idx+3];
835:     y[4]  = x[idx+4];
836:     y    += 5;
837:   }
838: }
839: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
840: {
841:   PetscInt i,idy;

843:   switch (addv) {
844:   case INSERT_VALUES:
845:     for (i=0; i<n; i++) {
846:       idy       = *indicesy++;
847:       y[idy]    = x[0];
848:       y[idy+1]  = x[1];
849:       y[idy+2]  = x[2];
850:       y[idy+3]  = x[3];
851:       y[idy+4]  = x[4];
852:       x    += 5;
853:     }
854:     break;
855:   case ADD_VALUES:
856:     for (i=0; i<n; i++) {
857:       idy       = *indicesy++;
858:       y[idy]    += x[0];
859:       y[idy+1]  += x[1];
860:       y[idy+2]  += x[2];
861:       y[idy+3]  += x[3];
862:       y[idy+4]  += x[4];
863:       x    += 5;
864:     }
865:     break;
866: #if !defined(PETSC_USE_COMPLEX)
867:   case MAX_VALUES:
868:     for (i=0; i<n; i++) {
869:       idy       = *indicesy++;
870:       y[idy]    = PetscMax(y[idy],x[0]);
871:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
872:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
873:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
874:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
875:       x    += 5;
876:     }
877: #else
878:   case MAX_VALUES:
879: #endif
880:   case NOT_SET_VALUES:
881:     break;
882:   }
883: }

885: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
886: {
887:   PetscInt i,idx,idy;

889:   switch (addv) {
890:   case INSERT_VALUES:
891:     for (i=0; i<n; i++) {
892:       idx       = *indicesx++;
893:       idy       = *indicesy++;
894:       y[idy]    = x[idx];
895:       y[idy+1]  = x[idx+1];
896:       y[idy+2]  = x[idx+2];
897:       y[idy+3]  = x[idx+3];
898:       y[idy+4]  = x[idx+4];
899:     }
900:     break;
901:   case ADD_VALUES:
902:     for (i=0; i<n; i++) {
903:       idx       = *indicesx++;
904:       idy       = *indicesy++;
905:       y[idy]    += x[idx];
906:       y[idy+1]  += x[idx+1];
907:       y[idy+2]  += x[idx+2];
908:       y[idy+3]  += x[idx+3];
909:       y[idy+4]  += x[idx+4];
910:     }
911:     break;
912: #if !defined(PETSC_USE_COMPLEX)
913:   case MAX_VALUES:
914:     for (i=0; i<n; i++) {
915:       idx       = *indicesx++;
916:       idy       = *indicesy++;
917:       y[idy]    = PetscMax(y[idy],x[idx]);
918:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
919:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
920:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
921:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
922:     }
923: #else
924:   case MAX_VALUES:
925: #endif
926:   case NOT_SET_VALUES:
927:     break;
928:   }
929: }
930:   /* ----------------------------------------------------------------------------------------------- */
931: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
932: {
933:   PetscInt i,idx;

935:   for (i=0; i<n; i++) {
936:     idx   = *indicesx++;
937:     y[0]  = x[idx];
938:     y[1]  = x[idx+1];
939:     y[2]  = x[idx+2];
940:     y[3]  = x[idx+3];
941:     y[4]  = x[idx+4];
942:     y[5]  = x[idx+5];
943:     y    += 6;
944:   }
945: }
946: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
947: {
948:   PetscInt i,idy;

950:   switch (addv) {
951:   case INSERT_VALUES:
952:     for (i=0; i<n; i++) {
953:       idy       = *indicesy++;
954:       y[idy]    = x[0];
955:       y[idy+1]  = x[1];
956:       y[idy+2]  = x[2];
957:       y[idy+3]  = x[3];
958:       y[idy+4]  = x[4];
959:       y[idy+5]  = x[5];
960:       x    += 6;
961:     }
962:     break;
963:   case ADD_VALUES:
964:     for (i=0; i<n; i++) {
965:       idy       = *indicesy++;
966:       y[idy]    += x[0];
967:       y[idy+1]  += x[1];
968:       y[idy+2]  += x[2];
969:       y[idy+3]  += x[3];
970:       y[idy+4]  += x[4];
971:       y[idy+5]  += x[5];
972:       x    += 6;
973:     }
974:     break;
975: #if !defined(PETSC_USE_COMPLEX)
976:   case MAX_VALUES:
977:     for (i=0; i<n; i++) {
978:       idy       = *indicesy++;
979:       y[idy]    = PetscMax(y[idy],x[0]);
980:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
981:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
982:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
983:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
984:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
985:       x    += 6;
986:     }
987: #else
988:   case MAX_VALUES:
989: #endif
990:   case NOT_SET_VALUES:
991:     break;
992:   }
993: }

995: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
996: {
997:   PetscInt i,idx,idy;

999:   switch (addv) {
1000:   case INSERT_VALUES:
1001:     for (i=0; i<n; i++) {
1002:       idx       = *indicesx++;
1003:       idy       = *indicesy++;
1004:       y[idy]    = x[idx];
1005:       y[idy+1]  = x[idx+1];
1006:       y[idy+2]  = x[idx+2];
1007:       y[idy+3]  = x[idx+3];
1008:       y[idy+4]  = x[idx+4];
1009:       y[idy+5]  = x[idx+5];
1010:     }
1011:     break;
1012:   case ADD_VALUES:
1013:     for (i=0; i<n; i++) {
1014:       idx       = *indicesx++;
1015:       idy       = *indicesy++;
1016:       y[idy]    += x[idx];
1017:       y[idy+1]  += x[idx+1];
1018:       y[idy+2]  += x[idx+2];
1019:       y[idy+3]  += x[idx+3];
1020:       y[idy+4]  += x[idx+4];
1021:       y[idy+5]  += x[idx+5];
1022:     }
1023:     break;
1024: #if !defined(PETSC_USE_COMPLEX)
1025:   case MAX_VALUES:
1026:     for (i=0; i<n; i++) {
1027:       idx       = *indicesx++;
1028:       idy       = *indicesy++;
1029:       y[idy]    = PetscMax(y[idy],x[idx]);
1030:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1031:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1032:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1033:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1034:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1035:     }
1036: #else
1037:   case MAX_VALUES:
1038: #endif
1039:   case NOT_SET_VALUES:
1040:     break;
1041:   }
1042: }
1043:   /* ----------------------------------------------------------------------------------------------- */
1044: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1045: {
1046:   PetscInt i,idx;

1048:   for (i=0; i<n; i++) {
1049:     idx   = *indicesx++;
1050:     y[0]  = x[idx];
1051:     y[1]  = x[idx+1];
1052:     y[2]  = x[idx+2];
1053:     y[3]  = x[idx+3];
1054:     y[4]  = x[idx+4];
1055:     y[5]  = x[idx+5];
1056:     y[6]  = x[idx+6];
1057:     y    += 7;
1058:   }
1059: }
1060: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1061: {
1062:   PetscInt i,idy;

1064:   switch (addv) {
1065:   case INSERT_VALUES:
1066:     for (i=0; i<n; i++) {
1067:       idy       = *indicesy++;
1068:       y[idy]    = x[0];
1069:       y[idy+1]  = x[1];
1070:       y[idy+2]  = x[2];
1071:       y[idy+3]  = x[3];
1072:       y[idy+4]  = x[4];
1073:       y[idy+5]  = x[5];
1074:       y[idy+6]  = x[6];
1075:       x    += 7;
1076:     }
1077:     break;
1078:   case ADD_VALUES:
1079:     for (i=0; i<n; i++) {
1080:       idy       = *indicesy++;
1081:       y[idy]    += x[0];
1082:       y[idy+1]  += x[1];
1083:       y[idy+2]  += x[2];
1084:       y[idy+3]  += x[3];
1085:       y[idy+4]  += x[4];
1086:       y[idy+5]  += x[5];
1087:       y[idy+6]  += x[6];
1088:       x    += 7;
1089:     }
1090:     break;
1091: #if !defined(PETSC_USE_COMPLEX)
1092:   case MAX_VALUES:
1093:     for (i=0; i<n; i++) {
1094:       idy       = *indicesy++;
1095:       y[idy]    = PetscMax(y[idy],x[0]);
1096:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1097:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1098:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1099:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1100:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1101:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1102:       x    += 7;
1103:     }
1104: #else
1105:   case MAX_VALUES:
1106: #endif
1107:   case NOT_SET_VALUES:
1108:     break;
1109:   }
1110: }

1112: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1113: {
1114:   PetscInt i,idx,idy;

1116:   switch (addv) {
1117:   case INSERT_VALUES:
1118:     for (i=0; i<n; i++) {
1119:       idx       = *indicesx++;
1120:       idy       = *indicesy++;
1121:       y[idy]    = x[idx];
1122:       y[idy+1]  = x[idx+1];
1123:       y[idy+2]  = x[idx+2];
1124:       y[idy+3]  = x[idx+3];
1125:       y[idy+4]  = x[idx+4];
1126:       y[idy+5]  = x[idx+5];
1127:       y[idy+6]  = x[idx+6];
1128:     }
1129:     break;
1130:   case ADD_VALUES:
1131:     for (i=0; i<n; i++) {
1132:       idx       = *indicesx++;
1133:       idy       = *indicesy++;
1134:       y[idy]    += x[idx];
1135:       y[idy+1]  += x[idx+1];
1136:       y[idy+2]  += x[idx+2];
1137:       y[idy+3]  += x[idx+3];
1138:       y[idy+4]  += x[idx+4];
1139:       y[idy+5]  += x[idx+5];
1140:       y[idy+6]  += x[idx+6];
1141:     }
1142:     break;
1143: #if !defined(PETSC_USE_COMPLEX)
1144:   case MAX_VALUES:
1145:     for (i=0; i<n; i++) {
1146:       idx       = *indicesx++;
1147:       idy       = *indicesy++;
1148:       y[idy]    = PetscMax(y[idy],x[idx]);
1149:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1150:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1151:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1152:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1153:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1154:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1155:     }
1156: #else
1157:   case MAX_VALUES:
1158: #endif
1159:   case NOT_SET_VALUES:
1160:     break;
1161:   }
1162: }
1163:   /* ----------------------------------------------------------------------------------------------- */
1164: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1165: {
1166:   PetscInt i,idx;

1168:   for (i=0; i<n; i++) {
1169:     idx   = *indicesx++;
1170:     y[0]  = x[idx];
1171:     y[1]  = x[idx+1];
1172:     y[2]  = x[idx+2];
1173:     y[3]  = x[idx+3];
1174:     y[4]  = x[idx+4];
1175:     y[5]  = x[idx+5];
1176:     y[6]  = x[idx+6];
1177:     y[7]  = x[idx+7];
1178:     y    += 8;
1179:   }
1180: }
1181: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1182: {
1183:   PetscInt i,idy;

1185:   switch (addv) {
1186:   case INSERT_VALUES:
1187:     for (i=0; i<n; i++) {
1188:       idy       = *indicesy++;
1189:       y[idy]    = x[0];
1190:       y[idy+1]  = x[1];
1191:       y[idy+2]  = x[2];
1192:       y[idy+3]  = x[3];
1193:       y[idy+4]  = x[4];
1194:       y[idy+5]  = x[5];
1195:       y[idy+6]  = x[6];
1196:       y[idy+7]  = x[7];
1197:       x    += 8;
1198:     }
1199:     break;
1200:   case ADD_VALUES:
1201:     for (i=0; i<n; i++) {
1202:       idy       = *indicesy++;
1203:       y[idy]    += x[0];
1204:       y[idy+1]  += x[1];
1205:       y[idy+2]  += x[2];
1206:       y[idy+3]  += x[3];
1207:       y[idy+4]  += x[4];
1208:       y[idy+5]  += x[5];
1209:       y[idy+6]  += x[6];
1210:       y[idy+7]  += x[7];
1211:       x    += 8;
1212:     }
1213:     break;
1214: #if !defined(PETSC_USE_COMPLEX)
1215:   case MAX_VALUES:
1216:     for (i=0; i<n; i++) {
1217:       idy       = *indicesy++;
1218:       y[idy]    = PetscMax(y[idy],x[0]);
1219:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1220:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1221:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1222:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1223:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1224:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1225:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1226:       x    += 8;
1227:     }
1228: #else
1229:   case MAX_VALUES:
1230: #endif
1231:   case NOT_SET_VALUES:
1232:     break;
1233:   }
1234: }

1236: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1237: {
1238:   PetscInt i,idx,idy;

1240:   switch (addv) {
1241:   case INSERT_VALUES:
1242:     for (i=0; i<n; i++) {
1243:       idx       = *indicesx++;
1244:       idy       = *indicesy++;
1245:       y[idy]    = x[idx];
1246:       y[idy+1]  = x[idx+1];
1247:       y[idy+2]  = x[idx+2];
1248:       y[idy+3]  = x[idx+3];
1249:       y[idy+4]  = x[idx+4];
1250:       y[idy+5]  = x[idx+5];
1251:       y[idy+6]  = x[idx+6];
1252:       y[idy+7]  = x[idx+7];
1253:     }
1254:     break;
1255:   case ADD_VALUES:
1256:     for (i=0; i<n; i++) {
1257:       idx       = *indicesx++;
1258:       idy       = *indicesy++;
1259:       y[idy]    += x[idx];
1260:       y[idy+1]  += x[idx+1];
1261:       y[idy+2]  += x[idx+2];
1262:       y[idy+3]  += x[idx+3];
1263:       y[idy+4]  += x[idx+4];
1264:       y[idy+5]  += x[idx+5];
1265:       y[idy+6]  += x[idx+6];
1266:       y[idy+7]  += x[idx+7];
1267:     }
1268:     break;
1269: #if !defined(PETSC_USE_COMPLEX)
1270:   case MAX_VALUES:
1271:     for (i=0; i<n; i++) {
1272:       idx       = *indicesx++;
1273:       idy       = *indicesy++;
1274:       y[idy]    = PetscMax(y[idy],x[idx]);
1275:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1276:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1277:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1278:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1279:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1280:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1281:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1282:     }
1283: #else
1284:   case MAX_VALUES:
1285: #endif
1286:   case NOT_SET_VALUES:
1287:     break;
1288:   }
1289: }

1291:   /* ----------------------------------------------------------------------------------------------- */
1292: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1293: {
1294:   PetscInt i,idx;

1296:   for (i=0; i<n; i++) {
1297:     idx   = *indicesx++;
1298:     y[0]  = x[idx];
1299:     y[1]  = x[idx+1];
1300:     y[2]  = x[idx+2];
1301:     y[3]  = x[idx+3];
1302:     y[4]  = x[idx+4];
1303:     y[5]  = x[idx+5];
1304:     y[6]  = x[idx+6];
1305:     y[7]  = x[idx+7];
1306:     y[8]  = x[idx+8];
1307:     y[9]  = x[idx+9];
1308:     y[10] = x[idx+10];
1309:     y[11] = x[idx+11];
1310:     y    += 12;
1311:   }
1312: }
1313: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1314: {
1315:   PetscInt i,idy;

1317:   switch (addv) {
1318:   case INSERT_VALUES:
1319:     for (i=0; i<n; i++) {
1320:       idy       = *indicesy++;
1321:       y[idy]    = x[0];
1322:       y[idy+1]  = x[1];
1323:       y[idy+2]  = x[2];
1324:       y[idy+3]  = x[3];
1325:       y[idy+4]  = x[4];
1326:       y[idy+5]  = x[5];
1327:       y[idy+6]  = x[6];
1328:       y[idy+7]  = x[7];
1329:       y[idy+8]  = x[8];
1330:       y[idy+9]  = x[9];
1331:       y[idy+10] = x[10];
1332:       y[idy+11] = x[11];
1333:       x    += 12;
1334:     }
1335:     break;
1336:   case ADD_VALUES:
1337:     for (i=0; i<n; i++) {
1338:       idy       = *indicesy++;
1339:       y[idy]    += x[0];
1340:       y[idy+1]  += x[1];
1341:       y[idy+2]  += x[2];
1342:       y[idy+3]  += x[3];
1343:       y[idy+4]  += x[4];
1344:       y[idy+5]  += x[5];
1345:       y[idy+6]  += x[6];
1346:       y[idy+7]  += x[7];
1347:       y[idy+8]  += x[8];
1348:       y[idy+9]  += x[9];
1349:       y[idy+10] += x[10];
1350:       y[idy+11] += x[11];
1351:       x    += 12;
1352:     }
1353:     break;
1354: #if !defined(PETSC_USE_COMPLEX)
1355:   case MAX_VALUES:
1356:     for (i=0; i<n; i++) {
1357:       idy       = *indicesy++;
1358:       y[idy]    = PetscMax(y[idy],x[0]);
1359:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1360:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1361:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1362:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1363:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1364:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1365:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1366:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1367:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1368:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1369:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1370:       x    += 12;
1371:     }
1372: #else
1373:   case MAX_VALUES:
1374: #endif
1375:   case NOT_SET_VALUES:
1376:     break;
1377:   }
1378: }

1380: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1381: {
1382:   PetscInt i,idx,idy;

1384:   switch (addv) {
1385:   case INSERT_VALUES:
1386:     for (i=0; i<n; i++) {
1387:       idx       = *indicesx++;
1388:       idy       = *indicesy++;
1389:       y[idy]    = x[idx];
1390:       y[idy+1]  = x[idx+1];
1391:       y[idy+2]  = x[idx+2];
1392:       y[idy+3]  = x[idx+3];
1393:       y[idy+4]  = x[idx+4];
1394:       y[idy+5]  = x[idx+5];
1395:       y[idy+6]  = x[idx+6];
1396:       y[idy+7]  = x[idx+7];
1397:       y[idy+8]  = x[idx+8];
1398:       y[idy+9]  = x[idx+9];
1399:       y[idy+10] = x[idx+10];
1400:       y[idy+11] = x[idx+11];
1401:     }
1402:     break;
1403:   case ADD_VALUES:
1404:     for (i=0; i<n; i++) {
1405:       idx       = *indicesx++;
1406:       idy       = *indicesy++;
1407:       y[idy]    += x[idx];
1408:       y[idy+1]  += x[idx+1];
1409:       y[idy+2]  += x[idx+2];
1410:       y[idy+3]  += x[idx+3];
1411:       y[idy+4]  += x[idx+4];
1412:       y[idy+5]  += x[idx+5];
1413:       y[idy+6]  += x[idx+6];
1414:       y[idy+7]  += x[idx+7];
1415:       y[idy+8]  += x[idx+8];
1416:       y[idy+9]  += x[idx+9];
1417:       y[idy+10] += x[idx+10];
1418:       y[idy+11] += x[idx+11];
1419:     }
1420:     break;
1421: #if !defined(PETSC_USE_COMPLEX)
1422:   case MAX_VALUES:
1423:     for (i=0; i<n; i++) {
1424:       idx       = *indicesx++;
1425:       idy       = *indicesy++;
1426:       y[idy]    = PetscMax(y[idy],x[idx]);
1427:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1428:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1429:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1430:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1431:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1432:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1433:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1434:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1435:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1436:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1437:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1438:     }
1439: #else
1440:   case MAX_VALUES:
1441: #endif
1442:   case NOT_SET_VALUES:
1443:     break;
1444:   }
1445: }

1447: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1448: #define BS 1
1449:  #include ../src/vec/vec/utils/vpscat.h
1450: #define BS 2
1451:  #include ../src/vec/vec/utils/vpscat.h
1452: #define BS 3
1453:  #include ../src/vec/vec/utils/vpscat.h
1454: #define BS 4
1455:  #include ../src/vec/vec/utils/vpscat.h
1456: #define BS 5
1457:  #include ../src/vec/vec/utils/vpscat.h
1458: #define BS 6
1459:  #include ../src/vec/vec/utils/vpscat.h
1460: #define BS 7
1461:  #include ../src/vec/vec/utils/vpscat.h
1462: #define BS 8
1463:  #include ../src/vec/vec/utils/vpscat.h
1464: #define BS 12
1465:  #include ../src/vec/vec/utils/vpscat.h

1467: /* ==========================================================================================*/

1469: /*              create parallel to sequential scatter context                           */

1471: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);

1475: /*@
1476:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

1478:      Collective on VecScatter

1480:    Input Parameters:
1481: +     VecScatter - obtained with VecScatterCreateEmpty()
1482: .     nsends -
1483: .     sendSizes -
1484: .     sendProcs -
1485: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1486: .     nrecvs - number of receives to expect
1487: .     recvSizes - 
1488: .     recvProcs - processes who are sending to me
1489: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1490: -     bs - size of block

1492:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1493:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

1495:        Probably does not handle sends to self properly. It should remove those from the counts that are used
1496:       in allocating space inside of the from struct

1498:   Level: intermediate

1500: @*/
1501: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1502: {
1503:   VecScatter_MPI_General *from, *to;
1504:   PetscInt               sendSize, recvSize;
1505:   PetscInt               n, i;
1506:   PetscErrorCode         ierr;

1508:   /* allocate entire send scatter context */
1509:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1510:   to->n = nsends;
1511:   for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1512:   PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1513:   PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1514:   PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1515:   to->starts[0] = 0;
1516:   for(n = 0; n < to->n; n++) {
1517:     if (sendSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1518:     to->starts[n+1] = to->starts[n] + sendSizes[n];
1519:     to->procs[n] = sendProcs[n];
1520:     for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1521:       to->indices[i] = sendIdx[i];
1522:     }
1523:   }
1524:   ctx->todata = (void *) to;

1526:   /* allocate entire receive scatter context */
1527:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1528:   from->n = nrecvs;
1529:   for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1530:   PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1531:   PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1532:   from->starts[0] = 0;
1533:   for(n = 0; n < from->n; n++) {
1534:     if (recvSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1535:     from->starts[n+1] = from->starts[n] + recvSizes[n];
1536:     from->procs[n] = recvProcs[n];
1537:     for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1538:       from->indices[i] = recvIdx[i];
1539:     }
1540:   }
1541:   ctx->fromdata = (void *)from;

1543:   /* No local scatter optimization */
1544:   from->local.n      = 0;
1545:   from->local.vslots = 0;
1546:   to->local.n        = 0;
1547:   to->local.vslots   = 0;
1548:   from->local.nonmatching_computed = PETSC_FALSE;
1549:   from->local.n_nonmatching        = 0;
1550:   from->local.slots_nonmatching    = 0;
1551:   to->local.nonmatching_computed   = PETSC_FALSE;
1552:   to->local.n_nonmatching          = 0;
1553:   to->local.slots_nonmatching      = 0;

1555:   from->type = VEC_SCATTER_MPI_GENERAL;
1556:   to->type   = VEC_SCATTER_MPI_GENERAL;
1557:   from->bs = bs;
1558:   to->bs   = bs;
1559:   VecScatterCreateCommon_PtoS(from, to, ctx);
1560:   return(0);
1561: }

1563: /*
1564:    bs indicates how many elements there are in each block. Normally this would be 1.

1566:    contains check that PetscMPIInt can handle the sizes needed 
1567: */
1570: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1571: {
1572:   VecScatter_MPI_General *from,*to;
1573:   PetscMPIInt            size,rank,imdex,tag,n;
1574:   PetscInt               *source = PETSC_NULL,*owners = PETSC_NULL;
1575:   PetscInt               *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1576:   PetscMPIInt            *nprocs = PETSC_NULL,nrecvs;
1577:   PetscInt               i,j,idx,nsends;
1578:   PetscInt               *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1579:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1580:   PetscMPIInt            *onodes1,*olengths1;
1581:   MPI_Comm               comm;
1582:   MPI_Request            *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1583:   MPI_Status             recv_status,*send_status;
1584:   PetscErrorCode         ierr;

1587:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
1588:   PetscObjectGetComm((PetscObject)xin,&comm);
1589:   MPI_Comm_rank(comm,&rank);
1590:   MPI_Comm_size(comm,&size);
1591:   owners = xin->map->range;
1592:   VecGetSize(yin,&lengthy);
1593:   VecGetSize(xin,&lengthx);

1595:   /*  first count number of contributors to each processor */
1596:   PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1597:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1598:   j      = 0;
1599:   nsends = 0;
1600:   for (i=0; i<nx; i++) {
1601:     idx = inidx[i];
1602:     if (idx < owners[j]) j = 0;
1603:     for (; j<size; j++) {
1604:       if (idx < owners[j+1]) {
1605:         if (!nprocs[j]++) nsends++;
1606:         owner[i] = j;
1607:         break;
1608:       }
1609:     }
1610:   }
1611:   nprocslocal  = nprocs[rank];
1612:   nprocs[rank] = 0;
1613:   if (nprocslocal) nsends--;
1614:   /* inform other processors of number of messages and max length*/
1615:   PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1616:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1617:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1618:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

1620:   /* post receives:   */
1621:   PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1622:   count  = 0;
1623:   for (i=0; i<nrecvs; i++) {
1624:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1625:     count += olengths1[i];
1626:   }

1628:   /* do sends:
1629:      1) starts[i] gives the starting index in svalues for stuff going to 
1630:      the ith processor
1631:   */
1632:   PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1633:   starts[0]  = 0;
1634:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1635:   for (i=0; i<nx; i++) {
1636:     if (owner[i] != rank) {
1637:       svalues[starts[owner[i]]++] = inidx[i];
1638:     }
1639:   }

1641:   starts[0] = 0;
1642:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1643:   count = 0;
1644:   for (i=0; i<size; i++) {
1645:     if (nprocs[i]) {
1646:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1647:     }
1648:   }

1650:   /*  wait on receives */
1651:   count  = nrecvs;
1652:   slen   = 0;
1653:   while (count) {
1654:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1655:     /* unpack receives into our local space */
1656:     MPI_Get_count(&recv_status,MPIU_INT,&n);
1657:     slen += n;
1658:     count--;
1659:   }

1661:   if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1662: 
1663:   /* allocate entire send scatter context */
1664:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1665:   to->n = nrecvs;
1666:   PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1667:   PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1668:   PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1669:   ctx->todata   = (void*)to;
1670:   to->starts[0] = 0;

1672:   if (nrecvs) {

1674:     /* move the data into the send scatter */
1675:     base     = owners[rank];
1676:     rsvalues = rvalues;
1677:     for (i=0; i<nrecvs; i++) {
1678:       to->starts[i+1] = to->starts[i] + olengths1[i];
1679:       to->procs[i]    = onodes1[i];
1680:       values = rsvalues;
1681:       rsvalues += olengths1[i];
1682:       for (j=0; j<olengths1[i]; j++) {
1683:         to->indices[to->starts[i] + j] = values[j] - base;
1684:       }
1685:     }
1686:   }
1687:   PetscFree(olengths1);
1688:   PetscFree(onodes1);
1689:   PetscFree3(rvalues,source,recv_waits);

1691:   /* allocate entire receive scatter context */
1692:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1693:   from->n = nsends;

1695:   PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1696:   PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1697:   ctx->fromdata  = (void*)from;

1699:   /* move data into receive scatter */
1700:   PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1701:   count = 0; from->starts[0] = start[0] = 0;
1702:   for (i=0; i<size; i++) {
1703:     if (nprocs[i]) {
1704:       lowner[i]            = count;
1705:       from->procs[count++] = i;
1706:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
1707:     }
1708:   }

1710:   for (i=0; i<nx; i++) {
1711:     if (owner[i] != rank) {
1712:       from->indices[start[lowner[owner[i]]]++] = inidy[i];
1713:       if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1714:     }
1715:   }
1716:   PetscFree2(lowner,start);
1717:   PetscFree2(nprocs,owner);
1718: 
1719:   /* wait on sends */
1720:   if (nsends) {
1721:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1722:     MPI_Waitall(nsends,send_waits,send_status);
1723:     PetscFree(send_status);
1724:   }
1725:   PetscFree3(svalues,send_waits,starts);

1727:   if (nprocslocal) {
1728:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
1729:     /* we have a scatter to ourselves */
1730:     PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1731:     PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1732:     nt   = 0;
1733:     for (i=0; i<nx; i++) {
1734:       idx = inidx[i];
1735:       if (idx >= owners[rank] && idx < owners[rank+1]) {
1736:         to->local.vslots[nt]     = idx - owners[rank];
1737:         from->local.vslots[nt++] = inidy[i];
1738:         if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1739:       }
1740:     }
1741:   } else {
1742:     from->local.n      = 0;
1743:     from->local.vslots = 0;
1744:     to->local.n        = 0;
1745:     to->local.vslots   = 0;
1746:   }

1748:   from->local.nonmatching_computed = PETSC_FALSE;
1749:   from->local.n_nonmatching        = 0;
1750:   from->local.slots_nonmatching    = 0;
1751:   to->local.nonmatching_computed   = PETSC_FALSE;
1752:   to->local.n_nonmatching          = 0;
1753:   to->local.slots_nonmatching      = 0;

1755:   from->type = VEC_SCATTER_MPI_GENERAL;
1756:   to->type   = VEC_SCATTER_MPI_GENERAL;
1757:   from->bs = bs;
1758:   to->bs   = bs;
1759:   VecScatterCreateCommon_PtoS(from,to,ctx);
1760:   return(0);
1761: }

1763: /*
1764:    bs indicates how many elements there are in each block. Normally this would be 1.
1765: */
1768: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1769: {
1770:   MPI_Comm       comm = ((PetscObject)ctx)->comm;
1771:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
1772:   PetscInt       bs   = to->bs;
1773:   PetscMPIInt    size;
1774:   PetscInt       i, n;
1776: 
1778:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1779:   ctx->destroy = VecScatterDestroy_PtoP;

1781:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce);
1782:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
1783:   from->sendfirst = to->sendfirst;

1785:   MPI_Comm_size(comm,&size);
1786:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1787:   to->contiq = PETSC_FALSE;
1788:   n = from->starts[from->n];
1789:   from->contiq = PETSC_TRUE;
1790:   for (i=1; i<n; i++) {
1791:     if (from->indices[i] != from->indices[i-1] + bs) {
1792:       from->contiq = PETSC_FALSE;
1793:       break;
1794:     }
1795:   }

1797:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv);
1798:   from->use_alltoallv = to->use_alltoallv;
1799:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1800: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
1801:   if (to->use_alltoallv) {
1802:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw);
1803:   }
1804:   from->use_alltoallw = to->use_alltoallw;
1805:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1806: #endif

1808: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1809:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_window",&to->use_window);
1810:   from->use_window = to->use_window;
1811: #endif

1813:   if (to->use_alltoallv) {

1815:     PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1816:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1817:     for (i=0; i<to->n; i++) {
1818:       to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1819:     }
1820:     to->displs[0] = 0;
1821:     for (i=1; i<size; i++) {
1822:       to->displs[i] = to->displs[i-1] + to->counts[i-1];
1823:     }

1825:     PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1826:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1827:     for (i=0; i<from->n; i++) {
1828:       from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1829:     }
1830:     from->displs[0] = 0;
1831:     for (i=1; i<size; i++) {
1832:       from->displs[i] = from->displs[i-1] + from->counts[i-1];
1833:     }
1834: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1835:     if (to->use_alltoallw) {
1836:       PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1837:       ctx->packtogether = PETSC_FALSE;
1838:       PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1839:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1840:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1841:       for (i=0; i<size; i++) {
1842:         to->types[i] = MPIU_SCALAR;
1843:       }

1845:       for (i=0; i<to->n; i++) {
1846:         to->wcounts[to->procs[i]] = 1;
1847:         mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1848:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1849:         MPI_Type_commit(to->types+to->procs[i]);
1850:       }
1851:       PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1852:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1853:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1854:       for (i=0; i<size; i++) {
1855:         from->types[i] = MPIU_SCALAR;
1856:       }
1857:       if (from->contiq) {
1858:         PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1859:         for (i=0; i<from->n; i++) {
1860:           from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1861:         }
1862:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1863:         for (i=1; i<from->n; i++) {
1864:           from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1865:         }
1866:       } else {
1867:         for (i=0; i<from->n; i++) {
1868:           from->wcounts[from->procs[i]] = 1;
1869:           mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1870:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1871:           MPI_Type_commit(from->types+from->procs[i]);
1872:         }
1873:       }
1874:     } else {
1875:       ctx->copy = VecScatterCopy_PtoP_AllToAll;
1876:     }
1877: #else 
1878:     to->use_alltoallw   = PETSC_FALSE;
1879:     from->use_alltoallw = PETSC_FALSE;
1880:     ctx->copy           = VecScatterCopy_PtoP_AllToAll;
1881: #endif
1882: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1883:   } else if (to->use_window) {
1884:     PetscMPIInt temptag,winsize;
1885:     MPI_Request *request;
1886:     MPI_Status  *status;
1887: 
1888:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1889:     winsize = (to->n ? to->starts[to->n] : 0)*sizeof(PetscScalar);
1890:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1891:     PetscMalloc(to->n,&to->winstarts);
1892:     PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1893:     for (i=0; i<to->n; i++) {
1894:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1895:     }
1896:     for (i=0; i<from->n; i++) {
1897:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1898:     }
1899:     MPI_Waitall(to->n,request,status);
1900:     PetscFree2(request,status);

1902:     winsize = (from->n ? from->starts[from->n] : 0)*sizeof(PetscScalar);
1903:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1904:     PetscMalloc(from->n,&from->winstarts);
1905:     PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1906:     for (i=0; i<from->n; i++) {
1907:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1908:     }
1909:     for (i=0; i<to->n; i++) {
1910:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1911:     }
1912:     MPI_Waitall(from->n,request,status);
1913:     PetscFree2(request,status);
1914: #endif
1915:   } else {
1916:     PetscTruth  use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1917:     PetscInt    *sstarts = to->starts,  *rstarts = from->starts;
1918:     PetscMPIInt *sprocs  = to->procs,   *rprocs  = from->procs;
1919:     MPI_Request *swaits  = to->requests,*rwaits  = from->requests;
1920:     MPI_Request *rev_swaits,*rev_rwaits;
1921:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

1923:     /* allocate additional wait variables for the "reverse" scatter */
1924:     PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1925:     PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1926:     to->rev_requests   = rev_rwaits;
1927:     from->rev_requests = rev_swaits;

1929:     /* Register the receives that you will use later (sends for scatter reverse) */
1930:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rsend",&use_rsend);
1931:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&use_ssend);
1932:     if (use_rsend) {
1933:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1934:       to->use_readyreceiver    = PETSC_TRUE;
1935:       from->use_readyreceiver  = PETSC_TRUE;
1936:     } else {
1937:       to->use_readyreceiver    = PETSC_FALSE;
1938:       from->use_readyreceiver  = PETSC_FALSE;
1939:     }
1940:     if (use_ssend) {
1941:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1942:     }

1944:     for (i=0; i<from->n; i++) {
1945:       if (use_rsend) {
1946:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1947:       } else if (use_ssend) {
1948:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1949:       } else {
1950:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1951:       }
1952:     }

1954:     for (i=0; i<to->n; i++) {
1955:       if (use_rsend) {
1956:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1957:       } else if (use_ssend) {
1958:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1959:       } else {
1960:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1961:       }
1962:     }
1963:     /* Register receives for scatter and reverse */
1964:     for (i=0; i<from->n; i++) {
1965:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
1966:     }
1967:     for (i=0; i<to->n; i++) {
1968:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
1969:     }
1970:     if (use_rsend) {
1971:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
1972:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
1973:       MPI_Barrier(comm);
1974:     }

1976:     ctx->copy      = VecScatterCopy_PtoP_X;
1977:   }
1978:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
1979:   switch (bs) {
1980:   case 12:
1981:     ctx->begin     = VecScatterBegin_12;
1982:     ctx->end       = VecScatterEnd_12;
1983:     break;
1984:   case 8:
1985:     ctx->begin     = VecScatterBegin_8;
1986:     ctx->end       = VecScatterEnd_8;
1987:     break;
1988:   case 7:
1989:     ctx->begin     = VecScatterBegin_7;
1990:     ctx->end       = VecScatterEnd_7;
1991:     break;
1992:   case 6:
1993:     ctx->begin     = VecScatterBegin_6;
1994:     ctx->end       = VecScatterEnd_6;
1995:     break;
1996:   case 5:
1997:     ctx->begin     = VecScatterBegin_5;
1998:     ctx->end       = VecScatterEnd_5;
1999:     break;
2000:   case 4:
2001:     ctx->begin     = VecScatterBegin_4;
2002:     ctx->end       = VecScatterEnd_4;
2003:     break;
2004:   case 3:
2005:     ctx->begin     = VecScatterBegin_3;
2006:     ctx->end       = VecScatterEnd_3;
2007:     break;
2008:   case 2:
2009:     ctx->begin     = VecScatterBegin_2;
2010:     ctx->end       = VecScatterEnd_2;
2011:     break;
2012:   case 1:
2013:     ctx->begin     = VecScatterBegin_1;
2014:     ctx->end       = VecScatterEnd_1;
2015:     break;
2016:   default:
2017:     SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2018:   }
2019:   ctx->view      = VecScatterView_MPI;
2020:   /* Check if the local scatter is actually a copy; important special case */
2021:   if (to->local.n) {
2022:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2023:   }
2024:   return(0);
2025: }



2029: /* ------------------------------------------------------------------------------------*/
2030: /*
2031:          Scatter from local Seq vectors to a parallel vector. 
2032:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2033:          reverses the result.
2034: */
2037: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2038: {
2039:   PetscErrorCode         ierr;
2040:   MPI_Request            *waits;
2041:   VecScatter_MPI_General *to,*from;

2044:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2045:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2046:   from          = (VecScatter_MPI_General*)ctx->todata;
2047:   ctx->todata   = (void*)to;
2048:   ctx->fromdata = (void*)from;
2049:   /* these two are special, they are ALWAYS stored in to struct */
2050:   to->sstatus   = from->sstatus;
2051:   to->rstatus   = from->rstatus;

2053:   from->sstatus = 0;
2054:   from->rstatus = 0;

2056:   waits              = from->rev_requests;
2057:   from->rev_requests = from->requests;
2058:   from->requests     = waits;
2059:   waits              = to->rev_requests;
2060:   to->rev_requests   = to->requests;
2061:   to->requests       = waits;
2062:   return(0);
2063: }

2065: /* ---------------------------------------------------------------------------------*/
2068: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2069: {
2071:   PetscMPIInt    size,rank,tag,imdex,n;
2072:   PetscInt       *owners = xin->map->range;
2073:   PetscMPIInt    *nprocs = PETSC_NULL;
2074:   PetscInt       i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
2075:   PetscInt       *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
2076:   PetscInt       *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
2077:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2078:   MPI_Comm       comm;
2079:   MPI_Request    *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
2080:   MPI_Status     recv_status,*send_status = PETSC_NULL;
2081:   PetscTruth     duplicate = PETSC_FALSE;
2082: #if defined(PETSC_USE_DEBUG)
2083:   PetscTruth     found = PETSC_FALSE;
2084: #endif

2087:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2088:   PetscObjectGetComm((PetscObject)xin,&comm);
2089:   MPI_Comm_size(comm,&size);
2090:   MPI_Comm_rank(comm,&rank);
2091:   if (size == 1) {
2092:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,1,ctx);
2093:     return(0);
2094:   }

2096:   /*
2097:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2098:      They then call the StoPScatterCreate()
2099:   */
2100:   /*  first count number of contributors to each processor */
2101:   PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2102:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2103:   lastidx = -1;
2104:   j       = 0;
2105:   for (i=0; i<nx; i++) {
2106:     /* if indices are NOT locally sorted, need to start search at the beginning */
2107:     if (lastidx > (idx = inidx[i])) j = 0;
2108:     lastidx = idx;
2109:     for (; j<size; j++) {
2110:       if (idx >= owners[j] && idx < owners[j+1]) {
2111:         nprocs[j]++;
2112:         owner[i] = j;
2113: #if defined(PETSC_USE_DEBUG)
2114:         found = PETSC_TRUE;
2115: #endif
2116:         break;
2117:       }
2118:     }
2119: #if defined(PETSC_USE_DEBUG)
2120:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2121:     found = PETSC_FALSE;
2122: #endif
2123:   }
2124:   nsends = 0;  for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}

2126:   /* inform other processors of number of messages and max length*/
2127:   PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2128:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2129:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2130:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2132:   /* post receives:   */
2133:   PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);

2135:   count = 0;
2136:   for (i=0; i<nrecvs; i++) {
2137:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2138:     count += olengths1[i];
2139:   }
2140:   PetscFree(onodes1);

2142:   /* do sends:
2143:       1) starts[i] gives the starting index in svalues for stuff going to 
2144:          the ith processor
2145:   */
2146:   starts[0]= 0;
2147:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2148:   for (i=0; i<nx; i++) {
2149:     svalues[2*starts[owner[i]]]       = inidx[i];
2150:     svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2151:   }

2153:   starts[0] = 0;
2154:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2155:   count = 0;
2156:   for (i=0; i<size; i++) {
2157:     if (nprocs[i]) {
2158:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2159:       count++;
2160:     }
2161:   }
2162:   PetscFree3(nprocs,owner,starts);

2164:   /*  wait on receives */
2165:   count = nrecvs;
2166:   slen  = 0;
2167:   while (count) {
2168:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2169:     /* unpack receives into our local space */
2170:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2171:     slen += n/2;
2172:     count--;
2173:   }
2174:   if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2175: 
2176:   PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2177:   base  = owners[rank];
2178:   count = 0;
2179:   rsvalues = rvalues;
2180:   for (i=0; i<nrecvs; i++) {
2181:     values = rsvalues;
2182:     rsvalues += 2*olengths1[i];
2183:     for (j=0; j<olengths1[i]; j++) {
2184:       local_inidx[count]   = values[2*j] - base;
2185:       local_inidy[count++] = values[2*j+1];
2186:     }
2187:   }
2188:   PetscFree(olengths1);

2190:   /* wait on sends */
2191:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2192:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2194:   /*
2195:      should sort and remove duplicates from local_inidx,local_inidy 
2196:   */

2198: #if defined(do_it_slow)
2199:   /* sort on the from index */
2200:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2201:   start = 0;
2202:   while (start < slen) {
2203:     count = start+1;
2204:     last  = local_inidx[start];
2205:     while (count < slen && last == local_inidx[count]) count++;
2206:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2207:       /* sort on to index */
2208:       PetscSortInt(count-start,local_inidy+start);
2209:     }
2210:     /* remove duplicates; not most efficient way, but probably good enough */
2211:     i = start;
2212:     while (i < count-1) {
2213:       if (local_inidy[i] != local_inidy[i+1]) {
2214:         i++;
2215:       } else { /* found a duplicate */
2216:         duplicate = PETSC_TRUE;
2217:         for (j=i; j<slen-1; j++) {
2218:           local_inidx[j] = local_inidx[j+1];
2219:           local_inidy[j] = local_inidy[j+1];
2220:         }
2221:         slen--;
2222:         count--;
2223:       }
2224:     }
2225:     start = count;
2226:   }
2227: #endif
2228:   if (duplicate) {
2229:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2230:   }
2231:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,1,ctx);
2232:   PetscFree2(local_inidx,local_inidy);
2233:   return(0);
2234: }