Actual source code: vpscat.c
petsc-3.5.4 2015-05-23
2: /*
3: Defines parallel vector scatters.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
11: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
12: {
13: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
14: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
15: PetscErrorCode ierr;
16: PetscInt i;
17: PetscMPIInt rank;
18: PetscViewerFormat format;
19: PetscBool iascii;
22: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
23: if (iascii) {
24: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
25: PetscViewerGetFormat(viewer,&format);
26: if (format == PETSC_VIEWER_ASCII_INFO) {
27: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
29: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
31: itmp = to->starts[to->n+1];
32: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
33: itmp = from->starts[from->n+1];
34: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
35: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
37: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
38: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
39: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
40: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
42: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
44: } else {
45: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
46: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
47: if (to->n) {
48: for (i=0; i<to->n; i++) {
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
50: }
51: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
52: for (i=0; i<to->starts[to->n]; i++) {
53: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
54: }
55: }
57: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
58: if (from->n) {
59: for (i=0; i<from->n; i++) {
60: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
61: }
63: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
64: for (i=0; i<from->starts[from->n]; i++) {
65: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
66: }
67: }
68: if (to->local.n) {
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
70: for (i=0; i<to->local.n; i++) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
72: }
73: }
75: PetscViewerFlush(viewer);
76: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
77: }
78: }
79: return(0);
80: }
82: /* -----------------------------------------------------------------------------------*/
83: /*
84: The next routine determines what part of the local part of the scatter is an
85: exact copy of values into their current location. We check this here and
86: then know that we need not perform that portion of the scatter when the vector is
87: scattering to itself with INSERT_VALUES.
89: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
91: */
94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
95: {
96: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
98: PetscInt *nto_slots,*nfrom_slots,j = 0;
101: for (i=0; i<n; i++) {
102: if (to_slots[i] != from_slots[i]) n_nonmatching++;
103: }
105: if (!n_nonmatching) {
106: to->nonmatching_computed = PETSC_TRUE;
107: to->n_nonmatching = from->n_nonmatching = 0;
109: PetscInfo1(scatter,"Reduced %D to 0\n", n);
110: } else if (n_nonmatching == n) {
111: to->nonmatching_computed = PETSC_FALSE;
113: PetscInfo(scatter,"All values non-matching\n");
114: } else {
115: to->nonmatching_computed= PETSC_TRUE;
116: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
118: PetscMalloc1(n_nonmatching,&nto_slots);
119: PetscMalloc1(n_nonmatching,&nfrom_slots);
121: to->slots_nonmatching = nto_slots;
122: from->slots_nonmatching = nfrom_slots;
123: for (i=0; i<n; i++) {
124: if (to_slots[i] != from_slots[i]) {
125: nto_slots[j] = to_slots[i];
126: nfrom_slots[j] = from_slots[i];
127: j++;
128: }
129: }
130: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131: }
132: return(0);
133: }
135: /* --------------------------------------------------------------------------------------*/
137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
143: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144: PetscErrorCode ierr;
145: PetscInt i;
148: if (to->use_readyreceiver) {
149: /*
150: Since we have already posted sends we must cancel them before freeing
151: the requests
152: */
153: for (i=0; i<from->n; i++) {
154: MPI_Cancel(from->requests+i);
155: }
156: for (i=0; i<to->n; i++) {
157: MPI_Cancel(to->rev_requests+i);
158: }
159: MPI_Waitall(from->n,from->requests,to->rstatus);
160: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
161: }
163: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
164: if (to->use_alltoallw) {
165: PetscFree3(to->wcounts,to->wdispls,to->types);
166: PetscFree3(from->wcounts,from->wdispls,from->types);
167: }
168: #endif
170: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
171: if (to->use_window) {
172: MPI_Win_free(&from->window);
173: MPI_Win_free(&to->window);
174: PetscFree(from->winstarts);
175: PetscFree(to->winstarts);
176: }
177: #endif
179: if (to->use_alltoallv) {
180: PetscFree2(to->counts,to->displs);
181: PetscFree2(from->counts,from->displs);
182: }
184: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
185: /*
186: IBM's PE version of MPI has a bug where freeing these guys will screw up later
187: message passing.
188: */
189: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
190: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
191: if (to->requests) {
192: for (i=0; i<to->n; i++) {
193: MPI_Request_free(to->requests + i);
194: }
195: }
196: if (to->rev_requests) {
197: for (i=0; i<to->n; i++) {
198: MPI_Request_free(to->rev_requests + i);
199: }
200: }
201: }
202: /*
203: MPICH could not properly cancel requests thus with ready receiver mode we
204: cannot free the requests. It may be fixed now, if not then put the following
205: code inside a if (!to->use_readyreceiver) {
206: */
207: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
208: if (from->requests) {
209: for (i=0; i<from->n; i++) {
210: MPI_Request_free(from->requests + i);
211: }
212: }
214: if (from->rev_requests) {
215: for (i=0; i<from->n; i++) {
216: MPI_Request_free(from->rev_requests + i);
217: }
218: }
219: }
220: #endif
222: PetscFree(to->local.vslots);
223: PetscFree(from->local.vslots);
224: PetscFree2(to->counts,to->displs);
225: PetscFree2(from->counts,from->displs);
226: PetscFree(to->local.slots_nonmatching);
227: PetscFree(from->local.slots_nonmatching);
228: PetscFree(to->rev_requests);
229: PetscFree(from->rev_requests);
230: PetscFree(to->requests);
231: PetscFree(from->requests);
232: PetscFree4(to->values,to->indices,to->starts,to->procs);
233: PetscFree2(to->sstatus,to->rstatus);
234: PetscFree4(from->values,from->indices,from->starts,from->procs);
235: PetscFree(from);
236: PetscFree(to);
237: return(0);
238: }
242: /* --------------------------------------------------------------------------------------*/
243: /*
244: Special optimization to see if the local part of the scatter is actually
245: a copy. The scatter routines call PetscMemcpy() instead.
247: */
250: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
251: {
252: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
253: PetscInt to_start,from_start;
257: to_start = to_slots[0];
258: from_start = from_slots[0];
260: for (i=1; i<n; i++) {
261: to_start += bs;
262: from_start += bs;
263: if (to_slots[i] != to_start) return(0);
264: if (from_slots[i] != from_start) return(0);
265: }
266: to->is_copy = PETSC_TRUE;
267: to->copy_start = to_slots[0];
268: to->copy_length = bs*sizeof(PetscScalar)*n;
269: from->is_copy = PETSC_TRUE;
270: from->copy_start = from_slots[0];
271: from->copy_length = bs*sizeof(PetscScalar)*n;
272: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
273: return(0);
274: }
276: /* --------------------------------------------------------------------------------------*/
280: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
281: {
282: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
283: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
284: PetscErrorCode ierr;
285: PetscInt ny,bs = in_from->bs;
288: out->begin = in->begin;
289: out->end = in->end;
290: out->copy = in->copy;
291: out->destroy = in->destroy;
292: out->view = in->view;
294: /* allocate entire send scatter context */
295: PetscNewLog(out,&out_to);
296: PetscNewLog(out,&out_from);
298: ny = in_to->starts[in_to->n];
299: out_to->n = in_to->n;
300: out_to->type = in_to->type;
301: out_to->sendfirst = in_to->sendfirst;
303: PetscMalloc1(out_to->n,&out_to->requests);
304: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
305: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
306: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
307: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
308: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
310: out->todata = (void*)out_to;
311: out_to->local.n = in_to->local.n;
312: out_to->local.nonmatching_computed = PETSC_FALSE;
313: out_to->local.n_nonmatching = 0;
314: out_to->local.slots_nonmatching = 0;
315: if (in_to->local.n) {
316: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
317: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
318: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
319: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
320: } else {
321: out_to->local.vslots = 0;
322: out_from->local.vslots = 0;
323: }
325: /* allocate entire receive context */
326: out_from->type = in_from->type;
327: ny = in_from->starts[in_from->n];
328: out_from->n = in_from->n;
329: out_from->sendfirst = in_from->sendfirst;
331: PetscMalloc1(out_from->n,&out_from->requests);
332: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
333: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
334: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
335: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
337: out->fromdata = (void*)out_from;
338: out_from->local.n = in_from->local.n;
339: out_from->local.nonmatching_computed = PETSC_FALSE;
340: out_from->local.n_nonmatching = 0;
341: out_from->local.slots_nonmatching = 0;
343: /*
344: set up the request arrays for use with isend_init() and irecv_init()
345: */
346: {
347: PetscMPIInt tag;
348: MPI_Comm comm;
349: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
350: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
351: PetscInt i;
352: PetscBool flg;
353: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
354: MPI_Request *rev_swaits,*rev_rwaits;
355: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
357: PetscMalloc1(in_to->n,&out_to->rev_requests);
358: PetscMalloc1(in_from->n,&out_from->rev_requests);
360: rev_rwaits = out_to->rev_requests;
361: rev_swaits = out_from->rev_requests;
363: out_from->bs = out_to->bs = bs;
364: tag = ((PetscObject)out)->tag;
365: PetscObjectGetComm((PetscObject)out,&comm);
367: /* Register the receives that you will use later (sends for scatter reverse) */
368: for (i=0; i<out_from->n; i++) {
369: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
370: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
371: }
373: flg = PETSC_FALSE;
374: PetscOptionsGetBool(NULL,"-vecscatter_rsend",&flg,NULL);
375: if (flg) {
376: out_to->use_readyreceiver = PETSC_TRUE;
377: out_from->use_readyreceiver = PETSC_TRUE;
378: for (i=0; i<out_to->n; i++) {
379: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
380: }
381: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
382: MPI_Barrier(comm);
383: PetscInfo(in,"Using VecScatter ready receiver mode\n");
384: } else {
385: out_to->use_readyreceiver = PETSC_FALSE;
386: out_from->use_readyreceiver = PETSC_FALSE;
387: flg = PETSC_FALSE;
388: PetscOptionsGetBool(NULL,"-vecscatter_ssend",&flg,NULL);
389: if (flg) {
390: PetscInfo(in,"Using VecScatter Ssend mode\n");
391: }
392: for (i=0; i<out_to->n; i++) {
393: if (!flg) {
394: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
395: } else {
396: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
397: }
398: }
399: }
400: /* Register receives for scatter reverse */
401: for (i=0; i<out_to->n; i++) {
402: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
403: }
404: }
405: return(0);
406: }
410: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
411: {
412: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
413: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
414: PetscErrorCode ierr;
415: PetscInt ny,bs = in_from->bs;
416: PetscMPIInt size;
419: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
421: out->begin = in->begin;
422: out->end = in->end;
423: out->copy = in->copy;
424: out->destroy = in->destroy;
425: out->view = in->view;
427: /* allocate entire send scatter context */
428: PetscNewLog(out,&out_to);
429: PetscNewLog(out,&out_from);
431: ny = in_to->starts[in_to->n];
432: out_to->n = in_to->n;
433: out_to->type = in_to->type;
434: out_to->sendfirst = in_to->sendfirst;
436: PetscMalloc1(out_to->n,&out_to->requests);
437: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
438: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
439: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
440: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
441: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
443: out->todata = (void*)out_to;
444: out_to->local.n = in_to->local.n;
445: out_to->local.nonmatching_computed = PETSC_FALSE;
446: out_to->local.n_nonmatching = 0;
447: out_to->local.slots_nonmatching = 0;
448: if (in_to->local.n) {
449: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
450: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
451: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
452: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
453: } else {
454: out_to->local.vslots = 0;
455: out_from->local.vslots = 0;
456: }
458: /* allocate entire receive context */
459: out_from->type = in_from->type;
460: ny = in_from->starts[in_from->n];
461: out_from->n = in_from->n;
462: out_from->sendfirst = in_from->sendfirst;
464: PetscMalloc1(out_from->n,&out_from->requests);
465: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
466: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
467: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
468: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
470: out->fromdata = (void*)out_from;
471: out_from->local.n = in_from->local.n;
472: out_from->local.nonmatching_computed = PETSC_FALSE;
473: out_from->local.n_nonmatching = 0;
474: out_from->local.slots_nonmatching = 0;
476: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
478: PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
479: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
480: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
482: PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
483: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
484: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
485: return(0);
486: }
487: /* --------------------------------------------------------------------------------------------------
488: Packs and unpacks the message data into send or from receive buffers.
490: These could be generated automatically.
492: Fortran kernels etc. could be used.
493: */
494: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
495: {
496: PetscInt i;
497: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
498: }
502: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
503: {
504: PetscInt i;
507: switch (addv) {
508: case INSERT_VALUES:
509: case INSERT_ALL_VALUES:
510: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
511: break;
512: case ADD_VALUES:
513: case ADD_ALL_VALUES:
514: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
515: break;
516: #if !defined(PETSC_USE_COMPLEX)
517: case MAX_VALUES:
518: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
519: #else
520: case MAX_VALUES:
521: #endif
522: case NOT_SET_VALUES:
523: break;
524: default:
525: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
526: }
527: return(0);
528: }
532: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
533: {
534: PetscInt i;
537: switch (addv) {
538: case INSERT_VALUES:
539: case INSERT_ALL_VALUES:
540: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
541: break;
542: case ADD_VALUES:
543: case ADD_ALL_VALUES:
544: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
545: break;
546: #if !defined(PETSC_USE_COMPLEX)
547: case MAX_VALUES:
548: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
549: #else
550: case MAX_VALUES:
551: #endif
552: case NOT_SET_VALUES:
553: break;
554: default:
555: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
556: }
557: return(0);
558: }
560: /* ----------------------------------------------------------------------------------------------- */
561: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
562: {
563: PetscInt i,idx;
565: for (i=0; i<n; i++) {
566: idx = *indicesx++;
567: y[0] = x[idx];
568: y[1] = x[idx+1];
569: y += 2;
570: }
571: }
575: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
576: {
577: PetscInt i,idy;
580: switch (addv) {
581: case INSERT_VALUES:
582: case INSERT_ALL_VALUES:
583: for (i=0; i<n; i++) {
584: idy = *indicesy++;
585: y[idy] = x[0];
586: y[idy+1] = x[1];
587: x += 2;
588: }
589: break;
590: case ADD_VALUES:
591: case ADD_ALL_VALUES:
592: for (i=0; i<n; i++) {
593: idy = *indicesy++;
594: y[idy] += x[0];
595: y[idy+1] += x[1];
596: x += 2;
597: }
598: break;
599: #if !defined(PETSC_USE_COMPLEX)
600: case MAX_VALUES:
601: for (i=0; i<n; i++) {
602: idy = *indicesy++;
603: y[idy] = PetscMax(y[idy],x[0]);
604: y[idy+1] = PetscMax(y[idy+1],x[1]);
605: x += 2;
606: }
607: #else
608: case MAX_VALUES:
609: #endif
610: case NOT_SET_VALUES:
611: break;
612: default:
613: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
614: }
615: return(0);
616: }
620: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
621: {
622: PetscInt i,idx,idy;
625: switch (addv) {
626: case INSERT_VALUES:
627: case INSERT_ALL_VALUES:
628: for (i=0; i<n; i++) {
629: idx = *indicesx++;
630: idy = *indicesy++;
631: y[idy] = x[idx];
632: y[idy+1] = x[idx+1];
633: }
634: break;
635: case ADD_VALUES:
636: case ADD_ALL_VALUES:
637: for (i=0; i<n; i++) {
638: idx = *indicesx++;
639: idy = *indicesy++;
640: y[idy] += x[idx];
641: y[idy+1] += x[idx+1];
642: }
643: break;
644: #if !defined(PETSC_USE_COMPLEX)
645: case MAX_VALUES:
646: for (i=0; i<n; i++) {
647: idx = *indicesx++;
648: idy = *indicesy++;
649: y[idy] = PetscMax(y[idy],x[idx]);
650: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
651: }
652: #else
653: case MAX_VALUES:
654: #endif
655: case NOT_SET_VALUES:
656: break;
657: default:
658: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
659: }
660: return(0);
661: }
662: /* ----------------------------------------------------------------------------------------------- */
663: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
664: {
665: PetscInt i,idx;
667: for (i=0; i<n; i++) {
668: idx = *indicesx++;
669: y[0] = x[idx];
670: y[1] = x[idx+1];
671: y[2] = x[idx+2];
672: y += 3;
673: }
674: }
677: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
678: {
679: PetscInt i,idy;
682: switch (addv) {
683: case INSERT_VALUES:
684: case INSERT_ALL_VALUES:
685: for (i=0; i<n; i++) {
686: idy = *indicesy++;
687: y[idy] = x[0];
688: y[idy+1] = x[1];
689: y[idy+2] = x[2];
690: x += 3;
691: }
692: break;
693: case ADD_VALUES:
694: case ADD_ALL_VALUES:
695: for (i=0; i<n; i++) {
696: idy = *indicesy++;
697: y[idy] += x[0];
698: y[idy+1] += x[1];
699: y[idy+2] += x[2];
700: x += 3;
701: }
702: break;
703: #if !defined(PETSC_USE_COMPLEX)
704: case MAX_VALUES:
705: for (i=0; i<n; i++) {
706: idy = *indicesy++;
707: y[idy] = PetscMax(y[idy],x[0]);
708: y[idy+1] = PetscMax(y[idy+1],x[1]);
709: y[idy+2] = PetscMax(y[idy+2],x[2]);
710: x += 3;
711: }
712: #else
713: case MAX_VALUES:
714: #endif
715: case NOT_SET_VALUES:
716: break;
717: default:
718: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
719: }
720: return(0);
721: }
725: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
726: {
727: PetscInt i,idx,idy;
730: switch (addv) {
731: case INSERT_VALUES:
732: case INSERT_ALL_VALUES:
733: for (i=0; i<n; i++) {
734: idx = *indicesx++;
735: idy = *indicesy++;
736: y[idy] = x[idx];
737: y[idy+1] = x[idx+1];
738: y[idy+2] = x[idx+2];
739: }
740: break;
741: case ADD_VALUES:
742: case ADD_ALL_VALUES:
743: for (i=0; i<n; i++) {
744: idx = *indicesx++;
745: idy = *indicesy++;
746: y[idy] += x[idx];
747: y[idy+1] += x[idx+1];
748: y[idy+2] += x[idx+2];
749: }
750: break;
751: #if !defined(PETSC_USE_COMPLEX)
752: case MAX_VALUES:
753: for (i=0; i<n; i++) {
754: idx = *indicesx++;
755: idy = *indicesy++;
756: y[idy] = PetscMax(y[idy],x[idx]);
757: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
758: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
759: }
760: #else
761: case MAX_VALUES:
762: #endif
763: case NOT_SET_VALUES:
764: break;
765: default:
766: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
767: }
768: return(0);
769: }
770: /* ----------------------------------------------------------------------------------------------- */
771: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
772: {
773: PetscInt i,idx;
775: for (i=0; i<n; i++) {
776: idx = *indicesx++;
777: y[0] = x[idx];
778: y[1] = x[idx+1];
779: y[2] = x[idx+2];
780: y[3] = x[idx+3];
781: y += 4;
782: }
783: }
786: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
787: {
788: PetscInt i,idy;
791: switch (addv) {
792: case INSERT_VALUES:
793: case INSERT_ALL_VALUES:
794: for (i=0; i<n; i++) {
795: idy = *indicesy++;
796: y[idy] = x[0];
797: y[idy+1] = x[1];
798: y[idy+2] = x[2];
799: y[idy+3] = x[3];
800: x += 4;
801: }
802: break;
803: case ADD_VALUES:
804: case ADD_ALL_VALUES:
805: for (i=0; i<n; i++) {
806: idy = *indicesy++;
807: y[idy] += x[0];
808: y[idy+1] += x[1];
809: y[idy+2] += x[2];
810: y[idy+3] += x[3];
811: x += 4;
812: }
813: break;
814: #if !defined(PETSC_USE_COMPLEX)
815: case MAX_VALUES:
816: for (i=0; i<n; i++) {
817: idy = *indicesy++;
818: y[idy] = PetscMax(y[idy],x[0]);
819: y[idy+1] = PetscMax(y[idy+1],x[1]);
820: y[idy+2] = PetscMax(y[idy+2],x[2]);
821: y[idy+3] = PetscMax(y[idy+3],x[3]);
822: x += 4;
823: }
824: #else
825: case MAX_VALUES:
826: #endif
827: case NOT_SET_VALUES:
828: break;
829: default:
830: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
831: }
832: return(0);
833: }
837: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
838: {
839: PetscInt i,idx,idy;
842: switch (addv) {
843: case INSERT_VALUES:
844: case INSERT_ALL_VALUES:
845: for (i=0; i<n; i++) {
846: idx = *indicesx++;
847: idy = *indicesy++;
848: y[idy] = x[idx];
849: y[idy+1] = x[idx+1];
850: y[idy+2] = x[idx+2];
851: y[idy+3] = x[idx+3];
852: }
853: break;
854: case ADD_VALUES:
855: case ADD_ALL_VALUES:
856: for (i=0; i<n; i++) {
857: idx = *indicesx++;
858: idy = *indicesy++;
859: y[idy] += x[idx];
860: y[idy+1] += x[idx+1];
861: y[idy+2] += x[idx+2];
862: y[idy+3] += x[idx+3];
863: }
864: break;
865: #if !defined(PETSC_USE_COMPLEX)
866: case MAX_VALUES:
867: for (i=0; i<n; i++) {
868: idx = *indicesx++;
869: idy = *indicesy++;
870: y[idy] = PetscMax(y[idy],x[idx]);
871: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
872: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
873: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
874: }
875: #else
876: case MAX_VALUES:
877: #endif
878: case NOT_SET_VALUES:
879: break;
880: default:
881: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
882: }
883: return(0);
884: }
885: /* ----------------------------------------------------------------------------------------------- */
886: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
887: {
888: PetscInt i,idx;
890: for (i=0; i<n; i++) {
891: idx = *indicesx++;
892: y[0] = x[idx];
893: y[1] = x[idx+1];
894: y[2] = x[idx+2];
895: y[3] = x[idx+3];
896: y[4] = x[idx+4];
897: y += 5;
898: }
899: }
903: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
904: {
905: PetscInt i,idy;
908: switch (addv) {
909: case INSERT_VALUES:
910: case INSERT_ALL_VALUES:
911: for (i=0; i<n; i++) {
912: idy = *indicesy++;
913: y[idy] = x[0];
914: y[idy+1] = x[1];
915: y[idy+2] = x[2];
916: y[idy+3] = x[3];
917: y[idy+4] = x[4];
918: x += 5;
919: }
920: break;
921: case ADD_VALUES:
922: case ADD_ALL_VALUES:
923: for (i=0; i<n; i++) {
924: idy = *indicesy++;
925: y[idy] += x[0];
926: y[idy+1] += x[1];
927: y[idy+2] += x[2];
928: y[idy+3] += x[3];
929: y[idy+4] += x[4];
930: x += 5;
931: }
932: break;
933: #if !defined(PETSC_USE_COMPLEX)
934: case MAX_VALUES:
935: for (i=0; i<n; i++) {
936: idy = *indicesy++;
937: y[idy] = PetscMax(y[idy],x[0]);
938: y[idy+1] = PetscMax(y[idy+1],x[1]);
939: y[idy+2] = PetscMax(y[idy+2],x[2]);
940: y[idy+3] = PetscMax(y[idy+3],x[3]);
941: y[idy+4] = PetscMax(y[idy+4],x[4]);
942: x += 5;
943: }
944: #else
945: case MAX_VALUES:
946: #endif
947: case NOT_SET_VALUES:
948: break;
949: default:
950: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
951: }
952: return(0);
953: }
957: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
958: {
959: PetscInt i,idx,idy;
962: switch (addv) {
963: case INSERT_VALUES:
964: case INSERT_ALL_VALUES:
965: for (i=0; i<n; i++) {
966: idx = *indicesx++;
967: idy = *indicesy++;
968: y[idy] = x[idx];
969: y[idy+1] = x[idx+1];
970: y[idy+2] = x[idx+2];
971: y[idy+3] = x[idx+3];
972: y[idy+4] = x[idx+4];
973: }
974: break;
975: case ADD_VALUES:
976: case ADD_ALL_VALUES:
977: for (i=0; i<n; i++) {
978: idx = *indicesx++;
979: idy = *indicesy++;
980: y[idy] += x[idx];
981: y[idy+1] += x[idx+1];
982: y[idy+2] += x[idx+2];
983: y[idy+3] += x[idx+3];
984: y[idy+4] += x[idx+4];
985: }
986: break;
987: #if !defined(PETSC_USE_COMPLEX)
988: case MAX_VALUES:
989: for (i=0; i<n; i++) {
990: idx = *indicesx++;
991: idy = *indicesy++;
992: y[idy] = PetscMax(y[idy],x[idx]);
993: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
994: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
995: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
996: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
997: }
998: #else
999: case MAX_VALUES:
1000: #endif
1001: case NOT_SET_VALUES:
1002: break;
1003: default:
1004: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1005: }
1006: return(0);
1007: }
1008: /* ----------------------------------------------------------------------------------------------- */
1009: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1010: {
1011: PetscInt i,idx;
1013: for (i=0; i<n; i++) {
1014: idx = *indicesx++;
1015: y[0] = x[idx];
1016: y[1] = x[idx+1];
1017: y[2] = x[idx+2];
1018: y[3] = x[idx+3];
1019: y[4] = x[idx+4];
1020: y[5] = x[idx+5];
1021: y += 6;
1022: }
1023: }
1027: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1028: {
1029: PetscInt i,idy;
1032: switch (addv) {
1033: case INSERT_VALUES:
1034: case INSERT_ALL_VALUES:
1035: for (i=0; i<n; i++) {
1036: idy = *indicesy++;
1037: y[idy] = x[0];
1038: y[idy+1] = x[1];
1039: y[idy+2] = x[2];
1040: y[idy+3] = x[3];
1041: y[idy+4] = x[4];
1042: y[idy+5] = x[5];
1043: x += 6;
1044: }
1045: break;
1046: case ADD_VALUES:
1047: case ADD_ALL_VALUES:
1048: for (i=0; i<n; i++) {
1049: idy = *indicesy++;
1050: y[idy] += x[0];
1051: y[idy+1] += x[1];
1052: y[idy+2] += x[2];
1053: y[idy+3] += x[3];
1054: y[idy+4] += x[4];
1055: y[idy+5] += x[5];
1056: x += 6;
1057: }
1058: break;
1059: #if !defined(PETSC_USE_COMPLEX)
1060: case MAX_VALUES:
1061: for (i=0; i<n; i++) {
1062: idy = *indicesy++;
1063: y[idy] = PetscMax(y[idy],x[0]);
1064: y[idy+1] = PetscMax(y[idy+1],x[1]);
1065: y[idy+2] = PetscMax(y[idy+2],x[2]);
1066: y[idy+3] = PetscMax(y[idy+3],x[3]);
1067: y[idy+4] = PetscMax(y[idy+4],x[4]);
1068: y[idy+5] = PetscMax(y[idy+5],x[5]);
1069: x += 6;
1070: }
1071: #else
1072: case MAX_VALUES:
1073: #endif
1074: case NOT_SET_VALUES:
1075: break;
1076: default:
1077: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1078: }
1079: return(0);
1080: }
1084: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1085: {
1086: PetscInt i,idx,idy;
1089: switch (addv) {
1090: case INSERT_VALUES:
1091: case INSERT_ALL_VALUES:
1092: for (i=0; i<n; i++) {
1093: idx = *indicesx++;
1094: idy = *indicesy++;
1095: y[idy] = x[idx];
1096: y[idy+1] = x[idx+1];
1097: y[idy+2] = x[idx+2];
1098: y[idy+3] = x[idx+3];
1099: y[idy+4] = x[idx+4];
1100: y[idy+5] = x[idx+5];
1101: }
1102: break;
1103: case ADD_VALUES:
1104: case ADD_ALL_VALUES:
1105: for (i=0; i<n; i++) {
1106: idx = *indicesx++;
1107: idy = *indicesy++;
1108: y[idy] += x[idx];
1109: y[idy+1] += x[idx+1];
1110: y[idy+2] += x[idx+2];
1111: y[idy+3] += x[idx+3];
1112: y[idy+4] += x[idx+4];
1113: y[idy+5] += x[idx+5];
1114: }
1115: break;
1116: #if !defined(PETSC_USE_COMPLEX)
1117: case MAX_VALUES:
1118: for (i=0; i<n; i++) {
1119: idx = *indicesx++;
1120: idy = *indicesy++;
1121: y[idy] = PetscMax(y[idy],x[idx]);
1122: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1123: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1124: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1125: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1126: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1127: }
1128: #else
1129: case MAX_VALUES:
1130: #endif
1131: case NOT_SET_VALUES:
1132: break;
1133: default:
1134: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1135: }
1136: return(0);
1137: }
1138: /* ----------------------------------------------------------------------------------------------- */
1139: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1140: {
1141: PetscInt i,idx;
1143: for (i=0; i<n; i++) {
1144: idx = *indicesx++;
1145: y[0] = x[idx];
1146: y[1] = x[idx+1];
1147: y[2] = x[idx+2];
1148: y[3] = x[idx+3];
1149: y[4] = x[idx+4];
1150: y[5] = x[idx+5];
1151: y[6] = x[idx+6];
1152: y += 7;
1153: }
1154: }
1158: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1159: {
1160: PetscInt i,idy;
1163: switch (addv) {
1164: case INSERT_VALUES:
1165: case INSERT_ALL_VALUES:
1166: for (i=0; i<n; i++) {
1167: idy = *indicesy++;
1168: y[idy] = x[0];
1169: y[idy+1] = x[1];
1170: y[idy+2] = x[2];
1171: y[idy+3] = x[3];
1172: y[idy+4] = x[4];
1173: y[idy+5] = x[5];
1174: y[idy+6] = x[6];
1175: x += 7;
1176: }
1177: break;
1178: case ADD_VALUES:
1179: case ADD_ALL_VALUES:
1180: for (i=0; i<n; i++) {
1181: idy = *indicesy++;
1182: y[idy] += x[0];
1183: y[idy+1] += x[1];
1184: y[idy+2] += x[2];
1185: y[idy+3] += x[3];
1186: y[idy+4] += x[4];
1187: y[idy+5] += x[5];
1188: y[idy+6] += x[6];
1189: x += 7;
1190: }
1191: break;
1192: #if !defined(PETSC_USE_COMPLEX)
1193: case MAX_VALUES:
1194: for (i=0; i<n; i++) {
1195: idy = *indicesy++;
1196: y[idy] = PetscMax(y[idy],x[0]);
1197: y[idy+1] = PetscMax(y[idy+1],x[1]);
1198: y[idy+2] = PetscMax(y[idy+2],x[2]);
1199: y[idy+3] = PetscMax(y[idy+3],x[3]);
1200: y[idy+4] = PetscMax(y[idy+4],x[4]);
1201: y[idy+5] = PetscMax(y[idy+5],x[5]);
1202: y[idy+6] = PetscMax(y[idy+6],x[6]);
1203: x += 7;
1204: }
1205: #else
1206: case MAX_VALUES:
1207: #endif
1208: case NOT_SET_VALUES:
1209: break;
1210: default:
1211: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1212: }
1213: return(0);
1214: }
1218: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1219: {
1220: PetscInt i,idx,idy;
1223: switch (addv) {
1224: case INSERT_VALUES:
1225: case INSERT_ALL_VALUES:
1226: for (i=0; i<n; i++) {
1227: idx = *indicesx++;
1228: idy = *indicesy++;
1229: y[idy] = x[idx];
1230: y[idy+1] = x[idx+1];
1231: y[idy+2] = x[idx+2];
1232: y[idy+3] = x[idx+3];
1233: y[idy+4] = x[idx+4];
1234: y[idy+5] = x[idx+5];
1235: y[idy+6] = x[idx+6];
1236: }
1237: break;
1238: case ADD_VALUES:
1239: case ADD_ALL_VALUES:
1240: for (i=0; i<n; i++) {
1241: idx = *indicesx++;
1242: idy = *indicesy++;
1243: y[idy] += x[idx];
1244: y[idy+1] += x[idx+1];
1245: y[idy+2] += x[idx+2];
1246: y[idy+3] += x[idx+3];
1247: y[idy+4] += x[idx+4];
1248: y[idy+5] += x[idx+5];
1249: y[idy+6] += x[idx+6];
1250: }
1251: break;
1252: #if !defined(PETSC_USE_COMPLEX)
1253: case MAX_VALUES:
1254: for (i=0; i<n; i++) {
1255: idx = *indicesx++;
1256: idy = *indicesy++;
1257: y[idy] = PetscMax(y[idy],x[idx]);
1258: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1259: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1260: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1261: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1262: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1263: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1264: }
1265: #else
1266: case MAX_VALUES:
1267: #endif
1268: case NOT_SET_VALUES:
1269: break;
1270: default:
1271: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1272: }
1273: return(0);
1274: }
1275: /* ----------------------------------------------------------------------------------------------- */
1276: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1277: {
1278: PetscInt i,idx;
1280: for (i=0; i<n; i++) {
1281: idx = *indicesx++;
1282: y[0] = x[idx];
1283: y[1] = x[idx+1];
1284: y[2] = x[idx+2];
1285: y[3] = x[idx+3];
1286: y[4] = x[idx+4];
1287: y[5] = x[idx+5];
1288: y[6] = x[idx+6];
1289: y[7] = x[idx+7];
1290: y += 8;
1291: }
1292: }
1296: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1297: {
1298: PetscInt i,idy;
1301: switch (addv) {
1302: case INSERT_VALUES:
1303: case INSERT_ALL_VALUES:
1304: for (i=0; i<n; i++) {
1305: idy = *indicesy++;
1306: y[idy] = x[0];
1307: y[idy+1] = x[1];
1308: y[idy+2] = x[2];
1309: y[idy+3] = x[3];
1310: y[idy+4] = x[4];
1311: y[idy+5] = x[5];
1312: y[idy+6] = x[6];
1313: y[idy+7] = x[7];
1314: x += 8;
1315: }
1316: break;
1317: case ADD_VALUES:
1318: case ADD_ALL_VALUES:
1319: for (i=0; i<n; i++) {
1320: idy = *indicesy++;
1321: y[idy] += x[0];
1322: y[idy+1] += x[1];
1323: y[idy+2] += x[2];
1324: y[idy+3] += x[3];
1325: y[idy+4] += x[4];
1326: y[idy+5] += x[5];
1327: y[idy+6] += x[6];
1328: y[idy+7] += x[7];
1329: x += 8;
1330: }
1331: break;
1332: #if !defined(PETSC_USE_COMPLEX)
1333: case MAX_VALUES:
1334: for (i=0; i<n; i++) {
1335: idy = *indicesy++;
1336: y[idy] = PetscMax(y[idy],x[0]);
1337: y[idy+1] = PetscMax(y[idy+1],x[1]);
1338: y[idy+2] = PetscMax(y[idy+2],x[2]);
1339: y[idy+3] = PetscMax(y[idy+3],x[3]);
1340: y[idy+4] = PetscMax(y[idy+4],x[4]);
1341: y[idy+5] = PetscMax(y[idy+5],x[5]);
1342: y[idy+6] = PetscMax(y[idy+6],x[6]);
1343: y[idy+7] = PetscMax(y[idy+7],x[7]);
1344: x += 8;
1345: }
1346: #else
1347: case MAX_VALUES:
1348: #endif
1349: case NOT_SET_VALUES:
1350: break;
1351: default:
1352: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1353: }
1354: return(0);
1355: }
1359: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1360: {
1361: PetscInt i,idx,idy;
1364: switch (addv) {
1365: case INSERT_VALUES:
1366: case INSERT_ALL_VALUES:
1367: for (i=0; i<n; i++) {
1368: idx = *indicesx++;
1369: idy = *indicesy++;
1370: y[idy] = x[idx];
1371: y[idy+1] = x[idx+1];
1372: y[idy+2] = x[idx+2];
1373: y[idy+3] = x[idx+3];
1374: y[idy+4] = x[idx+4];
1375: y[idy+5] = x[idx+5];
1376: y[idy+6] = x[idx+6];
1377: y[idy+7] = x[idx+7];
1378: }
1379: break;
1380: case ADD_VALUES:
1381: case ADD_ALL_VALUES:
1382: for (i=0; i<n; i++) {
1383: idx = *indicesx++;
1384: idy = *indicesy++;
1385: y[idy] += x[idx];
1386: y[idy+1] += x[idx+1];
1387: y[idy+2] += x[idx+2];
1388: y[idy+3] += x[idx+3];
1389: y[idy+4] += x[idx+4];
1390: y[idy+5] += x[idx+5];
1391: y[idy+6] += x[idx+6];
1392: y[idy+7] += x[idx+7];
1393: }
1394: break;
1395: #if !defined(PETSC_USE_COMPLEX)
1396: case MAX_VALUES:
1397: for (i=0; i<n; i++) {
1398: idx = *indicesx++;
1399: idy = *indicesy++;
1400: y[idy] = PetscMax(y[idy],x[idx]);
1401: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1402: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1403: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1404: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1405: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1406: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1407: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1408: }
1409: #else
1410: case MAX_VALUES:
1411: #endif
1412: case NOT_SET_VALUES:
1413: break;
1414: default:
1415: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1416: }
1417: return(0);
1418: }
1420: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1421: {
1422: PetscInt i,idx;
1424: for (i=0; i<n; i++) {
1425: idx = *indicesx++;
1426: y[0] = x[idx];
1427: y[1] = x[idx+1];
1428: y[2] = x[idx+2];
1429: y[3] = x[idx+3];
1430: y[4] = x[idx+4];
1431: y[5] = x[idx+5];
1432: y[6] = x[idx+6];
1433: y[7] = x[idx+7];
1434: y[8] = x[idx+8];
1435: y += 9;
1436: }
1437: }
1441: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1442: {
1443: PetscInt i,idy;
1446: switch (addv) {
1447: case INSERT_VALUES:
1448: case INSERT_ALL_VALUES:
1449: for (i=0; i<n; i++) {
1450: idy = *indicesy++;
1451: y[idy] = x[0];
1452: y[idy+1] = x[1];
1453: y[idy+2] = x[2];
1454: y[idy+3] = x[3];
1455: y[idy+4] = x[4];
1456: y[idy+5] = x[5];
1457: y[idy+6] = x[6];
1458: y[idy+7] = x[7];
1459: y[idy+8] = x[8];
1460: x += 9;
1461: }
1462: break;
1463: case ADD_VALUES:
1464: case ADD_ALL_VALUES:
1465: for (i=0; i<n; i++) {
1466: idy = *indicesy++;
1467: y[idy] += x[0];
1468: y[idy+1] += x[1];
1469: y[idy+2] += x[2];
1470: y[idy+3] += x[3];
1471: y[idy+4] += x[4];
1472: y[idy+5] += x[5];
1473: y[idy+6] += x[6];
1474: y[idy+7] += x[7];
1475: y[idy+8] += x[8];
1476: x += 9;
1477: }
1478: break;
1479: #if !defined(PETSC_USE_COMPLEX)
1480: case MAX_VALUES:
1481: for (i=0; i<n; i++) {
1482: idy = *indicesy++;
1483: y[idy] = PetscMax(y[idy],x[0]);
1484: y[idy+1] = PetscMax(y[idy+1],x[1]);
1485: y[idy+2] = PetscMax(y[idy+2],x[2]);
1486: y[idy+3] = PetscMax(y[idy+3],x[3]);
1487: y[idy+4] = PetscMax(y[idy+4],x[4]);
1488: y[idy+5] = PetscMax(y[idy+5],x[5]);
1489: y[idy+6] = PetscMax(y[idy+6],x[6]);
1490: y[idy+7] = PetscMax(y[idy+7],x[7]);
1491: y[idy+8] = PetscMax(y[idy+8],x[8]);
1492: x += 9;
1493: }
1494: #else
1495: case MAX_VALUES:
1496: #endif
1497: case NOT_SET_VALUES:
1498: break;
1499: default:
1500: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1501: }
1502: return(0);
1503: }
1507: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1508: {
1509: PetscInt i,idx,idy;
1512: switch (addv) {
1513: case INSERT_VALUES:
1514: case INSERT_ALL_VALUES:
1515: for (i=0; i<n; i++) {
1516: idx = *indicesx++;
1517: idy = *indicesy++;
1518: y[idy] = x[idx];
1519: y[idy+1] = x[idx+1];
1520: y[idy+2] = x[idx+2];
1521: y[idy+3] = x[idx+3];
1522: y[idy+4] = x[idx+4];
1523: y[idy+5] = x[idx+5];
1524: y[idy+6] = x[idx+6];
1525: y[idy+7] = x[idx+7];
1526: y[idy+8] = x[idx+8];
1527: }
1528: break;
1529: case ADD_VALUES:
1530: case ADD_ALL_VALUES:
1531: for (i=0; i<n; i++) {
1532: idx = *indicesx++;
1533: idy = *indicesy++;
1534: y[idy] += x[idx];
1535: y[idy+1] += x[idx+1];
1536: y[idy+2] += x[idx+2];
1537: y[idy+3] += x[idx+3];
1538: y[idy+4] += x[idx+4];
1539: y[idy+5] += x[idx+5];
1540: y[idy+6] += x[idx+6];
1541: y[idy+7] += x[idx+7];
1542: y[idy+8] += x[idx+8];
1543: }
1544: break;
1545: #if !defined(PETSC_USE_COMPLEX)
1546: case MAX_VALUES:
1547: for (i=0; i<n; i++) {
1548: idx = *indicesx++;
1549: idy = *indicesy++;
1550: y[idy] = PetscMax(y[idy],x[idx]);
1551: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1552: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1553: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1554: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1555: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1556: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1557: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1558: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1559: }
1560: #else
1561: case MAX_VALUES:
1562: #endif
1563: case NOT_SET_VALUES:
1564: break;
1565: default:
1566: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1567: }
1568: return(0);
1569: }
1571: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1572: {
1573: PetscInt i,idx;
1575: for (i=0; i<n; i++) {
1576: idx = *indicesx++;
1577: y[0] = x[idx];
1578: y[1] = x[idx+1];
1579: y[2] = x[idx+2];
1580: y[3] = x[idx+3];
1581: y[4] = x[idx+4];
1582: y[5] = x[idx+5];
1583: y[6] = x[idx+6];
1584: y[7] = x[idx+7];
1585: y[8] = x[idx+8];
1586: y[9] = x[idx+9];
1587: y += 10;
1588: }
1589: }
1593: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1594: {
1595: PetscInt i,idy;
1598: switch (addv) {
1599: case INSERT_VALUES:
1600: case INSERT_ALL_VALUES:
1601: for (i=0; i<n; i++) {
1602: idy = *indicesy++;
1603: y[idy] = x[0];
1604: y[idy+1] = x[1];
1605: y[idy+2] = x[2];
1606: y[idy+3] = x[3];
1607: y[idy+4] = x[4];
1608: y[idy+5] = x[5];
1609: y[idy+6] = x[6];
1610: y[idy+7] = x[7];
1611: y[idy+8] = x[8];
1612: y[idy+9] = x[9];
1613: x += 10;
1614: }
1615: break;
1616: case ADD_VALUES:
1617: case ADD_ALL_VALUES:
1618: for (i=0; i<n; i++) {
1619: idy = *indicesy++;
1620: y[idy] += x[0];
1621: y[idy+1] += x[1];
1622: y[idy+2] += x[2];
1623: y[idy+3] += x[3];
1624: y[idy+4] += x[4];
1625: y[idy+5] += x[5];
1626: y[idy+6] += x[6];
1627: y[idy+7] += x[7];
1628: y[idy+8] += x[8];
1629: y[idy+9] += x[9];
1630: x += 10;
1631: }
1632: break;
1633: #if !defined(PETSC_USE_COMPLEX)
1634: case MAX_VALUES:
1635: for (i=0; i<n; i++) {
1636: idy = *indicesy++;
1637: y[idy] = PetscMax(y[idy],x[0]);
1638: y[idy+1] = PetscMax(y[idy+1],x[1]);
1639: y[idy+2] = PetscMax(y[idy+2],x[2]);
1640: y[idy+3] = PetscMax(y[idy+3],x[3]);
1641: y[idy+4] = PetscMax(y[idy+4],x[4]);
1642: y[idy+5] = PetscMax(y[idy+5],x[5]);
1643: y[idy+6] = PetscMax(y[idy+6],x[6]);
1644: y[idy+7] = PetscMax(y[idy+7],x[7]);
1645: y[idy+8] = PetscMax(y[idy+8],x[8]);
1646: y[idy+9] = PetscMax(y[idy+9],x[9]);
1647: x += 10;
1648: }
1649: #else
1650: case MAX_VALUES:
1651: #endif
1652: case NOT_SET_VALUES:
1653: break;
1654: default:
1655: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1656: }
1657: return(0);
1658: }
1662: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1663: {
1664: PetscInt i,idx,idy;
1667: switch (addv) {
1668: case INSERT_VALUES:
1669: case INSERT_ALL_VALUES:
1670: for (i=0; i<n; i++) {
1671: idx = *indicesx++;
1672: idy = *indicesy++;
1673: y[idy] = x[idx];
1674: y[idy+1] = x[idx+1];
1675: y[idy+2] = x[idx+2];
1676: y[idy+3] = x[idx+3];
1677: y[idy+4] = x[idx+4];
1678: y[idy+5] = x[idx+5];
1679: y[idy+6] = x[idx+6];
1680: y[idy+7] = x[idx+7];
1681: y[idy+8] = x[idx+8];
1682: y[idy+9] = x[idx+9];
1683: }
1684: break;
1685: case ADD_VALUES:
1686: case ADD_ALL_VALUES:
1687: for (i=0; i<n; i++) {
1688: idx = *indicesx++;
1689: idy = *indicesy++;
1690: y[idy] += x[idx];
1691: y[idy+1] += x[idx+1];
1692: y[idy+2] += x[idx+2];
1693: y[idy+3] += x[idx+3];
1694: y[idy+4] += x[idx+4];
1695: y[idy+5] += x[idx+5];
1696: y[idy+6] += x[idx+6];
1697: y[idy+7] += x[idx+7];
1698: y[idy+8] += x[idx+8];
1699: y[idy+9] += x[idx+9];
1700: }
1701: break;
1702: #if !defined(PETSC_USE_COMPLEX)
1703: case MAX_VALUES:
1704: for (i=0; i<n; i++) {
1705: idx = *indicesx++;
1706: idy = *indicesy++;
1707: y[idy] = PetscMax(y[idy],x[idx]);
1708: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1709: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1710: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1711: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1712: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1713: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1714: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1715: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1716: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1717: }
1718: #else
1719: case MAX_VALUES:
1720: #endif
1721: case NOT_SET_VALUES:
1722: break;
1723: default:
1724: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1725: }
1726: return(0);
1727: }
1729: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1730: {
1731: PetscInt i,idx;
1733: for (i=0; i<n; i++) {
1734: idx = *indicesx++;
1735: y[0] = x[idx];
1736: y[1] = x[idx+1];
1737: y[2] = x[idx+2];
1738: y[3] = x[idx+3];
1739: y[4] = x[idx+4];
1740: y[5] = x[idx+5];
1741: y[6] = x[idx+6];
1742: y[7] = x[idx+7];
1743: y[8] = x[idx+8];
1744: y[9] = x[idx+9];
1745: y[10] = x[idx+10];
1746: y += 11;
1747: }
1748: }
1752: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1753: {
1754: PetscInt i,idy;
1757: switch (addv) {
1758: case INSERT_VALUES:
1759: case INSERT_ALL_VALUES:
1760: for (i=0; i<n; i++) {
1761: idy = *indicesy++;
1762: y[idy] = x[0];
1763: y[idy+1] = x[1];
1764: y[idy+2] = x[2];
1765: y[idy+3] = x[3];
1766: y[idy+4] = x[4];
1767: y[idy+5] = x[5];
1768: y[idy+6] = x[6];
1769: y[idy+7] = x[7];
1770: y[idy+8] = x[8];
1771: y[idy+9] = x[9];
1772: y[idy+10] = x[10];
1773: x += 11;
1774: }
1775: break;
1776: case ADD_VALUES:
1777: case ADD_ALL_VALUES:
1778: for (i=0; i<n; i++) {
1779: idy = *indicesy++;
1780: y[idy] += x[0];
1781: y[idy+1] += x[1];
1782: y[idy+2] += x[2];
1783: y[idy+3] += x[3];
1784: y[idy+4] += x[4];
1785: y[idy+5] += x[5];
1786: y[idy+6] += x[6];
1787: y[idy+7] += x[7];
1788: y[idy+8] += x[8];
1789: y[idy+9] += x[9];
1790: y[idy+10] += x[10];
1791: x += 11;
1792: }
1793: break;
1794: #if !defined(PETSC_USE_COMPLEX)
1795: case MAX_VALUES:
1796: for (i=0; i<n; i++) {
1797: idy = *indicesy++;
1798: y[idy] = PetscMax(y[idy],x[0]);
1799: y[idy+1] = PetscMax(y[idy+1],x[1]);
1800: y[idy+2] = PetscMax(y[idy+2],x[2]);
1801: y[idy+3] = PetscMax(y[idy+3],x[3]);
1802: y[idy+4] = PetscMax(y[idy+4],x[4]);
1803: y[idy+5] = PetscMax(y[idy+5],x[5]);
1804: y[idy+6] = PetscMax(y[idy+6],x[6]);
1805: y[idy+7] = PetscMax(y[idy+7],x[7]);
1806: y[idy+8] = PetscMax(y[idy+8],x[8]);
1807: y[idy+9] = PetscMax(y[idy+9],x[9]);
1808: y[idy+10] = PetscMax(y[idy+10],x[10]);
1809: x += 11;
1810: }
1811: #else
1812: case MAX_VALUES:
1813: #endif
1814: case NOT_SET_VALUES:
1815: break;
1816: default:
1817: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1818: }
1819: return(0);
1820: }
1824: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1825: {
1826: PetscInt i,idx,idy;
1829: switch (addv) {
1830: case INSERT_VALUES:
1831: case INSERT_ALL_VALUES:
1832: for (i=0; i<n; i++) {
1833: idx = *indicesx++;
1834: idy = *indicesy++;
1835: y[idy] = x[idx];
1836: y[idy+1] = x[idx+1];
1837: y[idy+2] = x[idx+2];
1838: y[idy+3] = x[idx+3];
1839: y[idy+4] = x[idx+4];
1840: y[idy+5] = x[idx+5];
1841: y[idy+6] = x[idx+6];
1842: y[idy+7] = x[idx+7];
1843: y[idy+8] = x[idx+8];
1844: y[idy+9] = x[idx+9];
1845: y[idy+10] = x[idx+10];
1846: }
1847: break;
1848: case ADD_VALUES:
1849: case ADD_ALL_VALUES:
1850: for (i=0; i<n; i++) {
1851: idx = *indicesx++;
1852: idy = *indicesy++;
1853: y[idy] += x[idx];
1854: y[idy+1] += x[idx+1];
1855: y[idy+2] += x[idx+2];
1856: y[idy+3] += x[idx+3];
1857: y[idy+4] += x[idx+4];
1858: y[idy+5] += x[idx+5];
1859: y[idy+6] += x[idx+6];
1860: y[idy+7] += x[idx+7];
1861: y[idy+8] += x[idx+8];
1862: y[idy+9] += x[idx+9];
1863: y[idy+10] += x[idx+10];
1864: }
1865: break;
1866: #if !defined(PETSC_USE_COMPLEX)
1867: case MAX_VALUES:
1868: for (i=0; i<n; i++) {
1869: idx = *indicesx++;
1870: idy = *indicesy++;
1871: y[idy] = PetscMax(y[idy],x[idx]);
1872: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1873: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1874: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1875: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1876: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1877: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1878: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1879: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1880: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1881: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1882: }
1883: #else
1884: case MAX_VALUES:
1885: #endif
1886: case NOT_SET_VALUES:
1887: break;
1888: default:
1889: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1890: }
1891: return(0);
1892: }
1894: /* ----------------------------------------------------------------------------------------------- */
1895: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1896: {
1897: PetscInt i,idx;
1899: for (i=0; i<n; i++) {
1900: idx = *indicesx++;
1901: y[0] = x[idx];
1902: y[1] = x[idx+1];
1903: y[2] = x[idx+2];
1904: y[3] = x[idx+3];
1905: y[4] = x[idx+4];
1906: y[5] = x[idx+5];
1907: y[6] = x[idx+6];
1908: y[7] = x[idx+7];
1909: y[8] = x[idx+8];
1910: y[9] = x[idx+9];
1911: y[10] = x[idx+10];
1912: y[11] = x[idx+11];
1913: y += 12;
1914: }
1915: }
1919: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1920: {
1921: PetscInt i,idy;
1924: switch (addv) {
1925: case INSERT_VALUES:
1926: case INSERT_ALL_VALUES:
1927: for (i=0; i<n; i++) {
1928: idy = *indicesy++;
1929: y[idy] = x[0];
1930: y[idy+1] = x[1];
1931: y[idy+2] = x[2];
1932: y[idy+3] = x[3];
1933: y[idy+4] = x[4];
1934: y[idy+5] = x[5];
1935: y[idy+6] = x[6];
1936: y[idy+7] = x[7];
1937: y[idy+8] = x[8];
1938: y[idy+9] = x[9];
1939: y[idy+10] = x[10];
1940: y[idy+11] = x[11];
1941: x += 12;
1942: }
1943: break;
1944: case ADD_VALUES:
1945: case ADD_ALL_VALUES:
1946: for (i=0; i<n; i++) {
1947: idy = *indicesy++;
1948: y[idy] += x[0];
1949: y[idy+1] += x[1];
1950: y[idy+2] += x[2];
1951: y[idy+3] += x[3];
1952: y[idy+4] += x[4];
1953: y[idy+5] += x[5];
1954: y[idy+6] += x[6];
1955: y[idy+7] += x[7];
1956: y[idy+8] += x[8];
1957: y[idy+9] += x[9];
1958: y[idy+10] += x[10];
1959: y[idy+11] += x[11];
1960: x += 12;
1961: }
1962: break;
1963: #if !defined(PETSC_USE_COMPLEX)
1964: case MAX_VALUES:
1965: for (i=0; i<n; i++) {
1966: idy = *indicesy++;
1967: y[idy] = PetscMax(y[idy],x[0]);
1968: y[idy+1] = PetscMax(y[idy+1],x[1]);
1969: y[idy+2] = PetscMax(y[idy+2],x[2]);
1970: y[idy+3] = PetscMax(y[idy+3],x[3]);
1971: y[idy+4] = PetscMax(y[idy+4],x[4]);
1972: y[idy+5] = PetscMax(y[idy+5],x[5]);
1973: y[idy+6] = PetscMax(y[idy+6],x[6]);
1974: y[idy+7] = PetscMax(y[idy+7],x[7]);
1975: y[idy+8] = PetscMax(y[idy+8],x[8]);
1976: y[idy+9] = PetscMax(y[idy+9],x[9]);
1977: y[idy+10] = PetscMax(y[idy+10],x[10]);
1978: y[idy+11] = PetscMax(y[idy+11],x[11]);
1979: x += 12;
1980: }
1981: #else
1982: case MAX_VALUES:
1983: #endif
1984: case NOT_SET_VALUES:
1985: break;
1986: default:
1987: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1988: }
1989: return(0);
1990: }
1994: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1995: {
1996: PetscInt i,idx,idy;
1999: switch (addv) {
2000: case INSERT_VALUES:
2001: case INSERT_ALL_VALUES:
2002: for (i=0; i<n; i++) {
2003: idx = *indicesx++;
2004: idy = *indicesy++;
2005: y[idy] = x[idx];
2006: y[idy+1] = x[idx+1];
2007: y[idy+2] = x[idx+2];
2008: y[idy+3] = x[idx+3];
2009: y[idy+4] = x[idx+4];
2010: y[idy+5] = x[idx+5];
2011: y[idy+6] = x[idx+6];
2012: y[idy+7] = x[idx+7];
2013: y[idy+8] = x[idx+8];
2014: y[idy+9] = x[idx+9];
2015: y[idy+10] = x[idx+10];
2016: y[idy+11] = x[idx+11];
2017: }
2018: break;
2019: case ADD_VALUES:
2020: case ADD_ALL_VALUES:
2021: for (i=0; i<n; i++) {
2022: idx = *indicesx++;
2023: idy = *indicesy++;
2024: y[idy] += x[idx];
2025: y[idy+1] += x[idx+1];
2026: y[idy+2] += x[idx+2];
2027: y[idy+3] += x[idx+3];
2028: y[idy+4] += x[idx+4];
2029: y[idy+5] += x[idx+5];
2030: y[idy+6] += x[idx+6];
2031: y[idy+7] += x[idx+7];
2032: y[idy+8] += x[idx+8];
2033: y[idy+9] += x[idx+9];
2034: y[idy+10] += x[idx+10];
2035: y[idy+11] += x[idx+11];
2036: }
2037: break;
2038: #if !defined(PETSC_USE_COMPLEX)
2039: case MAX_VALUES:
2040: for (i=0; i<n; i++) {
2041: idx = *indicesx++;
2042: idy = *indicesy++;
2043: y[idy] = PetscMax(y[idy],x[idx]);
2044: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
2045: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
2046: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
2047: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
2048: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
2049: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
2050: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
2051: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
2052: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
2053: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2054: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2055: }
2056: #else
2057: case MAX_VALUES:
2058: #endif
2059: case NOT_SET_VALUES:
2060: break;
2061: default:
2062: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2063: }
2064: return(0);
2065: }
2067: /* ----------------------------------------------------------------------------------------------- */
2068: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2069: {
2070: PetscInt i,idx;
2072: for (i=0; i<n; i++) {
2073: idx = *indicesx++;
2074: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2075: y += bs;
2076: }
2077: }
2081: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2082: {
2083: PetscInt i,idy,j;
2086: switch (addv) {
2087: case INSERT_VALUES:
2088: case INSERT_ALL_VALUES:
2089: for (i=0; i<n; i++) {
2090: idy = *indicesy++;
2091: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2092: x += bs;
2093: }
2094: break;
2095: case ADD_VALUES:
2096: case ADD_ALL_VALUES:
2097: for (i=0; i<n; i++) {
2098: idy = *indicesy++;
2099: for (j=0; j<bs; j++) y[idy+j] += x[j];
2100: x += bs;
2101: }
2102: break;
2103: #if !defined(PETSC_USE_COMPLEX)
2104: case MAX_VALUES:
2105: for (i=0; i<n; i++) {
2106: idy = *indicesy++;
2107: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2108: x += bs;
2109: }
2110: #else
2111: case MAX_VALUES:
2112: #endif
2113: case NOT_SET_VALUES:
2114: break;
2115: default:
2116: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2117: }
2118: return(0);
2119: }
2123: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2124: {
2125: PetscInt i,idx,idy,j;
2128: switch (addv) {
2129: case INSERT_VALUES:
2130: case INSERT_ALL_VALUES:
2131: for (i=0; i<n; i++) {
2132: idx = *indicesx++;
2133: idy = *indicesy++;
2134: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2135: }
2136: break;
2137: case ADD_VALUES:
2138: case ADD_ALL_VALUES:
2139: for (i=0; i<n; i++) {
2140: idx = *indicesx++;
2141: idy = *indicesy++;
2142: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2143: }
2144: break;
2145: #if !defined(PETSC_USE_COMPLEX)
2146: case MAX_VALUES:
2147: for (i=0; i<n; i++) {
2148: idx = *indicesx++;
2149: idy = *indicesy++;
2150: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2151: }
2152: #else
2153: case MAX_VALUES:
2154: #endif
2155: case NOT_SET_VALUES:
2156: break;
2157: default:
2158: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2159: }
2160: return(0);
2161: }
2163: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2164: #define BS 1
2165: #include <../src/vec/vec/utils/vpscat.h>
2166: #define BS 2
2167: #include <../src/vec/vec/utils/vpscat.h>
2168: #define BS 3
2169: #include <../src/vec/vec/utils/vpscat.h>
2170: #define BS 4
2171: #include <../src/vec/vec/utils/vpscat.h>
2172: #define BS 5
2173: #include <../src/vec/vec/utils/vpscat.h>
2174: #define BS 6
2175: #include <../src/vec/vec/utils/vpscat.h>
2176: #define BS 7
2177: #include <../src/vec/vec/utils/vpscat.h>
2178: #define BS 8
2179: #include <../src/vec/vec/utils/vpscat.h>
2180: #define BS 9
2181: #include <../src/vec/vec/utils/vpscat.h>
2182: #define BS 10
2183: #include <../src/vec/vec/utils/vpscat.h>
2184: #define BS 11
2185: #include <../src/vec/vec/utils/vpscat.h>
2186: #define BS 12
2187: #include <../src/vec/vec/utils/vpscat.h>
2188: #define BS bs
2189: #include <../src/vec/vec/utils/vpscat.h>
2191: /* ==========================================================================================*/
2193: /* create parallel to sequential scatter context */
2195: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2199: /*@
2200: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
2202: Collective on VecScatter
2204: Input Parameters:
2205: + VecScatter - obtained with VecScatterCreateEmpty()
2206: . nsends -
2207: . sendSizes -
2208: . sendProcs -
2209: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
2210: . nrecvs - number of receives to expect
2211: . recvSizes -
2212: . recvProcs - processes who are sending to me
2213: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
2214: - bs - size of block
2216: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
2217: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
2219: Probably does not handle sends to self properly. It should remove those from the counts that are used
2220: in allocating space inside of the from struct
2222: Level: intermediate
2224: @*/
2225: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
2226: {
2227: VecScatter_MPI_General *from, *to;
2228: PetscInt sendSize, recvSize;
2229: PetscInt n, i;
2230: PetscErrorCode ierr;
2232: /* allocate entire send scatter context */
2233: PetscNewLog(ctx,&to);
2234: to->n = nsends;
2235: for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];
2237: PetscMalloc1(to->n,&to->requests);
2238: PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
2239: PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);
2241: to->starts[0] = 0;
2242: for (n = 0; n < to->n; n++) {
2243: if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
2244: to->starts[n+1] = to->starts[n] + sendSizes[n];
2245: to->procs[n] = sendProcs[n];
2246: for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
2247: }
2248: ctx->todata = (void*) to;
2250: /* allocate entire receive scatter context */
2251: PetscNewLog(ctx,&from);
2252: from->n = nrecvs;
2253: for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];
2255: PetscMalloc1(from->n,&from->requests);
2256: PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);
2258: from->starts[0] = 0;
2259: for (n = 0; n < from->n; n++) {
2260: if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
2261: from->starts[n+1] = from->starts[n] + recvSizes[n];
2262: from->procs[n] = recvProcs[n];
2263: for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
2264: }
2265: ctx->fromdata = (void*)from;
2267: /* No local scatter optimization */
2268: from->local.n = 0;
2269: from->local.vslots = 0;
2270: to->local.n = 0;
2271: to->local.vslots = 0;
2272: from->local.nonmatching_computed = PETSC_FALSE;
2273: from->local.n_nonmatching = 0;
2274: from->local.slots_nonmatching = 0;
2275: to->local.nonmatching_computed = PETSC_FALSE;
2276: to->local.n_nonmatching = 0;
2277: to->local.slots_nonmatching = 0;
2279: from->type = VEC_SCATTER_MPI_GENERAL;
2280: to->type = VEC_SCATTER_MPI_GENERAL;
2281: from->bs = bs;
2282: to->bs = bs;
2283: VecScatterCreateCommon_PtoS(from, to, ctx);
2285: /* mark lengths as negative so it won't check local vector lengths */
2286: ctx->from_n = ctx->to_n = -1;
2287: return(0);
2288: }
2290: /*
2291: bs indicates how many elements there are in each block. Normally this would be 1.
2293: contains check that PetscMPIInt can handle the sizes needed
2294: */
2297: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2298: {
2299: VecScatter_MPI_General *from,*to;
2300: PetscMPIInt size,rank,imdex,tag,n;
2301: PetscInt *source = NULL,*owners = NULL,nxr;
2302: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2303: PetscMPIInt *nprocs = NULL,nrecvs;
2304: PetscInt i,j,idx,nsends;
2305: PetscMPIInt *owner = NULL;
2306: PetscInt *starts = NULL,count,slen;
2307: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2308: PetscMPIInt *onodes1,*olengths1;
2309: MPI_Comm comm;
2310: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2311: MPI_Status recv_status,*send_status;
2312: PetscErrorCode ierr;
2315: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2316: PetscObjectGetComm((PetscObject)xin,&comm);
2317: MPI_Comm_rank(comm,&rank);
2318: MPI_Comm_size(comm,&size);
2319: owners = xin->map->range;
2320: VecGetSize(yin,&lengthy);
2321: VecGetSize(xin,&lengthx);
2323: /* first count number of contributors to each processor */
2324: PetscMalloc2(size,&nprocs,nx,&owner);
2325: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2327: j = 0;
2328: nsends = 0;
2329: for (i=0; i<nx; i++) {
2330: idx = bs*inidx[i];
2331: if (idx < owners[j]) j = 0;
2332: for (; j<size; j++) {
2333: if (idx < owners[j+1]) {
2334: if (!nprocs[j]++) nsends++;
2335: owner[i] = j;
2336: break;
2337: }
2338: }
2339: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2340: }
2342: nprocslocal = nprocs[rank];
2343: nprocs[rank] = 0;
2344: if (nprocslocal) nsends--;
2345: /* inform other processors of number of messages and max length*/
2346: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2347: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2348: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2349: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2351: /* post receives: */
2352: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2353: count = 0;
2354: for (i=0; i<nrecvs; i++) {
2355: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2356: count += olengths1[i];
2357: }
2359: /* do sends:
2360: 1) starts[i] gives the starting index in svalues for stuff going to
2361: the ith processor
2362: */
2363: nxr = 0;
2364: for (i=0; i<nx; i++) {
2365: if (owner[i] != rank) nxr++;
2366: }
2367: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2369: starts[0] = 0;
2370: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2371: for (i=0; i<nx; i++) {
2372: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2373: }
2374: starts[0] = 0;
2375: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2376: count = 0;
2377: for (i=0; i<size; i++) {
2378: if (nprocs[i]) {
2379: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2380: }
2381: }
2383: /* wait on receives */
2384: count = nrecvs;
2385: slen = 0;
2386: while (count) {
2387: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2388: /* unpack receives into our local space */
2389: MPI_Get_count(&recv_status,MPIU_INT,&n);
2390: slen += n;
2391: count--;
2392: }
2394: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2396: /* allocate entire send scatter context */
2397: PetscNewLog(ctx,&to);
2398: to->n = nrecvs;
2400: PetscMalloc1(nrecvs,&to->requests);
2401: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2402: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2404: ctx->todata = (void*)to;
2405: to->starts[0] = 0;
2407: if (nrecvs) {
2408: /* move the data into the send scatter */
2409: base = owners[rank];
2410: rsvalues = rvalues;
2411: for (i=0; i<nrecvs; i++) {
2412: to->starts[i+1] = to->starts[i] + olengths1[i];
2413: to->procs[i] = onodes1[i];
2414: values = rsvalues;
2415: rsvalues += olengths1[i];
2416: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2417: }
2418: }
2419: PetscFree(olengths1);
2420: PetscFree(onodes1);
2421: PetscFree3(rvalues,source,recv_waits);
2423: /* allocate entire receive scatter context */
2424: PetscNewLog(ctx,&from);
2425: from->n = nsends;
2427: PetscMalloc1(nsends,&from->requests);
2428: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2429: ctx->fromdata = (void*)from;
2431: /* move data into receive scatter */
2432: PetscMalloc2(size,&lowner,nsends+1,&start);
2433: count = 0; from->starts[0] = start[0] = 0;
2434: for (i=0; i<size; i++) {
2435: if (nprocs[i]) {
2436: lowner[i] = count;
2437: from->procs[count++] = i;
2438: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2439: }
2440: }
2442: for (i=0; i<nx; i++) {
2443: if (owner[i] != rank) {
2444: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2445: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2446: }
2447: }
2448: PetscFree2(lowner,start);
2449: PetscFree2(nprocs,owner);
2451: /* wait on sends */
2452: if (nsends) {
2453: PetscMalloc1(nsends,&send_status);
2454: MPI_Waitall(nsends,send_waits,send_status);
2455: PetscFree(send_status);
2456: }
2457: PetscFree3(svalues,send_waits,starts);
2459: if (nprocslocal) {
2460: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2461: /* we have a scatter to ourselves */
2462: PetscMalloc1(nt,&to->local.vslots);
2463: PetscMalloc1(nt,&from->local.vslots);
2464: nt = 0;
2465: for (i=0; i<nx; i++) {
2466: idx = bs*inidx[i];
2467: if (idx >= owners[rank] && idx < owners[rank+1]) {
2468: to->local.vslots[nt] = idx - owners[rank];
2469: from->local.vslots[nt++] = bs*inidy[i];
2470: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2471: }
2472: }
2473: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2474: } else {
2475: from->local.n = 0;
2476: from->local.vslots = 0;
2477: to->local.n = 0;
2478: to->local.vslots = 0;
2479: }
2481: from->local.nonmatching_computed = PETSC_FALSE;
2482: from->local.n_nonmatching = 0;
2483: from->local.slots_nonmatching = 0;
2484: to->local.nonmatching_computed = PETSC_FALSE;
2485: to->local.n_nonmatching = 0;
2486: to->local.slots_nonmatching = 0;
2488: from->type = VEC_SCATTER_MPI_GENERAL;
2489: to->type = VEC_SCATTER_MPI_GENERAL;
2490: from->bs = bs;
2491: to->bs = bs;
2493: VecScatterCreateCommon_PtoS(from,to,ctx);
2494: return(0);
2495: }
2497: /*
2498: bs indicates how many elements there are in each block. Normally this would be 1.
2499: */
2502: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2503: {
2504: MPI_Comm comm;
2505: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2506: PetscInt bs = to->bs;
2507: PetscMPIInt size;
2508: PetscInt i, n;
2512: PetscObjectGetComm((PetscObject)ctx,&comm);
2513: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2514: ctx->destroy = VecScatterDestroy_PtoP;
2516: ctx->reproduce = PETSC_FALSE;
2517: to->sendfirst = PETSC_FALSE;
2518: PetscOptionsGetBool(NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2519: PetscOptionsGetBool(NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2520: from->sendfirst = to->sendfirst;
2522: MPI_Comm_size(comm,&size);
2523: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2524: to->contiq = PETSC_FALSE;
2525: n = from->starts[from->n];
2526: from->contiq = PETSC_TRUE;
2527: for (i=1; i<n; i++) {
2528: if (from->indices[i] != from->indices[i-1] + bs) {
2529: from->contiq = PETSC_FALSE;
2530: break;
2531: }
2532: }
2534: to->use_alltoallv = PETSC_FALSE;
2535: PetscOptionsGetBool(NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2536: from->use_alltoallv = to->use_alltoallv;
2537: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2538: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2539: if (to->use_alltoallv) {
2540: to->use_alltoallw = PETSC_FALSE;
2541: PetscOptionsGetBool(NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2542: }
2543: from->use_alltoallw = to->use_alltoallw;
2544: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2545: #endif
2547: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2548: to->use_window = PETSC_FALSE;
2549: PetscOptionsGetBool(NULL,"-vecscatter_window",&to->use_window,NULL);
2550: from->use_window = to->use_window;
2551: #endif
2553: if (to->use_alltoallv) {
2555: PetscMalloc2(size,&to->counts,size,&to->displs);
2556: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2557: for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
2559: to->displs[0] = 0;
2560: for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];
2562: PetscMalloc2(size,&from->counts,size,&from->displs);
2563: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2564: for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2565: from->displs[0] = 0;
2566: for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];
2568: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2569: if (to->use_alltoallw) {
2570: PetscMPIInt mpibs, mpilen;
2572: ctx->packtogether = PETSC_FALSE;
2573: PetscMPIIntCast(bs,&mpibs);
2574: PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2575: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2576: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2577: for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;
2579: for (i=0; i<to->n; i++) {
2580: to->wcounts[to->procs[i]] = 1;
2581: PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2582: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2583: MPI_Type_commit(to->types+to->procs[i]);
2584: }
2585: PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2586: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2587: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2588: for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;
2590: if (from->contiq) {
2591: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2592: for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2594: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2595: for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
2597: } else {
2598: for (i=0; i<from->n; i++) {
2599: from->wcounts[from->procs[i]] = 1;
2600: PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2601: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2602: MPI_Type_commit(from->types+from->procs[i]);
2603: }
2604: }
2605: } else ctx->copy = VecScatterCopy_PtoP_AllToAll;
2607: #else
2608: to->use_alltoallw = PETSC_FALSE;
2609: from->use_alltoallw = PETSC_FALSE;
2610: ctx->copy = VecScatterCopy_PtoP_AllToAll;
2611: #endif
2612: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2613: } else if (to->use_window) {
2614: PetscMPIInt temptag,winsize;
2615: MPI_Request *request;
2616: MPI_Status *status;
2618: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2619: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2620: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2621: PetscMalloc1(to->n,&to->winstarts);
2622: PetscMalloc2(to->n,&request,to->n,&status);
2623: for (i=0; i<to->n; i++) {
2624: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2625: }
2626: for (i=0; i<from->n; i++) {
2627: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2628: }
2629: MPI_Waitall(to->n,request,status);
2630: PetscFree2(request,status);
2632: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2633: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2634: PetscMalloc1(from->n,&from->winstarts);
2635: PetscMalloc2(from->n,&request,from->n,&status);
2636: for (i=0; i<from->n; i++) {
2637: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2638: }
2639: for (i=0; i<to->n; i++) {
2640: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2641: }
2642: MPI_Waitall(from->n,request,status);
2643: PetscFree2(request,status);
2644: #endif
2645: } else {
2646: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2647: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2648: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2649: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2650: MPI_Request *rev_swaits,*rev_rwaits;
2651: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2653: /* allocate additional wait variables for the "reverse" scatter */
2654: PetscMalloc1(to->n,&rev_rwaits);
2655: PetscMalloc1(from->n,&rev_swaits);
2656: to->rev_requests = rev_rwaits;
2657: from->rev_requests = rev_swaits;
2659: /* Register the receives that you will use later (sends for scatter reverse) */
2660: PetscOptionsGetBool(NULL,"-vecscatter_rsend",&use_rsend,NULL);
2661: PetscOptionsGetBool(NULL,"-vecscatter_ssend",&use_ssend,NULL);
2662: if (use_rsend) {
2663: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2664: to->use_readyreceiver = PETSC_TRUE;
2665: from->use_readyreceiver = PETSC_TRUE;
2666: } else {
2667: to->use_readyreceiver = PETSC_FALSE;
2668: from->use_readyreceiver = PETSC_FALSE;
2669: }
2670: if (use_ssend) {
2671: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2672: }
2674: for (i=0; i<from->n; i++) {
2675: if (use_rsend) {
2676: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2677: } else if (use_ssend) {
2678: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2679: } else {
2680: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2681: }
2682: }
2684: for (i=0; i<to->n; i++) {
2685: if (use_rsend) {
2686: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2687: } else if (use_ssend) {
2688: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2689: } else {
2690: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2691: }
2692: }
2693: /* Register receives for scatter and reverse */
2694: for (i=0; i<from->n; i++) {
2695: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2696: }
2697: for (i=0; i<to->n; i++) {
2698: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2699: }
2700: if (use_rsend) {
2701: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2702: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2703: MPI_Barrier(comm);
2704: }
2706: ctx->copy = VecScatterCopy_PtoP_X;
2707: }
2708: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2710: #if defined(PETSC_USE_DEBUG)
2711: MPI_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2712: MPI_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2713: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2714: #endif
2716: switch (bs) {
2717: case 12:
2718: ctx->begin = VecScatterBegin_12;
2719: ctx->end = VecScatterEnd_12;
2720: break;
2721: case 11:
2722: ctx->begin = VecScatterBegin_11;
2723: ctx->end = VecScatterEnd_11;
2724: break;
2725: case 10:
2726: ctx->begin = VecScatterBegin_10;
2727: ctx->end = VecScatterEnd_10;
2728: break;
2729: case 9:
2730: ctx->begin = VecScatterBegin_9;
2731: ctx->end = VecScatterEnd_9;
2732: break;
2733: case 8:
2734: ctx->begin = VecScatterBegin_8;
2735: ctx->end = VecScatterEnd_8;
2736: break;
2737: case 7:
2738: ctx->begin = VecScatterBegin_7;
2739: ctx->end = VecScatterEnd_7;
2740: break;
2741: case 6:
2742: ctx->begin = VecScatterBegin_6;
2743: ctx->end = VecScatterEnd_6;
2744: break;
2745: case 5:
2746: ctx->begin = VecScatterBegin_5;
2747: ctx->end = VecScatterEnd_5;
2748: break;
2749: case 4:
2750: ctx->begin = VecScatterBegin_4;
2751: ctx->end = VecScatterEnd_4;
2752: break;
2753: case 3:
2754: ctx->begin = VecScatterBegin_3;
2755: ctx->end = VecScatterEnd_3;
2756: break;
2757: case 2:
2758: ctx->begin = VecScatterBegin_2;
2759: ctx->end = VecScatterEnd_2;
2760: break;
2761: case 1:
2762: ctx->begin = VecScatterBegin_1;
2763: ctx->end = VecScatterEnd_1;
2764: break;
2765: default:
2766: ctx->begin = VecScatterBegin_bs;
2767: ctx->end = VecScatterEnd_bs;
2769: }
2770: ctx->view = VecScatterView_MPI;
2771: /* Check if the local scatter is actually a copy; important special case */
2772: if (to->local.n) {
2773: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2774: }
2775: return(0);
2776: }
2780: /* ------------------------------------------------------------------------------------*/
2781: /*
2782: Scatter from local Seq vectors to a parallel vector.
2783: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2784: reverses the result.
2785: */
2788: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2789: {
2790: PetscErrorCode ierr;
2791: MPI_Request *waits;
2792: VecScatter_MPI_General *to,*from;
2795: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2796: to = (VecScatter_MPI_General*)ctx->fromdata;
2797: from = (VecScatter_MPI_General*)ctx->todata;
2798: ctx->todata = (void*)to;
2799: ctx->fromdata = (void*)from;
2800: /* these two are special, they are ALWAYS stored in to struct */
2801: to->sstatus = from->sstatus;
2802: to->rstatus = from->rstatus;
2804: from->sstatus = 0;
2805: from->rstatus = 0;
2807: waits = from->rev_requests;
2808: from->rev_requests = from->requests;
2809: from->requests = waits;
2810: waits = to->rev_requests;
2811: to->rev_requests = to->requests;
2812: to->requests = waits;
2813: return(0);
2814: }
2816: /* ---------------------------------------------------------------------------------*/
2819: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2820: {
2822: PetscMPIInt size,rank,tag,imdex,n;
2823: PetscInt *owners = xin->map->range;
2824: PetscMPIInt *nprocs = NULL;
2825: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2826: PetscMPIInt *owner = NULL;
2827: PetscInt *starts = NULL,count,slen;
2828: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2829: PetscMPIInt *onodes1,*olengths1,nrecvs;
2830: MPI_Comm comm;
2831: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2832: MPI_Status recv_status,*send_status = NULL;
2833: PetscBool duplicate = PETSC_FALSE;
2834: #if defined(PETSC_USE_DEBUG)
2835: PetscBool found = PETSC_FALSE;
2836: #endif
2839: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2840: PetscObjectGetComm((PetscObject)xin,&comm);
2841: MPI_Comm_size(comm,&size);
2842: MPI_Comm_rank(comm,&rank);
2843: if (size == 1) {
2844: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2845: return(0);
2846: }
2848: /*
2849: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2850: They then call the StoPScatterCreate()
2851: */
2852: /* first count number of contributors to each processor */
2853: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2854: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2856: lastidx = -1;
2857: j = 0;
2858: for (i=0; i<nx; i++) {
2859: /* if indices are NOT locally sorted, need to start search at the beginning */
2860: if (lastidx > (idx = bs*inidx[i])) j = 0;
2861: lastidx = idx;
2862: for (; j<size; j++) {
2863: if (idx >= owners[j] && idx < owners[j+1]) {
2864: nprocs[j]++;
2865: owner[i] = j;
2866: #if defined(PETSC_USE_DEBUG)
2867: found = PETSC_TRUE;
2868: #endif
2869: break;
2870: }
2871: }
2872: #if defined(PETSC_USE_DEBUG)
2873: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2874: found = PETSC_FALSE;
2875: #endif
2876: }
2877: nsends = 0;
2878: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2880: /* inform other processors of number of messages and max length*/
2881: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2882: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2883: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2884: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2886: /* post receives: */
2887: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2889: count = 0;
2890: for (i=0; i<nrecvs; i++) {
2891: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2892: count += olengths1[i];
2893: }
2894: PetscFree(onodes1);
2896: /* do sends:
2897: 1) starts[i] gives the starting index in svalues for stuff going to
2898: the ith processor
2899: */
2900: starts[0]= 0;
2901: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2902: for (i=0; i<nx; i++) {
2903: svalues[2*starts[owner[i]]] = bs*inidx[i];
2904: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2905: }
2907: starts[0] = 0;
2908: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2909: count = 0;
2910: for (i=0; i<size; i++) {
2911: if (nprocs[i]) {
2912: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2913: count++;
2914: }
2915: }
2916: PetscFree3(nprocs,owner,starts);
2918: /* wait on receives */
2919: count = nrecvs;
2920: slen = 0;
2921: while (count) {
2922: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2923: /* unpack receives into our local space */
2924: MPI_Get_count(&recv_status,MPIU_INT,&n);
2925: slen += n/2;
2926: count--;
2927: }
2928: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2930: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2931: base = owners[rank];
2932: count = 0;
2933: rsvalues = rvalues;
2934: for (i=0; i<nrecvs; i++) {
2935: values = rsvalues;
2936: rsvalues += 2*olengths1[i];
2937: for (j=0; j<olengths1[i]; j++) {
2938: local_inidx[count] = values[2*j] - base;
2939: local_inidy[count++] = values[2*j+1];
2940: }
2941: }
2942: PetscFree(olengths1);
2944: /* wait on sends */
2945: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2946: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2948: /*
2949: should sort and remove duplicates from local_inidx,local_inidy
2950: */
2952: #if defined(do_it_slow)
2953: /* sort on the from index */
2954: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2955: start = 0;
2956: while (start < slen) {
2957: count = start+1;
2958: last = local_inidx[start];
2959: while (count < slen && last == local_inidx[count]) count++;
2960: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2961: /* sort on to index */
2962: PetscSortInt(count-start,local_inidy+start);
2963: }
2964: /* remove duplicates; not most efficient way, but probably good enough */
2965: i = start;
2966: while (i < count-1) {
2967: if (local_inidy[i] != local_inidy[i+1]) i++;
2968: else { /* found a duplicate */
2969: duplicate = PETSC_TRUE;
2970: for (j=i; j<slen-1; j++) {
2971: local_inidx[j] = local_inidx[j+1];
2972: local_inidy[j] = local_inidy[j+1];
2973: }
2974: slen--;
2975: count--;
2976: }
2977: }
2978: start = count;
2979: }
2980: #endif
2981: if (duplicate) {
2982: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2983: }
2984: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2985: PetscFree2(local_inidx,local_inidy);
2986: return(0);
2987: }