Actual source code: mpiov.c
2: /*
3: Routines to compute overlapping regions of a parallel MPI matrix
4: and to find submatrices that were shared across processors.
5: */
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscbt.h>
9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS *);
10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char **,PetscInt*,PetscInt**);
11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt **,PetscInt**,PetscInt*);
17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
18: {
20: PetscInt i;
23: if (ov < 0) SETERRQ(((PetscObject)C)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24: for (i=0; i<ov; ++i) {
25: MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
26: }
27: return(0);
28: }
30: /*
31: Sample message format:
32: If a processor A wants processor B to process some elements corresponding
33: to index sets is[1],is[5]
34: mesg [0] = 2 (no of index sets in the mesg)
35: -----------
36: mesg [1] = 1 => is[1]
37: mesg [2] = sizeof(is[1]);
38: -----------
39: mesg [3] = 5 => is[5]
40: mesg [4] = sizeof(is[5]);
41: -----------
42: mesg [5]
43: mesg [n] datas[1]
44: -----------
45: mesg[n+1]
46: mesg[m] data(is[5])
47: -----------
48:
49: Notes:
50: nrqs - no of requests sent (or to be sent out)
51: nrqr - no of requests recieved (which have to be or which have been processed
52: */
55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
56: {
57: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
58: PetscMPIInt *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
59: const PetscInt **idx,*idx_i;
60: PetscInt *n,**data,len;
62: PetscMPIInt size,rank,tag1,tag2;
63: PetscInt M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
64: PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
65: PetscBT *table;
66: MPI_Comm comm;
67: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2;
68: MPI_Status *s_status,*recv_status;
69: char *t_p;
72: comm = ((PetscObject)C)->comm;
73: size = c->size;
74: rank = c->rank;
75: M = C->rmap->N;
77: PetscObjectGetNewTag((PetscObject)C,&tag1);
78: PetscObjectGetNewTag((PetscObject)C,&tag2);
79:
80: PetscMalloc2(imax,PetscInt*,&idx,imax,PetscInt,&n);
82: for (i=0; i<imax; i++) {
83: ISGetIndices(is[i],&idx[i]);
84: ISGetLocalSize(is[i],&n[i]);
85: }
87: /* evaluate communication - mesg to who,length of mesg, and buffer space
88: required. Based on this, buffers are allocated, and data copied into them*/
89: PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);
90: PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
91: PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
92: PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
93: for (i=0; i<imax; i++) {
94: PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
95: idx_i = idx[i];
96: len = n[i];
97: for (j=0; j<len; j++) {
98: row = idx_i[j];
99: if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100: PetscLayoutFindOwner(C->rmap,row,&proc);
101: w4[proc]++;
102: }
103: for (j=0; j<size; j++){
104: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105: }
106: }
108: nrqs = 0; /* no of outgoing messages */
109: msz = 0; /* total mesg length (for all proc */
110: w1[rank] = 0; /* no mesg sent to intself */
111: w3[rank] = 0;
112: for (i=0; i<size; i++) {
113: if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114: }
115: /* pa - is list of processors to communicate with */
116: PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);
117: for (i=0,j=0; i<size; i++) {
118: if (w1[i]) {pa[j] = i; j++;}
119: }
121: /* Each message would have a header = 1 + 2*(no of IS) + data */
122: for (i=0; i<nrqs; i++) {
123: j = pa[i];
124: w1[j] += w2[j] + 2*w3[j];
125: msz += w1[j];
126: }
128: /* Determine the number of messages to expect, their lengths, from from-ids */
129: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
132: /* Now post the Irecvs corresponding to these messages */
133: PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);
135: /* Allocate Memory for outgoing messages */
136: PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);
137: PetscMemzero(outdat,size*sizeof(PetscInt*));
138: PetscMemzero(ptr,size*sizeof(PetscInt*));
140: {
141: PetscInt *iptr = tmp,ict = 0;
142: for (i=0; i<nrqs; i++) {
143: j = pa[i];
144: iptr += ict;
145: outdat[j] = iptr;
146: ict = w1[j];
147: }
148: }
150: /* Form the outgoing messages */
151: /*plug in the headers*/
152: for (i=0; i<nrqs; i++) {
153: j = pa[i];
154: outdat[j][0] = 0;
155: PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156: ptr[j] = outdat[j] + 2*w3[j] + 1;
157: }
158:
159: /* Memory for doing local proc's work*/
160: {
161: PetscMalloc5(imax,PetscBT,&table, imax,PetscInt*,&data, imax,PetscInt,&isz,
162: M*imax,PetscInt,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,char,&t_p);
163: PetscMemzero(table,imax*sizeof(PetscBT));
164: PetscMemzero(data,imax*sizeof(PetscInt*));
165: PetscMemzero(isz,imax*sizeof(PetscInt));
166: PetscMemzero(d_p,M*imax*sizeof(PetscInt));
167: PetscMemzero(t_p,(M/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));
169: for (i=0; i<imax; i++) {
170: table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
171: data[i] = d_p + M*i;
172: }
173: }
175: /* Parse the IS and update local tables and the outgoing buf with the data*/
176: {
177: PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
178: PetscBT table_i;
180: for (i=0; i<imax; i++) {
181: PetscMemzero(ctr,size*sizeof(PetscInt));
182: n_i = n[i];
183: table_i = table[i];
184: idx_i = idx[i];
185: data_i = data[i];
186: isz_i = isz[i];
187: for (j=0; j<n_i; j++) { /* parse the indices of each IS */
188: row = idx_i[j];
189: PetscLayoutFindOwner(C->rmap,row,&proc);
190: if (proc != rank) { /* copy to the outgoing buffer */
191: ctr[proc]++;
192: *ptr[proc] = row;
193: ptr[proc]++;
194: } else { /* Update the local table */
195: if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
196: }
197: }
198: /* Update the headers for the current IS */
199: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
200: if ((ctr_j = ctr[j])) {
201: outdat_j = outdat[j];
202: k = ++outdat_j[0];
203: outdat_j[2*k] = ctr_j;
204: outdat_j[2*k-1] = i;
205: }
206: }
207: isz[i] = isz_i;
208: }
209: }
211: /* Now post the sends */
212: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
213: for (i=0; i<nrqs; ++i) {
214: j = pa[i];
215: MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
216: }
217:
218: /* No longer need the original indices*/
219: for (i=0; i<imax; ++i) {
220: ISRestoreIndices(is[i],idx+i);
221: }
222: PetscFree2(idx,n);
224: for (i=0; i<imax; ++i) {
225: ISDestroy(&is[i]);
226: }
227:
228: /* Do Local work*/
229: MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);
231: /* Receive messages*/
232: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);
233: if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}
234:
235: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
236: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}
238: /* Phase 1 sends are complete - deallocate buffers */
239: PetscFree4(outdat,ptr,tmp,ctr);
240: PetscFree4(w1,w2,w3,w4);
242: PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);
243: PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);
244: MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
245: PetscFree(rbuf[0]);
246: PetscFree(rbuf);
248:
249: /* Send the data back*/
250: /* Do a global reduction to know the buffer space req for incoming messages*/
251: {
252: PetscMPIInt *rw1;
253:
254: PetscMalloc(size*sizeof(PetscMPIInt),&rw1);
255: PetscMemzero(rw1,size*sizeof(PetscMPIInt));
257: for (i=0; i<nrqr; ++i) {
258: proc = recv_status[i].MPI_SOURCE;
259: if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
260: rw1[proc] = isz1[i];
261: }
262: PetscFree(onodes1);
263: PetscFree(olengths1);
265: /* Determine the number of messages to expect, their lengths, from from-ids */
266: PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
267: PetscFree(rw1);
268: }
269: /* Now post the Irecvs corresponding to these messages */
270: PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);
272: /* Now post the sends */
273: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
274: for (i=0; i<nrqr; ++i) {
275: j = recv_status[i].MPI_SOURCE;
276: MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
277: }
279: /* receive work done on other processors*/
280: {
281: PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
282: PetscMPIInt idex;
283: PetscBT table_i;
284: MPI_Status *status2;
285:
286: PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
287: for (i=0; i<nrqs; ++i) {
288: MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
289: /* Process the message*/
290: rbuf2_i = rbuf2[idex];
291: ct1 = 2*rbuf2_i[0]+1;
292: jmax = rbuf2[idex][0];
293: for (j=1; j<=jmax; j++) {
294: max = rbuf2_i[2*j];
295: is_no = rbuf2_i[2*j-1];
296: isz_i = isz[is_no];
297: data_i = data[is_no];
298: table_i = table[is_no];
299: for (k=0; k<max; k++,ct1++) {
300: row = rbuf2_i[ct1];
301: if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
302: }
303: isz[is_no] = isz_i;
304: }
305: }
307: if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
308: PetscFree(status2);
309: }
310:
311: for (i=0; i<imax; ++i) {
312: ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
313: }
314:
315: PetscFree(onodes2);
316: PetscFree(olengths2);
318: PetscFree(pa);
319: PetscFree(rbuf2[0]);
320: PetscFree(rbuf2);
321: PetscFree(s_waits1);
322: PetscFree(r_waits1);
323: PetscFree(s_waits2);
324: PetscFree(r_waits2);
325: PetscFree5(table,data,isz,d_p,t_p);
326: PetscFree(s_status);
327: PetscFree(recv_status);
328: PetscFree(xdata[0]);
329: PetscFree(xdata);
330: PetscFree(isz1);
331: return(0);
332: }
336: /*
337: MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do
338: the work on the local processor.
340: Inputs:
341: C - MAT_MPIAIJ;
342: imax - total no of index sets processed at a time;
343: table - an array of char - size = m bits.
344:
345: Output:
346: isz - array containing the count of the solution elements corresponding
347: to each index set;
348: data - pointer to the solutions
349: */
350: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
351: {
352: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
353: Mat A = c->A,B = c->B;
354: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
355: PetscInt start,end,val,max,rstart,cstart,*ai,*aj;
356: PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
357: PetscBT table_i;
360: rstart = C->rmap->rstart;
361: cstart = C->cmap->rstart;
362: ai = a->i;
363: aj = a->j;
364: bi = b->i;
365: bj = b->j;
366: garray = c->garray;
368:
369: for (i=0; i<imax; i++) {
370: data_i = data[i];
371: table_i = table[i];
372: isz_i = isz[i];
373: for (j=0,max=isz[i]; j<max; j++) {
374: row = data_i[j] - rstart;
375: start = ai[row];
376: end = ai[row+1];
377: for (k=start; k<end; k++) { /* Amat */
378: val = aj[k] + cstart;
379: if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
380: }
381: start = bi[row];
382: end = bi[row+1];
383: for (k=start; k<end; k++) { /* Bmat */
384: val = garray[bj[k]];
385: if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
386: }
387: }
388: isz[i] = isz_i;
389: }
390: return(0);
391: }
395: /*
396: MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
397: and return the output
399: Input:
400: C - the matrix
401: nrqr - no of messages being processed.
402: rbuf - an array of pointers to the recieved requests
403:
404: Output:
405: xdata - array of messages to be sent back
406: isz1 - size of each message
408: For better efficiency perhaps we should malloc separately each xdata[i],
409: then if a remalloc is required we need only copy the data for that one row
410: rather then all previous rows as it is now where a single large chunck of
411: memory is used.
413: */
414: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
415: {
416: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
417: Mat A = c->A,B = c->B;
418: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
420: PetscMPIInt rank;
421: PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
422: PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
423: PetscInt val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
424: PetscInt *rbuf_i,kmax,rbuf_0;
425: PetscBT xtable;
428: rank = c->rank;
429: m = C->rmap->N;
430: rstart = C->rmap->rstart;
431: cstart = C->cmap->rstart;
432: ai = a->i;
433: aj = a->j;
434: bi = b->i;
435: bj = b->j;
436: garray = c->garray;
437:
438:
439: for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
440: rbuf_i = rbuf[i];
441: rbuf_0 = rbuf_i[0];
442: ct += rbuf_0;
443: for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
444: }
445:
446: if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
447: else max1 = 1;
448: mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
449: PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);
450: ++no_malloc;
451: PetscBTCreate(m,xtable);
452: PetscMemzero(isz1,nrqr*sizeof(PetscInt));
453:
454: ct3 = 0;
455: for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
456: rbuf_i = rbuf[i];
457: rbuf_0 = rbuf_i[0];
458: ct1 = 2*rbuf_0+1;
459: ct2 = ct1;
460: ct3 += ct1;
461: for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
462: PetscBTMemzero(m,xtable);
463: oct2 = ct2;
464: kmax = rbuf_i[2*j];
465: for (k=0; k<kmax; k++,ct1++) {
466: row = rbuf_i[ct1];
467: if (!PetscBTLookupSet(xtable,row)) {
468: if (!(ct3 < mem_estimate)) {
469: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
470: PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
471: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
472: PetscFree(xdata[0]);
473: xdata[0] = tmp;
474: mem_estimate = new_estimate; ++no_malloc;
475: for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
476: }
477: xdata[i][ct2++] = row;
478: ct3++;
479: }
480: }
481: for (k=oct2,max2=ct2; k<max2; k++) {
482: row = xdata[i][k] - rstart;
483: start = ai[row];
484: end = ai[row+1];
485: for (l=start; l<end; l++) {
486: val = aj[l] + cstart;
487: if (!PetscBTLookupSet(xtable,val)) {
488: if (!(ct3 < mem_estimate)) {
489: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
490: PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
491: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
492: PetscFree(xdata[0]);
493: xdata[0] = tmp;
494: mem_estimate = new_estimate; ++no_malloc;
495: for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
496: }
497: xdata[i][ct2++] = val;
498: ct3++;
499: }
500: }
501: start = bi[row];
502: end = bi[row+1];
503: for (l=start; l<end; l++) {
504: val = garray[bj[l]];
505: if (!PetscBTLookupSet(xtable,val)) {
506: if (!(ct3 < mem_estimate)) {
507: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
508: PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
509: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
510: PetscFree(xdata[0]);
511: xdata[0] = tmp;
512: mem_estimate = new_estimate; ++no_malloc;
513: for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
514: }
515: xdata[i][ct2++] = val;
516: ct3++;
517: }
518: }
519: }
520: /* Update the header*/
521: xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
522: xdata[i][2*j-1] = rbuf_i[2*j-1];
523: }
524: xdata[i][0] = rbuf_0;
525: xdata[i+1] = xdata[i] + ct2;
526: isz1[i] = ct2; /* size of each message */
527: }
528: PetscBTDestroy(xtable);
529: PetscInfo4(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",rank,mem_estimate,ct3,no_malloc);
530: return(0);
531: }
532: /* -------------------------------------------------------------------------*/
535: /*
536: Every processor gets the entire matrix
537: */
540: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
541: {
542: Mat B;
543: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
544: Mat_SeqAIJ *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
546: PetscMPIInt size,rank,*recvcounts = 0,*displs = 0;
547: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
548: PetscInt m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
549: MatScalar *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;
552: MPI_Comm_size(((PetscObject)A)->comm,&size);
553: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
555: if (scall == MAT_INITIAL_MATRIX) {
556: /* ----------------------------------------------------------------
557: Tell every processor the number of nonzeros per row
558: */
559: PetscMalloc(A->rmap->N*sizeof(PetscInt),&lens);
560: for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
561: lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
562: }
563: sendcount = A->rmap->rend - A->rmap->rstart;
564: PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
565: for (i=0; i<size; i++) {
566: recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
567: displs[i] = A->rmap->range[i];
568: }
569: #if defined(PETSC_HAVE_MPI_IN_PLACE)
570: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
571: #else
572: MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
573: #endif
574: /* ---------------------------------------------------------------
575: Create the sequential matrix of the same type as the local block diagonal
576: */
577: MatCreate(PETSC_COMM_SELF,&B);
578: MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
579: MatSetType(B,((PetscObject)a->A)->type_name);
580: MatSeqAIJSetPreallocation(B,0,lens);
581: PetscMalloc(sizeof(Mat),Bin);
582: **Bin = B;
583: b = (Mat_SeqAIJ *)B->data;
585: /*--------------------------------------------------------------------
586: Copy my part of matrix column indices over
587: */
588: sendcount = ad->nz + bd->nz;
589: jsendbuf = b->j + b->i[rstarts[rank]];
590: a_jsendbuf = ad->j;
591: b_jsendbuf = bd->j;
592: n = A->rmap->rend - A->rmap->rstart;
593: cnt = 0;
594: for (i=0; i<n; i++) {
596: /* put in lower diagonal portion */
597: m = bd->i[i+1] - bd->i[i];
598: while (m > 0) {
599: /* is it above diagonal (in bd (compressed) numbering) */
600: if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
601: jsendbuf[cnt++] = garray[*b_jsendbuf++];
602: m--;
603: }
605: /* put in diagonal portion */
606: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
607: jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
608: }
610: /* put in upper diagonal portion */
611: while (m-- > 0) {
612: jsendbuf[cnt++] = garray[*b_jsendbuf++];
613: }
614: }
615: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
617: /*--------------------------------------------------------------------
618: Gather all column indices to all processors
619: */
620: for (i=0; i<size; i++) {
621: recvcounts[i] = 0;
622: for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
623: recvcounts[i] += lens[j];
624: }
625: }
626: displs[0] = 0;
627: for (i=1; i<size; i++) {
628: displs[i] = displs[i-1] + recvcounts[i-1];
629: }
630: #if defined(PETSC_HAVE_MPI_IN_PLACE)
631: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
632: #else
633: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
634: #endif
635: /*--------------------------------------------------------------------
636: Assemble the matrix into useable form (note numerical values not yet set)
637: */
638: /* set the b->ilen (length of each row) values */
639: PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
640: /* set the b->i indices */
641: b->i[0] = 0;
642: for (i=1; i<=A->rmap->N; i++) {
643: b->i[i] = b->i[i-1] + lens[i-1];
644: }
645: PetscFree(lens);
646: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
647: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
649: } else {
650: B = **Bin;
651: b = (Mat_SeqAIJ *)B->data;
652: }
654: /*--------------------------------------------------------------------
655: Copy my part of matrix numerical values into the values location
656: */
657: if (flag == MAT_GET_VALUES){
658: sendcount = ad->nz + bd->nz;
659: sendbuf = b->a + b->i[rstarts[rank]];
660: a_sendbuf = ad->a;
661: b_sendbuf = bd->a;
662: b_sendj = bd->j;
663: n = A->rmap->rend - A->rmap->rstart;
664: cnt = 0;
665: for (i=0; i<n; i++) {
667: /* put in lower diagonal portion */
668: m = bd->i[i+1] - bd->i[i];
669: while (m > 0) {
670: /* is it above diagonal (in bd (compressed) numbering) */
671: if (garray[*b_sendj] > A->rmap->rstart + i) break;
672: sendbuf[cnt++] = *b_sendbuf++;
673: m--;
674: b_sendj++;
675: }
677: /* put in diagonal portion */
678: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
679: sendbuf[cnt++] = *a_sendbuf++;
680: }
682: /* put in upper diagonal portion */
683: while (m-- > 0) {
684: sendbuf[cnt++] = *b_sendbuf++;
685: b_sendj++;
686: }
687: }
688: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
689:
690: /* -----------------------------------------------------------------
691: Gather all numerical values to all processors
692: */
693: if (!recvcounts) {
694: PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
695: }
696: for (i=0; i<size; i++) {
697: recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
698: }
699: displs[0] = 0;
700: for (i=1; i<size; i++) {
701: displs[i] = displs[i-1] + recvcounts[i-1];
702: }
703: recvbuf = b->a;
704: #if defined(PETSC_HAVE_MPI_IN_PLACE)
705: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
706: #else
707: MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
708: #endif
709: } /* endof (flag == MAT_GET_VALUES) */
710: PetscFree2(recvcounts,displs);
712: if (A->symmetric){
713: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
714: } else if (A->hermitian) {
715: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
716: } else if (A->structurally_symmetric) {
717: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
718: }
719: return(0);
720: }
726: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
727: {
729: PetscInt nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
730: PetscBool rowflag,colflag,wantallmatrix = PETSC_FALSE,twantallmatrix;
733: /*
734: Check for special case each processor gets entire matrix
735: */
736: if (ismax == 1 && C->rmap->N == C->cmap->N) {
737: ISIdentity(*isrow,&rowflag);
738: ISIdentity(*iscol,&colflag);
739: ISGetLocalSize(*isrow,&nrow);
740: ISGetLocalSize(*iscol,&ncol);
741: if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
742: wantallmatrix = PETSC_TRUE;
743: PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,PETSC_NULL);
744: }
745: }
746: MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPI_INT,MPI_MIN,((PetscObject)C)->comm);
747: if (twantallmatrix) {
748: MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
749: return(0);
750: }
752: /* Allocate memory to hold all the submatrices */
753: if (scall != MAT_REUSE_MATRIX) {
754: PetscMalloc((ismax+1)*sizeof(Mat),submat);
755: }
756: /* Determine the number of stages through which submatrices are done */
757: nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt));
758: /*
759: Each stage will extract nmax submatrices.
760: nmax is determined by the matrix column dimension.
761: If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
762: */
763: if (!nmax) nmax = 1;
764: nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
766: /* Make sure every processor loops through the nstages */
767: MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);
769: for (i=0,pos=0; i<nstages; i++) {
770: if (pos+nmax <= ismax) max_no = nmax;
771: else if (pos == ismax) max_no = 0;
772: else max_no = ismax-pos;
773: MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
774: pos += max_no;
775: }
776: return(0);
777: }
779: /* -------------------------------------------------------------------------*/
782: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
783: {
784: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
785: Mat A = c->A;
786: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
787: const PetscInt **icol,**irow;
788: PetscInt *nrow,*ncol,start;
790: PetscMPIInt rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
791: PetscInt **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
792: PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
793: PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
794: PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
795: #if defined (PETSC_USE_CTABLE)
796: PetscTable *cmap,cmap_i,*rmap,rmap_i;
797: #else
798: PetscInt **cmap,*cmap_i,**rmap,*rmap_i;
799: #endif
800: const PetscInt *irow_i;
801: PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
802: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
803: MPI_Request *r_waits4,*s_waits3,*s_waits4;
804: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
805: MPI_Status *r_status3,*r_status4,*s_status4;
806: MPI_Comm comm;
807: PetscScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
808: PetscMPIInt *onodes1,*olengths1;
809: PetscMPIInt idex,idex2,end;
812: comm = ((PetscObject)C)->comm;
813: tag0 = ((PetscObject)C)->tag;
814: size = c->size;
815: rank = c->rank;
816:
817: /* Get some new tags to keep the communication clean */
818: PetscObjectGetNewTag((PetscObject)C,&tag1);
819: PetscObjectGetNewTag((PetscObject)C,&tag2);
820: PetscObjectGetNewTag((PetscObject)C,&tag3);
822: PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);
824: for (i=0; i<ismax; i++) {
825: ISGetIndices(isrow[i],&irow[i]);
826: ISGetIndices(iscol[i],&icol[i]);
827: ISGetLocalSize(isrow[i],&nrow[i]);
828: ISGetLocalSize(iscol[i],&ncol[i]);
829: }
831: /* evaluate communication - mesg to who, length of mesg, and buffer space
832: required. Based on this, buffers are allocated, and data copied into them*/
833: PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4); /* mesg size */
834: PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialize work vector*/
835: PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialize work vector*/
836: PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialize work vector*/
837: for (i=0; i<ismax; i++) {
838: PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
839: jmax = nrow[i];
840: irow_i = irow[i];
841: for (j=0; j<jmax; j++) {
842: l = 0;
843: row = irow_i[j];
844: while (row >= C->rmap->range[l+1]) l++;
845: proc = l;
846: w4[proc]++;
847: }
848: for (j=0; j<size; j++) {
849: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
850: }
851: }
852:
853: nrqs = 0; /* no of outgoing messages */
854: msz = 0; /* total mesg length (for all procs) */
855: w1[rank] = 0; /* no mesg sent to self */
856: w3[rank] = 0;
857: for (i=0; i<size; i++) {
858: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
859: }
860: PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa); /*(proc -array)*/
861: for (i=0,j=0; i<size; i++) {
862: if (w1[i]) { pa[j] = i; j++; }
863: }
865: /* Each message would have a header = 1 + 2*(no of IS) + data */
866: for (i=0; i<nrqs; i++) {
867: j = pa[i];
868: w1[j] += w2[j] + 2* w3[j];
869: msz += w1[j];
870: }
871: PetscInfo2(C,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);
873: /* Determine the number of messages to expect, their lengths, from from-ids */
874: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
875: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
877: /* Now post the Irecvs corresponding to these messages */
878: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
879:
880: PetscFree(onodes1);
881: PetscFree(olengths1);
882:
883: /* Allocate Memory for outgoing messages */
884: PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);
885: PetscMemzero(sbuf1,size*sizeof(PetscInt*));
886: PetscMemzero(ptr,size*sizeof(PetscInt*));
888: {
889: PetscInt *iptr = tmp,ict = 0;
890: for (i=0; i<nrqs; i++) {
891: j = pa[i];
892: iptr += ict;
893: sbuf1[j] = iptr;
894: ict = w1[j];
895: }
896: }
898: /* Form the outgoing messages */
899: /* Initialize the header space */
900: for (i=0; i<nrqs; i++) {
901: j = pa[i];
902: sbuf1[j][0] = 0;
903: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
904: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
905: }
906:
907: /* Parse the isrow and copy data into outbuf */
908: for (i=0; i<ismax; i++) {
909: PetscMemzero(ctr,size*sizeof(PetscInt));
910: irow_i = irow[i];
911: jmax = nrow[i];
912: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
913: l = 0;
914: row = irow_i[j];
915: while (row >= C->rmap->range[l+1]) l++;
916: proc = l;
917: if (proc != rank) { /* copy to the outgoing buf*/
918: ctr[proc]++;
919: *ptr[proc] = row;
920: ptr[proc]++;
921: }
922: }
923: /* Update the headers for the current IS */
924: for (j=0; j<size; j++) { /* Can Optimise this loop too */
925: if ((ctr_j = ctr[j])) {
926: sbuf1_j = sbuf1[j];
927: k = ++sbuf1_j[0];
928: sbuf1_j[2*k] = ctr_j;
929: sbuf1_j[2*k-1] = i;
930: }
931: }
932: }
934: /* Now post the sends */
935: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
936: for (i=0; i<nrqs; ++i) {
937: j = pa[i];
938: MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
939: }
941: /* Post Receives to capture the buffer size */
942: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
943: PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);
944: rbuf2[0] = tmp + msz;
945: for (i=1; i<nrqs; ++i) {
946: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
947: }
948: for (i=0; i<nrqs; ++i) {
949: j = pa[i];
950: MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
951: }
953: /* Send to other procs the buf size they should allocate */
954:
956: /* Receive messages*/
957: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
958: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
959: PetscMalloc3(nrqr,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);
960: {
961: Mat_SeqAIJ *sA = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
962: PetscInt *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
963: PetscInt *sbuf2_i;
965: for (i=0; i<nrqr; ++i) {
966: MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);
967: req_size[idex] = 0;
968: rbuf1_i = rbuf1[idex];
969: start = 2*rbuf1_i[0] + 1;
970: MPI_Get_count(r_status1+i,MPIU_INT,&end);
971: PetscMalloc((end+1)*sizeof(PetscInt),&sbuf2[idex]);
972: sbuf2_i = sbuf2[idex];
973: for (j=start; j<end; j++) {
974: id = rbuf1_i[j] - rstart;
975: ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
976: sbuf2_i[j] = ncols;
977: req_size[idex] += ncols;
978: }
979: req_source[idex] = r_status1[i].MPI_SOURCE;
980: /* form the header */
981: sbuf2_i[0] = req_size[idex];
982: for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
983: MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
984: }
985: }
986: PetscFree(r_status1);
987: PetscFree(r_waits1);
989: /* recv buffer sizes */
990: /* Receive messages*/
991:
992: PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);
993: PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf4);
994: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
995: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
996: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
998: for (i=0; i<nrqs; ++i) {
999: MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1000: PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscInt),&rbuf3[idex]);
1001: PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscScalar),&rbuf4[idex]);
1002: MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1003: MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1004: }
1005: PetscFree(r_status2);
1006: PetscFree(r_waits2);
1007:
1008: /* Wait on sends1 and sends2 */
1009: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
1010: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
1012: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1013: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1014: PetscFree(s_status1);
1015: PetscFree(s_status2);
1016: PetscFree(s_waits1);
1017: PetscFree(s_waits2);
1019: /* Now allocate buffers for a->j, and send them off */
1020: PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);
1021: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1022: PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);
1023: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
1024:
1025: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
1026: {
1027: PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1028: PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1029: PetscInt cend = C->cmap->rend;
1030: PetscInt *a_j = a->j,*b_j = b->j,ctmp;
1032: for (i=0; i<nrqr; i++) {
1033: rbuf1_i = rbuf1[i];
1034: sbuf_aj_i = sbuf_aj[i];
1035: ct1 = 2*rbuf1_i[0] + 1;
1036: ct2 = 0;
1037: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1038: kmax = rbuf1[i][2*j];
1039: for (k=0; k<kmax; k++,ct1++) {
1040: row = rbuf1_i[ct1] - rstart;
1041: nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
1042: ncols = nzA + nzB;
1043: cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];
1045: /* load the column indices for this row into cols*/
1046: cols = sbuf_aj_i + ct2;
1047:
1048: lwrite = 0;
1049: for (l=0; l<nzB; l++) {
1050: if ((ctmp = bmap[cworkB[l]]) < cstart) cols[lwrite++] = ctmp;
1051: }
1052: for (l=0; l<nzA; l++) cols[lwrite++] = cstart + cworkA[l];
1053: for (l=0; l<nzB; l++) {
1054: if ((ctmp = bmap[cworkB[l]]) >= cend) cols[lwrite++] = ctmp;
1055: }
1057: ct2 += ncols;
1058: }
1059: }
1060: MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1061: }
1062: }
1063: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
1064: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
1066: /* Allocate buffers for a->a, and send them off */
1067: PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf_aa);
1068: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1069: PetscMalloc((j+1)*sizeof(PetscScalar),&sbuf_aa[0]);
1070: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
1071:
1072: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
1073: {
1074: PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1075: PetscInt cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1076: PetscInt cend = C->cmap->rend;
1077: PetscInt *b_j = b->j;
1078: PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;
1079:
1080: for (i=0; i<nrqr; i++) {
1081: rbuf1_i = rbuf1[i];
1082: sbuf_aa_i = sbuf_aa[i];
1083: ct1 = 2*rbuf1_i[0]+1;
1084: ct2 = 0;
1085: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1086: kmax = rbuf1_i[2*j];
1087: for (k=0; k<kmax; k++,ct1++) {
1088: row = rbuf1_i[ct1] - rstart;
1089: nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
1090: ncols = nzA + nzB;
1091: cworkB = b_j + b_i[row];
1092: vworkA = a_a + a_i[row];
1093: vworkB = b_a + b_i[row];
1095: /* load the column values for this row into vals*/
1096: vals = sbuf_aa_i+ct2;
1097:
1098: lwrite = 0;
1099: for (l=0; l<nzB; l++) {
1100: if ((bmap[cworkB[l]]) < cstart) vals[lwrite++] = vworkB[l];
1101: }
1102: for (l=0; l<nzA; l++) vals[lwrite++] = vworkA[l];
1103: for (l=0; l<nzB; l++) {
1104: if ((bmap[cworkB[l]]) >= cend) vals[lwrite++] = vworkB[l];
1105: }
1106:
1107: ct2 += ncols;
1108: }
1109: }
1110: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1111: }
1112: }
1113: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
1114: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
1115: PetscFree(rbuf1[0]);
1116: PetscFree(rbuf1);
1118: /* Form the matrix */
1119: /* create col map: global col of C -> local col of submatrices */
1120: {
1121: const PetscInt *icol_i;
1122: #if defined (PETSC_USE_CTABLE)
1123: PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);
1124: for (i=0; i<ismax; i++) {
1125: PetscTableCreate(ncol[i]+1,&cmap[i]);
1126: jmax = ncol[i];
1127: icol_i = icol[i];
1128: cmap_i = cmap[i];
1129: for (j=0; j<jmax; j++) {
1130: PetscTableAdd(cmap[i],icol_i[j]+1,j+1);
1131: }
1132: }
1133: #else
1134: PetscMalloc(ismax*sizeof(PetscInt*),&cmap);
1135: PetscMalloc(ismax*C->cmap->N*sizeof(PetscInt),&cmap[0]);
1136: PetscMemzero(cmap[0],ismax*C->cmap->N*sizeof(PetscInt));
1137: for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->cmap->N; }
1138: for (i=0; i<ismax; i++) {
1139: jmax = ncol[i];
1140: icol_i = icol[i];
1141: cmap_i = cmap[i];
1142: for (j=0; j<jmax; j++) {
1143: cmap_i[icol_i[j]] = j+1;
1144: }
1145: }
1146: #endif
1147: }
1149: /* Create lens which is required for MatCreate... */
1150: for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
1151: PetscMalloc(ismax*sizeof(PetscInt*),&lens);
1152: PetscMalloc(j*sizeof(PetscInt),&lens[0]);
1153: PetscMemzero(lens[0],j*sizeof(PetscInt));
1154: for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
1155:
1156: /* Update lens from local data */
1157: for (i=0; i<ismax; i++) {
1158: jmax = nrow[i];
1159: cmap_i = cmap[i];
1160: irow_i = irow[i];
1161: lens_i = lens[i];
1162: for (j=0; j<jmax; j++) {
1163: l = 0;
1164: row = irow_i[j];
1165: while (row >= C->rmap->range[l+1]) l++;
1166: proc = l;
1167: if (proc == rank) {
1168: MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1169: for (k=0; k<ncols; k++) {
1170: #if defined (PETSC_USE_CTABLE)
1171: PetscTableFind(cmap_i,cols[k]+1,&tcol);
1172: #else
1173: tcol = cmap_i[cols[k]];
1174: #endif
1175: if (tcol) { lens_i[j]++;}
1176: }
1177: MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1178: }
1179: }
1180: }
1181:
1182: /* Create row map: global row of C -> local row of submatrices */
1183: #if defined (PETSC_USE_CTABLE)
1184: PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);
1185: for (i=0; i<ismax; i++) {
1186: PetscTableCreate(nrow[i]+1,&rmap[i]);
1187: rmap_i = rmap[i];
1188: irow_i = irow[i];
1189: jmax = nrow[i];
1190: for (j=0; j<jmax; j++) {
1191: PetscTableAdd(rmap[i],irow_i[j]+1,j+1);
1192: }
1193: }
1194: #else
1195: PetscMalloc(ismax*sizeof(PetscInt*),&rmap);
1196: PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);
1197: PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1198: for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
1199: for (i=0; i<ismax; i++) {
1200: rmap_i = rmap[i];
1201: irow_i = irow[i];
1202: jmax = nrow[i];
1203: for (j=0; j<jmax; j++) {
1204: rmap_i[irow_i[j]] = j;
1205: }
1206: }
1207: #endif
1208:
1209: /* Update lens from offproc data */
1210: {
1211: PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;
1213: for (tmp2=0; tmp2<nrqs; tmp2++) {
1214: MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1215: idex = pa[idex2];
1216: sbuf1_i = sbuf1[idex];
1217: jmax = sbuf1_i[0];
1218: ct1 = 2*jmax+1;
1219: ct2 = 0;
1220: rbuf2_i = rbuf2[idex2];
1221: rbuf3_i = rbuf3[idex2];
1222: for (j=1; j<=jmax; j++) {
1223: is_no = sbuf1_i[2*j-1];
1224: max1 = sbuf1_i[2*j];
1225: lens_i = lens[is_no];
1226: cmap_i = cmap[is_no];
1227: rmap_i = rmap[is_no];
1228: for (k=0; k<max1; k++,ct1++) {
1229: #if defined (PETSC_USE_CTABLE)
1230: PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1231: row--;
1232: if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); }
1233: #else
1234: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1235: #endif
1236: max2 = rbuf2_i[ct1];
1237: for (l=0; l<max2; l++,ct2++) {
1238: #if defined (PETSC_USE_CTABLE)
1239: PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1240: #else
1241: tcol = cmap_i[rbuf3_i[ct2]];
1242: #endif
1243: if (tcol) {
1244: lens_i[row]++;
1245: }
1246: }
1247: }
1248: }
1249: }
1250: }
1251: PetscFree(r_status3);
1252: PetscFree(r_waits3);
1253: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1254: PetscFree(s_status3);
1255: PetscFree(s_waits3);
1257: /* Create the submatrices */
1258: if (scall == MAT_REUSE_MATRIX) {
1259: PetscBool flag;
1261: /*
1262: Assumes new rows are same length as the old rows,hence bug!
1263: */
1264: for (i=0; i<ismax; i++) {
1265: mat = (Mat_SeqAIJ *)(submats[i]->data);
1266: if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1267: PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1268: if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1269: /* Initial matrix as if empty */
1270: PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));
1271: submats[i]->factortype = C->factortype;
1272: }
1273: } else {
1274: for (i=0; i<ismax; i++) {
1275: MatCreate(PETSC_COMM_SELF,submats+i);
1276: MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);
1277: MatSetType(submats[i],((PetscObject)A)->type_name);
1278: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1279: }
1280: }
1282: /* Assemble the matrices */
1283: /* First assemble the local rows */
1284: {
1285: PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1286: PetscScalar *imat_a;
1287:
1288: for (i=0; i<ismax; i++) {
1289: mat = (Mat_SeqAIJ*)submats[i]->data;
1290: imat_ilen = mat->ilen;
1291: imat_j = mat->j;
1292: imat_i = mat->i;
1293: imat_a = mat->a;
1294: cmap_i = cmap[i];
1295: rmap_i = rmap[i];
1296: irow_i = irow[i];
1297: jmax = nrow[i];
1298: for (j=0; j<jmax; j++) {
1299: l = 0;
1300: row = irow_i[j];
1301: while (row >= C->rmap->range[l+1]) l++;
1302: proc = l;
1303: if (proc == rank) {
1304: old_row = row;
1305: #if defined (PETSC_USE_CTABLE)
1306: PetscTableFind(rmap_i,row+1,&row);
1307: row--;
1308: #else
1309: row = rmap_i[row];
1310: #endif
1311: ilen_row = imat_ilen[row];
1312: MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1313: mat_i = imat_i[row] ;
1314: mat_a = imat_a + mat_i;
1315: mat_j = imat_j + mat_i;
1316: for (k=0; k<ncols; k++) {
1317: #if defined (PETSC_USE_CTABLE)
1318: PetscTableFind(cmap_i,cols[k]+1,&tcol);
1319: #else
1320: tcol = cmap_i[cols[k]];
1321: #endif
1322: if (tcol){
1323: *mat_j++ = tcol - 1;
1324: *mat_a++ = vals[k];
1325: ilen_row++;
1326: }
1327: }
1328: MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1329: imat_ilen[row] = ilen_row;
1330: }
1331: }
1332: }
1333: }
1335: /* Now assemble the off proc rows*/
1336: {
1337: PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1338: PetscInt *imat_j,*imat_i;
1339: PetscScalar *imat_a,*rbuf4_i;
1341: for (tmp2=0; tmp2<nrqs; tmp2++) {
1342: MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1343: idex = pa[idex2];
1344: sbuf1_i = sbuf1[idex];
1345: jmax = sbuf1_i[0];
1346: ct1 = 2*jmax + 1;
1347: ct2 = 0;
1348: rbuf2_i = rbuf2[idex2];
1349: rbuf3_i = rbuf3[idex2];
1350: rbuf4_i = rbuf4[idex2];
1351: for (j=1; j<=jmax; j++) {
1352: is_no = sbuf1_i[2*j-1];
1353: rmap_i = rmap[is_no];
1354: cmap_i = cmap[is_no];
1355: mat = (Mat_SeqAIJ*)submats[is_no]->data;
1356: imat_ilen = mat->ilen;
1357: imat_j = mat->j;
1358: imat_i = mat->i;
1359: imat_a = mat->a;
1360: max1 = sbuf1_i[2*j];
1361: for (k=0; k<max1; k++,ct1++) {
1362: row = sbuf1_i[ct1];
1363: #if defined (PETSC_USE_CTABLE)
1364: PetscTableFind(rmap_i,row+1,&row);
1365: row--;
1366: #else
1367: row = rmap_i[row];
1368: #endif
1369: ilen = imat_ilen[row];
1370: mat_i = imat_i[row] ;
1371: mat_a = imat_a + mat_i;
1372: mat_j = imat_j + mat_i;
1373: max2 = rbuf2_i[ct1];
1374: for (l=0; l<max2; l++,ct2++) {
1375:
1376: #if defined (PETSC_USE_CTABLE)
1377: PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1378: #else
1379: tcol = cmap_i[rbuf3_i[ct2]];
1380: #endif
1381: if (tcol) {
1382: *mat_j++ = tcol - 1;
1383: *mat_a++ = rbuf4_i[ct2];
1384: ilen++;
1385: }
1386: }
1387: imat_ilen[row] = ilen;
1388: }
1389: }
1390: }
1391: }
1392: PetscFree(r_status4);
1393: PetscFree(r_waits4);
1394: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1395: PetscFree(s_waits4);
1396: PetscFree(s_status4);
1398: /* Restore the indices */
1399: for (i=0; i<ismax; i++) {
1400: ISRestoreIndices(isrow[i],irow+i);
1401: ISRestoreIndices(iscol[i],icol+i);
1402: }
1404: /* Destroy allocated memory */
1405: PetscFree4(irow,icol,nrow,ncol);
1406: PetscFree4(w1,w2,w3,w4);
1407: PetscFree(pa);
1409: PetscFree4(sbuf1,ptr,tmp,ctr);
1410: PetscFree(rbuf2);
1411: for (i=0; i<nrqr; ++i) {
1412: PetscFree(sbuf2[i]);
1413: }
1414: for (i=0; i<nrqs; ++i) {
1415: PetscFree(rbuf3[i]);
1416: PetscFree(rbuf4[i]);
1417: }
1419: PetscFree3(sbuf2,req_size,req_source);
1420: PetscFree(rbuf3);
1421: PetscFree(rbuf4);
1422: PetscFree(sbuf_aj[0]);
1423: PetscFree(sbuf_aj);
1424: PetscFree(sbuf_aa[0]);
1425: PetscFree(sbuf_aa);
1426: #if defined (PETSC_USE_CTABLE)
1427: for (i=0; i<ismax; i++) {
1428: PetscTableDestroy((PetscTable*)&cmap[i]);
1429: PetscTableDestroy((PetscTable*)&rmap[i]);
1430: }
1431: #else
1432: PetscFree(cmap[0]);
1433: PetscFree(rmap[0]);
1434: #endif
1435: PetscFree(cmap);
1436: PetscFree(rmap);
1437: PetscFree(lens[0]);
1438: PetscFree(lens);
1440: for (i=0; i<ismax; i++) {
1441: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1442: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1443: }
1444: return(0);
1445: }
1447: /*
1448: Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1449: Be careful not to destroy them elsewhere.
1450: */
1453: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1454: {
1455: /* If making this function public, change the error returned in this function away from _PLIB. */
1457: Mat_MPIAIJ *aij;
1458: PetscBool seqaij;
1461: /* Check to make sure the component matrices are compatible with C. */
1462: PetscTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1463: if(!seqaij) {
1464: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1465: }
1466: PetscTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1467: if(!seqaij) {
1468: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1469: }
1470: if(A->rmap->n != B->rmap->n || A->rmap->bs != B->rmap->bs || A->cmap->bs != B->cmap->bs) {
1471: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1472: }
1473: MatCreate(comm, C);
1474: MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1475: PetscLayoutSetBlockSize((*C)->rmap,A->rmap->bs);
1476: PetscLayoutSetBlockSize((*C)->cmap,A->cmap->bs);
1477: PetscLayoutSetUp((*C)->rmap);
1478: PetscLayoutSetUp((*C)->cmap);
1479: if((*C)->cmap->N != A->cmap->n + B->cmap->n) {
1480: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1481: }
1482: MatSetType(*C, MATMPIAIJ);
1483: aij = (Mat_MPIAIJ*)((*C)->data);
1484: aij->A = A;
1485: aij->B = B;
1486: PetscLogObjectParent(*C,A);
1487: PetscLogObjectParent(*C,B);
1488: (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1489: (*C)->assembled = (PetscBool)(A->assembled && B->assembled);
1490: return(0);
1491: }
1495: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1496: {
1497: Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);
1501: *A = aij->A;
1502: *B = aij->B;
1503: /* Note that we don't incref *A and *B, so be careful! */
1504: return(0);
1505: }
1510: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1511: PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1512: PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1513: PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1514: {
1516: PetscMPIInt size, flag;
1517: PetscInt i,ii;
1518: PetscInt ismax_c;
1520: if(!ismax) {
1521: return(0);
1522: }
1523: for(i = 0, ismax_c = 0; i < ismax; ++i) {
1524: MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1525: if(flag != MPI_IDENT) {
1526: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1527: }
1528: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1529: if(size > 1) {
1530: ++ismax_c;
1531: }
1532: }
1533: if(!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1534: (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1535: }
1536: else { /* if(ismax_c) */
1537: Mat *A,*B;
1538: IS *isrow_c, *iscol_c;
1539: PetscMPIInt size;
1540: /*
1541: Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate
1542: array of sequential matrices underlying the resulting parallel matrices.
1543: Which arrays to allocate is based on the value of MatReuse scall.
1544: There are as many diag matrices as there are original index sets.
1545: There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.
1547: Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists,
1548: we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq
1549: will deposite the extracted diag and off-diag parts.
1550: However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1551: If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1552: */
1553:
1554: /* Parallel matrix array is allocated only if no reuse is taking place. */
1555: if (scall != MAT_REUSE_MATRIX) {
1556: PetscMalloc((ismax)*sizeof(Mat),submat);
1557: }
1558: else {
1559: PetscMalloc(ismax*sizeof(Mat), &A);
1560: PetscMalloc(ismax_c*sizeof(Mat), &B);
1561: /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1562: for(i = 0, ii = 0; i < ismax; ++i) {
1563: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1564: if(size > 1) {
1565: (*extractseq)((*submat)[i],A+i,B+ii);
1566: ++ii;
1567: }
1568: else {
1569: A[i] = (*submat)[i];
1570: }
1571: }
1572: }
1573: /*
1574: Construct the complements of the iscol ISs for parallel ISs only.
1575: These are used to extract the off-diag portion of the resulting parallel matrix.
1576: The row IS for the off-diag portion is the same as for the diag portion,
1577: so we merely alias the row IS, while skipping those that are sequential.
1578: */
1579: PetscMalloc2(ismax_c,IS,&isrow_c, ismax_c, IS, &iscol_c);
1580: for(i = 0, ii = 0; i < ismax; ++i) {
1581: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1582: if(size > 1) {
1583: isrow_c[ii] = isrow[i];
1584: ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1585: ++ii;
1586: }
1587: }
1588: /* Now obtain the sequential A and B submatrices separately. */
1589: (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1590: (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1591: for(ii = 0; ii < ismax_c; ++ii) {
1592: ISDestroy(&iscol_c[ii]);
1593: }
1594: PetscFree2(isrow_c, iscol_c);
1595: /*
1596: If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1597: have been extracted directly into the parallel matrices containing them, or
1598: simply into the sequential matrix identical with the corresponding A (if size == 1).
1599: Otherwise, make sure that parallel matrices are constructed from A & B, or the
1600: A is put into the correct submat slot (if size == 1).
1601: */
1602: if(scall != MAT_REUSE_MATRIX) {
1603: for(i = 0, ii = 0; i < ismax; ++i) {
1604: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1605: if(size > 1) {
1606: /*
1607: For each parallel isrow[i], create parallel matrices from the extracted sequential matrices.
1608: */
1609: /* Construct submat[i] from the Seq pieces A and B. */
1610: (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);
1611:
1612: ++ii;
1613: }
1614: else {
1615: (*submat)[i] = A[i];
1616: }
1617: }
1618: }
1619: PetscFree(A);
1620: PetscFree(B);
1621: }
1622: return(0);
1623: }/* MatGetSubMatricesParallel_MPIXAIJ() */
1629: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1630: {
1633: MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,
1634: MatCreateMPIAIJFromSeqMatrices_Private,
1635: MatMPIAIJExtractSeqMatrices_Private);
1636:
1637: return(0);
1638: }