Actual source code: mpiov.c

petsc-3.3-p5 2012-12-01
  2: /*
  3:    Routines to compute overlapping regions of a parallel MPI matrix
  4:   and to find submatrices that were shared across processors.
  5: */
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <petscbt.h>

  9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS *);
 10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char **,PetscInt*,PetscInt**);
 11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt **,PetscInt**,PetscInt*);
 12: extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
 13: extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);

 17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
 18: {
 20:   PetscInt       i;

 23:   if (ov < 0) SETERRQ(((PetscObject)C)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 24:   for (i=0; i<ov; ++i) {
 25:     MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
 26:   }
 27:   return(0);
 28: }

 30: /*
 31:   Sample message format:
 32:   If a processor A wants processor B to process some elements corresponding
 33:   to index sets is[1],is[5]
 34:   mesg [0] = 2   (no of index sets in the mesg)
 35:   -----------  
 36:   mesg [1] = 1 => is[1]
 37:   mesg [2] = sizeof(is[1]);
 38:   -----------  
 39:   mesg [3] = 5  => is[5]
 40:   mesg [4] = sizeof(is[5]);
 41:   -----------
 42:   mesg [5] 
 43:   mesg [n]  datas[1]
 44:   -----------  
 45:   mesg[n+1]
 46:   mesg[m]  data(is[5])
 47:   -----------  
 48:   
 49:   Notes:
 50:   nrqs - no of requests sent (or to be sent out)
 51:   nrqr - no of requests recieved (which have to be or which have been processed
 52: */
 55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
 56: {
 57:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
 58:   PetscMPIInt    *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
 59:   const PetscInt **idx,*idx_i;
 60:   PetscInt       *n,**data,len;
 62:   PetscMPIInt    size,rank,tag1,tag2;
 63:   PetscInt       M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
 64:   PetscInt       *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
 65:   PetscBT        *table;
 66:   MPI_Comm       comm;
 67:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
 68:   MPI_Status     *s_status,*recv_status;
 69:   char           *t_p;

 72:   comm   = ((PetscObject)C)->comm;
 73:   size   = c->size;
 74:   rank   = c->rank;
 75:   M      = C->rmap->N;

 77:   PetscObjectGetNewTag((PetscObject)C,&tag1);
 78:   PetscObjectGetNewTag((PetscObject)C,&tag2);
 79: 
 80:   PetscMalloc2(imax,PetscInt*,&idx,imax,PetscInt,&n);

 82:   for (i=0; i<imax; i++) {
 83:     ISGetIndices(is[i],&idx[i]);
 84:     ISGetLocalSize(is[i],&n[i]);
 85:   }

 87:   /* evaluate communication - mesg to who,length of mesg, and buffer space
 88:      required. Based on this, buffers are allocated, and data copied into them*/
 89:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);
 90:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 91:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 92:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 93:   for (i=0; i<imax; i++) {
 94:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 95:     idx_i = idx[i];
 96:     len   = n[i];
 97:     for (j=0; j<len; j++) {
 98:       row  = idx_i[j];
 99:       if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100:       PetscLayoutFindOwner(C->rmap,row,&proc);
101:       w4[proc]++;
102:     }
103:     for (j=0; j<size; j++){
104:       if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105:     }
106:   }

108:   nrqs     = 0;              /* no of outgoing messages */
109:   msz      = 0;              /* total mesg length (for all proc */
110:   w1[rank] = 0;              /* no mesg sent to intself */
111:   w3[rank] = 0;
112:   for (i=0; i<size; i++) {
113:     if (w1[i])  {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114:   }
115:   /* pa - is list of processors to communicate with */
116:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);
117:   for (i=0,j=0; i<size; i++) {
118:     if (w1[i]) {pa[j] = i; j++;}
119:   }

121:   /* Each message would have a header = 1 + 2*(no of IS) + data */
122:   for (i=0; i<nrqs; i++) {
123:     j      = pa[i];
124:     w1[j] += w2[j] + 2*w3[j];
125:     msz   += w1[j];
126:   }

128:   /* Determine the number of messages to expect, their lengths, from from-ids */
129:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

132:   /* Now post the Irecvs corresponding to these messages */
133:   PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);

135:   /* Allocate Memory for outgoing messages */
136:   PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);
137:   PetscMemzero(outdat,size*sizeof(PetscInt*));
138:   PetscMemzero(ptr,size*sizeof(PetscInt*));

140:   {
141:     PetscInt *iptr = tmp,ict  = 0;
142:     for (i=0; i<nrqs; i++) {
143:       j         = pa[i];
144:       iptr     +=  ict;
145:       outdat[j] = iptr;
146:       ict       = w1[j];
147:     }
148:   }

150:   /* Form the outgoing messages */
151:   /*plug in the headers*/
152:   for (i=0; i<nrqs; i++) {
153:     j            = pa[i];
154:     outdat[j][0] = 0;
155:     PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156:     ptr[j]       = outdat[j] + 2*w3[j] + 1;
157:   }
158: 
159:   /* Memory for doing local proc's work*/
160:   {
161:     PetscMalloc5(imax,PetscBT,&table, imax,PetscInt*,&data, imax,PetscInt,&isz,
162:                         M*imax,PetscInt,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,char,&t_p);
163:     PetscMemzero(table,imax*sizeof(PetscBT));
164:     PetscMemzero(data,imax*sizeof(PetscInt*));
165:     PetscMemzero(isz,imax*sizeof(PetscInt));
166:     PetscMemzero(d_p,M*imax*sizeof(PetscInt));
167:     PetscMemzero(t_p,(M/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));

169:     for (i=0; i<imax; i++) {
170:       table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
171:       data[i]  = d_p + M*i;
172:     }
173:   }

175:   /* Parse the IS and update local tables and the outgoing buf with the data*/
176:   {
177:     PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
178:     PetscBT  table_i;

180:     for (i=0; i<imax; i++) {
181:       PetscMemzero(ctr,size*sizeof(PetscInt));
182:       n_i     = n[i];
183:       table_i = table[i];
184:       idx_i   = idx[i];
185:       data_i  = data[i];
186:       isz_i   = isz[i];
187:       for (j=0;  j<n_i; j++) {  /* parse the indices of each IS */
188:         row  = idx_i[j];
189:         PetscLayoutFindOwner(C->rmap,row,&proc);
190:         if (proc != rank) { /* copy to the outgoing buffer */
191:           ctr[proc]++;
192:           *ptr[proc] = row;
193:           ptr[proc]++;
194:         } else { /* Update the local table */
195:           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
196:         }
197:       }
198:       /* Update the headers for the current IS */
199:       for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
200:         if ((ctr_j = ctr[j])) {
201:           outdat_j        = outdat[j];
202:           k               = ++outdat_j[0];
203:           outdat_j[2*k]   = ctr_j;
204:           outdat_j[2*k-1] = i;
205:         }
206:       }
207:       isz[i] = isz_i;
208:     }
209:   }

211:   /*  Now  post the sends */
212:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
213:   for (i=0; i<nrqs; ++i) {
214:     j    = pa[i];
215:     MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
216:   }
217: 
218:   /* No longer need the original indices*/
219:   for (i=0; i<imax; ++i) {
220:     ISRestoreIndices(is[i],idx+i);
221:   }
222:   PetscFree2(idx,n);

224:   for (i=0; i<imax; ++i) {
225:     ISDestroy(&is[i]);
226:   }
227: 
228:   /* Do Local work*/
229:   MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);

231:   /* Receive messages*/
232:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);
233:   if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}
234: 
235:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
236:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

238:   /* Phase 1 sends are complete - deallocate buffers */
239:   PetscFree4(outdat,ptr,tmp,ctr);
240:   PetscFree4(w1,w2,w3,w4);

242:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);
243:   PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);
244:   MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
245:   PetscFree(rbuf[0]);
246:   PetscFree(rbuf);

248: 
249:  /* Send the data back*/
250:   /* Do a global reduction to know the buffer space req for incoming messages*/
251:   {
252:     PetscMPIInt *rw1;
253: 
254:     PetscMalloc(size*sizeof(PetscMPIInt),&rw1);
255:     PetscMemzero(rw1,size*sizeof(PetscMPIInt));

257:     for (i=0; i<nrqr; ++i) {
258:       proc      = recv_status[i].MPI_SOURCE;
259:       if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
260:       rw1[proc] = isz1[i];
261:     }
262:     PetscFree(onodes1);
263:     PetscFree(olengths1);

265:     /* Determine the number of messages to expect, their lengths, from from-ids */
266:     PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
267:     PetscFree(rw1);
268:   }
269:   /* Now post the Irecvs corresponding to these messages */
270:   PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);

272:   /*  Now  post the sends */
273:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
274:   for (i=0; i<nrqr; ++i) {
275:     j    = recv_status[i].MPI_SOURCE;
276:     MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
277:   }

279:   /* receive work done on other processors*/
280:   {
281:     PetscInt    is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
282:     PetscMPIInt idex;
283:     PetscBT     table_i;
284:     MPI_Status  *status2;
285: 
286:     PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
287:     for (i=0; i<nrqs; ++i) {
288:       MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
289:       /* Process the message*/
290:       rbuf2_i = rbuf2[idex];
291:       ct1     = 2*rbuf2_i[0]+1;
292:       jmax    = rbuf2[idex][0];
293:       for (j=1; j<=jmax; j++) {
294:         max     = rbuf2_i[2*j];
295:         is_no   = rbuf2_i[2*j-1];
296:         isz_i   = isz[is_no];
297:         data_i  = data[is_no];
298:         table_i = table[is_no];
299:         for (k=0; k<max; k++,ct1++) {
300:           row = rbuf2_i[ct1];
301:           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
302:         }
303:         isz[is_no] = isz_i;
304:       }
305:     }

307:     if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
308:     PetscFree(status2);
309:   }
310: 
311:   for (i=0; i<imax; ++i) {
312:     ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
313:   }
314: 
315:   PetscFree(onodes2);
316:   PetscFree(olengths2);

318:   PetscFree(pa);
319:   PetscFree(rbuf2[0]);
320:   PetscFree(rbuf2);
321:   PetscFree(s_waits1);
322:   PetscFree(r_waits1);
323:   PetscFree(s_waits2);
324:   PetscFree(r_waits2);
325:   PetscFree5(table,data,isz,d_p,t_p);
326:   PetscFree(s_status);
327:   PetscFree(recv_status);
328:   PetscFree(xdata[0]);
329:   PetscFree(xdata);
330:   PetscFree(isz1);
331:   return(0);
332: }

336: /*  
337:    MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do 
338:        the work on the local processor.

340:      Inputs:
341:       C      - MAT_MPIAIJ;
342:       imax - total no of index sets processed at a time;
343:       table  - an array of char - size = m bits.
344:       
345:      Output:
346:       isz    - array containing the count of the solution elements corresponding
347:                to each index set;
348:       data   - pointer to the solutions
349: */
350: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
351: {
352:   Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
353:   Mat        A = c->A,B = c->B;
354:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
355:   PetscInt   start,end,val,max,rstart,cstart,*ai,*aj;
356:   PetscInt   *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
357:   PetscBT    table_i;

360:   rstart = C->rmap->rstart;
361:   cstart = C->cmap->rstart;
362:   ai     = a->i;
363:   aj     = a->j;
364:   bi     = b->i;
365:   bj     = b->j;
366:   garray = c->garray;

368: 
369:   for (i=0; i<imax; i++) {
370:     data_i  = data[i];
371:     table_i = table[i];
372:     isz_i   = isz[i];
373:     for (j=0,max=isz[i]; j<max; j++) {
374:       row   = data_i[j] - rstart;
375:       start = ai[row];
376:       end   = ai[row+1];
377:       for (k=start; k<end; k++) { /* Amat */
378:         val = aj[k] + cstart;
379:         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
380:       }
381:       start = bi[row];
382:       end   = bi[row+1];
383:       for (k=start; k<end; k++) { /* Bmat */
384:         val = garray[bj[k]];
385:         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
386:       }
387:     }
388:     isz[i] = isz_i;
389:   }
390:   return(0);
391: }

395: /*     
396:       MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
397:          and return the output

399:          Input:
400:            C    - the matrix
401:            nrqr - no of messages being processed.
402:            rbuf - an array of pointers to the recieved requests
403:            
404:          Output:
405:            xdata - array of messages to be sent back
406:            isz1  - size of each message

408:   For better efficiency perhaps we should malloc separately each xdata[i],
409: then if a remalloc is required we need only copy the data for that one row
410: rather then all previous rows as it is now where a single large chunck of 
411: memory is used.

413: */
414: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
415: {
416:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
417:   Mat            A = c->A,B = c->B;
418:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
420:   PetscInt       rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
421:   PetscInt       row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
422:   PetscInt       val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
423:   PetscInt       *rbuf_i,kmax,rbuf_0;
424:   PetscBT        xtable;

427:   m      = C->rmap->N;
428:   rstart = C->rmap->rstart;
429:   cstart = C->cmap->rstart;
430:   ai     = a->i;
431:   aj     = a->j;
432:   bi     = b->i;
433:   bj     = b->j;
434:   garray = c->garray;
435: 
436: 
437:   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
438:     rbuf_i  =  rbuf[i];
439:     rbuf_0  =  rbuf_i[0];
440:     ct     += rbuf_0;
441:     for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
442:   }
443: 
444:   if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
445:   else      max1 = 1;
446:   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
447:   PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);
448:   ++no_malloc;
449:   PetscBTCreate(m,&xtable);
450:   PetscMemzero(isz1,nrqr*sizeof(PetscInt));
451: 
452:   ct3 = 0;
453:   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
454:     rbuf_i =  rbuf[i];
455:     rbuf_0 =  rbuf_i[0];
456:     ct1    =  2*rbuf_0+1;
457:     ct2    =  ct1;
458:     ct3    += ct1;
459:     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
460:       PetscBTMemzero(m,xtable);
461:       oct2 = ct2;
462:       kmax = rbuf_i[2*j];
463:       for (k=0; k<kmax; k++,ct1++) {
464:         row = rbuf_i[ct1];
465:         if (!PetscBTLookupSet(xtable,row)) {
466:           if (!(ct3 < mem_estimate)) {
467:             new_estimate = (PetscInt)(1.5*mem_estimate)+1;
468:             PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
469:             PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
470:             PetscFree(xdata[0]);
471:             xdata[0]     = tmp;
472:             mem_estimate = new_estimate; ++no_malloc;
473:             for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
474:           }
475:           xdata[i][ct2++] = row;
476:           ct3++;
477:         }
478:       }
479:       for (k=oct2,max2=ct2; k<max2; k++) {
480:         row   = xdata[i][k] - rstart;
481:         start = ai[row];
482:         end   = ai[row+1];
483:         for (l=start; l<end; l++) {
484:           val = aj[l] + cstart;
485:           if (!PetscBTLookupSet(xtable,val)) {
486:             if (!(ct3 < mem_estimate)) {
487:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
488:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
489:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
490:               PetscFree(xdata[0]);
491:               xdata[0]     = tmp;
492:               mem_estimate = new_estimate; ++no_malloc;
493:               for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
494:             }
495:             xdata[i][ct2++] = val;
496:             ct3++;
497:           }
498:         }
499:         start = bi[row];
500:         end   = bi[row+1];
501:         for (l=start; l<end; l++) {
502:           val = garray[bj[l]];
503:           if (!PetscBTLookupSet(xtable,val)) {
504:             if (!(ct3 < mem_estimate)) {
505:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
506:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
507:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
508:               PetscFree(xdata[0]);
509:               xdata[0]     = tmp;
510:               mem_estimate = new_estimate; ++no_malloc;
511:               for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
512:             }
513:             xdata[i][ct2++] = val;
514:             ct3++;
515:           }
516:         }
517:       }
518:       /* Update the header*/
519:       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
520:       xdata[i][2*j-1] = rbuf_i[2*j-1];
521:     }
522:     xdata[i][0] = rbuf_0;
523:     xdata[i+1]  = xdata[i] + ct2;
524:     isz1[i]     = ct2; /* size of each message */
525:   }
526:   PetscBTDestroy(&xtable);
527:   PetscInfo3(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);
528:   return(0);
529: }
530: /* -------------------------------------------------------------------------*/
531: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
532: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
533: /*
534:     Every processor gets the entire matrix
535: */
538: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
539: {
540:   Mat            B;
541:   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
542:   Mat_SeqAIJ     *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
544:   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
545:   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
546:   PetscInt       m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
547:   MatScalar      *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;

550:   MPI_Comm_size(((PetscObject)A)->comm,&size);
551:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);

553:   if (scall == MAT_INITIAL_MATRIX) {
554:     /* ----------------------------------------------------------------
555:          Tell every processor the number of nonzeros per row
556:     */
557:     PetscMalloc(A->rmap->N*sizeof(PetscInt),&lens);
558:     for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
559:       lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
560:     }
561:     sendcount = A->rmap->rend - A->rmap->rstart;
562:     PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
563:     for (i=0; i<size; i++) {
564:       recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
565:       displs[i]     = A->rmap->range[i];
566:     }
567: #if defined(PETSC_HAVE_MPI_IN_PLACE)
568:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
569: #else
570:     MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
571: #endif
572:     /* ---------------------------------------------------------------
573:          Create the sequential matrix of the same type as the local block diagonal
574:     */
575:     MatCreate(PETSC_COMM_SELF,&B);
576:     MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
577:     MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
578:     MatSetType(B,((PetscObject)a->A)->type_name);
579:     MatSeqAIJSetPreallocation(B,0,lens);
580:     PetscMalloc(sizeof(Mat),Bin);
581:     **Bin = B;
582:     b = (Mat_SeqAIJ *)B->data;

584:     /*--------------------------------------------------------------------
585:        Copy my part of matrix column indices over
586:     */
587:     sendcount  = ad->nz + bd->nz;
588:     jsendbuf   = b->j + b->i[rstarts[rank]];
589:     a_jsendbuf = ad->j;
590:     b_jsendbuf = bd->j;
591:     n          = A->rmap->rend - A->rmap->rstart;
592:     cnt        = 0;
593:     for (i=0; i<n; i++) {

595:       /* put in lower diagonal portion */
596:       m = bd->i[i+1] - bd->i[i];
597:       while (m > 0) {
598:         /* is it above diagonal (in bd (compressed) numbering) */
599:         if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
600:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
601:         m--;
602:       }

604:       /* put in diagonal portion */
605:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
606:         jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
607:       }

609:       /* put in upper diagonal portion */
610:       while (m-- > 0) {
611:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
612:       }
613:     }
614:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

616:     /*--------------------------------------------------------------------
617:        Gather all column indices to all processors
618:     */
619:     for (i=0; i<size; i++) {
620:       recvcounts[i] = 0;
621:       for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
622:         recvcounts[i] += lens[j];
623:       }
624:     }
625:     displs[0]  = 0;
626:     for (i=1; i<size; i++) {
627:       displs[i] = displs[i-1] + recvcounts[i-1];
628:     }
629: #if defined(PETSC_HAVE_MPI_IN_PLACE)
630:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
631: #else
632:     MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
633: #endif
634:     /*--------------------------------------------------------------------
635:         Assemble the matrix into useable form (note numerical values not yet set)
636:     */
637:     /* set the b->ilen (length of each row) values */
638:     PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
639:     /* set the b->i indices */
640:     b->i[0] = 0;
641:     for (i=1; i<=A->rmap->N; i++) {
642:       b->i[i] = b->i[i-1] + lens[i-1];
643:     }
644:     PetscFree(lens);
645:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
646:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

648:   } else {
649:     B  = **Bin;
650:     b = (Mat_SeqAIJ *)B->data;
651:   }

653:   /*--------------------------------------------------------------------
654:        Copy my part of matrix numerical values into the values location 
655:   */
656:   if (flag == MAT_GET_VALUES){
657:     sendcount = ad->nz + bd->nz;
658:     sendbuf   = b->a + b->i[rstarts[rank]];
659:     a_sendbuf = ad->a;
660:     b_sendbuf = bd->a;
661:     b_sendj   = bd->j;
662:     n         = A->rmap->rend - A->rmap->rstart;
663:     cnt       = 0;
664:     for (i=0; i<n; i++) {

666:       /* put in lower diagonal portion */
667:       m = bd->i[i+1] - bd->i[i];
668:       while (m > 0) {
669:         /* is it above diagonal (in bd (compressed) numbering) */
670:         if (garray[*b_sendj] > A->rmap->rstart + i) break;
671:         sendbuf[cnt++] = *b_sendbuf++;
672:         m--;
673:         b_sendj++;
674:       }

676:       /* put in diagonal portion */
677:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
678:         sendbuf[cnt++] = *a_sendbuf++;
679:       }

681:       /* put in upper diagonal portion */
682:       while (m-- > 0) {
683:         sendbuf[cnt++] = *b_sendbuf++;
684:         b_sendj++;
685:       }
686:     }
687:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
688: 
689:     /* ----------------------------------------------------------------- 
690:        Gather all numerical values to all processors 
691:     */
692:     if (!recvcounts) {
693:       PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
694:     }
695:     for (i=0; i<size; i++) {
696:       recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
697:     }
698:     displs[0]  = 0;
699:     for (i=1; i<size; i++) {
700:       displs[i] = displs[i-1] + recvcounts[i-1];
701:     }
702:     recvbuf   = b->a;
703: #if defined(PETSC_HAVE_MPI_IN_PLACE)
704:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
705: #else
706:     MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
707: #endif
708:   }  /* endof (flag == MAT_GET_VALUES) */
709:   PetscFree2(recvcounts,displs);

711:   if (A->symmetric){
712:     MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
713:   } else if (A->hermitian) {
714:     MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
715:   } else if (A->structurally_symmetric) {
716:     MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
717:   }
718:   return(0);
719: }



725: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
726: {
728:   PetscInt       nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
729:   PetscBool      rowflag,colflag,wantallmatrix=PETSC_FALSE,twantallmatrix,*allcolumns;


733:   /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */
734:   /* It would make sense to error out in MatGetSubMatrices_MPIAIJ_Local(), the most impl-specific level.
735:      However, there are more careful users of MatGetSubMatrices_MPIAIJ_Local() -- MatPermute_MPIAIJ() -- that 
736:      take care to order the result correctly by assembling it with MatSetValues() (after preallocating).
737:    */
738:   for(i = 0; i < ismax; ++i) {
739:     PetscBool sorted;
740:     ISSorted(iscol[i], &sorted);
741:     if(!sorted) SETERRQ1(((PetscObject)iscol[i])->comm, PETSC_ERR_SUP, "Column index set %D not sorted", i);
742:   }

744:   /*
745:        Check for special case: each processor gets entire matrix
746:   */
747:   if (ismax == 1 && C->rmap->N == C->cmap->N) {
748:     ISIdentity(*isrow,&rowflag);
749:     ISIdentity(*iscol,&colflag);
750:     ISGetLocalSize(*isrow,&nrow);
751:     ISGetLocalSize(*iscol,&ncol);
752:     if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
753:       wantallmatrix = PETSC_TRUE;
754:       PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,PETSC_NULL);
755:     }
756:   }
757:   MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPI_INT,MPI_MIN,((PetscObject)C)->comm);
758:   if (twantallmatrix) {
759:     MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
760:     return(0);
761:   }

763:   /* Allocate memory to hold all the submatrices */
764:   if (scall != MAT_REUSE_MATRIX) {
765:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
766:   }

768:   /* Check for special case: each processor gets entire matrix columns */
769:   PetscMalloc((ismax+1)*sizeof(PetscBool),&allcolumns);
770:   for (i=0; i<ismax; i++) {
771:     ISIdentity(iscol[i],&colflag);
772:     ISGetLocalSize(iscol[i],&ncol);
773:     if (colflag && ncol == C->cmap->N){
774:       allcolumns[i] = PETSC_TRUE;
775:     } else {
776:       allcolumns[i] = PETSC_FALSE;
777:     }
778:   }

780:   /* Determine the number of stages through which submatrices are done */
781:   nmax          = 20*1000000 / (C->cmap->N * sizeof(PetscInt));

783:   /* 
784:      Each stage will extract nmax submatrices.  
785:      nmax is determined by the matrix column dimension.
786:      If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
787:   */
788:   if (!nmax) nmax = 1;
789:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

791:   /* Make sure every processor loops through the nstages */
792:   MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);

794:   for (i=0,pos=0; i<nstages; i++) {
795:     if (pos+nmax <= ismax) max_no = nmax;
796:     else if (pos == ismax) max_no = 0;
797:     else                   max_no = ismax-pos;
798:     MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,allcolumns+pos,*submat+pos);
799:     pos += max_no;
800:   }

802:   PetscFree(allcolumns);
803:   return(0);
804: }

806: /* -------------------------------------------------------------------------*/
809: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allcolumns,Mat *submats)
810: {
811:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
812:   Mat            A = c->A;
813:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
814:   const PetscInt **icol,**irow;
815:   PetscInt       *nrow,*ncol,start;
817:   PetscMPIInt    rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
818:   PetscInt       **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
819:   PetscInt       nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
820:   PetscInt       **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
821:   PetscInt       **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
822: #if defined (PETSC_USE_CTABLE)
823:   PetscTable     *cmap,cmap_i=PETSC_NULL,*rmap,rmap_i;
824: #else
825:   PetscInt       **cmap,*cmap_i=PETSC_NULL,**rmap,*rmap_i;
826: #endif
827:   const PetscInt *irow_i;
828:   PetscInt       ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
829:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
830:   MPI_Request    *r_waits4,*s_waits3,*s_waits4;
831:   MPI_Status     *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
832:   MPI_Status     *r_status3,*r_status4,*s_status4;
833:   MPI_Comm       comm;
834:   PetscScalar    **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
835:   PetscMPIInt    *onodes1,*olengths1;
836:   PetscMPIInt    idex,idex2,end;


840:   comm   = ((PetscObject)C)->comm;
841:   tag0   = ((PetscObject)C)->tag;
842:   size   = c->size;
843:   rank   = c->rank;
844: 
845:   /* Get some new tags to keep the communication clean */
846:   PetscObjectGetNewTag((PetscObject)C,&tag1);
847:   PetscObjectGetNewTag((PetscObject)C,&tag2);
848:   PetscObjectGetNewTag((PetscObject)C,&tag3);

850:   PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);

852:   for (i=0; i<ismax; i++) {
853:     ISGetIndices(isrow[i],&irow[i]);
854:     ISGetLocalSize(isrow[i],&nrow[i]);
855:     if (allcolumns[i]){
856:       icol[i] = PETSC_NULL;
857:       ncol[i] = C->cmap->N;
858:     } else {
859:       ISGetIndices(iscol[i],&icol[i]);
860:       ISGetLocalSize(iscol[i],&ncol[i]);
861:     }
862:   }

864:   /* evaluate communication - mesg to who, length of mesg, and buffer space
865:      required. Based on this, buffers are allocated, and data copied into them*/
866:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4); /* mesg size */
867:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialize work vector*/
868:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialize work vector*/
869:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialize work vector*/
870:   for (i=0; i<ismax; i++) {
871:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
872:     jmax   = nrow[i];
873:     irow_i = irow[i];
874:     for (j=0; j<jmax; j++) {
875:       l = 0;
876:       row  = irow_i[j];
877:       while (row >= C->rmap->range[l+1]) l++;
878:       proc = l;
879:       w4[proc]++;
880:     }
881:     for (j=0; j<size; j++) {
882:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
883:     }
884:   }
885: 
886:   nrqs     = 0;              /* no of outgoing messages */
887:   msz      = 0;              /* total mesg length (for all procs) */
888:   w1[rank] = 0;              /* no mesg sent to self */
889:   w3[rank] = 0;
890:   for (i=0; i<size; i++) {
891:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
892:   }
893:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa); /*(proc -array)*/
894:   for (i=0,j=0; i<size; i++) {
895:     if (w1[i]) { pa[j] = i; j++; }
896:   }

898:   /* Each message would have a header = 1 + 2*(no of IS) + data */
899:   for (i=0; i<nrqs; i++) {
900:     j     = pa[i];
901:     w1[j] += w2[j] + 2* w3[j];
902:     msz   += w1[j];
903:   }
904:   PetscInfo2(C,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);

906:   /* Determine the number of messages to expect, their lengths, from from-ids */
907:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
908:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

910:   /* Now post the Irecvs corresponding to these messages */
911:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
912: 
913:   PetscFree(onodes1);
914:   PetscFree(olengths1);
915: 
916:   /* Allocate Memory for outgoing messages */
917:   PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);
918:   PetscMemzero(sbuf1,size*sizeof(PetscInt*));
919:   PetscMemzero(ptr,size*sizeof(PetscInt*));

921:   {
922:     PetscInt *iptr = tmp,ict = 0;
923:     for (i=0; i<nrqs; i++) {
924:       j         = pa[i];
925:       iptr     += ict;
926:       sbuf1[j]  = iptr;
927:       ict       = w1[j];
928:     }
929:   }

931:   /* Form the outgoing messages */
932:   /* Initialize the header space */
933:   for (i=0; i<nrqs; i++) {
934:     j           = pa[i];
935:     sbuf1[j][0] = 0;
936:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
937:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
938:   }
939: 
940:   /* Parse the isrow and copy data into outbuf */
941:   for (i=0; i<ismax; i++) {
942:     PetscMemzero(ctr,size*sizeof(PetscInt));
943:     irow_i = irow[i];
944:     jmax   = nrow[i];
945:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
946:       l = 0;
947:       row  = irow_i[j];
948:       while (row >= C->rmap->range[l+1]) l++;
949:       proc = l;
950:       if (proc != rank) { /* copy to the outgoing buf*/
951:         ctr[proc]++;
952:         *ptr[proc] = row;
953:         ptr[proc]++;
954:       }
955:     }
956:     /* Update the headers for the current IS */
957:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
958:       if ((ctr_j = ctr[j])) {
959:         sbuf1_j        = sbuf1[j];
960:         k              = ++sbuf1_j[0];
961:         sbuf1_j[2*k]   = ctr_j;
962:         sbuf1_j[2*k-1] = i;
963:       }
964:     }
965:   }

967:   /*  Now  post the sends */
968:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
969:   for (i=0; i<nrqs; ++i) {
970:     j    = pa[i];
971:     MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
972:   }

974:   /* Post Receives to capture the buffer size */
975:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
976:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);
977:   rbuf2[0] = tmp + msz;
978:   for (i=1; i<nrqs; ++i) {
979:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
980:   }
981:   for (i=0; i<nrqs; ++i) {
982:     j    = pa[i];
983:     MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
984:   }

986:   /* Send to other procs the buf size they should allocate */
987: 

989:   /* Receive messages*/
990:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
991:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
992:   PetscMalloc3(nrqr,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);
993:   {
994:     Mat_SeqAIJ  *sA = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
995:     PetscInt    *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
996:     PetscInt    *sbuf2_i;

998:     for (i=0; i<nrqr; ++i) {
999:       MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);
1000:       req_size[idex] = 0;
1001:       rbuf1_i         = rbuf1[idex];
1002:       start           = 2*rbuf1_i[0] + 1;
1003:       MPI_Get_count(r_status1+i,MPIU_INT,&end);
1004:       PetscMalloc((end+1)*sizeof(PetscInt),&sbuf2[idex]);
1005:       sbuf2_i         = sbuf2[idex];
1006:       for (j=start; j<end; j++) {
1007:         id               = rbuf1_i[j] - rstart;
1008:         ncols            = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
1009:         sbuf2_i[j]       = ncols;
1010:         req_size[idex] += ncols;
1011:       }
1012:       req_source[idex] = r_status1[i].MPI_SOURCE;
1013:       /* form the header */
1014:       sbuf2_i[0]   = req_size[idex];
1015:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
1016:       MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
1017:     }
1018:   }
1019:   PetscFree(r_status1);
1020:   PetscFree(r_waits1);

1022:   /*  recv buffer sizes */
1023:   /* Receive messages*/

1025:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);
1026:   PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf4);
1027:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
1028:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
1029:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

1031:   for (i=0; i<nrqs; ++i) {
1032:     MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1033:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscInt),&rbuf3[idex]);
1034:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscScalar),&rbuf4[idex]);
1035:     MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1036:     MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1037:   }
1038:   PetscFree(r_status2);
1039:   PetscFree(r_waits2);
1040: 
1041:   /* Wait on sends1 and sends2 */
1042:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
1043:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

1045:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1046:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1047:   PetscFree(s_status1);
1048:   PetscFree(s_status2);
1049:   PetscFree(s_waits1);
1050:   PetscFree(s_waits2);

1052:   /* Now allocate buffers for a->j, and send them off */
1053:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);
1054:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1055:   PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);
1056:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
1057: 
1058:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
1059:   {
1060:     PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1061:     PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1062:     PetscInt cend = C->cmap->rend;
1063:     PetscInt *a_j = a->j,*b_j = b->j,ctmp;

1065:     for (i=0; i<nrqr; i++) {
1066:       rbuf1_i   = rbuf1[i];
1067:       sbuf_aj_i = sbuf_aj[i];
1068:       ct1       = 2*rbuf1_i[0] + 1;
1069:       ct2       = 0;
1070:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1071:         kmax = rbuf1[i][2*j];
1072:         for (k=0; k<kmax; k++,ct1++) {
1073:           row    = rbuf1_i[ct1] - rstart;
1074:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1075:           ncols  = nzA + nzB;
1076:           cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];

1078:           /* load the column indices for this row into cols*/
1079:           cols  = sbuf_aj_i + ct2;
1080: 
1081:           lwrite = 0;
1082:           for (l=0; l<nzB; l++) {
1083:             if ((ctmp = bmap[cworkB[l]]) < cstart)  cols[lwrite++] = ctmp;
1084:           }
1085:           for (l=0; l<nzA; l++)   cols[lwrite++] = cstart + cworkA[l];
1086:           for (l=0; l<nzB; l++) {
1087:             if ((ctmp = bmap[cworkB[l]]) >= cend)  cols[lwrite++] = ctmp;
1088:           }

1090:           ct2 += ncols;
1091:         }
1092:       }
1093:       MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1094:     }
1095:   }
1096:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
1097:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

1099:   /* Allocate buffers for a->a, and send them off */
1100:   PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf_aa);
1101:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1102:   PetscMalloc((j+1)*sizeof(PetscScalar),&sbuf_aa[0]);
1103:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
1104: 
1105:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
1106:   {
1107:     PetscInt    nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1108:     PetscInt    cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1109:     PetscInt    cend = C->cmap->rend;
1110:     PetscInt    *b_j = b->j;
1111:     PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;
1112: 
1113:     for (i=0; i<nrqr; i++) {
1114:       rbuf1_i   = rbuf1[i];
1115:       sbuf_aa_i = sbuf_aa[i];
1116:       ct1       = 2*rbuf1_i[0]+1;
1117:       ct2       = 0;
1118:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1119:         kmax = rbuf1_i[2*j];
1120:         for (k=0; k<kmax; k++,ct1++) {
1121:           row    = rbuf1_i[ct1] - rstart;
1122:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1123:           ncols  = nzA + nzB;
1124:           cworkB = b_j + b_i[row];
1125:           vworkA = a_a + a_i[row];
1126:           vworkB = b_a + b_i[row];

1128:           /* load the column values for this row into vals*/
1129:           vals  = sbuf_aa_i+ct2;
1130: 
1131:           lwrite = 0;
1132:           for (l=0; l<nzB; l++) {
1133:             if ((bmap[cworkB[l]]) < cstart)  vals[lwrite++] = vworkB[l];
1134:           }
1135:           for (l=0; l<nzA; l++)   vals[lwrite++] = vworkA[l];
1136:           for (l=0; l<nzB; l++) {
1137:             if ((bmap[cworkB[l]]) >= cend)  vals[lwrite++] = vworkB[l];
1138:           }
1139: 
1140:           ct2 += ncols;
1141:         }
1142:       }
1143:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1144:     }
1145:   }
1146:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
1147:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
1148:   PetscFree(rbuf1[0]);
1149:   PetscFree(rbuf1);

1151:   /* Form the matrix */
1152:   /* create col map: global col of C -> local col of submatrices */
1153:   {
1154:     const PetscInt *icol_i;
1155: #if defined (PETSC_USE_CTABLE)
1156:     PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);
1157:     for (i=0; i<ismax; i++) {
1158:       if (!allcolumns[i]){
1159:         PetscTableCreate(ncol[i]+1,C->cmap->N+1,&cmap[i]);
1160:         jmax   = ncol[i];
1161:         icol_i = icol[i];
1162:         cmap_i = cmap[i];
1163:         for (j=0; j<jmax; j++) {
1164:           PetscTableAdd(cmap[i],icol_i[j]+1,j+1,INSERT_VALUES);
1165:         }
1166:       } else {
1167:         cmap[i] = PETSC_NULL;
1168:       }
1169:     }
1170: #else
1171:     PetscMalloc(ismax*sizeof(PetscInt*),&cmap);
1172:     for (i=0; i<ismax; i++) {
1173:       if (!allcolumns[i]){
1174:         PetscMalloc(C->cmap->N*sizeof(PetscInt),&cmap[i]);
1175:         PetscMemzero(cmap[i],C->cmap->N*sizeof(PetscInt));
1176:         jmax   = ncol[i];
1177:         icol_i = icol[i];
1178:         cmap_i = cmap[i];
1179:         for (j=0; j<jmax; j++) {
1180:           cmap_i[icol_i[j]] = j+1;
1181:         }
1182:       } else {
1183:         cmap[i] = PETSC_NULL;
1184:       }
1185:     }
1186: #endif
1187:   }

1189:   /* Create lens which is required for MatCreate... */
1190:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
1191:   PetscMalloc(ismax*sizeof(PetscInt*),&lens);
1192:   if (ismax) {
1193:     PetscMalloc(j*sizeof(PetscInt),&lens[0]);
1194:     PetscMemzero(lens[0],j*sizeof(PetscInt));
1195:   }
1196:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
1197: 
1198:   /* Update lens from local data */
1199:   for (i=0; i<ismax; i++) {
1200:     jmax   = nrow[i];
1201:     if (!allcolumns[i]) cmap_i = cmap[i];
1202:     irow_i = irow[i];
1203:     lens_i = lens[i];
1204:     for (j=0; j<jmax; j++) {
1205:       l = 0;
1206:       row  = irow_i[j];
1207:       while (row >= C->rmap->range[l+1]) l++;
1208:       proc = l;
1209:       if (proc == rank) {
1210:         MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1211:         if (!allcolumns[i]){
1212:           for (k=0; k<ncols; k++) {
1213: #if defined (PETSC_USE_CTABLE)
1214:             PetscTableFind(cmap_i,cols[k]+1,&tcol);
1215: #else
1216:             tcol = cmap_i[cols[k]];
1217: #endif 
1218:             if (tcol) { lens_i[j]++;}
1219:           }
1220:         } else { /* allcolumns */
1221:           lens_i[j] = ncols;
1222:         }
1223:         MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1224:       }
1225:     }
1226:   }
1227: 
1228:   /* Create row map: global row of C -> local row of submatrices */
1229: #if defined (PETSC_USE_CTABLE)
1230:   PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);
1231:     for (i=0; i<ismax; i++) {
1232:       PetscTableCreate(nrow[i]+1,C->rmap->N+1,&rmap[i]);
1233:       rmap_i = rmap[i];
1234:       irow_i = irow[i];
1235:       jmax   = nrow[i];
1236:       for (j=0; j<jmax; j++) {
1237:         PetscTableAdd(rmap[i],irow_i[j]+1,j+1,INSERT_VALUES);
1238:       }
1239:     }
1240: #else
1241:   PetscMalloc(ismax*sizeof(PetscInt*),&rmap);
1242:   if (ismax) {
1243:     PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);
1244:     PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1245:   }
1246:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
1247:   for (i=0; i<ismax; i++) {
1248:     rmap_i = rmap[i];
1249:     irow_i = irow[i];
1250:     jmax   = nrow[i];
1251:     for (j=0; j<jmax; j++) {
1252:       rmap_i[irow_i[j]] = j;
1253:     }
1254:   }
1255: #endif
1256: 
1257:   /* Update lens from offproc data */
1258:   {
1259:     PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;

1261:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1262:       MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1263:       idex   = pa[idex2];
1264:       sbuf1_i = sbuf1[idex];
1265:       jmax    = sbuf1_i[0];
1266:       ct1     = 2*jmax+1;
1267:       ct2     = 0;
1268:       rbuf2_i = rbuf2[idex2];
1269:       rbuf3_i = rbuf3[idex2];
1270:       for (j=1; j<=jmax; j++) {
1271:         is_no   = sbuf1_i[2*j-1];
1272:         max1    = sbuf1_i[2*j];
1273:         lens_i  = lens[is_no];
1274:         if (!allcolumns[is_no]){cmap_i  = cmap[is_no];}
1275:         rmap_i  = rmap[is_no];
1276:         for (k=0; k<max1; k++,ct1++) {
1277: #if defined (PETSC_USE_CTABLE)
1278:           PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1279:           row--;
1280:           if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); }
1281: #else
1282:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1283: #endif
1284:           max2 = rbuf2_i[ct1];
1285:           for (l=0; l<max2; l++,ct2++) {
1286:             if (!allcolumns[is_no]){
1287: #if defined (PETSC_USE_CTABLE)
1288:               PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1289: #else
1290:               tcol = cmap_i[rbuf3_i[ct2]];
1291: #endif
1292:               if (tcol) {
1293:                 lens_i[row]++;
1294:               }
1295:             } else { /* allcolumns */
1296:               lens_i[row]++; /* lens_i[row] += max2 ? */
1297:             }
1298:           }
1299:         }
1300:       }
1301:     }
1302:   }
1303:   PetscFree(r_status3);
1304:   PetscFree(r_waits3);
1305:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1306:   PetscFree(s_status3);
1307:   PetscFree(s_waits3);

1309:   /* Create the submatrices */
1310:   if (scall == MAT_REUSE_MATRIX) {
1311:     PetscBool  flag;

1313:     /*
1314:         Assumes new rows are same length as the old rows,hence bug!
1315:     */
1316:     for (i=0; i<ismax; i++) {
1317:       mat = (Mat_SeqAIJ *)(submats[i]->data);
1318:       if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1319:       PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1320:       if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1321:       /* Initial matrix as if empty */
1322:       PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));
1323:       submats[i]->factortype = C->factortype;
1324:     }
1325:   } else {
1326:     for (i=0; i<ismax; i++) {
1327:       PetscInt rbs,cbs;
1328:       ISGetBlockSize(isrow[i],&rbs);
1329:       ISGetBlockSize(iscol[i],&cbs);

1331:       MatCreate(PETSC_COMM_SELF,submats+i);
1332:       MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);

1334:       MatSetBlockSizes(submats[i],rbs,cbs);
1335:       MatSetType(submats[i],((PetscObject)A)->type_name);
1336:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1337:     }
1338:   }

1340:   /* Assemble the matrices */
1341:   /* First assemble the local rows */
1342:   {
1343:     PetscInt    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1344:     PetscScalar *imat_a;
1345: 
1346:     for (i=0; i<ismax; i++) {
1347:       mat       = (Mat_SeqAIJ*)submats[i]->data;
1348:       imat_ilen = mat->ilen;
1349:       imat_j    = mat->j;
1350:       imat_i    = mat->i;
1351:       imat_a    = mat->a;
1352:       if (!allcolumns[i]) cmap_i = cmap[i];
1353:       rmap_i    = rmap[i];
1354:       irow_i    = irow[i];
1355:       jmax      = nrow[i];
1356:       for (j=0; j<jmax; j++) {
1357:         l = 0;
1358:         row      = irow_i[j];
1359:         while (row >= C->rmap->range[l+1]) l++;
1360:         proc = l;
1361:         if (proc == rank) {
1362:           old_row  = row;
1363: #if defined (PETSC_USE_CTABLE)
1364:           PetscTableFind(rmap_i,row+1,&row);
1365:           row--;
1366: #else
1367:           row      = rmap_i[row];
1368: #endif
1369:           ilen_row = imat_ilen[row];
1370:           MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1371:           mat_i    = imat_i[row] ;
1372:           mat_a    = imat_a + mat_i;
1373:           mat_j    = imat_j + mat_i;
1374:           if (!allcolumns[i]){
1375:             for (k=0; k<ncols; k++) {
1376: #if defined (PETSC_USE_CTABLE)
1377:               PetscTableFind(cmap_i,cols[k]+1,&tcol);
1378: #else
1379:               tcol = cmap_i[cols[k]];
1380: #endif
1381:               if (tcol){
1382:                 *mat_j++ = tcol - 1;
1383:                 *mat_a++ = vals[k];
1384:                 ilen_row++;
1385:               }
1386:             }
1387:           } else { /* allcolumns */
1388:             for (k=0; k<ncols; k++) {
1389:               *mat_j++ = cols[k] ; /* global col index! */
1390:               *mat_a++ = vals[k];
1391:               ilen_row++;
1392:             }
1393:           }
1394:           MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1395:           imat_ilen[row] = ilen_row;
1396:         }
1397:       }
1398:     }
1399:   }

1401:   /*   Now assemble the off proc rows*/
1402:   {
1403:     PetscInt    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1404:     PetscInt    *imat_j,*imat_i;
1405:     PetscScalar *imat_a,*rbuf4_i;

1407:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1408:       MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1409:       idex   = pa[idex2];
1410:       sbuf1_i = sbuf1[idex];
1411:       jmax    = sbuf1_i[0];
1412:       ct1     = 2*jmax + 1;
1413:       ct2     = 0;
1414:       rbuf2_i = rbuf2[idex2];
1415:       rbuf3_i = rbuf3[idex2];
1416:       rbuf4_i = rbuf4[idex2];
1417:       for (j=1; j<=jmax; j++) {
1418:         is_no     = sbuf1_i[2*j-1];
1419:         rmap_i    = rmap[is_no];
1420:         if (!allcolumns[is_no]){cmap_i = cmap[is_no];}
1421:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
1422:         imat_ilen = mat->ilen;
1423:         imat_j    = mat->j;
1424:         imat_i    = mat->i;
1425:         imat_a    = mat->a;
1426:         max1      = sbuf1_i[2*j];
1427:         for (k=0; k<max1; k++,ct1++) {
1428:           row   = sbuf1_i[ct1];
1429: #if defined (PETSC_USE_CTABLE)
1430:           PetscTableFind(rmap_i,row+1,&row);
1431:           row--;
1432: #else
1433:           row   = rmap_i[row];
1434: #endif
1435:           ilen  = imat_ilen[row];
1436:           mat_i = imat_i[row] ;
1437:           mat_a = imat_a + mat_i;
1438:           mat_j = imat_j + mat_i;
1439:           max2 = rbuf2_i[ct1];
1440:           if (!allcolumns[is_no]){
1441:             for (l=0; l<max2; l++,ct2++) {
1442: 
1443: #if defined (PETSC_USE_CTABLE)
1444:               PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1445: #else
1446:               tcol = cmap_i[rbuf3_i[ct2]];
1447: #endif
1448:               if (tcol) {
1449:                 *mat_j++ = tcol - 1;
1450:                 *mat_a++ = rbuf4_i[ct2];
1451:                 ilen++;
1452:               }
1453:             }
1454:           } else { /* allcolumns */
1455:             for (l=0; l<max2; l++,ct2++) {
1456:               *mat_j++ = rbuf3_i[ct2]; /* same global column index of C */
1457:               *mat_a++ = rbuf4_i[ct2];
1458:               ilen++;
1459:             }
1460:           }
1461:           imat_ilen[row] = ilen;
1462:         }
1463:       }
1464:     }
1465:   }

1467:   PetscFree(r_status4);
1468:   PetscFree(r_waits4);
1469:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1470:   PetscFree(s_waits4);
1471:   PetscFree(s_status4);

1473:   /* Restore the indices */
1474:   for (i=0; i<ismax; i++) {
1475:     ISRestoreIndices(isrow[i],irow+i);
1476:     if (!allcolumns[i]){
1477:       ISRestoreIndices(iscol[i],icol+i);
1478:     }
1479:   }

1481:   /* Destroy allocated memory */
1482:   PetscFree4(irow,icol,nrow,ncol);
1483:   PetscFree4(w1,w2,w3,w4);
1484:   PetscFree(pa);

1486:   PetscFree4(sbuf1,ptr,tmp,ctr);
1487:   PetscFree(rbuf2);
1488:   for (i=0; i<nrqr; ++i) {
1489:     PetscFree(sbuf2[i]);
1490:   }
1491:   for (i=0; i<nrqs; ++i) {
1492:     PetscFree(rbuf3[i]);
1493:     PetscFree(rbuf4[i]);
1494:   }

1496:   PetscFree3(sbuf2,req_size,req_source);
1497:   PetscFree(rbuf3);
1498:   PetscFree(rbuf4);
1499:   PetscFree(sbuf_aj[0]);
1500:   PetscFree(sbuf_aj);
1501:   PetscFree(sbuf_aa[0]);
1502:   PetscFree(sbuf_aa);

1504: #if defined (PETSC_USE_CTABLE)
1505:   for (i=0; i<ismax; i++) {PetscTableDestroy((PetscTable*)&rmap[i]);}
1506: #else
1507:   if (ismax) {PetscFree(rmap[0]);}
1508: #endif
1509:   PetscFree(rmap);

1511:   for (i=0; i<ismax; i++) {
1512:     if (!allcolumns[i]){
1513: #if defined (PETSC_USE_CTABLE)
1514:       PetscTableDestroy((PetscTable*)&cmap[i]);
1515: #else
1516:       PetscFree(cmap[i]);
1517: #endif
1518:     }
1519:   }
1520:   PetscFree(cmap);
1521:   if (ismax) {PetscFree(lens[0]);}
1522:   PetscFree(lens);

1524:   for (i=0; i<ismax; i++) {
1525:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1526:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1527:   }
1528:   return(0);
1529: }

1531: /* 
1532:  Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1533:  Be careful not to destroy them elsewhere.  
1534:  */
1537: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1538: {
1539:   /* If making this function public, change the error returned in this function away from _PLIB. */
1541:   Mat_MPIAIJ     *aij;
1542:   PetscBool      seqaij;

1545:   /* Check to make sure the component matrices are compatible with C. */
1546:   PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1547:   if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1548:   PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1549:   if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1550:   if (A->rmap->n != B->rmap->n || A->rmap->bs != B->rmap->bs || A->cmap->bs != B->cmap->bs) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");

1552:   MatCreate(comm, C);
1553:   MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1554:   MatSetBlockSizes(*C,A->rmap->bs, A->cmap->bs);
1555:   PetscLayoutSetUp((*C)->rmap);
1556:   PetscLayoutSetUp((*C)->cmap);
1557:   if((*C)->cmap->N != A->cmap->n + B->cmap->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1558:   MatSetType(*C, MATMPIAIJ);
1559:   aij = (Mat_MPIAIJ*)((*C)->data);
1560:   aij->A = A;
1561:   aij->B = B;
1562:   PetscLogObjectParent(*C,A);
1563:   PetscLogObjectParent(*C,B);
1564:   (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1565:   (*C)->assembled    = (PetscBool)(A->assembled && B->assembled);
1566:   return(0);
1567: }

1571: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1572: {
1573:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);
1577:   *A = aij->A;
1578:   *B = aij->B;
1579:   /* Note that we don't incref *A and *B, so be careful! */
1580:   return(0);
1581: }

1585: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1586:                                                  PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1587:                                                  PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1588:                                                  PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1589: {
1591:   PetscMPIInt    size, flag;
1592:   PetscInt       i,ii;
1593:   PetscInt       ismax_c;

1596:   if (!ismax) {
1597:     return(0);
1598:   }
1599:   for (i = 0, ismax_c = 0; i < ismax; ++i) {
1600:     MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1601:     if(flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1602:     MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1603:     if(size > 1) {
1604:       ++ismax_c;
1605:     }
1606:   }
1607:   if(!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1608:     (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1609:   } else { /* if(ismax_c) */
1610:     Mat         *A,*B;
1611:     IS          *isrow_c, *iscol_c;
1612:     PetscMPIInt size;
1613:     /* 
1614:      Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate 
1615:      array of sequential matrices underlying the resulting parallel matrices.  
1616:      Which arrays to allocate is based on the value of MatReuse scall.
1617:      There are as many diag matrices as there are original index sets.
1618:      There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.

1620:      Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists, 
1621:      we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq 
1622:      will deposite the extracted diag and off-diag parts. 
1623:      However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1624:      If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1625:     */
1626: 
1627:     /* Parallel matrix array is allocated only if no reuse is taking place. */
1628:     if (scall != MAT_REUSE_MATRIX) {
1629:       PetscMalloc((ismax)*sizeof(Mat),submat);
1630:     } else {
1631:       PetscMalloc(ismax*sizeof(Mat), &A);
1632:       PetscMalloc(ismax_c*sizeof(Mat), &B);
1633:       /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1634:       for(i = 0, ii = 0; i < ismax; ++i) {
1635:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1636:         if(size > 1) {
1637:           (*extractseq)((*submat)[i],A+i,B+ii);
1638:           ++ii;
1639:         } else {
1640:           A[i] = (*submat)[i];
1641:         }
1642:       }
1643:     }
1644:     /* 
1645:      Construct the complements of the iscol ISs for parallel ISs only.
1646:      These are used to extract the off-diag portion of the resulting parallel matrix.
1647:      The row IS for the off-diag portion is the same as for the diag portion,
1648:      so we merely alias the row IS, while skipping those that are sequential.
1649:     */
1650:     PetscMalloc2(ismax_c,IS,&isrow_c, ismax_c, IS, &iscol_c);
1651:     for(i = 0, ii = 0; i < ismax; ++i) {
1652:       MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1653:       if(size > 1) {
1654:         isrow_c[ii] = isrow[i];
1655:         ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1656:         ++ii;
1657:       }
1658:     }
1659:     /* Now obtain the sequential A and B submatrices separately. */
1660:     (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1661:     (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1662:     for(ii = 0; ii < ismax_c; ++ii) {
1663:       ISDestroy(&iscol_c[ii]);
1664:     }
1665:     PetscFree2(isrow_c, iscol_c);
1666:     /* 
1667:      If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1668:      have been extracted directly into the parallel matrices containing them, or
1669:      simply into the sequential matrix identical with the corresponding A (if size == 1).
1670:      Otherwise, make sure that parallel matrices are constructed from A & B, or the
1671:      A is put into the correct submat slot (if size == 1).
1672:      */
1673:     if(scall != MAT_REUSE_MATRIX) {
1674:       for(i = 0, ii = 0; i < ismax; ++i) {
1675:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1676:         if(size > 1) {
1677:           /* 
1678:            For each parallel isrow[i], create parallel matrices from the extracted sequential matrices. 
1679:            */
1680:           /* Construct submat[i] from the Seq pieces A and B. */
1681:           (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);
1682: 
1683:           ++ii;
1684:         } else {
1685:           (*submat)[i] = A[i];
1686:         }
1687:       }
1688:     }
1689:     PetscFree(A);
1690:     PetscFree(B);
1691:   }
1692:   return(0);
1693: }/* MatGetSubMatricesParallel_MPIXAIJ() */



1699: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1700: {

1704:   MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,MatCreateMPIAIJFromSeqMatrices_Private,MatMPIAIJExtractSeqMatrices_Private);
1705:   return(0);
1706: }