Actual source code: mpiov.c

  2: /*
  3:    Routines to compute overlapping regions of a parallel MPI matrix
  4:   and to find submatrices that were shared across processors.
  5: */
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <petscbt.h>

  9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS *);
 10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char **,PetscInt*,PetscInt**);
 11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt **,PetscInt**,PetscInt*);

 17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
 18: {
 20:   PetscInt       i;

 23:   if (ov < 0) SETERRQ(((PetscObject)C)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 24:   for (i=0; i<ov; ++i) {
 25:     MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
 26:   }
 27:   return(0);
 28: }

 30: /*
 31:   Sample message format:
 32:   If a processor A wants processor B to process some elements corresponding
 33:   to index sets is[1],is[5]
 34:   mesg [0] = 2   (no of index sets in the mesg)
 35:   -----------  
 36:   mesg [1] = 1 => is[1]
 37:   mesg [2] = sizeof(is[1]);
 38:   -----------  
 39:   mesg [3] = 5  => is[5]
 40:   mesg [4] = sizeof(is[5]);
 41:   -----------
 42:   mesg [5] 
 43:   mesg [n]  datas[1]
 44:   -----------  
 45:   mesg[n+1]
 46:   mesg[m]  data(is[5])
 47:   -----------  
 48:   
 49:   Notes:
 50:   nrqs - no of requests sent (or to be sent out)
 51:   nrqr - no of requests recieved (which have to be or which have been processed
 52: */
 55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
 56: {
 57:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
 58:   PetscMPIInt    *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
 59:   const PetscInt **idx,*idx_i;
 60:   PetscInt       *n,**data,len;
 62:   PetscMPIInt    size,rank,tag1,tag2;
 63:   PetscInt       M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
 64:   PetscInt       *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
 65:   PetscBT        *table;
 66:   MPI_Comm       comm;
 67:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
 68:   MPI_Status     *s_status,*recv_status;
 69:   char           *t_p;

 72:   comm   = ((PetscObject)C)->comm;
 73:   size   = c->size;
 74:   rank   = c->rank;
 75:   M      = C->rmap->N;

 77:   PetscObjectGetNewTag((PetscObject)C,&tag1);
 78:   PetscObjectGetNewTag((PetscObject)C,&tag2);
 79: 
 80:   PetscMalloc2(imax,PetscInt*,&idx,imax,PetscInt,&n);

 82:   for (i=0; i<imax; i++) {
 83:     ISGetIndices(is[i],&idx[i]);
 84:     ISGetLocalSize(is[i],&n[i]);
 85:   }

 87:   /* evaluate communication - mesg to who,length of mesg, and buffer space
 88:      required. Based on this, buffers are allocated, and data copied into them*/
 89:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);
 90:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 91:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 92:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 93:   for (i=0; i<imax; i++) {
 94:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 95:     idx_i = idx[i];
 96:     len   = n[i];
 97:     for (j=0; j<len; j++) {
 98:       row  = idx_i[j];
 99:       if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100:       PetscLayoutFindOwner(C->rmap,row,&proc);
101:       w4[proc]++;
102:     }
103:     for (j=0; j<size; j++){
104:       if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105:     }
106:   }

108:   nrqs     = 0;              /* no of outgoing messages */
109:   msz      = 0;              /* total mesg length (for all proc */
110:   w1[rank] = 0;              /* no mesg sent to intself */
111:   w3[rank] = 0;
112:   for (i=0; i<size; i++) {
113:     if (w1[i])  {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114:   }
115:   /* pa - is list of processors to communicate with */
116:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);
117:   for (i=0,j=0; i<size; i++) {
118:     if (w1[i]) {pa[j] = i; j++;}
119:   }

121:   /* Each message would have a header = 1 + 2*(no of IS) + data */
122:   for (i=0; i<nrqs; i++) {
123:     j      = pa[i];
124:     w1[j] += w2[j] + 2*w3[j];
125:     msz   += w1[j];
126:   }

128:   /* Determine the number of messages to expect, their lengths, from from-ids */
129:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

132:   /* Now post the Irecvs corresponding to these messages */
133:   PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);

135:   /* Allocate Memory for outgoing messages */
136:   PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);
137:   PetscMemzero(outdat,size*sizeof(PetscInt*));
138:   PetscMemzero(ptr,size*sizeof(PetscInt*));

140:   {
141:     PetscInt *iptr = tmp,ict  = 0;
142:     for (i=0; i<nrqs; i++) {
143:       j         = pa[i];
144:       iptr     +=  ict;
145:       outdat[j] = iptr;
146:       ict       = w1[j];
147:     }
148:   }

150:   /* Form the outgoing messages */
151:   /*plug in the headers*/
152:   for (i=0; i<nrqs; i++) {
153:     j            = pa[i];
154:     outdat[j][0] = 0;
155:     PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156:     ptr[j]       = outdat[j] + 2*w3[j] + 1;
157:   }
158: 
159:   /* Memory for doing local proc's work*/
160:   {
161:     PetscMalloc5(imax,PetscBT,&table, imax,PetscInt*,&data, imax,PetscInt,&isz,
162:                         M*imax,PetscInt,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,char,&t_p);
163:     PetscMemzero(table,imax*sizeof(PetscBT));
164:     PetscMemzero(data,imax*sizeof(PetscInt*));
165:     PetscMemzero(isz,imax*sizeof(PetscInt));
166:     PetscMemzero(d_p,M*imax*sizeof(PetscInt));
167:     PetscMemzero(t_p,(M/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));

169:     for (i=0; i<imax; i++) {
170:       table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
171:       data[i]  = d_p + M*i;
172:     }
173:   }

175:   /* Parse the IS and update local tables and the outgoing buf with the data*/
176:   {
177:     PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
178:     PetscBT  table_i;

180:     for (i=0; i<imax; i++) {
181:       PetscMemzero(ctr,size*sizeof(PetscInt));
182:       n_i     = n[i];
183:       table_i = table[i];
184:       idx_i   = idx[i];
185:       data_i  = data[i];
186:       isz_i   = isz[i];
187:       for (j=0;  j<n_i; j++) {  /* parse the indices of each IS */
188:         row  = idx_i[j];
189:         PetscLayoutFindOwner(C->rmap,row,&proc);
190:         if (proc != rank) { /* copy to the outgoing buffer */
191:           ctr[proc]++;
192:           *ptr[proc] = row;
193:           ptr[proc]++;
194:         } else { /* Update the local table */
195:           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
196:         }
197:       }
198:       /* Update the headers for the current IS */
199:       for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
200:         if ((ctr_j = ctr[j])) {
201:           outdat_j        = outdat[j];
202:           k               = ++outdat_j[0];
203:           outdat_j[2*k]   = ctr_j;
204:           outdat_j[2*k-1] = i;
205:         }
206:       }
207:       isz[i] = isz_i;
208:     }
209:   }

211:   /*  Now  post the sends */
212:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
213:   for (i=0; i<nrqs; ++i) {
214:     j    = pa[i];
215:     MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
216:   }
217: 
218:   /* No longer need the original indices*/
219:   for (i=0; i<imax; ++i) {
220:     ISRestoreIndices(is[i],idx+i);
221:   }
222:   PetscFree2(idx,n);

224:   for (i=0; i<imax; ++i) {
225:     ISDestroy(&is[i]);
226:   }
227: 
228:   /* Do Local work*/
229:   MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);

231:   /* Receive messages*/
232:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);
233:   if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}
234: 
235:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
236:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

238:   /* Phase 1 sends are complete - deallocate buffers */
239:   PetscFree4(outdat,ptr,tmp,ctr);
240:   PetscFree4(w1,w2,w3,w4);

242:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);
243:   PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);
244:   MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
245:   PetscFree(rbuf[0]);
246:   PetscFree(rbuf);

248: 
249:  /* Send the data back*/
250:   /* Do a global reduction to know the buffer space req for incoming messages*/
251:   {
252:     PetscMPIInt *rw1;
253: 
254:     PetscMalloc(size*sizeof(PetscMPIInt),&rw1);
255:     PetscMemzero(rw1,size*sizeof(PetscMPIInt));

257:     for (i=0; i<nrqr; ++i) {
258:       proc      = recv_status[i].MPI_SOURCE;
259:       if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
260:       rw1[proc] = isz1[i];
261:     }
262:     PetscFree(onodes1);
263:     PetscFree(olengths1);

265:     /* Determine the number of messages to expect, their lengths, from from-ids */
266:     PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
267:     PetscFree(rw1);
268:   }
269:   /* Now post the Irecvs corresponding to these messages */
270:   PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);

272:   /*  Now  post the sends */
273:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
274:   for (i=0; i<nrqr; ++i) {
275:     j    = recv_status[i].MPI_SOURCE;
276:     MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
277:   }

279:   /* receive work done on other processors*/
280:   {
281:     PetscInt    is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
282:     PetscMPIInt idex;
283:     PetscBT     table_i;
284:     MPI_Status  *status2;
285: 
286:     PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
287:     for (i=0; i<nrqs; ++i) {
288:       MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
289:       /* Process the message*/
290:       rbuf2_i = rbuf2[idex];
291:       ct1     = 2*rbuf2_i[0]+1;
292:       jmax    = rbuf2[idex][0];
293:       for (j=1; j<=jmax; j++) {
294:         max     = rbuf2_i[2*j];
295:         is_no   = rbuf2_i[2*j-1];
296:         isz_i   = isz[is_no];
297:         data_i  = data[is_no];
298:         table_i = table[is_no];
299:         for (k=0; k<max; k++,ct1++) {
300:           row = rbuf2_i[ct1];
301:           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
302:         }
303:         isz[is_no] = isz_i;
304:       }
305:     }

307:     if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
308:     PetscFree(status2);
309:   }
310: 
311:   for (i=0; i<imax; ++i) {
312:     ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
313:   }
314: 
315:   PetscFree(onodes2);
316:   PetscFree(olengths2);

318:   PetscFree(pa);
319:   PetscFree(rbuf2[0]);
320:   PetscFree(rbuf2);
321:   PetscFree(s_waits1);
322:   PetscFree(r_waits1);
323:   PetscFree(s_waits2);
324:   PetscFree(r_waits2);
325:   PetscFree5(table,data,isz,d_p,t_p);
326:   PetscFree(s_status);
327:   PetscFree(recv_status);
328:   PetscFree(xdata[0]);
329:   PetscFree(xdata);
330:   PetscFree(isz1);
331:   return(0);
332: }

336: /*  
337:    MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do 
338:        the work on the local processor.

340:      Inputs:
341:       C      - MAT_MPIAIJ;
342:       imax - total no of index sets processed at a time;
343:       table  - an array of char - size = m bits.
344:       
345:      Output:
346:       isz    - array containing the count of the solution elements corresponding
347:                to each index set;
348:       data   - pointer to the solutions
349: */
350: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
351: {
352:   Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
353:   Mat        A = c->A,B = c->B;
354:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
355:   PetscInt   start,end,val,max,rstart,cstart,*ai,*aj;
356:   PetscInt   *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
357:   PetscBT    table_i;

360:   rstart = C->rmap->rstart;
361:   cstart = C->cmap->rstart;
362:   ai     = a->i;
363:   aj     = a->j;
364:   bi     = b->i;
365:   bj     = b->j;
366:   garray = c->garray;

368: 
369:   for (i=0; i<imax; i++) {
370:     data_i  = data[i];
371:     table_i = table[i];
372:     isz_i   = isz[i];
373:     for (j=0,max=isz[i]; j<max; j++) {
374:       row   = data_i[j] - rstart;
375:       start = ai[row];
376:       end   = ai[row+1];
377:       for (k=start; k<end; k++) { /* Amat */
378:         val = aj[k] + cstart;
379:         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
380:       }
381:       start = bi[row];
382:       end   = bi[row+1];
383:       for (k=start; k<end; k++) { /* Bmat */
384:         val = garray[bj[k]];
385:         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
386:       }
387:     }
388:     isz[i] = isz_i;
389:   }
390:   return(0);
391: }

395: /*     
396:       MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
397:          and return the output

399:          Input:
400:            C    - the matrix
401:            nrqr - no of messages being processed.
402:            rbuf - an array of pointers to the recieved requests
403:            
404:          Output:
405:            xdata - array of messages to be sent back
406:            isz1  - size of each message

408:   For better efficiency perhaps we should malloc separately each xdata[i],
409: then if a remalloc is required we need only copy the data for that one row
410: rather then all previous rows as it is now where a single large chunck of 
411: memory is used.

413: */
414: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
415: {
416:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
417:   Mat            A = c->A,B = c->B;
418:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
420:   PetscMPIInt    rank;
421:   PetscInt       rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
422:   PetscInt       row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
423:   PetscInt       val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
424:   PetscInt       *rbuf_i,kmax,rbuf_0;
425:   PetscBT        xtable;

428:   rank   = c->rank;
429:   m      = C->rmap->N;
430:   rstart = C->rmap->rstart;
431:   cstart = C->cmap->rstart;
432:   ai     = a->i;
433:   aj     = a->j;
434:   bi     = b->i;
435:   bj     = b->j;
436:   garray = c->garray;
437: 
438: 
439:   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
440:     rbuf_i  =  rbuf[i];
441:     rbuf_0  =  rbuf_i[0];
442:     ct     += rbuf_0;
443:     for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
444:   }
445: 
446:   if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
447:   else      max1 = 1;
448:   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
449:   PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);
450:   ++no_malloc;
451:   PetscBTCreate(m,xtable);
452:   PetscMemzero(isz1,nrqr*sizeof(PetscInt));
453: 
454:   ct3 = 0;
455:   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
456:     rbuf_i =  rbuf[i];
457:     rbuf_0 =  rbuf_i[0];
458:     ct1    =  2*rbuf_0+1;
459:     ct2    =  ct1;
460:     ct3    += ct1;
461:     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
462:       PetscBTMemzero(m,xtable);
463:       oct2 = ct2;
464:       kmax = rbuf_i[2*j];
465:       for (k=0; k<kmax; k++,ct1++) {
466:         row = rbuf_i[ct1];
467:         if (!PetscBTLookupSet(xtable,row)) {
468:           if (!(ct3 < mem_estimate)) {
469:             new_estimate = (PetscInt)(1.5*mem_estimate)+1;
470:             PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
471:             PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
472:             PetscFree(xdata[0]);
473:             xdata[0]     = tmp;
474:             mem_estimate = new_estimate; ++no_malloc;
475:             for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
476:           }
477:           xdata[i][ct2++] = row;
478:           ct3++;
479:         }
480:       }
481:       for (k=oct2,max2=ct2; k<max2; k++) {
482:         row   = xdata[i][k] - rstart;
483:         start = ai[row];
484:         end   = ai[row+1];
485:         for (l=start; l<end; l++) {
486:           val = aj[l] + cstart;
487:           if (!PetscBTLookupSet(xtable,val)) {
488:             if (!(ct3 < mem_estimate)) {
489:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
490:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
491:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
492:               PetscFree(xdata[0]);
493:               xdata[0]     = tmp;
494:               mem_estimate = new_estimate; ++no_malloc;
495:               for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
496:             }
497:             xdata[i][ct2++] = val;
498:             ct3++;
499:           }
500:         }
501:         start = bi[row];
502:         end   = bi[row+1];
503:         for (l=start; l<end; l++) {
504:           val = garray[bj[l]];
505:           if (!PetscBTLookupSet(xtable,val)) {
506:             if (!(ct3 < mem_estimate)) {
507:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
508:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
509:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
510:               PetscFree(xdata[0]);
511:               xdata[0]     = tmp;
512:               mem_estimate = new_estimate; ++no_malloc;
513:               for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
514:             }
515:             xdata[i][ct2++] = val;
516:             ct3++;
517:           }
518:         }
519:       }
520:       /* Update the header*/
521:       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
522:       xdata[i][2*j-1] = rbuf_i[2*j-1];
523:     }
524:     xdata[i][0] = rbuf_0;
525:     xdata[i+1]  = xdata[i] + ct2;
526:     isz1[i]     = ct2; /* size of each message */
527:   }
528:   PetscBTDestroy(xtable);
529:   PetscInfo4(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",rank,mem_estimate,ct3,no_malloc);
530:   return(0);
531: }
532: /* -------------------------------------------------------------------------*/
535: /*
536:     Every processor gets the entire matrix
537: */
540: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
541: {
542:   Mat            B;
543:   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
544:   Mat_SeqAIJ     *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
546:   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
547:   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
548:   PetscInt       m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
549:   MatScalar      *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;

552:   MPI_Comm_size(((PetscObject)A)->comm,&size);
553:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);

555:   if (scall == MAT_INITIAL_MATRIX) {
556:     /* ----------------------------------------------------------------
557:          Tell every processor the number of nonzeros per row
558:     */
559:     PetscMalloc(A->rmap->N*sizeof(PetscInt),&lens);
560:     for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
561:       lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
562:     }
563:     sendcount = A->rmap->rend - A->rmap->rstart;
564:     PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
565:     for (i=0; i<size; i++) {
566:       recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
567:       displs[i]     = A->rmap->range[i];
568:     }
569: #if defined(PETSC_HAVE_MPI_IN_PLACE)
570:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
571: #else
572:     MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
573: #endif
574:     /* ---------------------------------------------------------------
575:          Create the sequential matrix of the same type as the local block diagonal
576:     */
577:     MatCreate(PETSC_COMM_SELF,&B);
578:     MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
579:     MatSetType(B,((PetscObject)a->A)->type_name);
580:     MatSeqAIJSetPreallocation(B,0,lens);
581:     PetscMalloc(sizeof(Mat),Bin);
582:     **Bin = B;
583:     b = (Mat_SeqAIJ *)B->data;

585:     /*--------------------------------------------------------------------
586:        Copy my part of matrix column indices over
587:     */
588:     sendcount  = ad->nz + bd->nz;
589:     jsendbuf   = b->j + b->i[rstarts[rank]];
590:     a_jsendbuf = ad->j;
591:     b_jsendbuf = bd->j;
592:     n          = A->rmap->rend - A->rmap->rstart;
593:     cnt        = 0;
594:     for (i=0; i<n; i++) {

596:       /* put in lower diagonal portion */
597:       m = bd->i[i+1] - bd->i[i];
598:       while (m > 0) {
599:         /* is it above diagonal (in bd (compressed) numbering) */
600:         if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
601:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
602:         m--;
603:       }

605:       /* put in diagonal portion */
606:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
607:         jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
608:       }

610:       /* put in upper diagonal portion */
611:       while (m-- > 0) {
612:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
613:       }
614:     }
615:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

617:     /*--------------------------------------------------------------------
618:        Gather all column indices to all processors
619:     */
620:     for (i=0; i<size; i++) {
621:       recvcounts[i] = 0;
622:       for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
623:         recvcounts[i] += lens[j];
624:       }
625:     }
626:     displs[0]  = 0;
627:     for (i=1; i<size; i++) {
628:       displs[i] = displs[i-1] + recvcounts[i-1];
629:     }
630: #if defined(PETSC_HAVE_MPI_IN_PLACE)
631:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
632: #else
633:     MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
634: #endif
635:     /*--------------------------------------------------------------------
636:         Assemble the matrix into useable form (note numerical values not yet set)
637:     */
638:     /* set the b->ilen (length of each row) values */
639:     PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
640:     /* set the b->i indices */
641:     b->i[0] = 0;
642:     for (i=1; i<=A->rmap->N; i++) {
643:       b->i[i] = b->i[i-1] + lens[i-1];
644:     }
645:     PetscFree(lens);
646:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
647:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

649:   } else {
650:     B  = **Bin;
651:     b = (Mat_SeqAIJ *)B->data;
652:   }

654:   /*--------------------------------------------------------------------
655:        Copy my part of matrix numerical values into the values location 
656:   */
657:   if (flag == MAT_GET_VALUES){
658:     sendcount = ad->nz + bd->nz;
659:     sendbuf   = b->a + b->i[rstarts[rank]];
660:     a_sendbuf = ad->a;
661:     b_sendbuf = bd->a;
662:     b_sendj   = bd->j;
663:     n         = A->rmap->rend - A->rmap->rstart;
664:     cnt       = 0;
665:     for (i=0; i<n; i++) {

667:       /* put in lower diagonal portion */
668:       m = bd->i[i+1] - bd->i[i];
669:       while (m > 0) {
670:         /* is it above diagonal (in bd (compressed) numbering) */
671:         if (garray[*b_sendj] > A->rmap->rstart + i) break;
672:         sendbuf[cnt++] = *b_sendbuf++;
673:         m--;
674:         b_sendj++;
675:       }

677:       /* put in diagonal portion */
678:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
679:         sendbuf[cnt++] = *a_sendbuf++;
680:       }

682:       /* put in upper diagonal portion */
683:       while (m-- > 0) {
684:         sendbuf[cnt++] = *b_sendbuf++;
685:         b_sendj++;
686:       }
687:     }
688:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
689: 
690:     /* ----------------------------------------------------------------- 
691:        Gather all numerical values to all processors 
692:     */
693:     if (!recvcounts) {
694:       PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
695:     }
696:     for (i=0; i<size; i++) {
697:       recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
698:     }
699:     displs[0]  = 0;
700:     for (i=1; i<size; i++) {
701:       displs[i] = displs[i-1] + recvcounts[i-1];
702:     }
703:     recvbuf   = b->a;
704: #if defined(PETSC_HAVE_MPI_IN_PLACE)
705:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
706: #else
707:     MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
708: #endif
709:   }  /* endof (flag == MAT_GET_VALUES) */
710:   PetscFree2(recvcounts,displs);

712:   if (A->symmetric){
713:     MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
714:   } else if (A->hermitian) {
715:     MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
716:   } else if (A->structurally_symmetric) {
717:     MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
718:   }
719:   return(0);
720: }



726: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
727: {
729:   PetscInt       nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
730:   PetscBool      rowflag,colflag,wantallmatrix = PETSC_FALSE,twantallmatrix;

733:   /*
734:        Check for special case each processor gets entire matrix
735:   */
736:   if (ismax == 1 && C->rmap->N == C->cmap->N) {
737:     ISIdentity(*isrow,&rowflag);
738:     ISIdentity(*iscol,&colflag);
739:     ISGetLocalSize(*isrow,&nrow);
740:     ISGetLocalSize(*iscol,&ncol);
741:     if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
742:       wantallmatrix = PETSC_TRUE;
743:       PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,PETSC_NULL);
744:     }
745:   }
746:   MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPI_INT,MPI_MIN,((PetscObject)C)->comm);
747:   if (twantallmatrix) {
748:     MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
749:     return(0);
750:   }

752:   /* Allocate memory to hold all the submatrices */
753:   if (scall != MAT_REUSE_MATRIX) {
754:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
755:   }
756:   /* Determine the number of stages through which submatrices are done */
757:   nmax          = 20*1000000 / (C->cmap->N * sizeof(PetscInt));
758:   /* 
759:      Each stage will extract nmax submatrices.  
760:      nmax is determined by the matrix column dimension.
761:      If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
762:   */
763:   if (!nmax) nmax = 1;
764:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

766:   /* Make sure every processor loops through the nstages */
767:   MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);

769:   for (i=0,pos=0; i<nstages; i++) {
770:     if (pos+nmax <= ismax) max_no = nmax;
771:     else if (pos == ismax) max_no = 0;
772:     else                   max_no = ismax-pos;
773:     MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
774:     pos += max_no;
775:   }
776:   return(0);
777: }

779: /* -------------------------------------------------------------------------*/
782: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
783: {
784:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
785:   Mat            A = c->A;
786:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
787:   const PetscInt **icol,**irow;
788:   PetscInt       *nrow,*ncol,start;
790:   PetscMPIInt    rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
791:   PetscInt       **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
792:   PetscInt       nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
793:   PetscInt       **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
794:   PetscInt       **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
795: #if defined (PETSC_USE_CTABLE)
796:   PetscTable     *cmap,cmap_i,*rmap,rmap_i;
797: #else
798:   PetscInt       **cmap,*cmap_i,**rmap,*rmap_i;
799: #endif
800:   const PetscInt *irow_i;
801:   PetscInt       ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
802:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
803:   MPI_Request    *r_waits4,*s_waits3,*s_waits4;
804:   MPI_Status     *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
805:   MPI_Status     *r_status3,*r_status4,*s_status4;
806:   MPI_Comm       comm;
807:   PetscScalar    **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
808:   PetscMPIInt    *onodes1,*olengths1;
809:   PetscMPIInt    idex,idex2,end;

812:   comm   = ((PetscObject)C)->comm;
813:   tag0   = ((PetscObject)C)->tag;
814:   size   = c->size;
815:   rank   = c->rank;
816: 
817:   /* Get some new tags to keep the communication clean */
818:   PetscObjectGetNewTag((PetscObject)C,&tag1);
819:   PetscObjectGetNewTag((PetscObject)C,&tag2);
820:   PetscObjectGetNewTag((PetscObject)C,&tag3);

822:   PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);

824:   for (i=0; i<ismax; i++) {
825:     ISGetIndices(isrow[i],&irow[i]);
826:     ISGetIndices(iscol[i],&icol[i]);
827:     ISGetLocalSize(isrow[i],&nrow[i]);
828:     ISGetLocalSize(iscol[i],&ncol[i]);
829:   }

831:   /* evaluate communication - mesg to who, length of mesg, and buffer space
832:      required. Based on this, buffers are allocated, and data copied into them*/
833:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4); /* mesg size */
834:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialize work vector*/
835:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialize work vector*/
836:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialize work vector*/
837:   for (i=0; i<ismax; i++) {
838:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
839:     jmax   = nrow[i];
840:     irow_i = irow[i];
841:     for (j=0; j<jmax; j++) {
842:       l = 0;
843:       row  = irow_i[j];
844:       while (row >= C->rmap->range[l+1]) l++;
845:       proc = l;
846:       w4[proc]++;
847:     }
848:     for (j=0; j<size; j++) {
849:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
850:     }
851:   }
852: 
853:   nrqs     = 0;              /* no of outgoing messages */
854:   msz      = 0;              /* total mesg length (for all procs) */
855:   w1[rank] = 0;              /* no mesg sent to self */
856:   w3[rank] = 0;
857:   for (i=0; i<size; i++) {
858:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
859:   }
860:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa); /*(proc -array)*/
861:   for (i=0,j=0; i<size; i++) {
862:     if (w1[i]) { pa[j] = i; j++; }
863:   }

865:   /* Each message would have a header = 1 + 2*(no of IS) + data */
866:   for (i=0; i<nrqs; i++) {
867:     j     = pa[i];
868:     w1[j] += w2[j] + 2* w3[j];
869:     msz   += w1[j];
870:   }
871:   PetscInfo2(C,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);

873:   /* Determine the number of messages to expect, their lengths, from from-ids */
874:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
875:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

877:   /* Now post the Irecvs corresponding to these messages */
878:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
879: 
880:   PetscFree(onodes1);
881:   PetscFree(olengths1);
882: 
883:   /* Allocate Memory for outgoing messages */
884:   PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);
885:   PetscMemzero(sbuf1,size*sizeof(PetscInt*));
886:   PetscMemzero(ptr,size*sizeof(PetscInt*));

888:   {
889:     PetscInt *iptr = tmp,ict = 0;
890:     for (i=0; i<nrqs; i++) {
891:       j         = pa[i];
892:       iptr     += ict;
893:       sbuf1[j]  = iptr;
894:       ict       = w1[j];
895:     }
896:   }

898:   /* Form the outgoing messages */
899:   /* Initialize the header space */
900:   for (i=0; i<nrqs; i++) {
901:     j           = pa[i];
902:     sbuf1[j][0] = 0;
903:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
904:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
905:   }
906: 
907:   /* Parse the isrow and copy data into outbuf */
908:   for (i=0; i<ismax; i++) {
909:     PetscMemzero(ctr,size*sizeof(PetscInt));
910:     irow_i = irow[i];
911:     jmax   = nrow[i];
912:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
913:       l = 0;
914:       row  = irow_i[j];
915:       while (row >= C->rmap->range[l+1]) l++;
916:       proc = l;
917:       if (proc != rank) { /* copy to the outgoing buf*/
918:         ctr[proc]++;
919:         *ptr[proc] = row;
920:         ptr[proc]++;
921:       }
922:     }
923:     /* Update the headers for the current IS */
924:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
925:       if ((ctr_j = ctr[j])) {
926:         sbuf1_j        = sbuf1[j];
927:         k              = ++sbuf1_j[0];
928:         sbuf1_j[2*k]   = ctr_j;
929:         sbuf1_j[2*k-1] = i;
930:       }
931:     }
932:   }

934:   /*  Now  post the sends */
935:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
936:   for (i=0; i<nrqs; ++i) {
937:     j    = pa[i];
938:     MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
939:   }

941:   /* Post Receives to capture the buffer size */
942:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
943:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);
944:   rbuf2[0] = tmp + msz;
945:   for (i=1; i<nrqs; ++i) {
946:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
947:   }
948:   for (i=0; i<nrqs; ++i) {
949:     j    = pa[i];
950:     MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
951:   }

953:   /* Send to other procs the buf size they should allocate */
954: 

956:   /* Receive messages*/
957:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
958:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
959:   PetscMalloc3(nrqr,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);
960:   {
961:     Mat_SeqAIJ  *sA = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
962:     PetscInt    *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
963:     PetscInt    *sbuf2_i;

965:     for (i=0; i<nrqr; ++i) {
966:       MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);
967:       req_size[idex] = 0;
968:       rbuf1_i         = rbuf1[idex];
969:       start           = 2*rbuf1_i[0] + 1;
970:       MPI_Get_count(r_status1+i,MPIU_INT,&end);
971:       PetscMalloc((end+1)*sizeof(PetscInt),&sbuf2[idex]);
972:       sbuf2_i         = sbuf2[idex];
973:       for (j=start; j<end; j++) {
974:         id               = rbuf1_i[j] - rstart;
975:         ncols            = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
976:         sbuf2_i[j]       = ncols;
977:         req_size[idex] += ncols;
978:       }
979:       req_source[idex] = r_status1[i].MPI_SOURCE;
980:       /* form the header */
981:       sbuf2_i[0]   = req_size[idex];
982:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
983:       MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
984:     }
985:   }
986:   PetscFree(r_status1);
987:   PetscFree(r_waits1);

989:   /*  recv buffer sizes */
990:   /* Receive messages*/
991: 
992:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);
993:   PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf4);
994:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
995:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
996:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

998:   for (i=0; i<nrqs; ++i) {
999:     MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1000:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscInt),&rbuf3[idex]);
1001:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscScalar),&rbuf4[idex]);
1002:     MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1003:     MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1004:   }
1005:   PetscFree(r_status2);
1006:   PetscFree(r_waits2);
1007: 
1008:   /* Wait on sends1 and sends2 */
1009:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
1010:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

1012:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1013:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1014:   PetscFree(s_status1);
1015:   PetscFree(s_status2);
1016:   PetscFree(s_waits1);
1017:   PetscFree(s_waits2);

1019:   /* Now allocate buffers for a->j, and send them off */
1020:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);
1021:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1022:   PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);
1023:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
1024: 
1025:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
1026:   {
1027:     PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1028:     PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1029:     PetscInt cend = C->cmap->rend;
1030:     PetscInt *a_j = a->j,*b_j = b->j,ctmp;

1032:     for (i=0; i<nrqr; i++) {
1033:       rbuf1_i   = rbuf1[i];
1034:       sbuf_aj_i = sbuf_aj[i];
1035:       ct1       = 2*rbuf1_i[0] + 1;
1036:       ct2       = 0;
1037:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1038:         kmax = rbuf1[i][2*j];
1039:         for (k=0; k<kmax; k++,ct1++) {
1040:           row    = rbuf1_i[ct1] - rstart;
1041:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1042:           ncols  = nzA + nzB;
1043:           cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];

1045:           /* load the column indices for this row into cols*/
1046:           cols  = sbuf_aj_i + ct2;
1047: 
1048:           lwrite = 0;
1049:           for (l=0; l<nzB; l++) {
1050:             if ((ctmp = bmap[cworkB[l]]) < cstart)  cols[lwrite++] = ctmp;
1051:           }
1052:           for (l=0; l<nzA; l++)   cols[lwrite++] = cstart + cworkA[l];
1053:           for (l=0; l<nzB; l++) {
1054:             if ((ctmp = bmap[cworkB[l]]) >= cend)  cols[lwrite++] = ctmp;
1055:           }

1057:           ct2 += ncols;
1058:         }
1059:       }
1060:       MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1061:     }
1062:   }
1063:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
1064:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

1066:   /* Allocate buffers for a->a, and send them off */
1067:   PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf_aa);
1068:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1069:   PetscMalloc((j+1)*sizeof(PetscScalar),&sbuf_aa[0]);
1070:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
1071: 
1072:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
1073:   {
1074:     PetscInt    nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1075:     PetscInt    cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1076:     PetscInt    cend = C->cmap->rend;
1077:     PetscInt    *b_j = b->j;
1078:     PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;
1079: 
1080:     for (i=0; i<nrqr; i++) {
1081:       rbuf1_i   = rbuf1[i];
1082:       sbuf_aa_i = sbuf_aa[i];
1083:       ct1       = 2*rbuf1_i[0]+1;
1084:       ct2       = 0;
1085:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1086:         kmax = rbuf1_i[2*j];
1087:         for (k=0; k<kmax; k++,ct1++) {
1088:           row    = rbuf1_i[ct1] - rstart;
1089:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1090:           ncols  = nzA + nzB;
1091:           cworkB = b_j + b_i[row];
1092:           vworkA = a_a + a_i[row];
1093:           vworkB = b_a + b_i[row];

1095:           /* load the column values for this row into vals*/
1096:           vals  = sbuf_aa_i+ct2;
1097: 
1098:           lwrite = 0;
1099:           for (l=0; l<nzB; l++) {
1100:             if ((bmap[cworkB[l]]) < cstart)  vals[lwrite++] = vworkB[l];
1101:           }
1102:           for (l=0; l<nzA; l++)   vals[lwrite++] = vworkA[l];
1103:           for (l=0; l<nzB; l++) {
1104:             if ((bmap[cworkB[l]]) >= cend)  vals[lwrite++] = vworkB[l];
1105:           }
1106: 
1107:           ct2 += ncols;
1108:         }
1109:       }
1110:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1111:     }
1112:   }
1113:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
1114:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
1115:   PetscFree(rbuf1[0]);
1116:   PetscFree(rbuf1);

1118:   /* Form the matrix */
1119:   /* create col map: global col of C -> local col of submatrices */
1120:   {
1121:     const PetscInt *icol_i;
1122: #if defined (PETSC_USE_CTABLE)
1123:     PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);
1124:     for (i=0; i<ismax; i++) {
1125:       PetscTableCreate(ncol[i]+1,&cmap[i]);
1126:       jmax   = ncol[i];
1127:       icol_i = icol[i];
1128:       cmap_i = cmap[i];
1129:       for (j=0; j<jmax; j++) {
1130:         PetscTableAdd(cmap[i],icol_i[j]+1,j+1);
1131:       }
1132:     }
1133: #else
1134:     PetscMalloc(ismax*sizeof(PetscInt*),&cmap);
1135:     PetscMalloc(ismax*C->cmap->N*sizeof(PetscInt),&cmap[0]);
1136:     PetscMemzero(cmap[0],ismax*C->cmap->N*sizeof(PetscInt));
1137:     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->cmap->N; }
1138:     for (i=0; i<ismax; i++) {
1139:       jmax   = ncol[i];
1140:       icol_i = icol[i];
1141:       cmap_i = cmap[i];
1142:       for (j=0; j<jmax; j++) {
1143:         cmap_i[icol_i[j]] = j+1;
1144:       }
1145:     }
1146: #endif
1147:   }

1149:   /* Create lens which is required for MatCreate... */
1150:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
1151:   PetscMalloc(ismax*sizeof(PetscInt*),&lens);
1152:   PetscMalloc(j*sizeof(PetscInt),&lens[0]);
1153:   PetscMemzero(lens[0],j*sizeof(PetscInt));
1154:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
1155: 
1156:   /* Update lens from local data */
1157:   for (i=0; i<ismax; i++) {
1158:     jmax   = nrow[i];
1159:     cmap_i = cmap[i];
1160:     irow_i = irow[i];
1161:     lens_i = lens[i];
1162:     for (j=0; j<jmax; j++) {
1163:       l = 0;
1164:       row  = irow_i[j];
1165:       while (row >= C->rmap->range[l+1]) l++;
1166:       proc = l;
1167:       if (proc == rank) {
1168:         MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1169:         for (k=0; k<ncols; k++) {
1170: #if defined (PETSC_USE_CTABLE)
1171:           PetscTableFind(cmap_i,cols[k]+1,&tcol);
1172: #else
1173:           tcol = cmap_i[cols[k]];
1174: #endif
1175:           if (tcol) { lens_i[j]++;}
1176:         }
1177:         MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1178:       }
1179:     }
1180:   }
1181: 
1182:   /* Create row map: global row of C -> local row of submatrices */
1183: #if defined (PETSC_USE_CTABLE)
1184:   PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);
1185:     for (i=0; i<ismax; i++) {
1186:       PetscTableCreate(nrow[i]+1,&rmap[i]);
1187:       rmap_i = rmap[i];
1188:       irow_i = irow[i];
1189:       jmax   = nrow[i];
1190:       for (j=0; j<jmax; j++) {
1191:         PetscTableAdd(rmap[i],irow_i[j]+1,j+1);
1192:       }
1193:     }
1194: #else
1195:   PetscMalloc(ismax*sizeof(PetscInt*),&rmap);
1196:   PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);
1197:   PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1198:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
1199:   for (i=0; i<ismax; i++) {
1200:     rmap_i = rmap[i];
1201:     irow_i = irow[i];
1202:     jmax   = nrow[i];
1203:     for (j=0; j<jmax; j++) {
1204:       rmap_i[irow_i[j]] = j;
1205:     }
1206:   }
1207: #endif
1208: 
1209:   /* Update lens from offproc data */
1210:   {
1211:     PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;

1213:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1214:       MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1215:       idex   = pa[idex2];
1216:       sbuf1_i = sbuf1[idex];
1217:       jmax    = sbuf1_i[0];
1218:       ct1     = 2*jmax+1;
1219:       ct2     = 0;
1220:       rbuf2_i = rbuf2[idex2];
1221:       rbuf3_i = rbuf3[idex2];
1222:       for (j=1; j<=jmax; j++) {
1223:         is_no   = sbuf1_i[2*j-1];
1224:         max1    = sbuf1_i[2*j];
1225:         lens_i  = lens[is_no];
1226:         cmap_i  = cmap[is_no];
1227:         rmap_i  = rmap[is_no];
1228:         for (k=0; k<max1; k++,ct1++) {
1229: #if defined (PETSC_USE_CTABLE)
1230:           PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1231:           row--;
1232:           if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); }
1233: #else
1234:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1235: #endif
1236:           max2 = rbuf2_i[ct1];
1237:           for (l=0; l<max2; l++,ct2++) {
1238: #if defined (PETSC_USE_CTABLE)
1239:             PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1240: #else
1241:             tcol = cmap_i[rbuf3_i[ct2]];
1242: #endif
1243:             if (tcol) {
1244:               lens_i[row]++;
1245:             }
1246:           }
1247:         }
1248:       }
1249:     }
1250:   }
1251:   PetscFree(r_status3);
1252:   PetscFree(r_waits3);
1253:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1254:   PetscFree(s_status3);
1255:   PetscFree(s_waits3);

1257:   /* Create the submatrices */
1258:   if (scall == MAT_REUSE_MATRIX) {
1259:     PetscBool  flag;

1261:     /*
1262:         Assumes new rows are same length as the old rows,hence bug!
1263:     */
1264:     for (i=0; i<ismax; i++) {
1265:       mat = (Mat_SeqAIJ *)(submats[i]->data);
1266:       if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1267:       PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1268:       if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1269:       /* Initial matrix as if empty */
1270:       PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));
1271:       submats[i]->factortype = C->factortype;
1272:     }
1273:   } else {
1274:     for (i=0; i<ismax; i++) {
1275:       MatCreate(PETSC_COMM_SELF,submats+i);
1276:       MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);
1277:       MatSetType(submats[i],((PetscObject)A)->type_name);
1278:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1279:     }
1280:   }

1282:   /* Assemble the matrices */
1283:   /* First assemble the local rows */
1284:   {
1285:     PetscInt    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1286:     PetscScalar *imat_a;
1287: 
1288:     for (i=0; i<ismax; i++) {
1289:       mat       = (Mat_SeqAIJ*)submats[i]->data;
1290:       imat_ilen = mat->ilen;
1291:       imat_j    = mat->j;
1292:       imat_i    = mat->i;
1293:       imat_a    = mat->a;
1294:       cmap_i    = cmap[i];
1295:       rmap_i    = rmap[i];
1296:       irow_i    = irow[i];
1297:       jmax      = nrow[i];
1298:       for (j=0; j<jmax; j++) {
1299:         l = 0;
1300:         row      = irow_i[j];
1301:         while (row >= C->rmap->range[l+1]) l++;
1302:         proc = l;
1303:         if (proc == rank) {
1304:           old_row  = row;
1305: #if defined (PETSC_USE_CTABLE)
1306:           PetscTableFind(rmap_i,row+1,&row);
1307:           row--;
1308: #else
1309:           row      = rmap_i[row];
1310: #endif
1311:           ilen_row = imat_ilen[row];
1312:           MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1313:           mat_i    = imat_i[row] ;
1314:           mat_a    = imat_a + mat_i;
1315:           mat_j    = imat_j + mat_i;
1316:           for (k=0; k<ncols; k++) {
1317: #if defined (PETSC_USE_CTABLE)
1318:             PetscTableFind(cmap_i,cols[k]+1,&tcol);
1319: #else
1320:             tcol = cmap_i[cols[k]];
1321: #endif
1322:             if (tcol){
1323:               *mat_j++ = tcol - 1;
1324:               *mat_a++ = vals[k];
1325:               ilen_row++;
1326:             }
1327:           }
1328:           MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1329:           imat_ilen[row] = ilen_row;
1330:         }
1331:       }
1332:     }
1333:   }

1335:   /*   Now assemble the off proc rows*/
1336:   {
1337:     PetscInt    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1338:     PetscInt    *imat_j,*imat_i;
1339:     PetscScalar *imat_a,*rbuf4_i;

1341:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1342:       MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1343:       idex   = pa[idex2];
1344:       sbuf1_i = sbuf1[idex];
1345:       jmax    = sbuf1_i[0];
1346:       ct1     = 2*jmax + 1;
1347:       ct2     = 0;
1348:       rbuf2_i = rbuf2[idex2];
1349:       rbuf3_i = rbuf3[idex2];
1350:       rbuf4_i = rbuf4[idex2];
1351:       for (j=1; j<=jmax; j++) {
1352:         is_no     = sbuf1_i[2*j-1];
1353:         rmap_i    = rmap[is_no];
1354:         cmap_i    = cmap[is_no];
1355:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
1356:         imat_ilen = mat->ilen;
1357:         imat_j    = mat->j;
1358:         imat_i    = mat->i;
1359:         imat_a    = mat->a;
1360:         max1      = sbuf1_i[2*j];
1361:         for (k=0; k<max1; k++,ct1++) {
1362:           row   = sbuf1_i[ct1];
1363: #if defined (PETSC_USE_CTABLE)
1364:           PetscTableFind(rmap_i,row+1,&row);
1365:           row--;
1366: #else
1367:           row   = rmap_i[row];
1368: #endif
1369:           ilen  = imat_ilen[row];
1370:           mat_i = imat_i[row] ;
1371:           mat_a = imat_a + mat_i;
1372:           mat_j = imat_j + mat_i;
1373:           max2 = rbuf2_i[ct1];
1374:           for (l=0; l<max2; l++,ct2++) {
1375: 
1376: #if defined (PETSC_USE_CTABLE)
1377:             PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1378: #else
1379:             tcol = cmap_i[rbuf3_i[ct2]];
1380: #endif
1381:             if (tcol) {
1382:               *mat_j++ = tcol - 1;
1383:               *mat_a++ = rbuf4_i[ct2];
1384:               ilen++;
1385:             }
1386:           }
1387:           imat_ilen[row] = ilen;
1388:         }
1389:       }
1390:     }
1391:   }
1392:   PetscFree(r_status4);
1393:   PetscFree(r_waits4);
1394:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1395:   PetscFree(s_waits4);
1396:   PetscFree(s_status4);

1398:   /* Restore the indices */
1399:   for (i=0; i<ismax; i++) {
1400:     ISRestoreIndices(isrow[i],irow+i);
1401:     ISRestoreIndices(iscol[i],icol+i);
1402:   }

1404:   /* Destroy allocated memory */
1405:   PetscFree4(irow,icol,nrow,ncol);
1406:   PetscFree4(w1,w2,w3,w4);
1407:   PetscFree(pa);

1409:   PetscFree4(sbuf1,ptr,tmp,ctr);
1410:   PetscFree(rbuf2);
1411:   for (i=0; i<nrqr; ++i) {
1412:     PetscFree(sbuf2[i]);
1413:   }
1414:   for (i=0; i<nrqs; ++i) {
1415:     PetscFree(rbuf3[i]);
1416:     PetscFree(rbuf4[i]);
1417:   }

1419:   PetscFree3(sbuf2,req_size,req_source);
1420:   PetscFree(rbuf3);
1421:   PetscFree(rbuf4);
1422:   PetscFree(sbuf_aj[0]);
1423:   PetscFree(sbuf_aj);
1424:   PetscFree(sbuf_aa[0]);
1425:   PetscFree(sbuf_aa);
1426: #if defined (PETSC_USE_CTABLE)
1427:   for (i=0; i<ismax; i++) {
1428:     PetscTableDestroy((PetscTable*)&cmap[i]);
1429:     PetscTableDestroy((PetscTable*)&rmap[i]);
1430:   }
1431: #else
1432:   PetscFree(cmap[0]);
1433:   PetscFree(rmap[0]);
1434: #endif
1435:   PetscFree(cmap);
1436:   PetscFree(rmap);
1437:   PetscFree(lens[0]);
1438:   PetscFree(lens);

1440:   for (i=0; i<ismax; i++) {
1441:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1442:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1443:   }
1444:   return(0);
1445: }

1447: /* 
1448:  Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1449:  Be careful not to destroy them elsewhere.  
1450:  */
1453: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1454: {
1455:   /* If making this function public, change the error returned in this function away from _PLIB. */
1457:   Mat_MPIAIJ *aij;
1458:   PetscBool seqaij;

1461:   /* Check to make sure the component matrices are compatible with C. */
1462:   PetscTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1463:   if(!seqaij) {
1464:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1465:   }
1466:   PetscTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1467:   if(!seqaij) {
1468:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1469:   }
1470:   if(A->rmap->n != B->rmap->n || A->rmap->bs != B->rmap->bs || A->cmap->bs != B->cmap->bs) {
1471:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1472:   }
1473:   MatCreate(comm, C);
1474:   MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1475:   PetscLayoutSetBlockSize((*C)->rmap,A->rmap->bs);
1476:   PetscLayoutSetBlockSize((*C)->cmap,A->cmap->bs);
1477:   PetscLayoutSetUp((*C)->rmap);
1478:   PetscLayoutSetUp((*C)->cmap);
1479:   if((*C)->cmap->N != A->cmap->n + B->cmap->n) {
1480:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1481:   }
1482:   MatSetType(*C, MATMPIAIJ);
1483:   aij = (Mat_MPIAIJ*)((*C)->data);
1484:   aij->A = A;
1485:   aij->B = B;
1486:   PetscLogObjectParent(*C,A);
1487:   PetscLogObjectParent(*C,B);
1488:   (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1489:   (*C)->assembled    = (PetscBool)(A->assembled && B->assembled);
1490:   return(0);
1491: }

1495: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1496: {
1497:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);
1501:   *A = aij->A;
1502:   *B = aij->B;
1503:   /* Note that we don't incref *A and *B, so be careful! */
1504:   return(0);
1505: }


1510: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1511:                                                  PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1512:                                                  PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1513:                                                  PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1514: {
1516:   PetscMPIInt size, flag;
1517:   PetscInt    i,ii;
1518:   PetscInt    ismax_c;
1520:   if(!ismax) {
1521:     return(0);
1522:   }
1523:   for(i = 0, ismax_c = 0; i < ismax; ++i) {
1524:     MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1525:     if(flag != MPI_IDENT) {
1526:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1527:     }
1528:     MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1529:     if(size > 1) {
1530:       ++ismax_c;
1531:     }
1532:   }
1533:   if(!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1534:     (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1535:   }
1536:   else { /* if(ismax_c) */
1537:     Mat *A,*B;
1538:     IS  *isrow_c, *iscol_c;
1539:     PetscMPIInt size;
1540:     /* 
1541:      Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate 
1542:      array of sequential matrices underlying the resulting parallel matrices.  
1543:      Which arrays to allocate is based on the value of MatReuse scall.
1544:      There are as many diag matrices as there are original index sets.
1545:      There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.

1547:      Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists, 
1548:      we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq 
1549:      will deposite the extracted diag and off-diag parts. 
1550:      However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1551:      If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1552:     */
1553: 
1554:     /* Parallel matrix array is allocated only if no reuse is taking place. */
1555:     if (scall != MAT_REUSE_MATRIX) {
1556:       PetscMalloc((ismax)*sizeof(Mat),submat);
1557:     }
1558:     else {
1559:       PetscMalloc(ismax*sizeof(Mat), &A);
1560:       PetscMalloc(ismax_c*sizeof(Mat), &B);
1561:       /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1562:       for(i = 0, ii = 0; i < ismax; ++i) {
1563:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1564:         if(size > 1) {
1565:           (*extractseq)((*submat)[i],A+i,B+ii);
1566:           ++ii;
1567:         }
1568:         else {
1569:           A[i] = (*submat)[i];
1570:         }
1571:       }
1572:     }
1573:     /* 
1574:      Construct the complements of the iscol ISs for parallel ISs only.
1575:      These are used to extract the off-diag portion of the resulting parallel matrix.
1576:      The row IS for the off-diag portion is the same as for the diag portion,
1577:      so we merely alias the row IS, while skipping those that are sequential.
1578:     */
1579:     PetscMalloc2(ismax_c,IS,&isrow_c, ismax_c, IS, &iscol_c);
1580:     for(i = 0, ii = 0; i < ismax; ++i) {
1581:       MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1582:       if(size > 1) {
1583:         isrow_c[ii] = isrow[i];
1584:         ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1585:         ++ii;
1586:       }
1587:     }
1588:     /* Now obtain the sequential A and B submatrices separately. */
1589:     (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1590:     (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1591:     for(ii = 0; ii < ismax_c; ++ii) {
1592:       ISDestroy(&iscol_c[ii]);
1593:     }
1594:     PetscFree2(isrow_c, iscol_c);
1595:     /* 
1596:      If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1597:      have been extracted directly into the parallel matrices containing them, or
1598:      simply into the sequential matrix identical with the corresponding A (if size == 1).
1599:      Otherwise, make sure that parallel matrices are constructed from A & B, or the
1600:      A is put into the correct submat slot (if size == 1).
1601:      */
1602:     if(scall != MAT_REUSE_MATRIX) {
1603:       for(i = 0, ii = 0; i < ismax; ++i) {
1604:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1605:         if(size > 1) {
1606:           /* 
1607:            For each parallel isrow[i], create parallel matrices from the extracted sequential matrices. 
1608:            */
1609:           /* Construct submat[i] from the Seq pieces A and B. */
1610:           (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);
1611: 
1612:           ++ii;
1613:         }
1614:         else {
1615:           (*submat)[i] = A[i];
1616:         }
1617:       }
1618:     }
1619:     PetscFree(A);
1620:     PetscFree(B);
1621:   }
1622:   return(0);
1623: }/* MatGetSubMatricesParallel_MPIXAIJ() */



1629: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1630: {
1633:   MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,
1634:                                            MatCreateMPIAIJFromSeqMatrices_Private,
1635:                                            MatMPIAIJExtractSeqMatrices_Private);
1636: 
1637:   return(0);
1638: }