Actual source code: mpirowbs.c

  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/rowbs/mpi/mpirowbs.h

  5: #define CHUNCKSIZE_LOCAL   10

  9: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
 10: {

 14:   if (v) {
 15: #if defined(PETSC_USE_LOG)
 16:     int len = -n*(sizeof(int)+sizeof(PetscScalar));
 17: #endif
 18:     PetscFree(v);
 19:     PetscLogObjectMemory(A,len);
 20:   }
 21:   return(0);
 22: }

 26: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
 27: {

 31:   if (!n) {
 32:     *i = 0; *v = 0;
 33:   } else {
 34:     PetscMalloc(n*(sizeof(int) + sizeof(PetscScalar)),v);
 35:     PetscLogObjectMemory(A,len);
 36:     *i = (int*)(*v + n);
 37:   }
 38:   return(0);
 39: }

 43: PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha)
 44: {
 45:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)inA->data;
 46:   BSspmat        *A = a->A;
 47:   BSsprow        *vs;
 48:   PetscScalar    *ap;
 49:   int            i,m = inA->rmap->n,nrow,j;

 53:   for (i=0; i<m; i++) {
 54:     vs   = A->rows[i];
 55:     nrow = vs->length;
 56:     ap   = vs->nz;
 57:     for (j=0; j<nrow; j++) {
 58:       ap[j] *= alpha;
 59:     }
 60:   }
 61:   PetscLogFlops(a->nz);
 62:   return(0);
 63: }

 65: /* ----------------------------------------------------------------- */
 68: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
 69: {
 70:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
 72:   int   i,len,m = A->rmap->n,*tnnz;
 73:   BSspmat      *bsmat;
 74:   BSsprow      *vs;

 77:   PetscMalloc((m+1)*sizeof(int),&tnnz);
 78:   if (!nnz) {
 79:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
 80:     if (nz <= 0)             nz = 1;
 81:     for (i=0; i<m; i++) tnnz[i] = nz;
 82:     nz      = nz*m;
 83:   } else {
 84:     nz = 0;
 85:     for (i=0; i<m; i++) {
 86:       if (nnz[i] <= 0) tnnz[i] = 1;
 87:       else             tnnz[i] = nnz[i];
 88:       nz += tnnz[i];
 89:     }
 90:   }

 92:   /* Allocate BlockSolve matrix context */
 93:   PetscNewLog(A,BSspmat,&bsif->A);
 94:   bsmat = bsif->A;
 95:   BSset_mat_icc_storage(bsmat,PETSC_FALSE);
 96:   BSset_mat_symmetric(bsmat,PETSC_FALSE);
 97:   PetscMalloc(m*(sizeof(BSsprow*)+ sizeof(BSsprow)),&bsmat->rows);
 98:   bsmat->num_rows        = m;
 99:   bsmat->global_num_rows = A->rmap->N;
100:   bsmat->map             = bsif->bsmap;
101:   vs                     = (BSsprow*)(bsmat->rows + m);
102:   for (i=0; i<m; i++) {
103:     bsmat->rows[i]  = vs;
104:     bsif->imax[i]   = tnnz[i];
105:     vs->diag_ind    = -1;
106:     MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
107:     /* put zero on diagonal */
108:     /*vs->length            = 1;
109:     vs->col[0]      = i + bsif->rstart;
110:     vs->nz[0]       = 0.0;*/
111:     vs->length = 0;
112:     vs++;
113:   }
114:   PetscLogObjectMemory(A,sizeof(BSspmat) + len);
115:   bsif->nz               = 0;
116:   bsif->maxnz            = nz;
117:   bsif->sorted           = 0;
118:   bsif->roworiented      = PETSC_TRUE;
119:   bsif->nonew            = 0;
120:   bsif->bs_color_single  = 0;

122:   PetscFree(tnnz);
123:   return(0);
124: }

128: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
129: {
130:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
131:   BSspmat      *A = mat->A;
132:   BSsprow      *vs;
134:   int          *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
135:   int          *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
136:   PetscScalar  *ap,value;

139:   for (k=0; k<m; k++) { /* loop over added rows */
140:     row = im[k];
141:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
142:     if (row >= AA->rmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->rmap->n-1);
143:     vs   = A->rows[row];
144:     ap   = vs->nz; rp = vs->col;
145:     rmax = imax[row]; nrow = vs->length;
146:     a    = 0;
147:     for (l=0; l<n; l++) { /* loop over added columns */
148:       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
149:       if (in[l] >= AA->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->cmap->N-1);
150:       col = in[l]; value = *v++;
151:       if (!sorted) a = 0; b = nrow;
152:       while (b-a > 5) {
153:         t = (b+a)/2;
154:         if (rp[t] > col) b = t;
155:         else             a = t;
156:       }
157:       for (i=a; i<b; i++) {
158:         if (rp[i] > col) break;
159:         if (rp[i] == col) {
160:           if (addv == ADD_VALUES) ap[i] += value;
161:           else                    ap[i] = value;
162:           goto noinsert;
163:         }
164:       }
165:       if (nonew) goto noinsert;
166:       if (nrow >= rmax) {
167:         /* there is no extra room in row, therefore enlarge */
168:         int    *itemp,*iout,*iin = vs->col;
169:         PetscScalar *vout,*vin = vs->nz,*vtemp;

171:         /* malloc new storage space */
172:         imax[row] += CHUNCKSIZE_LOCAL;
173:         MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
174:         vout = vtemp; iout = itemp;
175:         for (ii=0; ii<i; ii++) {
176:           vout[ii] = vin[ii];
177:           iout[ii] = iin[ii];
178:         }
179:         vout[i] = value;
180:         iout[i] = col;
181:         for (ii=i+1; ii<=nrow; ii++) {
182:           vout[ii] = vin[ii-1];
183:           iout[ii] = iin[ii-1];
184:         }
185:         /* free old row storage */
186:         if (rmax > 0) {
187:           MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
188:         }
189:         vs->col           =  iout; vs->nz = vout;
190:         rmax              =  imax[row];
191:         mat->maxnz        += CHUNCKSIZE_LOCAL;
192:         mat->reallocs++;
193:       } else {
194:         /* shift higher columns over to make room for newie */
195:         for (ii=nrow-1; ii>=i; ii--) {
196:           rp[ii+1] = rp[ii];
197:           ap[ii+1] = ap[ii];
198:         }
199:         rp[i] = col;
200:         ap[i] = value;
201:       }
202:       nrow++;
203:       mat->nz++;
204:       AA->same_nonzero = PETSC_FALSE;
205:       noinsert:;
206:       a = i + 1;
207:     }
208:     vs->length = nrow;
209:   }
210:   return(0);
211: }


216: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
217: {
219:   return(0);
220: }

224: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
225: {
226:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
227:   BSspmat      *A = a->A;
228:   BSsprow      *vs;
229:   int          i,j,rstart = AA->rmap->rstart;

232:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

234:   /* Mark location of diagonal */
235:   for (i=0; i<AA->rmap->n; i++) {
236:     vs = A->rows[i];
237:     for (j=0; j<vs->length; j++) {
238:       if (vs->col[j] == i + rstart) {
239:         vs->diag_ind = j;
240:         break;
241:       }
242:     }
243:     if (vs->diag_ind == -1) {
244:       SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
245:     }
246:   }
247:   return(0);
248: }

252: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag)
253: {
254:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)A->data;
255:   BSspmat        *l = a->A;
257:   int            i,m = A->rmap->n - 1,col,base=A->rmap->rstart;

260:   if (a->keepzeroedrows) {
261:     for (i=0; i<N; i++) {
262:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
263:       PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
264:       if (diag != 0.0) {
265:         col=rz[i]+base;
266:         MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
267:       }
268:     }
269:   } else {
270:     if (diag != 0.0) {
271:       for (i=0; i<N; i++) {
272:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
273:         if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
274:           l->rows[rz[i]]->length = 1;
275:           l->rows[rz[i]]->nz[0]  = diag;
276:           l->rows[rz[i]]->col[0] = A->rmap->rstart + rz[i];
277:         } else {
278:           col=rz[i]+base;
279:           MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
280:         }
281:       }
282:     } else {
283:       for (i=0; i<N; i++) {
284:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
285:         l->rows[rz[i]]->length = 0;
286:       }
287:     }
288:     A->same_nonzero = PETSC_FALSE;
289:   }
290:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
291:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
292:   return(0);
293: }

297: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
298: {
299:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
300:   BSsprow      *vs,**rs;
301:   PetscScalar  *xv;
302:   PetscReal    sum = 0.0;
304:   int          *xi,nz,i,j;

307:   rs = mat->A->rows;
308:   if (type == NORM_FROBENIUS) {
309:     for (i=0; i<A->rmap->n; i++) {
310:       vs = *rs++;
311:       nz = vs->length;
312:       xv = vs->nz;
313:       while (nz--) {
314: #if defined(PETSC_USE_COMPLEX)
315:         sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
316: #else
317:         sum += (*xv)*(*xv); xv++;
318: #endif
319:       }
320:     }
321:     *norm = sqrt(sum);
322:   } else if (type == NORM_1) { /* max column norm */
323:     PetscReal *tmp;
324:     PetscMalloc(A->cmap->n*sizeof(PetscReal),&tmp);
325:     PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));
326:     *norm = 0.0;
327:     for (i=0; i<A->rmap->n; i++) {
328:       vs = *rs++;
329:       nz = vs->length;
330:       xi = vs->col;
331:       xv = vs->nz;
332:       while (nz--) {
333:         tmp[*xi] += PetscAbsScalar(*xv);
334:         xi++; xv++;
335:       }
336:     }
337:     for (j=0; j<A->rmap->n; j++) {
338:       if (tmp[j] > *norm) *norm = tmp[j];
339:     }
340:     PetscFree(tmp);
341:   } else if (type == NORM_INFINITY) { /* max row norm */
342:     *norm = 0.0;
343:     for (i=0; i<A->rmap->n; i++) {
344:       vs = *rs++;
345:       nz = vs->length;
346:       xv = vs->nz;
347:       sum = 0.0;
348:       while (nz--) {
349:         sum += PetscAbsScalar(*xv); xv++;
350:       }
351:       if (sum > *norm) *norm = sum;
352:     }
353:   } else {
354:     SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
355:   }
356:   return(0);
357: }

359: /* ----------------------------------------------------------------- */

363: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
364: {
365:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
367:   int   i,j,row,col,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
368:   PetscTruth   roworiented = a->roworiented;

371:   /* Note:  There's no need to "unscale" the matrix, since scaling is
372:      confined to a->pA, and we're working with a->A here */
373:   for (i=0; i<m; i++) {
374:     if (im[i] < 0) continue;
375:     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->rmap->N-1);
376:     if (im[i] >= rstart && im[i] < rend) {
377:       row = im[i] - rstart;
378:       for (j=0; j<n; j++) {
379:         if (in[j] < 0) continue;
380:         if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->cmap->N-1);
381:         if (in[j] >= 0 && in[j] < mat->cmap->N){
382:           col = in[j];
383:           if (roworiented) {
384:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
385:           } else {
386:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
387:           }
388:         } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
389:       }
390:     } else {
391:       if (!a->donotstash) {
392:         if (roworiented) {
393:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
394:         } else {
395:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
396:         }
397:       }
398:     }
399:   }
400:   return(0);
401: }

405: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
406: {
407:   MPI_Comm      comm = ((PetscObject)mat)->comm;
409:   int         nstash,reallocs;
410:   InsertMode    addv;

413:   /* Note:  There's no need to "unscale" the matrix, since scaling is
414:             confined to a->pA, and we're working with a->A here */

416:   /* make sure all processors are either in INSERTMODE or ADDMODE */
417:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
418:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
419:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
420:   }
421:   mat->insertmode = addv; /* in case this processor had no cache */

423:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
424:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
425:   PetscInfo2(mat,"Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
426:   return(0);
427: }

431: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
432: {
433:   Mat_MPIRowbs      *a = (Mat_MPIRowbs*)mat->data;
435:   int               i,j;
436:   PetscTruth        iascii;
437:   BSspmat           *A = a->A;
438:   BSsprow           **rs = A->rows;
439:   PetscViewerFormat format;

442:   PetscViewerGetFormat(viewer,&format);
443:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);

445:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
446:     int ind_l,ind_g,clq_l,clq_g,color;
447:     ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
448:     ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
449:     clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
450:     clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
451:     color = BSnum_colors(a->pA);CHKERRBS(0);
452:     PetscViewerASCIIPrintf(viewer,"  %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
453:     PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
454:   } else  if (format == PETSC_VIEWER_ASCII_COMMON) {
455:     for (i=0; i<A->num_rows; i++) {
456:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap->rstart);
457:       for (j=0; j<rs[i]->length; j++) {
458:         if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
459:       }
460:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
461:     }
462:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
463:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
464:   } else {
465:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
466:     for (i=0; i<A->num_rows; i++) {
467:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap->rstart);
468:       for (j=0; j<rs[i]->length; j++) {
469:         PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
470:       }
471:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
472:     }
473:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
474:   }
475:   PetscViewerFlush(viewer);
476:   return(0);
477: }

481: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
482: {
483:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)mat->data;
485:   PetscMPIInt    rank,size,mnz;
486:   PetscInt       i,M,m,*sbuff,*rowlengths;
487:   PetscInt       *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
488:   BSspmat        *A = a->A;
489:   BSsprow        **rs = A->rows;
490:   MPI_Comm       comm = ((PetscObject)mat)->comm;
491:   MPI_Status     status;
492:   PetscScalar    *vals;
493:   MatInfo        info;

496:   MPI_Comm_size(comm,&size);
497:   MPI_Comm_rank(comm,&rank);

499:   M = mat->rmap->N; m = mat->rmap->n;
500:   /* First gather together on the first processor the lengths of 
501:      each row, and write them out to the file */
502:   PetscMalloc(m*sizeof(int),&sbuff);
503:   for (i=0; i<A->num_rows; i++) {
504:     sbuff[i] = rs[i]->length;
505:   }
506:   MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
507:   if (!rank) {
508:     PetscViewerBinaryGetDescriptor(viewer,&fd);
509:     PetscMalloc((4+M)*sizeof(int),&rowlengths);
510:     PetscMalloc(size*sizeof(int),&recvcts);
511:     recvdisp = mat->rmap->range;
512:     for (i=0; i<size; i++) {
513:       recvcts[i] = recvdisp[i+1] - recvdisp[i];
514:     }
515:     /* first four elements of rowlength are the header */
516:     rowlengths[0] = ((PetscObject)mat)->cookie;
517:     rowlengths[1] = mat->rmap->N;
518:     rowlengths[2] = mat->cmap->N;
519:     rowlengths[3] = (int)info.nz_used;
520:     MPI_Gatherv(sbuff,m,MPIU_INT,rowlengths+4,recvcts,recvdisp,MPIU_INT,0,comm);
521:     PetscFree(sbuff);
522:     PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
523:     /* count the number of nonzeros on each processor */
524:     PetscMemzero(recvcts,size*sizeof(int));
525:     for (i=0; i<size; i++) {
526:       for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
527:         recvcts[i] += rowlengths[j+3];
528:       }
529:     }
530:     /* allocate buffer long enough to hold largest one */
531:     maxnz = 0;
532:     for (i=0; i<size; i++) {
533:       maxnz = PetscMax(maxnz,recvcts[i]);
534:     }
535:     PetscFree(rowlengths);
536:     PetscFree(recvcts);
537:     PetscMalloc(maxnz*sizeof(int),&cols);

539:     /* binary store column indices for 0th processor */
540:     nz = 0;
541:     for (i=0; i<A->num_rows; i++) {
542:       for (j=0; j<rs[i]->length; j++) {
543:         cols[nz++] = rs[i]->col[j];
544:       }
545:     }
546:     PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);

548:     /* receive and store column indices for all other processors */
549:     for (i=1; i<size; i++) {
550:       /* should tell processor that I am now ready and to begin the send */
551:       MPI_Recv(cols,maxnz,MPIU_INT,i,((PetscObject)mat)->tag,comm,&status);
552:       MPI_Get_count(&status,MPIU_INT,&mnz);
553:       PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
554:     }
555:     PetscFree(cols);
556:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

558:     /* binary store values for 0th processor */
559:     nz = 0;
560:     for (i=0; i<A->num_rows; i++) {
561:       for (j=0; j<rs[i]->length; j++) {
562:         vals[nz++] = rs[i]->nz[j];
563:       }
564:     }
565:     PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);

567:     /* receive and store nonzeros for all other processors */
568:     for (i=1; i<size; i++) {
569:       /* should tell processor that I am now ready and to begin the send */
570:       MPI_Recv(vals,maxnz,MPIU_SCALAR,i,((PetscObject)mat)->tag,comm,&status);
571:       MPI_Get_count(&status,MPIU_SCALAR,&nz);
572:       PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
573:     }
574:     PetscFree(vals);
575:   } else {
576:     MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
577:     PetscFree(sbuff);

579:     /* count local nonzeros */
580:     nz = 0;
581:     for (i=0; i<A->num_rows; i++) {
582:       for (j=0; j<rs[i]->length; j++) {
583:         nz++;
584:       }
585:     }
586:     /* copy into buffer column indices */
587:     PetscMalloc(nz*sizeof(int),&cols);
588:     nz = 0;
589:     for (i=0; i<A->num_rows; i++) {
590:       for (j=0; j<rs[i]->length; j++) {
591:         cols[nz++] = rs[i]->col[j];
592:       }
593:     }
594:     /* send */  /* should wait until processor zero tells me to go */
595:     MPI_Send(cols,nz,MPI_INT,0,((PetscObject)mat)->tag,comm);
596:     PetscFree(cols);

598:     /* copy into buffer column values */
599:     PetscMalloc(nz*sizeof(PetscScalar),&vals);
600:     nz   = 0;
601:     for (i=0; i<A->num_rows; i++) {
602:       for (j=0; j<rs[i]->length; j++) {
603:         vals[nz++] = rs[i]->nz[j];
604:       }
605:     }
606:     /* send */  /* should wait until processor zero tells me to go */
607:     MPI_Send(vals,nz,MPIU_SCALAR,0,((PetscObject)mat)->tag,comm);
608:     PetscFree(vals);
609:   }

611:   return(0);
612: }

616: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
617: {
618:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
620:   PetscTruth   iascii,isbinary;

623:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
624:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
625:   if (!bsif->blocksolveassembly) {
626:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
627:   }
628:   if (iascii) {
629:     MatView_MPIRowbs_ASCII(mat,viewer);
630:   } else if (isbinary) {
631:     MatView_MPIRowbs_Binary(mat,viewer);
632:   } else {
633:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
634:   }
635:   return(0);
636: }
637: 
640: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
641: {
642:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
643:   BSspmat      *A = a->A;
644:   BSsprow      *vs;
645:   int          size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
646:   int          msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
648:   int          ctr_j,*sbuf1_j,k;
649:   PetscScalar  val=0.0;
650:   MPI_Comm     comm;
651:   MPI_Request  *s_waits1,*r_waits1;
652:   MPI_Status   *s_status,*r_status;

655:   comm   = ((PetscObject)mat)->comm;
656:   tag    = ((PetscObject)mat)->tag;
657:   size   = a->size;
658:   rank   = a->rank;
659:   M      = mat->rmap->N;
660:   rstart = mat->rmap->rstart;

662:   PetscMalloc(M*sizeof(int),&rtable);
663:   /* Create hash table for the mapping :row -> proc */
664:   for (i=0,j=0; i<size; i++) {
665:     len = mat->rmap->range[i+1];
666:     for (; j<len; j++) {
667:       rtable[j] = i;
668:     }
669:   }

671:   /* Evaluate communication - mesg to whom, length of mesg, and buffer space
672:      required. Based on this, buffers are allocated, and data copied into them. */
673:   PetscMalloc(size*4*sizeof(int),&w1);/*  mesg size */
674:   w3   = w1 + 2*size;       /* no of IS that needs to be sent to proc i */
675:   w4   = w3 + size;       /* temp work space used in determining w1,  w3 */
676:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */

678:   for (i=0;  i<mat->rmap->n; i++) {
679:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
680:     vs = A->rows[i];
681:     for (j=0; j<vs->length; j++) {
682:       proc = rtable[vs->col[j]];
683:       w4[proc]++;
684:     }
685:     for (j=0; j<size; j++) {
686:       if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
687:     }
688:   }
689: 
690:   nrqs       = 0;              /* number of outgoing messages */
691:   msz        = 0;              /* total mesg length (for all proc */
692:   w1[2*rank] = 0;              /* no mesg sent to itself */
693:   w3[rank]   = 0;
694:   for (i=0; i<size; i++) {
695:     if (w1[2*i])  {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
696:   }
697:   /* pa - is list of processors to communicate with */
698:   PetscMalloc((nrqs+1)*sizeof(int),&pa);
699:   for (i=0,j=0; i<size; i++) {
700:     if (w1[2*i]) {pa[j] = i; j++;}
701:   }

703:   /* Each message would have a header = 1 + 2*(no of ROWS) + data */
704:   for (i=0; i<nrqs; i++) {
705:     j       = pa[i];
706:     w1[2*j] += w1[2*j+1] + 2*w3[j];
707:     msz     += w1[2*j];
708:   }
709: 
710:   /* Do a global reduction to determine how many messages to expect */
711:   PetscMaxSum(comm,w1,&bsz,&nrqr);

713:   /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
714:   PetscMalloc((nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int),&rbuf1);
715:   rbuf1[0] = (int*)(rbuf1 + nrqr);
716:   for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

718:   /* Post the receives */
719:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
720:   for (i=0; i<nrqr; ++i){
721:     MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
722:   }
723: 
724:   /* Allocate Memory for outgoing messages */
725:   PetscMalloc(2*size*sizeof(int*) + (size+msz)*sizeof(int),&sbuf1);
726:   ptr   = sbuf1 + size;     /* Pointers to the data in outgoing buffers */
727:   PetscMemzero(sbuf1,2*size*sizeof(int*));
728:   tmp   = (int*)(sbuf1 + 2*size);
729:   ctr   = tmp + msz;

731:   {
732:     int *iptr = tmp,ict  = 0;
733:     for (i=0; i<nrqs; i++) {
734:       j        = pa[i];
735:       iptr    += ict;
736:       sbuf1[j] = iptr;
737:       ict      = w1[2*j];
738:     }
739:   }

741:   /* Form the outgoing messages */
742:   /* Clean up the header space */
743:   for (i=0; i<nrqs; i++) {
744:     j           = pa[i];
745:     sbuf1[j][0] = 0;
746:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
747:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
748:   }

750:   /* Parse the matrix and copy the data into sbuf1 */
751:   for (i=0; i<mat->rmap->n; i++) {
752:     PetscMemzero(ctr,size*sizeof(int));
753:     vs = A->rows[i];
754:     for (j=0; j<vs->length; j++) {
755:       col  = vs->col[j];
756:       proc = rtable[col];
757:       if (proc != rank) { /* copy to the outgoing buffer */
758:         ctr[proc]++;
759:           *ptr[proc] = col;
760:           ptr[proc]++;
761:       } else {
762:         row = col - rstart;
763:         col = i + rstart;
764:         MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
765:       }
766:     }
767:     /* Update the headers for the current row */
768:     for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
769:       if ((ctr_j = ctr[j])) {
770:         sbuf1_j        = sbuf1[j];
771:         k               = ++sbuf1_j[0];
772:         sbuf1_j[2*k]   = ctr_j;
773:         sbuf1_j[2*k-1] = i + rstart;
774:       }
775:     }
776:   }
777:    /* Check Validity of the outgoing messages */
778:   {
779:     int sum;
780:     for (i=0 ; i<nrqs ; i++) {
781:       j = pa[i];
782:       if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
783:     }

785:     for (i=0 ; i<nrqs ; i++) {
786:       j = pa[i];
787:       sum = 1;
788:       for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
789:       if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
790:     }
791:   }
792: 
793:   /* Now post the sends */
794:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
795:   for (i=0; i<nrqs; ++i) {
796:     j    = pa[i];
797:     MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
798:   }
799: 
800:   /* Receive messages*/
801:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
802:   for (i=0; i<nrqr; ++i) {
803:     MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
804:     /* Process the Message */
805:     {
806:       int    *rbuf1_i,n_row,ct1;

808:       rbuf1_i = rbuf1[idx];
809:       n_row   = rbuf1_i[0];
810:       ct1     = 2*n_row+1;
811:       val     = 0.0;
812:       /* Optimise this later */
813:       for (j=1; j<=n_row; j++) {
814:         col = rbuf1_i[2*j-1];
815:         for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
816:           row = rbuf1_i[ct1] - rstart;
817:           MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
818:         }
819:       }
820:     }
821:   }

823:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
824:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

826:   PetscFree(rtable);
827:   PetscFree(w1);
828:   PetscFree(pa);
829:   PetscFree(rbuf1);
830:   PetscFree(sbuf1);
831:   PetscFree(r_waits1);
832:   PetscFree(s_waits1);
833:   PetscFree(r_status);
834:   PetscFree(s_status);
835:   return(0);
836: }

838: /*
839:      This does the BlockSolve portion of the matrix assembly.
840:    It is provided in a separate routine so that users can
841:    operate on the matrix (using MatScale(), MatShift() etc.) after 
842:    the matrix has been assembled but before BlockSolve has sucked it
843:    in and devoured it.
844: */
847: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
848: {
849:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
851:   int          ldim,low,high,i;
852:   PetscScalar  *diag;

855:   if ((mat->was_assembled) && (!mat->same_nonzero)) {  /* Free the old info */
856:     if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
857:     if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
858:   }

860:   if ((!mat->same_nonzero) || (!mat->was_assembled)) {
861:     /* Indicates bypassing cliques in coloring */
862:     if (a->bs_color_single) {
863:       BSctx_set_si(a->procinfo,100);
864:     }
865:     /* Form permuted matrix for efficient parallel execution */
866:     a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
867:     /* Set up the communication */
868:     a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
869:   } else {
870:     /* Repermute the matrix */
871:     BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
872:   }

874:   /* Symmetrically scale the matrix by the diagonal */
875:   BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);

877:   /* Store inverse of square root of permuted diagonal scaling matrix */
878:   VecGetLocalSize(a->diag,&ldim);
879:   VecGetOwnershipRange(a->diag,&low,&high);
880:   VecGetArray(a->diag,&diag);
881:   for (i=0; i<ldim; i++) {
882:     if (a->pA->scale_diag[i] != 0.0) {
883:       diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
884:     } else {
885:       diag[i] = 1.0;
886:     }
887:   }
888:   VecRestoreArray(a->diag,&diag);
889:   a->assembled_icc_storage = a->A->icc_storage;
890:   a->blocksolveassembly = 1;
891:   mat->was_assembled    = PETSC_TRUE;
892:   mat->same_nonzero     = PETSC_TRUE;
893:   PetscInfo(mat,"Completed BlockSolve95 matrix assembly\n");
894:   return(0);
895: }

899: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
900: {
901:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
903:   int          i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
904:   PetscScalar  *vals,val;
905:   InsertMode   addv = mat->insertmode;

908:   while (1) {
909:     MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
910:     if (!flg) break;
911: 
912:     for (i=0; i<n;) {
913:       /* Now identify the consecutive vals belonging to the same row */
914:       for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
915:       if (j < n) ncols = j-i;
916:       else       ncols = n-i;
917:       /* Now assemble all these values with a single function call */
918:       MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
919:       i = j;
920:     }
921:   }
922:   MatStashScatterEnd_Private(&mat->stash);

924:   rstart = mat->rmap->rstart;
925:   nzcount = a->nz; /* This is the number of nonzeros entered by the user */
926:   /* BlockSolve requires that the matrix is structurally symmetric */
927:   if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
928:     MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
929:   }
930: 
931:   /* BlockSolve requires that all the diagonal elements are set */
932:   val  = 0.0;
933:   for (i=0; i<mat->rmap->n; i++) {
934:     row = i; col = i + rstart;
935:     MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
936:   }
937: 
938:   MatAssemblyBegin_MPIRowbs_local(mat,mode);
939:   MatAssemblyEnd_MPIRowbs_local(mat,mode);
940: 
941:   a->blocksolveassembly = 0;
942:   PetscInfo4(mat,"Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->rmap->n,mat->cmap->n,a->maxnz-a->nz,a->nz);
943:   PetscInfo2(mat,"User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
944:   PetscInfo1(mat,"Number of mallocs during MatSetValues is %d\n",a->reallocs);
945:   return(0);
946: }

950: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
951: {
952:   Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
953:   BSspmat      *A = l->A;
954:   BSsprow      *vs;
955:   int          i,j;

958:   for (i=0; i <mat->rmap->n; i++) {
959:     vs = A->rows[i];
960:     for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
961:   }
962:   return(0);
963: }

965: /* the code does not do the diagonal entries correctly unless the 
966:    matrix is square and the column and row owerships are identical.
967:    This is a BUG.
968: */

972: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
973: {
974:   Mat_MPIRowbs   *l = (Mat_MPIRowbs*)A->data;
976:   int            i,*owners = A->rmap->range,size = l->size;
977:   int            *nprocs,j,idx,nsends;
978:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
979:   int            *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,n,*source;
980:   int            *lens,imdex,*lrows,*values;
981:   MPI_Comm       comm = ((PetscObject)A)->comm;
982:   MPI_Request    *send_waits,*recv_waits;
983:   MPI_Status     recv_status,*send_status;
984:   PetscTruth     found;

987:   /*  first count number of contributors to each processor */
988:   PetscMalloc(2*size*sizeof(int),&nprocs);
989:   PetscMemzero(nprocs,2*size*sizeof(int));
990:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
991:   for (i=0; i<N; i++) {
992:     idx   = rows[i];
993:     found = PETSC_FALSE;
994:     for (j=0; j<size; j++) {
995:       if (idx >= owners[j] && idx < owners[j+1]) {
996:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
997:       }
998:     }
999:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1000:   }
1001:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

1003:   /* inform other processors of number of messages and max length*/
1004:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

1006:   /* post receives:   */
1007:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1008:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1009:   for (i=0; i<nrecvs; i++) {
1010:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1011:   }

1013:   /* do sends:
1014:       1) starts[i] gives the starting index in svalues for stuff going to 
1015:          the ith processor
1016:   */
1017:   PetscMalloc((N+1)*sizeof(int),&svalues);
1018:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1019:   PetscMalloc((size+1)*sizeof(int),&starts);
1020:   starts[0] = 0;
1021:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1022:   for (i=0; i<N; i++) {
1023:     svalues[starts[owner[i]]++] = rows[i];
1024:   }

1026:   starts[0] = 0;
1027:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1028:   count = 0;
1029:   for (i=0; i<size; i++) {
1030:     if (nprocs[2*i+1]) {
1031:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1032:     }
1033:   }
1034:   PetscFree(starts);

1036:   base = owners[rank];

1038:   /*  wait on receives */
1039:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1040:   source = lens + nrecvs;
1041:   count = nrecvs; slen = 0;
1042:   while (count) {
1043:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1044:     /* unpack receives into our local space */
1045:     MPI_Get_count(&recv_status,MPI_INT,&n);
1046:     source[imdex]  = recv_status.MPI_SOURCE;
1047:     lens[imdex]    = n;
1048:     slen           += n;
1049:     count--;
1050:   }
1051:   PetscFree(recv_waits);
1052: 
1053:   /* move the data into the send scatter */
1054:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1055:   count = 0;
1056:   for (i=0; i<nrecvs; i++) {
1057:     values = rvalues + i*nmax;
1058:     for (j=0; j<lens[i]; j++) {
1059:       lrows[count++] = values[j] - base;
1060:     }
1061:   }
1062:   PetscFree(rvalues);
1063:   PetscFree(lens);
1064:   PetscFree(owner);
1065:   PetscFree(nprocs);
1066: 
1067:   /* actually zap the local rows */
1068:   MatZeroRows_MPIRowbs_local(A,slen,lrows,diag);
1069:   PetscFree(lrows);

1071:   /* wait on sends */
1072:   if (nsends) {
1073:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1074:     MPI_Waitall(nsends,send_waits,send_status);
1075:     PetscFree(send_status);
1076:   }
1077:   PetscFree(send_waits);
1078:   PetscFree(svalues);

1080:   return(0);
1081: }

1085: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1086: {
1087:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1088:   BSsprow      *vs,**rs;
1089:   PetscScalar  *xv;
1090:   PetscReal    sum = 0.0;
1092:   int          *xi,nz,i,j;

1095:   if (a->size == 1) {
1096:     MatNorm_MPIRowbs_local(mat,type,norm);
1097:   } else {
1098:     rs = a->A->rows;
1099:     if (type == NORM_FROBENIUS) {
1100:       for (i=0; i<mat->rmap->n; i++) {
1101:         vs = *rs++;
1102:         nz = vs->length;
1103:         xv = vs->nz;
1104:         while (nz--) {
1105: #if defined(PETSC_USE_COMPLEX)
1106:           sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1107: #else
1108:           sum += (*xv)*(*xv); xv++;
1109: #endif
1110:         }
1111:       }
1112:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
1113:       *norm = sqrt(*norm);
1114:     } else if (type == NORM_1) { /* max column norm */
1115:       PetscReal *tmp,*tmp2;
1116:       PetscMalloc(mat->cmap->n*sizeof(PetscReal),&tmp);
1117:       PetscMalloc(mat->cmap->n*sizeof(PetscReal),&tmp2);
1118:       PetscMemzero(tmp,mat->cmap->n*sizeof(PetscReal));
1119:       *norm = 0.0;
1120:       for (i=0; i<mat->rmap->n; i++) {
1121:         vs = *rs++;
1122:         nz = vs->length;
1123:         xi = vs->col;
1124:         xv = vs->nz;
1125:         while (nz--) {
1126:           tmp[*xi] += PetscAbsScalar(*xv);
1127:           xi++; xv++;
1128:         }
1129:       }
1130:       MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
1131:       for (j=0; j<mat->cmap->n; j++) {
1132:         if (tmp2[j] > *norm) *norm = tmp2[j];
1133:       }
1134:       PetscFree(tmp);
1135:       PetscFree(tmp2);
1136:     } else if (type == NORM_INFINITY) { /* max row norm */
1137:       PetscReal ntemp = 0.0;
1138:       for (i=0; i<mat->rmap->n; i++) {
1139:         vs = *rs++;
1140:         nz = vs->length;
1141:         xv = vs->nz;
1142:         sum = 0.0;
1143:         while (nz--) {
1144:           sum += PetscAbsScalar(*xv); xv++;
1145:         }
1146:         if (sum > ntemp) ntemp = sum;
1147:       }
1148:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);
1149:     } else {
1150:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1151:     }
1152:   }
1153:   return(0);
1154: }

1158: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1159: {
1160:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1161:   BSprocinfo   *bspinfo = bsif->procinfo;
1162:   PetscScalar  *xxa,*xworka,*yya;

1166:   if (!bsif->blocksolveassembly) {
1167:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1168:   }

1170:   /* Permute and apply diagonal scaling:  [ xwork = D^{1/2} * x ] */
1171:   if (!bsif->vecs_permscale) {
1172:     VecGetArray(bsif->xwork,&xworka);
1173:     VecGetArray(xx,&xxa);
1174:     BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1175:     VecRestoreArray(bsif->xwork,&xworka);
1176:     VecRestoreArray(xx,&xxa);
1177:     VecPointwiseDivide(xx,bsif->xwork,bsif->diag);
1178:   }

1180:   VecGetArray(xx,&xxa);
1181:   VecGetArray(yy,&yya);
1182:   /* Do lower triangular multiplication:  [ y = L * xwork ] */
1183:   if (bspinfo->single) {
1184:     BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1185:   }  else {
1186:     BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1187:   }
1188: 
1189:   /* Do upper triangular multiplication:  [ y = y + L^{T} * xwork ] */
1190:   if (mat->symmetric) {
1191:     if (bspinfo->single){
1192:       BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1193:     } else {
1194:       BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1195:     }
1196:   }
1197:   /* not needed for ILU version since forward does it all */
1198:   VecRestoreArray(xx,&xxa);
1199:   VecRestoreArray(yy,&yya);

1201:   /* Apply diagonal scaling to vector:  [  y = D^{1/2} * y ] */
1202:   if (!bsif->vecs_permscale) {
1203:     VecGetArray(bsif->xwork,&xworka);
1204:     VecGetArray(xx,&xxa);
1205:     BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1206:     VecRestoreArray(bsif->xwork,&xworka);
1207:     VecRestoreArray(xx,&xxa);
1208:     VecPointwiseDivide(bsif->xwork,yy,bsif->diag);
1209:     VecGetArray(bsif->xwork,&xworka);
1210:     VecGetArray(yy,&yya);
1211:     BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1212:     VecRestoreArray(bsif->xwork,&xworka);
1213:     VecRestoreArray(yy,&yya);
1214:   }
1215:   PetscLogFlops(2*bsif->nz - mat->cmap->n);

1217:   return(0);
1218: }

1222: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1223: {
1225:   PetscScalar  one = 1.0;

1228:   (*mat->ops->mult)(mat,xx,zz);
1229:   VecAXPY(zz,one,yy);
1230:   return(0);
1231: }

1235: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1236: {
1237:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1238:   PetscReal    isend[5],irecv[5];

1242:   info->block_size     = 1.0;
1243:   info->mallocs        = (double)mat->reallocs;
1244:   isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] =  mat->maxnz -  mat->nz;
1245:   isend[3] = ((PetscObject)A)->mem;  isend[4] = info->mallocs;

1247:   if (flag == MAT_LOCAL) {
1248:     info->nz_used      = isend[0];
1249:     info->nz_allocated = isend[1];
1250:     info->nz_unneeded  = isend[2];
1251:     info->memory       = isend[3];
1252:     info->mallocs      = isend[4];
1253:   } else if (flag == MAT_GLOBAL_MAX) {
1254:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
1255:     info->nz_used      = irecv[0];
1256:     info->nz_allocated = irecv[1];
1257:     info->nz_unneeded  = irecv[2];
1258:     info->memory       = irecv[3];
1259:     info->mallocs      = irecv[4];
1260:   } else if (flag == MAT_GLOBAL_SUM) {
1261:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
1262:     info->nz_used      = irecv[0];
1263:     info->nz_allocated = irecv[1];
1264:     info->nz_unneeded  = irecv[2];
1265:     info->memory       = irecv[3];
1266:     info->mallocs      = irecv[4];
1267:   }
1268:   return(0);
1269: }

1273: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1274: {
1275:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1276:   BSsprow      **rs = a->A->rows;
1278:   int          i,n;
1279:   PetscScalar  *x,zero = 0.0;

1282:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1283:   if (!a->blocksolveassembly) {
1284:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1285:   }

1287:   VecSet(v,zero);
1288:   VecGetLocalSize(v,&n);
1289:   if (n != mat->rmap->n) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1290:   VecGetArray(v,&x);
1291:   for (i=0; i<mat->rmap->n; i++) {
1292:     x[i] = rs[i]->nz[rs[i]->diag_ind];
1293:   }
1294:   VecRestoreArray(v,&x);
1295:   return(0);
1296: }

1300: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1301: {
1302:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1303:   BSspmat      *A = a->A;
1304:   BSsprow      *vs;
1306:   int          i;

1309: #if defined(PETSC_USE_LOG)
1310:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap->N,mat->cmap->N);
1311: #endif
1312:   MatStashDestroy_Private(&mat->stash);
1313:   if (a->bsmap) {
1314:     PetscFree(a->bsmap->vlocal2global);
1315:     PetscFree(a->bsmap->vglobal2local);
1316:     if (a->bsmap->vglobal2proc)  (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1317:     PetscFree(a->bsmap);
1318:   }

1320:   if (A) {
1321:     for (i=0; i<mat->rmap->n; i++) {
1322:       vs = A->rows[i];
1323:       MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1324:     }
1325:     /* Note: A->map = a->bsmap is freed above */
1326:     PetscFree(A->rows);
1327:     PetscFree(A);
1328:   }
1329:   if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1330:   if (a->diag)     {VecDestroy(a->diag);}
1331:   if (a->xwork)    {VecDestroy(a->xwork);}
1332:   if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
1333:   if (a->fpA)      {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1334:   if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1335:   if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1336:   PetscFree(a->imax);
1337:   MPI_Comm_free(&(a->comm_mpirowbs));
1338:   PetscFree(a);

1340:   PetscObjectChangeTypeName((PetscObject)mat,0);
1341:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1342:   return(0);
1343: }

1347: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op,PetscTruth flg)
1348: {
1349:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)A->data;

1353:   switch (op) {
1354:   case MAT_ROW_ORIENTED:
1355:     a->roworiented = flg;
1356:     break;
1357:   case MAT_NEW_NONZERO_LOCATIONS:
1358:     a->nonew       = (flg ? 0 : 1);
1359:     break;
1360:   case MAT_USE_INODES:
1361:     a->bs_color_single = (flg ? 0 : 1);
1362:     break;
1363:   case MAT_NEW_DIAGONALS:
1364:   case MAT_NEW_NONZERO_LOCATION_ERR:
1365:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1366:   case MAT_USE_HASH_TABLE:
1367:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1368:     break;
1369:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1370:     a->donotstash = flg;
1371:     break;
1372:   case MAT_KEEP_ZEROED_ROWS:
1373:     a->keepzeroedrows    = flg;
1374:     break;
1375:   case MAT_SYMMETRIC:
1376:     BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1377:     break;
1378:   case MAT_STRUCTURALLY_SYMMETRIC:
1379:   case MAT_HERMITIAN:
1380:   case MAT_SYMMETRY_ETERNAL:
1381:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1382:     break;
1383:   default:
1384:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1385:     break;
1386:   }
1387:   return(0);
1388: }

1392: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1393: {
1394:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1395:   BSspmat      *A = mat->A;
1396:   BSsprow      *rs;
1397: 
1399:   if (row < AA->rmap->rstart || row >= AA->rmap->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");

1401:   rs  = A->rows[row - AA->rmap->rstart];
1402:   *nz = rs->length;
1403:   if (v)   *v   = rs->nz;
1404:   if (idx) *idx = rs->col;
1405:   return(0);
1406: }

1410: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1411: {
1413:   return(0);
1414: }

1416: /* ------------------------------------------------------------------ */

1420: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1421: {

1425:    MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1426:   return(0);
1427: }

1429: /* -------------------------------------------------------------------*/
1430: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1431:        MatGetRow_MPIRowbs,
1432:        MatRestoreRow_MPIRowbs,
1433:        MatMult_MPIRowbs,
1434: /* 4*/ MatMultAdd_MPIRowbs,
1435:        MatMult_MPIRowbs,
1436:        MatMultAdd_MPIRowbs,
1437:        0,
1438:        0,
1439:        0,
1440: /*10*/ 0,
1441:        0,
1442:        0,
1443:        0,
1444:        0,
1445: /*15*/ MatGetInfo_MPIRowbs,
1446:        0,
1447:        MatGetDiagonal_MPIRowbs,
1448:        0,
1449:        MatNorm_MPIRowbs,
1450: /*20*/ MatAssemblyBegin_MPIRowbs,
1451:        MatAssemblyEnd_MPIRowbs,
1452:        0,
1453:        MatSetOption_MPIRowbs,
1454:        MatZeroEntries_MPIRowbs,
1455: /*25*/ MatZeroRows_MPIRowbs,
1456:        0,
1457:        0,
1458:        0,
1459:        0,
1460: /*30*/ MatSetUpPreallocation_MPIRowbs,
1461:        0,
1462:        0,
1463:        0,
1464:        0,
1465: /*35*/ 0,
1466:        0,
1467:        0,
1468:        0,
1469:        0,
1470: /*40*/ 0,
1471:        MatGetSubMatrices_MPIRowbs,
1472:        0,
1473:        0,
1474:        0,
1475: /*45*/ 0,
1476:        MatScale_MPIRowbs,
1477:        0,
1478:        0,
1479:        0,
1480: /*50*/ 0,
1481:        0,
1482:        0,
1483:        0,
1484:        0,
1485: /*55*/ 0,
1486:        0,
1487:        0,
1488:        0,
1489:        0,
1490: /*60*/ MatGetSubMatrix_MPIRowbs,
1491:        MatDestroy_MPIRowbs,
1492:        MatView_MPIRowbs,
1493:        0,
1494:        MatUseScaledForm_MPIRowbs,
1495: /*65*/ MatScaleSystem_MPIRowbs,
1496:        MatUnScaleSystem_MPIRowbs,
1497:        0,
1498:        0,
1499:        0,
1500: /*70*/ 0,
1501:        0,
1502:        0,
1503:        0,
1504:        0,
1505: /*75*/ 0,
1506:        0,
1507:        0,
1508:        0,
1509:        0,
1510: /*80*/ 0,
1511:        0,
1512:        0,
1513:        0,
1514:        MatLoad_MPIRowbs,
1515: /*85*/ 0,
1516:        0,
1517:        0,
1518:        0,
1519:        0,
1520: /*90*/ 0,
1521:        0,
1522:        0,
1523:        0,
1524:        0,
1525: /*95*/ 0,
1526:        0,
1527:        0,
1528:        0};

1530: /* ------------------------------------------------------------------- */

1535: PetscErrorCode  MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1536: {

1540:   mat->preallocated = PETSC_TRUE;
1541:   MatCreateMPIRowbs_local(mat,nz,nnz);
1542:   return(0);
1543: }

1546: /*MC
1547:    MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1548:    with the external package BlockSolve95.  If BlockSolve95 is installed (see the manual for instructions
1549:    on how to declare the existence of external packages), a matrix type can be constructed which invokes
1550:    BlockSolve95 preconditioners and solvers. 

1552:    Options Database Keys:
1553: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()

1555:   Level: beginner

1557: .seealso: MatCreateMPIRowbs
1558: M*/

1563: PetscErrorCode  MatCreate_MPIRowbs(Mat A)
1564: {
1565:   Mat_MPIRowbs *a;
1566:   BSmapping    *bsmap;
1567:   BSoff_map    *bsoff;
1569:   int          *offset,m,M;
1570:   PetscTruth   flg1,flg3;
1571:   BSprocinfo   *bspinfo;
1572:   MPI_Comm     comm;
1573: 
1575:   comm = ((PetscObject)A)->comm;

1577:   PetscNewLog(A,Mat_MPIRowbs,&a);
1578:   A->data               = (void*)a;
1579:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1580:   A->factor             = 0;
1581:   A->mapping            = 0;
1582:   a->vecs_permscale     = PETSC_FALSE;
1583:   A->insertmode         = NOT_SET_VALUES;
1584:   a->blocksolveassembly = 0;
1585:   a->keepzeroedrows     = PETSC_FALSE;

1587:   MPI_Comm_rank(comm,&a->rank);
1588:   MPI_Comm_size(comm,&a->size);


1591:   PetscMapSetBlockSize(A->rmap,1);
1592:   PetscMapSetBlockSize(A->cmap,1);
1593:   PetscMapSetUp(A->rmap);
1594:   PetscMapSetUp(A->cmap);
1595:   m    = A->rmap->n;
1596:   M    = A->rmap->N;

1598:   PetscMalloc((A->rmap->n+1)*sizeof(int),&a->imax);
1599:   a->reallocs                      = 0;

1601:   /* build cache for off array entries formed */
1602:   MatStashCreate_Private(((PetscObject)A)->comm,1,&A->stash);
1603:   a->donotstash = PETSC_FALSE;

1605:   /* Initialize BlockSolve information */
1606:   a->A              = 0;
1607:   a->pA              = 0;
1608:   a->comm_pA  = 0;
1609:   a->fpA      = 0;
1610:   a->comm_fpA = 0;
1611:   a->alpha    = 1.0;
1612:   a->0;
1613:   a->failures = 0;
1614:   MPI_Comm_dup(((PetscObject)A)->comm,&(a->comm_mpirowbs));
1615:   VecCreateMPI(((PetscObject)A)->comm,A->rmap->n,A->rmap->N,&(a->diag));
1616:   VecDuplicate(a->diag,&(a->xwork));
1617:   PetscLogObjectParent(A,a->diag);  PetscLogObjectParent(A,a->xwork);
1618:   PetscLogObjectMemory(A,(A->rmap->n+1)*sizeof(PetscScalar));
1619:   bspinfo = BScreate_ctx();CHKERRBS(0);
1620:   a->procinfo = bspinfo;
1621:   BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1622:   BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1623:   BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1624:   BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1625:   BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1626:   BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1627: #if defined(PETSC_USE_DEBUG)
1628:   BSctx_set_err(bspinfo,1);CHKERRBS(0);  /* BS error checking */
1629: #endif
1630:   BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1631: #if defined (PETSC_USE_INFO)
1632:   PetscOptionsHasName(PETSC_NULL,"-info",&flg1);
1633:   if (flg1) {
1634:     BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1635:   }
1636: #endif
1637:   PetscOptionsBegin(((PetscObject)A)->comm,PETSC_NULL,"Options for MPIROWBS matrix","Mat");
1638:     PetscOptionsTruth("-pc_factor_factorpointwise","Do not optimize for inodes (slow)",PETSC_NULL,PETSC_FALSE,&flg1,PETSC_NULL);
1639:     PetscOptionsTruth("-mat_rowbs_no_inode","Do not optimize for inodes (slow)",PETSC_NULL,PETSC_FALSE,&flg3,PETSC_NULL);
1640:   PetscOptionsEnd();
1641:   if (flg1 || flg3) {
1642:     BSctx_set_si(bspinfo,1);CHKERRBS(0);
1643:   } else {
1644:     BSctx_set_si(bspinfo,0);CHKERRBS(0);
1645:   }
1646: #if defined(PETSC_USE_LOG)
1647:   MLOG_INIT();  /* Initialize logging */
1648: #endif

1650:   /* Compute global offsets */
1651:   offset = &A->rmap->rstart;

1653:   PetscNewLog(A,BSmapping,&a->bsmap);
1654:   bsmap = a->bsmap;
1655:   PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1656:   *((int*)bsmap->vlocal2global) = (*offset);
1657:   bsmap->flocal2global                 = BSloc2glob;
1658:   bsmap->free_l2g                = 0;
1659:   PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1660:   *((int*)bsmap->vglobal2local) = (*offset);
1661:   bsmap->fglobal2local                 = BSglob2loc;
1662:   bsmap->free_g2l                 = 0;
1663:   bsoff                          = BSmake_off_map(*offset,bspinfo,A->rmap->N);
1664:   bsmap->vglobal2proc                 = (void*)bsoff;
1665:   bsmap->fglobal2proc                 = BSglob2proc;
1666:   bsmap->free_g2p                = (void(*)(void*)) BSfree_off_map;
1667:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1668:                                     "MatMPIRowbsSetPreallocation_MPIRowbs",
1669:                                      MatMPIRowbsSetPreallocation_MPIRowbs);
1670:   PetscObjectChangeTypeName((PetscObject)A,MATMPIROWBS);
1671:   return(0);
1672: }

1677: /* @
1678:   MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros 
1679:   per row in the matrix.

1681:   Input Parameter:
1682: +  mat - matrix
1683: .  nz - maximum expected for any row
1684: -  nzz - number expected in each row

1686:   Note:
1687:   This routine is valid only for matrices stored in the MATMPIROWBS
1688:   format.
1689: @ */
1690: PetscErrorCode  MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1691: {
1692:   PetscErrorCode ierr,(*f)(Mat,int,const int[]);

1695:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1696:   if (f) {
1697:     (*f)(mat,nz,nnz);
1698:   }
1699:   return(0);
1700: }

1702: /* --------------- extra BlockSolve-specific routines -------------- */
1705: /* @
1706:   MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1707:   user can then manipulate to alter the default parameters.

1709:   Input Parameter:
1710:   mat - matrix

1712:   Output Parameter:
1713:   procinfo - processor information context

1715:   Note:
1716:   This routine is valid only for matrices stored in the MATMPIROWBS
1717:   format.
1718: @ */
1719: PetscErrorCode  MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1720: {
1721:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1722:   PetscTruth   ismpirowbs;

1726:   PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1727:   if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1728:   procinfo = a->procinfo;
1729:   return(0);
1730: }

1734: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1735: {
1736:   Mat_MPIRowbs *a;
1737:   BSspmat      *A;
1738:   BSsprow      **rs;
1739:   Mat          mat;
1741:   int          i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1742:   int          header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1743:   PetscScalar  *vals;
1744:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1745:   MPI_Status   status;

1748:   MPI_Comm_size(comm,&size);
1749:   MPI_Comm_rank(comm,&rank);
1750:   if (!rank) {
1751:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1752:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1753:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1754:     if (header[3] < 0) {
1755:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1756:     }
1757:   }

1759:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1760:   M = header[1];

1762:   /* determine ownership of all rows */
1763:   m          = M/size + ((M % size) > rank);
1764:   PetscMalloc((size+2)*sizeof(int),&rowners);
1765:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1766:   rowners[0] = 0;
1767:   for (i=2; i<=size; i++) {
1768:     rowners[i] += rowners[i-1];
1769:   }
1770:   rstart = rowners[rank];
1771:   rend   = rowners[rank+1];

1773:   /* distribute row lengths to all processors */
1774:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1775:   if (!rank) {
1776:     PetscMalloc(M*sizeof(int),&rowlengths);
1777:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1778:     PetscMalloc(size*sizeof(int),&sndcounts);
1779:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1780:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1781:     PetscFree(sndcounts);
1782:   } else {
1783:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1784:   }

1786:   /* create our matrix */
1787:   MatCreate(comm,newmat);
1788:   MatSetSizes(*newmat,m,m,M,M);
1789:   MatSetType(*newmat,type);
1790:   MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1791:   mat = *newmat;
1792:   PetscFree(ourlens);

1794:   a = (Mat_MPIRowbs*)mat->data;
1795:   A = a->A;
1796:   rs = A->rows;

1798:   if (!rank) {
1799:     /* calculate the number of nonzeros on each processor */
1800:     PetscMalloc(size*sizeof(int),&procsnz);
1801:     PetscMemzero(procsnz,size*sizeof(int));
1802:     for (i=0; i<size; i++) {
1803:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1804:         procsnz[i] += rowlengths[j];
1805:       }
1806:     }
1807:     PetscFree(rowlengths);

1809:     /* determine max buffer needed and allocate it */
1810:     maxnz = 0;
1811:     for (i=0; i<size; i++) {
1812:       maxnz = PetscMax(maxnz,procsnz[i]);
1813:     }
1814:     PetscMalloc(maxnz*sizeof(int),&cols);

1816:     /* read in my part of the matrix column indices  */
1817:     nz = procsnz[0];
1818:     PetscBinaryRead(fd,cols,nz,PETSC_INT);
1819: 
1820:     /* insert it into my part of matrix */
1821:     nz = 0;
1822:     for (i=0; i<A->num_rows; i++) {
1823:       for (j=0; j<a->imax[i]; j++) {
1824:         rs[i]->col[j] = cols[nz++];
1825:       }
1826:       rs[i]->length = a->imax[i];
1827:     }
1828:     /* read in parts for all other processors */
1829:     for (i=1; i<size; i++) {
1830:       nz   = procsnz[i];
1831:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1832:       MPI_Send(cols,nz,MPI_INT,i,((PetscObject)mat)->tag,comm);
1833:     }
1834:     PetscFree(cols);
1835:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1837:     /* read in my part of the matrix numerical values  */
1838:     nz   = procsnz[0];
1839:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1840: 
1841:     /* insert it into my part of matrix */
1842:     nz = 0;
1843:     for (i=0; i<A->num_rows; i++) {
1844:       for (j=0; j<a->imax[i]; j++) {
1845:         rs[i]->nz[j] = vals[nz++];
1846:       }
1847:     }
1848:     /* read in parts for all other processors */
1849:     for (i=1; i<size; i++) {
1850:       nz   = procsnz[i];
1851:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1852:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)mat)->tag,comm);
1853:     }
1854:     PetscFree(vals);
1855:     PetscFree(procsnz);
1856:   } else {
1857:     /* determine buffer space needed for message */
1858:     nz = 0;
1859:     for (i=0; i<A->num_rows; i++) {
1860:       nz += a->imax[i];
1861:     }
1862:     PetscMalloc(nz*sizeof(int),&cols);

1864:     /* receive message of column indices*/
1865:     MPI_Recv(cols,nz,MPI_INT,0,((PetscObject)mat)->tag,comm,&status);
1866:     MPI_Get_count(&status,MPI_INT,&maxnz);
1867:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1869:     /* insert it into my part of matrix */
1870:     nz = 0;
1871:     for (i=0; i<A->num_rows; i++) {
1872:       for (j=0; j<a->imax[i]; j++) {
1873:         rs[i]->col[j] = cols[nz++];
1874:       }
1875:       rs[i]->length = a->imax[i];
1876:     }
1877:     PetscFree(cols);
1878:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1880:     /* receive message of values*/
1881:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)mat)->tag,comm,&status);
1882:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1883:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1885:     /* insert it into my part of matrix */
1886:     nz = 0;
1887:     for (i=0; i<A->num_rows; i++) {
1888:       for (j=0; j<a->imax[i]; j++) {
1889:         rs[i]->nz[j] = vals[nz++];
1890:       }
1891:       rs[i]->length = a->imax[i];
1892:     }
1893:     PetscFree(vals);
1894:   }
1895:   PetscFree(rowners);
1896:   a->nz = a->maxnz;
1897:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1898:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1899:   return(0);
1900: }

1902: /* 
1903:     Special destroy and view routines for factored matrices 
1904: */
1907: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1908: {
1910: #if defined(PETSC_USE_LOG)
1911:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap->N,mat->cmap->N);
1912: #endif
1913:   return(0);
1914: }

1918: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1919: {

1923:   MatView((Mat) mat->data,viewer);
1924:   return(0);
1925: }

1929: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat newmat,Mat mat,IS isrow,const MatFactorInfo *info)
1930: {
1931:   /* Note:  f is not currently used in BlockSolve */
1932:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1934:   PetscTruth   idn;

1937:   if (isrow) {
1938:     ISIdentity(isrow,&idn);
1939:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1940:   }

1942:   if (!mat->symmetric) {
1943:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
1944:         preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
1945:         symmetric using the option MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE)");
1946:   }

1948:   /* If the icc_storage flag wasn't set before the last blocksolveassembly,          */
1949:   /* we must completely redo the assembly as a different storage format is required. */
1950:   if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
1951:     mat->same_nonzero       = PETSC_FALSE;
1952:     mbs->blocksolveassembly = 0;
1953:   }

1955:   if (!mbs->blocksolveassembly) {
1956:     BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1957:     BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1958:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1959:   }

1961:   /* Copy permuted matrix */
1962:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1963:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

1965:   /* Set up the communication for factorization */
1966:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
1967:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

1969:   /* 
1970:       Create a new Mat structure to hold the "factored" matrix, 
1971:     not this merely contains a pointer to the original matrix, since
1972:     the original matrix contains the factor information.
1973:   */
1974:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",((PetscObject)mat)->comm,MatDestroy,MatView);

1976:   newmat->data         = (void*)mat;
1977:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
1978:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
1979:   newmat->ops->view    = MatView_MPIRowbs_Factored;
1980:   newmat->factor       = 1;
1981:   newmat->preallocated = PETSC_TRUE;
1982:   PetscMapCopy(((PetscObject)mat)->comm,mat->rmap,newmat->rmap);
1983:   PetscMapCopy(((PetscObject)mat)->comm,mat->cmap,newmat->cmap);

1985:   PetscStrallocpy(MATMPIROWBS,&((PetscObject)newmat)->type_name);
1986:   newmat->ops->lufactornumeric = MatLUFactorNumeric_MPIRowbs;
1987:   return(0);
1988: }

1992: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat newmat,Mat mat,IS isrow,IS iscol,const MatFactorInfo* info)
1993: {
1994:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1996:   PetscTruth   idn;

1999:   if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2000:   if (isrow) {
2001:     ISIdentity(isrow,&idn);
2002:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2003:   }
2004:   if (iscol) {
2005:     ISIdentity(iscol,&idn);
2006:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2007:   }

2009:   if (!mbs->blocksolveassembly) {
2010:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2011:   }
2012: 
2013: /*   if (mat->symmetric) { */
2014: /*     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2015: /*         MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2016: /*         using the option MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE)"); */
2017: /*   } */

2019:   /* Copy permuted matrix */
2020:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2021:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

2023:   /* Set up the communication for factorization */
2024:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2025:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2027:   /* 
2028:       Create a new Mat structure to hold the "factored" matrix,
2029:     not this merely contains a pointer to the original matrix, since
2030:     the original matrix contains the factor information.
2031:   */
2032:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",((PetscObject)mat)->comm,MatDestroy,MatView);

2034:   newmat->data         = (void*)mat;
2035:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2036:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2037:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2038:   newmat->factor       = 1;
2039:   newmat->preallocated = PETSC_TRUE;

2041:   PetscMapCopy(((PetscObject)mat)->comm,mat->rmap,newmat->rmap);
2042:   PetscMapCopy(((PetscObject)mat)->comm,mat->cmap,newmat->cmap);

2044:   PetscStrallocpy(MATMPIROWBS,&((PetscObject)newmat)->type_name);

2046:   newmat->ops->lufactornumeric = MatLUFactorNumeric_MPIRowbs;
2047:   *newfact = newmat;
2048:   return(0);
2049: }

2053: /*@C
2054:    MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2055:    format.  This format is intended primarily as an interface for BlockSolve95.

2057:    Collective on MPI_Comm

2059:    Input Parameters:
2060: +  comm - MPI communicator
2061: .  m - number of local rows (or PETSC_DECIDE to have calculated)
2062: .  M - number of global rows (or PETSC_DECIDE to have calculated)
2063: .  nz - number of nonzeros per row (same for all local rows)
2064: -  nnz - number of nonzeros per row (possibly different for each row).

2066:    Output Parameter:
2067: .  newA - the matrix 

2069:    Notes:
2070:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2071:    than it must be used on all processors that share the object for that argument.

2073:    The user MUST specify either the local or global matrix dimensions
2074:    (possibly both).

2076:    Specify the preallocated storage with either nz or nnz (not both).  Set 
2077:    nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2078:    allocation.

2080:    Notes:
2081:    By default, the matrix is assumed to be nonsymmetric; the user can
2082:    take advantage of special optimizations for symmetric matrices by calling
2083: $     MatSetOption(mat,MAT_SYMMETRIC,PETSC_TRUE)
2084: $     MatSetOption(mat,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)
2085:    BEFORE calling the routine MatAssemblyBegin().

2087:    Internally, the MATMPIROWBS format inserts zero elements to the
2088:    matrix if necessary, so that nonsymmetric matrices are considered
2089:    to be symmetric in terms of their sparsity structure; this format
2090:    is required for use of the parallel communication routines within
2091:    BlockSolve95. In particular, if the matrix element A[i,j] exists,
2092:    then PETSc will internally allocate a 0 value for the element
2093:    A[j,i] during MatAssemblyEnd() if the user has not already set
2094:    a value for the matrix element A[j,i].

2096:    Options Database Keys:
2097: .  -mat_rowbs_no_inode - Do not use inodes.

2099:    Level: intermediate
2100:   
2101: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve

2103: .seealso: MatCreate(), MatSetValues()
2104: @*/
2105: PetscErrorCode  MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2106: {
2108: 
2110:   MatCreate(comm,newA);
2111:   MatSetSizes(*newA,m,m,M,M);
2112:   MatSetType(*newA,MATMPIROWBS);
2113:   MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2114:   return(0);
2115: }


2118: /* -------------------------------------------------------------------------*/

2120:  #include ../src/mat/impls/aij/seq/aij.h
2121:  #include ../src/mat/impls/aij/mpi/mpiaij.h

2125: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2126: {
2128:   int         nmax,nstages_local,nstages,i,pos,max_no;


2132:   /* Allocate memory to hold all the submatrices */
2133:   if (scall != MAT_REUSE_MATRIX) {
2134:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
2135:   }
2136: 
2137:   /* Determine the number of stages through which submatrices are done */
2138:   nmax          = 20*1000000 / (C->cmap->N * sizeof(int));
2139:   if (!nmax) nmax = 1;
2140:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

2142:   /* Make sure every processor loops through the nstages */
2143:   MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,((PetscObject)C)->comm);

2145:   for (i=0,pos=0; i<nstages; i++) {
2146:     if (pos+nmax <= ismax) max_no = nmax;
2147:     else if (pos == ismax) max_no = 0;
2148:     else                   max_no = ismax-pos;
2149:     MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2150:     pos += max_no;
2151:   }
2152:   return(0);
2153: }
2154: /* -------------------------------------------------------------------------*/
2155: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2156:    matrix and preservs zeroes from structural symetry
2157:  */
2160: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2161: {
2162:   Mat_MPIRowbs  *c = (Mat_MPIRowbs *)(C->data);
2163:   BSspmat       *A = c->A;
2164:   Mat_SeqAIJ    *mat;
2166:   const int      **irow,**icol,*irow_i;
2167:   int         *nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2168:   int         **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2169:   int         nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2170:   int         **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2171:   int         **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
2172:   int         len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2173:   int         *rmap_i,tag0,tag1,tag2,tag3;
2174:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2175:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2176:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2177:   MPI_Status  *r_status3,*r_status4,*s_status4;
2178:   MPI_Comm    comm;
2179:   FLOAT       **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2180:   PetscScalar *mat_a;
2181:   PetscTruth  sorted;
2182:   int         *onodes1,*olengths1;

2185:   comm   = ((PetscObject)C)->comm;
2186:   tag0   = ((PetscObject)C)->tag;
2187:   size   = c->size;
2188:   rank   = c->rank;
2189:   m      = C->rmap->N;
2190: 
2191:   /* Get some new tags to keep the communication clean */
2192:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2193:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2194:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2196:     /* Check if the col indices are sorted */
2197:   for (i=0; i<ismax; i++) {
2198:     ISSorted(isrow[i],&sorted);
2199:     if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2200:     ISSorted(iscol[i],&sorted);
2201:     /*    if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2202:   }

2204:   PetscMalloc((2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int),&irow);
2205:   icol   = irow + ismax;
2206:   nrow   = (int*)(icol + ismax);
2207:   ncol   = nrow + ismax;
2208:   rtable = ncol + ismax;

2210:   for (i=0; i<ismax; i++) {
2211:     ISGetIndices(isrow[i],&irow[i]);
2212:     ISGetIndices(iscol[i],&icol[i]);
2213:     ISGetLocalSize(isrow[i],&nrow[i]);
2214:     ISGetLocalSize(iscol[i],&ncol[i]);
2215:   }

2217:   /* Create hash table for the mapping :row -> proc*/
2218:   for (i=0,j=0; i<size; i++) {
2219:     jmax = C->rmap->range[i+1];
2220:     for (; j<jmax; j++) {
2221:       rtable[j] = i;
2222:     }
2223:   }

2225:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2226:      required. Based on this, buffers are allocated, and data copied into them*/
2227:   PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2228:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2229:   w3     = w2 + size;      /* no of IS that needs to be sent to proc i */
2230:   w4     = w3 + size;      /* temp work space used in determining w1, w2, w3 */
2231:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2232:   for (i=0; i<ismax; i++) {
2233:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2234:     jmax   = nrow[i];
2235:     irow_i = irow[i];
2236:     for (j=0; j<jmax; j++) {
2237:       row  = irow_i[j];
2238:       proc = rtable[row];
2239:       w4[proc]++;
2240:     }
2241:     for (j=0; j<size; j++) {
2242:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
2243:     }
2244:   }
2245: 
2246:   nrqs     = 0;              /* no of outgoing messages */
2247:   msz      = 0;              /* total mesg length (for all procs) */
2248:   w1[rank] = 0;              /* no mesg sent to self */
2249:   w3[rank] = 0;
2250:   for (i=0; i<size; i++) {
2251:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2252:   }
2253:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2254:   for (i=0,j=0; i<size; i++) {
2255:     if (w1[i]) { pa[j] = i; j++; }
2256:   }

2258:   /* Each message would have a header = 1 + 2*(no of IS) + data */
2259:   for (i=0; i<nrqs; i++) {
2260:     j     = pa[i];
2261:     w1[j] += w2[j] + 2* w3[j];
2262:     msz   += w1[j];
2263:   }

2265:   /* Determine the number of messages to expect, their lengths, from from-ids */
2266:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2267:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

2269:   /* Now post the Irecvs corresponding to these messages */
2270:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2271: 
2272:   PetscFree(onodes1);
2273:   PetscFree(olengths1);
2274: 
2275:   /* Allocate Memory for outgoing messages */
2276:   PetscMalloc(2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int),&sbuf1);
2277:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2278:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2279:   /* allocate memory for outgoing data + buf to receive the first reply */
2280:   tmp      = (int*)(ptr + size);
2281:   ctr      = tmp + 2*msz;

2283:   {
2284:     int *iptr = tmp,ict = 0;
2285:     for (i=0; i<nrqs; i++) {
2286:       j         = pa[i];
2287:       iptr     += ict;
2288:       sbuf1[j]  = iptr;
2289:       ict       = w1[j];
2290:     }
2291:   }

2293:   /* Form the outgoing messages */
2294:   /* Initialize the header space */
2295:   for (i=0; i<nrqs; i++) {
2296:     j           = pa[i];
2297:     sbuf1[j][0] = 0;
2298:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2299:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
2300:   }
2301: 
2302:   /* Parse the isrow and copy data into outbuf */
2303:   for (i=0; i<ismax; i++) {
2304:     PetscMemzero(ctr,size*sizeof(int));
2305:     irow_i = irow[i];
2306:     jmax   = nrow[i];
2307:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
2308:       row  = irow_i[j];
2309:       proc = rtable[row];
2310:       if (proc != rank) { /* copy to the outgoing buf*/
2311:         ctr[proc]++;
2312:         *ptr[proc] = row;
2313:         ptr[proc]++;
2314:       }
2315:     }
2316:     /* Update the headers for the current IS */
2317:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
2318:       if ((ctr_j = ctr[j])) {
2319:         sbuf1_j        = sbuf1[j];
2320:         k              = ++sbuf1_j[0];
2321:         sbuf1_j[2*k]   = ctr_j;
2322:         sbuf1_j[2*k-1] = i;
2323:       }
2324:     }
2325:   }

2327:   /*  Now  post the sends */
2328:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2329:   for (i=0; i<nrqs; ++i) {
2330:     j    = pa[i];
2331:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2332:   }

2334:   /* Post Receives to capture the buffer size */
2335:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2336:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2337:   rbuf2[0] = tmp + msz;
2338:   for (i=1; i<nrqs; ++i) {
2339:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2340:   }
2341:   for (i=0; i<nrqs; ++i) {
2342:     j    = pa[i];
2343:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2344:   }

2346:   /* Send to other procs the buf size they should allocate */
2347: 

2349:   /* Receive messages*/
2350:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2351:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2352:   PetscMalloc(2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*),&sbuf2);
2353:   req_size    = (int*)(sbuf2 + nrqr);
2354:   req_source  = req_size + nrqr;
2355: 
2356:   {
2357:     BSsprow    **sAi = A->rows;
2358:     int        id,rstart = C->rmap->rstart;
2359:     int        *sbuf2_i;

2361:     for (i=0; i<nrqr; ++i) {
2362:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2363:       req_size[idx]   = 0;
2364:       rbuf1_i         = rbuf1[idx];
2365:       start           = 2*rbuf1_i[0] + 1;
2366:       MPI_Get_count(r_status1+i,MPI_INT,&end);
2367:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2368:       sbuf2_i         = sbuf2[idx];
2369:       for (j=start; j<end; j++) {
2370:         id               = rbuf1_i[j] - rstart;
2371:         ncols            = (sAi[id])->length;
2372:         sbuf2_i[j]       = ncols;
2373:         req_size[idx]   += ncols;
2374:       }
2375:       req_source[idx] = r_status1[i].MPI_SOURCE;
2376:       /* form the header */
2377:       sbuf2_i[0]   = req_size[idx];
2378:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2379:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2380:     }
2381:   }
2382:   PetscFree(r_status1);
2383:   PetscFree(r_waits1);

2385:   /*  recv buffer sizes */
2386:   /* Receive messages*/
2387: 
2388:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2389:   PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2390:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2391:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2392:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

2394:   for (i=0; i<nrqs; ++i) {
2395:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2396:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2397:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2398:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2399:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2400:   }
2401:   PetscFree(r_status2);
2402:   PetscFree(r_waits2);
2403: 
2404:   /* Wait on sends1 and sends2 */
2405:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2406:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

2408:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
2409:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
2410:   PetscFree(s_status1);
2411:   PetscFree(s_status2);
2412:   PetscFree(s_waits1);
2413:   PetscFree(s_waits2);

2415:   /* Now allocate buffers for a->j, and send them off */
2416:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2417:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2418:   PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2419:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2420: 
2421:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2422:   {
2423:     BSsprow *brow;
2424:     int *Acol;
2425:     int rstart = C->rmap->rstart;

2427:     for (i=0; i<nrqr; i++) {
2428:       rbuf1_i   = rbuf1[i];
2429:       sbuf_aj_i = sbuf_aj[i];
2430:       ct1       = 2*rbuf1_i[0] + 1;
2431:       ct2       = 0;
2432:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2433:         kmax = rbuf1[i][2*j];
2434:         for (k=0; k<kmax; k++,ct1++) {
2435:           brow   = A->rows[rbuf1_i[ct1] - rstart];
2436:           ncols  = brow->length;
2437:           Acol   = brow->col;
2438:           /* load the column indices for this row into cols*/
2439:           cols  = sbuf_aj_i + ct2;
2440:           PetscMemcpy(cols,Acol,ncols*sizeof(int));
2441:           /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2442:                                                           mappings?? */
2443:           ct2 += ncols;
2444:         }
2445:       }
2446:       MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2447:     }
2448:   }
2449:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2450:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

2452:   /* Allocate buffers for a->a, and send them off */
2453:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2454:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2455:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2456:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2457: 
2458:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2459:   {
2460:     BSsprow *brow;
2461:     FLOAT *Aval;
2462:     int rstart = C->rmap->rstart;
2463: 
2464:     for (i=0; i<nrqr; i++) {
2465:       rbuf1_i   = rbuf1[i];
2466:       sbuf_aa_i = sbuf_aa[i];
2467:       ct1       = 2*rbuf1_i[0]+1;
2468:       ct2       = 0;
2469:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2470:         kmax = rbuf1_i[2*j];
2471:         for (k=0; k<kmax; k++,ct1++) {
2472:           brow  = A->rows[rbuf1_i[ct1] - rstart];
2473:           ncols = brow->length;
2474:           Aval  = brow->nz;
2475:           /* load the column values for this row into vals*/
2476:           vals  = sbuf_aa_i+ct2;
2477:           PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2478:           ct2 += ncols;
2479:         }
2480:       }
2481:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2482:     }
2483:   }
2484:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2485:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2486:   PetscFree(rbuf1);

2488:   /* Form the matrix */
2489:   /* create col map */
2490:   {
2491:     int *icol_i;
2492: 
2493:     PetscMalloc((1+ismax)*sizeof(int*)+ ismax*C->cmap->N*sizeof(int),&cmap);
2494:     cmap[0] = (int*)(cmap + ismax);
2495:     PetscMemzero(cmap[0],(1+ismax*C->cmap->N)*sizeof(int));
2496:     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->cmap->N; }
2497:     for (i=0; i<ismax; i++) {
2498:       jmax   = ncol[i];
2499:       icol_i = icol[i];
2500:       cmap_i = cmap[i];
2501:       for (j=0; j<jmax; j++) {
2502:         cmap_i[icol_i[j]] = j+1;
2503:       }
2504:     }
2505:   }

2507:   /* Create lens which is required for MatCreate... */
2508:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2509:   PetscMalloc((1+ismax)*sizeof(int*)+ j*sizeof(int),&lens);
2510:   lens[0] = (int*)(lens + ismax);
2511:   PetscMemzero(lens[0],j*sizeof(int));
2512:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2513: 
2514:   /* Update lens from local data */
2515:   { BSsprow *Arow;
2516:     for (i=0; i<ismax; i++) {
2517:       jmax   = nrow[i];
2518:       cmap_i = cmap[i];
2519:       irow_i = irow[i];
2520:       lens_i = lens[i];
2521:       for (j=0; j<jmax; j++) {
2522:         row  = irow_i[j];
2523:         proc = rtable[row];
2524:         if (proc == rank) {
2525:           Arow=A->rows[row-C->rmap->rstart];
2526:           ncols=Arow->length;
2527:           cols=Arow->col;
2528:           for (k=0; k<ncols; k++) {
2529:             if (cmap_i[cols[k]]) { lens_i[j]++;}
2530:           }
2531:         }
2532:       }
2533:     }
2534:   }
2535: 
2536:   /* Create row map*/
2537:   PetscMalloc((1+ismax)*sizeof(int*)+ ismax*C->rmap->N*sizeof(int),&rmap);
2538:   rmap[0] = (int*)(rmap + ismax);
2539:   PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(int));
2540:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
2541:   for (i=0; i<ismax; i++) {
2542:     rmap_i = rmap[i];
2543:     irow_i = irow[i];
2544:     jmax   = nrow[i];
2545:     for (j=0; j<jmax; j++) {
2546:       rmap_i[irow_i[j]] = j;
2547:     }
2548:   }
2549: 
2550:   /* Update lens from offproc data */
2551:   {
2552:     int *rbuf2_i,*rbuf3_i,*sbuf1_i;

2554:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2555:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2556:       idx     = pa[i];
2557:       sbuf1_i = sbuf1[idx];
2558:       jmax    = sbuf1_i[0];
2559:       ct1     = 2*jmax+1;
2560:       ct2     = 0;
2561:       rbuf2_i = rbuf2[i];
2562:       rbuf3_i = rbuf3[i];
2563:       for (j=1; j<=jmax; j++) {
2564:         is_no   = sbuf1_i[2*j-1];
2565:         max1    = sbuf1_i[2*j];
2566:         lens_i  = lens[is_no];
2567:         cmap_i  = cmap[is_no];
2568:         rmap_i  = rmap[is_no];
2569:         for (k=0; k<max1; k++,ct1++) {
2570:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2571:           max2 = rbuf2_i[ct1];
2572:           for (l=0; l<max2; l++,ct2++) {
2573:             if (cmap_i[rbuf3_i[ct2]]) {
2574:               lens_i[row]++;
2575:             }
2576:           }
2577:         }
2578:       }
2579:     }
2580:   }
2581:   PetscFree(r_status3);
2582:   PetscFree(r_waits3);
2583:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
2584:   PetscFree(s_status3);
2585:   PetscFree(s_waits3);

2587:   /* Create the submatrices */
2588:   if (scall == MAT_REUSE_MATRIX) {
2589:     PetscTruth same;
2590: 
2591:     /*
2592:         Assumes new rows are same length as the old rows,hence bug!
2593:     */
2594:     for (i=0; i<ismax; i++) {
2595:       PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2596:       if (!same) {
2597:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2598:       }
2599:       mat = (Mat_SeqAIJ*)(submats[i]->data);
2600:       if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) {
2601:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2602:       }
2603:       PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(int),&same);
2604:       if (!same) {
2605:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2606:       }
2607:       /* Initial matrix as if empty */
2608:       PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(int));
2609:       submats[i]->factor = C->factor;
2610:     }
2611:   } else {
2612:     for (i=0; i<ismax; i++) {
2613:       /* Here we want to explicitly generate SeqAIJ matrices */
2614:       MatCreate(PETSC_COMM_SELF,submats+i);
2615:       MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);
2616:       MatSetType(submats[i],MATSEQAIJ);
2617:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2618:     }
2619:   }

2621:   /* Assemble the matrices */
2622:   /* First assemble the local rows */
2623:   {
2624:     int    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2625:     PetscScalar *imat_a;
2626:     BSsprow *Arow;
2627: 
2628:     for (i=0; i<ismax; i++) {
2629:       mat       = (Mat_SeqAIJ*)submats[i]->data;
2630:       imat_ilen = mat->ilen;
2631:       imat_j    = mat->j;
2632:       imat_i    = mat->i;
2633:       imat_a    = mat->a;
2634:       cmap_i    = cmap[i];
2635:       rmap_i    = rmap[i];
2636:       irow_i    = irow[i];
2637:       jmax      = nrow[i];
2638:       for (j=0; j<jmax; j++) {
2639:         row      = irow_i[j];
2640:         proc     = rtable[row];
2641:         if (proc == rank) {
2642:           old_row  = row;
2643:           row      = rmap_i[row];
2644:           ilen_row = imat_ilen[row];
2645: 
2646:           Arow=A->rows[old_row-C->rmap->rstart];
2647:           ncols=Arow->length;
2648:           cols=Arow->col;
2649:           vals=Arow->nz;
2650: 
2651:           mat_i    = imat_i[row];
2652:           mat_a    = imat_a + mat_i;
2653:           mat_j    = imat_j + mat_i;
2654:           for (k=0; k<ncols; k++) {
2655:             if ((tcol = cmap_i[cols[k]])) {
2656:               *mat_j++ = tcol - 1;
2657:               *mat_a++ = (PetscScalar)vals[k];
2658:               ilen_row++;
2659:             }
2660:           }
2661:           imat_ilen[row] = ilen_row;
2662:         }
2663:       }
2664:     }
2665:   }

2667:   /*   Now assemble the off proc rows*/
2668:   {
2669:     int    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2670:     int    *imat_j,*imat_i;
2671:     PetscScalar *imat_a;
2672:     FLOAT *rbuf4_i;
2673: 
2674:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2675:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2676:       idx     = pa[i];
2677:       sbuf1_i = sbuf1[idx];
2678:       jmax    = sbuf1_i[0];
2679:       ct1     = 2*jmax + 1;
2680:       ct2     = 0;
2681:       rbuf2_i = rbuf2[i];
2682:       rbuf3_i = rbuf3[i];
2683:       rbuf4_i = rbuf4[i];
2684:       for (j=1; j<=jmax; j++) {
2685:         is_no     = sbuf1_i[2*j-1];
2686:         rmap_i    = rmap[is_no];
2687:         cmap_i    = cmap[is_no];
2688:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
2689:         imat_ilen = mat->ilen;
2690:         imat_j    = mat->j;
2691:         imat_i    = mat->i;
2692:         imat_a    = mat->a;
2693:         max1      = sbuf1_i[2*j];
2694:         for (k=0; k<max1; k++,ct1++) {
2695:           row   = sbuf1_i[ct1];
2696:           row   = rmap_i[row];
2697:           ilen  = imat_ilen[row];
2698:           mat_i = imat_i[row];
2699:           mat_a = imat_a + mat_i;
2700:           mat_j = imat_j + mat_i;
2701:           max2 = rbuf2_i[ct1];
2702:           for (l=0; l<max2; l++,ct2++) {
2703:             if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2704:               *mat_j++ = tcol - 1;
2705:               *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2706:               ilen++;
2707:             }
2708:           }
2709:           imat_ilen[row] = ilen;
2710:         }
2711:       }
2712:     }
2713:   }
2714:   PetscFree(r_status4);
2715:   PetscFree(r_waits4);
2716:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
2717:   PetscFree(s_waits4);
2718:   PetscFree(s_status4);

2720:   /* Restore the indices */
2721:   for (i=0; i<ismax; i++) {
2722:     ISRestoreIndices(isrow[i],irow+i);
2723:     ISRestoreIndices(iscol[i],icol+i);
2724:   }

2726:   /* Destroy allocated memory */
2727:   PetscFree(irow);
2728:   PetscFree(w1);
2729:   PetscFree(pa);

2731:   PetscFree(sbuf1);
2732:   PetscFree(rbuf2);
2733:   for (i=0; i<nrqr; ++i) {
2734:     PetscFree(sbuf2[i]);
2735:   }
2736:   for (i=0; i<nrqs; ++i) {
2737:     PetscFree(rbuf3[i]);
2738:     PetscFree(rbuf4[i]);
2739:   }

2741:   PetscFree(sbuf2);
2742:   PetscFree(rbuf3);
2743:   PetscFree(rbuf4);
2744:   PetscFree(sbuf_aj[0]);
2745:   PetscFree(sbuf_aj);
2746:   PetscFree(sbuf_aa[0]);
2747:   PetscFree(sbuf_aa);
2748: 
2749:   PetscFree(cmap);
2750:   PetscFree(rmap);
2751:   PetscFree(lens);

2753:   for (i=0; i<ismax; i++) {
2754:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2755:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2756:   }
2757:   return(0);
2758: }

2760: /*
2761:   can be optimized by send only non-zeroes in iscol IS  -
2762:   so prebuild submatrix on sending side including A,B partitioning
2763:   */
2766:  #include ../src/vec/is/impls/general/general.h
2767: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2768: {
2769:   Mat_MPIRowbs  *c = (Mat_MPIRowbs*)C->data;
2770:   BSspmat       *A = c->A;
2771:   BSsprow *Arow;
2772:   Mat_SeqAIJ    *matA,*matB; /* on prac , off proc part of submat */
2773:   Mat_MPIAIJ    *mat;  /* submat->data */
2775:   const int      *irow,*icol;
2776:   int    nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2777:   int    *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2778:   int    i,j,k,l,len,jmax,proc,idx;
2779:   int    **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2780:   FLOAT  **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */

2782:   int    *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2783:   int    *req_source;
2784:   int    ncols_t;
2785: 
2786: 
2787:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2788:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2789: 
2790:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2791:   MPI_Status  *r_status3,*r_status4,*s_status4;
2792:   MPI_Comm    comm;


2796:   comm   = ((PetscObject)C)->comm;
2797:   tag0   = ((PetscObject)C)->tag;
2798:   size   = c->size;
2799:   rank   = c->rank;

2801:   if (size==1) {
2802:     if (scall == MAT_REUSE_MATRIX) {
2803:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2804:       return(0);
2805:     } else {
2806:       Mat *newsubmat;
2807: 
2808:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2809:       *submat=*newsubmat;
2810:       ierr=PetscFree(newsubmat);
2811:       return(0);
2812:     }
2813:   }
2814: 
2815:   /* Get some new tags to keep the communication clean */
2816:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2817:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2818:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2820:   /* Check if the col indices are sorted */
2821:   {PetscTruth sorted;
2822:   ISSorted(isrow,&sorted);
2823:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2824:   ISSorted(iscol,&sorted);
2825:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2826:   }
2827: 
2828:   ISGetIndices(isrow,&irow);
2829:   ISGetIndices(iscol,&icol);
2830:   ISGetLocalSize(isrow,&nrow);
2831:   ISGetLocalSize(iscol,&ncol);
2832: 
2833:   if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2834:   if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2835: 
2836: 
2837:   PetscMalloc((C->rmap->N+1)*sizeof(int),&rtable);
2838:   /* Create hash table for the mapping :row -> proc*/
2839:   for (i=0,j=0; i<size; i++) {
2840:     jmax = C->rmap->range[i+1];
2841:     for (; j<jmax; j++) {
2842:       rtable[j] = i;
2843:     }
2844:   }

2846:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2847:      required. Based on this, buffers are allocated, and data copied into them*/
2848:   PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2849:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2850:   PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2851:   for (j=0; j<nrow; j++) {
2852:     row_t  = irow[j];
2853:     proc   = rtable[row_t];
2854:     w1[proc]++;
2855:   }
2856:   nrqs     = 0;              /* no of outgoing messages */
2857:   msz      = 0;              /* total mesg length (for all procs) */
2858:   w1[rank] = 0;              /* no mesg sent to self */
2859:   for (i=0; i<size; i++) {
2860:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2861:   }
2862: 
2863:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2864:   for (i=0,j=0; i<size; i++) {
2865:     if (w1[i]) {
2866:       pa[j++] = i;
2867:       w1[i]++;  /* header for return data */
2868:       msz+=w1[i];
2869:     }
2870:   }
2871: 
2872:   {int  *onodes1,*olengths1;
2873:   /* Determine the number of messages to expect, their lengths, from from-ids */
2874:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2875:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2876:   /* Now post the Irecvs corresponding to these messages */
2877:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2878:   PetscFree(onodes1);
2879:   PetscFree(olengths1);
2880:   }
2881: 
2882: { int **ptr,*iptr,*tmp;
2883:   /* Allocate Memory for outgoing messages */
2884:   PetscMalloc(2*size*sizeof(int*) + msz*sizeof(int),&sbuf1);
2885:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2886:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2887:   /* allocate memory for outgoing data + buf to receive the first reply */
2888:   tmp      = (int*)(ptr + size);

2890:   for (i=0,iptr=tmp; i<nrqs; i++) {
2891:     j         = pa[i];
2892:     sbuf1[j]  = iptr;
2893:     iptr     += w1[j];
2894:   }

2896:   /* Form the outgoing messages */
2897:   for (i=0; i<nrqs; i++) {
2898:     j           = pa[i];
2899:     sbuf1[j][0] = 0;   /*header */
2900:     ptr[j]      = sbuf1[j] + 1;
2901:   }
2902: 
2903:   /* Parse the isrow and copy data into outbuf */
2904:   for (j=0; j<nrow; j++) {
2905:     row_t  = irow[j];
2906:     proc = rtable[row_t];
2907:     if (proc != rank) { /* copy to the outgoing buf*/
2908:       sbuf1[proc][0]++;
2909:       *ptr[proc] = row_t;
2910:       ptr[proc]++;
2911:     }
2912:   }
2913: } /* block */

2915:   /*  Now  post the sends */
2916: 
2917:   /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
2918:    * of requested rows)*/

2920:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2921:   for (i=0; i<nrqs; ++i) {
2922:     j    = pa[i];
2923:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2924:   }

2926:   /* Post Receives to capture the buffer size */
2927:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2928:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2929:   PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
2930:   for (i=1; i<nrqs; ++i) {
2931:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2932:   }
2933:   for (i=0; i<nrqs; ++i) {
2934:     j    = pa[i];
2935:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2936:   }

2938:   /* Send to other procs the buf size they should allocate */
2939:   /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
2940:    * (row sizes) */

2942:   /* Receive messages*/
2943:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2944:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2945:   PetscMalloc(2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*),&sbuf2);
2946:   req_size    = (int*)(sbuf2 + nrqr);
2947:   req_source  = req_size + nrqr;
2948: 
2949:   {
2950:     BSsprow    **sAi = A->rows;
2951:     int        id,rstart = C->rmap->rstart;
2952:     int        *sbuf2_i,*rbuf1_i,end;

2954:     for (i=0; i<nrqr; ++i) {
2955:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2956:       req_size[idx]   = 0;
2957:       rbuf1_i         = rbuf1[idx];
2958:       MPI_Get_count(r_status1+i,MPI_INT,&end);
2959:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2960:       sbuf2_i         = sbuf2[idx];
2961:       for (j=1; j<end; j++) {
2962:         id               = rbuf1_i[j] - rstart;
2963:         ncols_t          = (sAi[id])->length;
2964:         sbuf2_i[j]       = ncols_t;
2965:         req_size[idx]   += ncols_t;
2966:       }
2967:       req_source[idx] = r_status1[i].MPI_SOURCE;
2968:       /* form the header */
2969:       sbuf2_i[0]   = req_size[idx];
2970:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2971:     }
2972:   }
2973:   PetscFree(r_status1);
2974:   PetscFree(r_waits1);

2976:   /*  recv buffer sizes */
2977:   /* Receive messages*/
2978: 
2979:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2980:   PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
2981:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2982:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2983:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

2985:   for (i=0; i<nrqs; ++i) {
2986:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2987:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2988:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2989:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2990:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2991:   }
2992:   PetscFree(r_status2);
2993:   PetscFree(r_waits2);
2994: 
2995:   /* Wait on sends1 and sends2 */
2996:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2997:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

2999:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
3000:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
3001:   PetscFree(s_status1);
3002:   PetscFree(s_status2);
3003:   PetscFree(s_waits1);
3004:   PetscFree(s_waits2);

3006:   /* Now allocate buffers for a->j, and send them off */
3007:   /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3008:    * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3009: 
3010:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3011:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3012:   PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3013:   for (i=1; i<nrqr; i++)  sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3014: 
3015:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3016:   {
3017:     int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3018:     int rstart = C->rmap->rstart;

3020:     for (i=0; i<nrqr; i++) {
3021:       rbuf1_i   = rbuf1[i];
3022:       sbuf3_i   = sbuf3[i];
3023:       noutcols  = 0;
3024:       kmax = rbuf1_i[0];  /* num. of req. rows */
3025:       for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3026:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3027:         ncols  = Arow->length;
3028:         Acol   = Arow->col;
3029:         /* load the column indices for this row into cols*/
3030:         cols  = sbuf3_i + noutcols;
3031:         PetscMemcpy(cols,Acol,ncols*sizeof(int));
3032:         /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3033:         noutcols += ncols;
3034:       }
3035:       MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3036:     }
3037:   }
3038:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3039:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

3041:   /* Allocate buffers for a->a, and send them off */
3042:   /* can be optimized by conect with previous block */
3043:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3044:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3045:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3046:   for (i=1; i<nrqr; i++)  sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3047: 
3048:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3049:   {
3050:     FLOAT *Aval,*vals,*sbuf4_i;
3051:     int rstart = C->rmap->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3052: 
3053: 
3054:     for (i=0; i<nrqr; i++) {
3055:       rbuf1_i   = rbuf1[i];
3056:       sbuf4_i   = sbuf4[i];
3057:       rqrow     = 1;
3058:       noutvals  = 0;
3059:       kmax      = rbuf1_i[0];  /* num of req. rows */
3060:       for (k=0; k<kmax; k++,rqrow++) {
3061:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3062:         ncols  = Arow->length;
3063:         Aval = Arow->nz;
3064:         /* load the column values for this row into vals*/
3065:         vals  = sbuf4_i+noutvals;
3066:         PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3067:         noutvals += ncols;
3068:       }
3069:       MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3070:     }
3071:   }
3072:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3073:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3074:   PetscFree(rbuf1);

3076:   /* Form the matrix */

3078:   /* create col map */
3079:   PetscMalloc(C->cmap->N*sizeof(int),&cmap);
3080:   PetscMemzero(cmap,C->cmap->N*sizeof(int));
3081:   for (j=0; j<ncol; j++) {
3082:       cmap[icol[j]] = j+1;
3083:   }
3084: 
3085:   /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3086:   PetscMalloc(C->rmap->N*sizeof(int),&rmap);
3087:   PetscMemzero(rmap,C->rmap->N*sizeof(int));
3088:   for (j=0; j<nrow; j++) {
3089:     rmap[irow[j]] = j;
3090:   }

3092:   /*
3093:      Determine the number of non-zeros in the diagonal and off-diagonal 
3094:      portions of the matrix in order to do correct preallocation
3095:    */

3097:   /* first get start and end of "diagonal" columns */
3098:   if (csize == PETSC_DECIDE) {
3099:     nlocal = ncol/size + ((ncol % size) > rank);
3100:   } else {
3101:     nlocal = csize;
3102:   }
3103:   {
3104:     int ncols,*cols,olen,dlen,thecol;
3105:     int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3106: 
3107:     MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3108:     cstart = cend - nlocal;
3109:     if (rank == size - 1 && cend != ncol) {
3110:       SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3111:     }

3113:     PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3114:     o_nz = d_nz + nrow;
3115: 
3116:     /* Update lens from local data */
3117:     for (j=0; j<nrow; j++) {
3118:       row  = irow[j];
3119:       proc = rtable[row];
3120:       if (proc == rank) {
3121:         Arow=A->rows[row-C->rmap->rstart];
3122:         ncols=Arow->length;
3123:         cols=Arow->col;
3124:         olen=dlen=0;
3125:         for (k=0; k<ncols; k++) {
3126:           if ((thecol=cmap[cols[k]])) {
3127:             if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3128:             else olen++;
3129:           }
3130:         }
3131:         o_nz[j]=olen;
3132:         d_nz[j]=dlen;
3133:       } else d_nz[j]=o_nz[j]=0;
3134:     }
3135:     /* Update lens from offproc data and done waits */
3136:     /* this will be much simplier after sending only appropriate columns */
3137:     for (j=0; j<nrqs;j++) {
3138:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3139:       proc   = pa[i];
3140:       sbuf1_i = sbuf1[proc];
3141:       cidx    = 0;
3142:       rbuf2_i = rbuf2[i];
3143:       rbuf3_i = rbuf3[i];
3144:       kmax    = sbuf1_i[0]; /*num of rq. rows*/
3145:       for (k=1; k<=kmax; k++) {
3146:         row  = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3147:         for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3148:           if ((thecol=cmap[rbuf3_i[cidx]])) {
3149: 
3150:             if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3151:             else o_nz[row]++;
3152:           }
3153:         }
3154:       }
3155:     }
3156:   }
3157:   PetscFree(r_status3);
3158:   PetscFree(r_waits3);
3159:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
3160:   PetscFree(s_status3);
3161:   PetscFree(s_waits3);

3163:   if (scall ==  MAT_INITIAL_MATRIX) {
3164:     MatCreate(comm,submat);
3165:     MatSetSizes(*submat,nrow,nlocal,PETSC_DECIDE,ncol);
3166:     MatSetType(*submat,((PetscObject)C)->type_name);
3167:     MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3168:     mat=(Mat_MPIAIJ *)((*submat)->data);
3169:     matA=(Mat_SeqAIJ *)(mat->A->data);
3170:     matB=(Mat_SeqAIJ *)(mat->B->data);
3171: 
3172:   } else {
3173:     PetscTruth same;
3174:     /* folowing code can be optionaly dropped for debuged versions of users
3175:      * program, but I don't know PETSc option which can switch off such safety
3176:      * tests - in a same way counting of o_nz,d_nz can be droped for  REUSE
3177:      * matrix */
3178: 
3179:     PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3180:     if (!same) {
3181:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3182:     }
3183:     if (((*submat)->rmap->n != nrow) || ((*submat)->cmap->N != ncol)) {
3184:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3185:     }
3186:     mat=(Mat_MPIAIJ *)((*submat)->data);
3187:     matA=(Mat_SeqAIJ *)(mat->A->data);
3188:     matB=(Mat_SeqAIJ *)(mat->B->data);
3189:     PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3190:     if (!same) {
3191:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3192:     }
3193:     PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3194:     if (!same) {
3195:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3196:     }
3197:   /* Initial matrix as if empty */
3198:     PetscMemzero(matA->ilen,nrow*sizeof(int));
3199:     PetscMemzero(matB->ilen,nrow*sizeof(int));
3200:     /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3201:      * delete all possibly nonactual inforamtion */
3202:     /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3203:      * copy some factor information - where are thay */
3204:     (*submat)->was_assembled=PETSC_FALSE;
3205:     (*submat)->assembled=PETSC_FALSE;
3206: 
3207:   }
3208:   PetscFree(d_nz);

3210:   /* Assemble the matrix */
3211:   /* First assemble from local rows */
3212:   {
3213:     int    i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3214:     FLOAT  *vals;
3215:     PetscScalar *matA_a,*matB_a;
3216: 
3217:     for (j=0; j<nrow; j++) {
3218:       oldrow = irow[j];
3219:       proc   = rtable[oldrow];
3220:       if (proc == rank) {
3221:         row  = rmap[oldrow];
3222: 
3223:         Arow  = A->rows[oldrow-C->rmap->rstart];
3224:         ncols = Arow->length;
3225:         cols  = Arow->col;
3226:         vals  = Arow->nz;
3227: 
3228:         i_row   = matA->i[row];
3229:         matA_a = matA->a + i_row;
3230:         matA_j = matA->j + i_row;
3231:         i_row   = matB->i[row];
3232:         matB_a = matB->a + i_row;
3233:         matB_j = matB->j + i_row;
3234:         for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3235:           if ((tcol = cmap[cols[k]])) {
3236:             if (tcol<=cstart) {
3237:               *matB_j++ = tcol-1;
3238:               *matB_a++ = vals[k];
3239:               ilenB++;
3240:             } else if (tcol<=cend) {
3241:               *matA_j++ = (tcol-1)-cstart;
3242:               *matA_a++ = (PetscScalar)(vals[k]);
3243:               ilenA++;
3244:             } else {
3245:               *matB_j++ = tcol-1;
3246:               *matB_a++ = vals[k];
3247:               ilenB++;
3248:             }
3249:           }
3250:         }
3251:         matA->ilen[row]=ilenA;
3252:         matB->ilen[row]=ilenB;
3253: 
3254:       }
3255:     }
3256:   }

3258:   /*   Now assemble the off proc rows*/
3259:   {
3260:     int  *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3261:     int  *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3262:     PetscScalar *matA_a,*matB_a;
3263:     FLOAT *rbuf4_i;

3265:     for (j=0; j<nrqs; j++) {
3266:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3267:       proc   = pa[i];
3268:       sbuf1_i = sbuf1[proc];
3269: 
3270:       cidx    = 0;
3271:       rbuf2_i = rbuf2[i];
3272:       rbuf3_i = rbuf3[i];
3273:       rbuf4_i = rbuf4[i];
3274:       kmax    = sbuf1_i[0];
3275:       for (k=1; k<=kmax; k++) {
3276:         row = rmap[sbuf1_i[k]];
3277: 
3278:         i_row  = matA->i[row];
3279:         matA_a = matA->a + i_row;
3280:         matA_j = matA->j + i_row;
3281:         i_row  = matB->i[row];
3282:         matB_a = matB->a + i_row;
3283:         matB_j = matB->j + i_row;
3284: 
3285:         lmax = rbuf2_i[k];
3286:         for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3287:           if ((tcol = cmap[rbuf3_i[cidx]])) {
3288:             if (tcol<=cstart) {
3289:               *matB_j++ = tcol-1;
3290:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3291:               ilenB++;
3292:             } else if (tcol<=cend) {
3293:               *matA_j++ = (tcol-1)-cstart;
3294:               *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3295:               ilenA++;
3296:             } else {
3297:               *matB_j++ = tcol-1;
3298:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3299:               ilenB++;
3300:             }
3301:           }
3302:         }
3303:         matA->ilen[row]=ilenA;
3304:         matB->ilen[row]=ilenB;
3305:       }
3306:     }
3307:   }

3309:   PetscFree(r_status4);
3310:   PetscFree(r_waits4);
3311:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
3312:   PetscFree(s_waits4);
3313:   PetscFree(s_status4);

3315:   /* Restore the indices */
3316:   ISRestoreIndices(isrow,&irow);
3317:   ISRestoreIndices(iscol,&icol);

3319:   /* Destroy allocated memory */
3320:   PetscFree(rtable);
3321:   PetscFree(w1);
3322:   PetscFree(pa);

3324:   PetscFree(sbuf1);
3325:   PetscFree(rbuf2[0]);
3326:   PetscFree(rbuf2);
3327:   for (i=0; i<nrqr; ++i) {
3328:     PetscFree(sbuf2[i]);
3329:   }
3330:   for (i=0; i<nrqs; ++i) {
3331:     PetscFree(rbuf3[i]);
3332:     PetscFree(rbuf4[i]);
3333:   }

3335:   PetscFree(sbuf2);
3336:   PetscFree(rbuf3);
3337:   PetscFree(rbuf4);
3338:   PetscFree(sbuf3[0]);
3339:   PetscFree(sbuf3);
3340:   PetscFree(sbuf4[0]);
3341:   PetscFree(sbuf4);
3342: 
3343:   PetscFree(cmap);
3344:   PetscFree(rmap);


3347:   MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3348:   MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);


3351:   return(0);
3352: }