Actual source code: mpibdiag.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    The basic matrix operations for the Block diagonal parallel 
  5:   matrices.
  6: */
 7:  #include src/mat/impls/bdiag/mpi/mpibdiag.h

 11: PetscErrorCode MatSetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 12: {
 13:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 15:   PetscInt       i,j,row,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
 16:   PetscTruth     roworiented = mbd->roworiented;

 19:   for (i=0; i<m; i++) {
 20:     if (idxm[i] < 0) continue;
 21:     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 22:     if (idxm[i] >= rstart && idxm[i] < rend) {
 23:       row = idxm[i] - rstart;
 24:       for (j=0; j<n; j++) {
 25:         if (idxn[j] < 0) continue;
 26:         if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 27:         if (roworiented) {
 28:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 29:         } else {
 30:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 31:         }
 32:       }
 33:     } else {
 34:       if (!mbd->donotstash) {
 35:         if (roworiented) {
 36:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 37:         } else {
 38:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 39:         }
 40:       }
 41:     }
 42:   }
 43:   return(0);
 44: }

 48: PetscErrorCode MatGetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
 49: {
 50:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 52:   PetscInt       i,j,row,rstart = mat->rmap.rstart,rend = mat->rmap.rend;

 55:   for (i=0; i<m; i++) {
 56:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 57:     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 58:     if (idxm[i] >= rstart && idxm[i] < rend) {
 59:       row = idxm[i] - rstart;
 60:       for (j=0; j<n; j++) {
 61:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 62:         if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 63:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 64:       }
 65:     } else {
 66:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 67:     }
 68:   }
 69:   return(0);
 70: }

 74: PetscErrorCode MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 75: {
 76:   MPI_Comm       comm = mat->comm;
 78:   PetscInt       nstash,reallocs;
 79:   InsertMode     addv;

 82:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 83:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 84:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 85:   }
 86:   mat->insertmode = addv; /* in case this processor had no cache */
 87:   MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
 88:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 89:   PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
 90:   return(0);
 91: }

 95: PetscErrorCode MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 96: {
 97:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 98:   Mat_SeqBDiag   *mlocal;
100:   PetscMPIInt    n;
101:   PetscInt       i,*row,*col;
102:   PetscInt       *tmp1,*tmp2,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
103:   PetscScalar    *val;
104:   InsertMode     addv = mat->insertmode;


108:   while (1) {
109:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
110:     if (!flg) break;
111: 
112:     for (i=0; i<n;) {
113:       /* Now identify the consecutive vals belonging to the same row */
114:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
115:       if (j < n) ncols = j-i;
116:       else       ncols = n-i;
117:       /* Now assemble all these values with a single function call */
118:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
119:       i = j;
120:     }
121:   }
122:   MatStashScatterEnd_Private(&mat->stash);

124:   MatAssemblyBegin(mbd->A,mode);
125:   MatAssemblyEnd(mbd->A,mode);

127:   /* Fix main diagonal location and determine global diagonals */
128:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
129:   Mblock         = mat->rmap.N/mat->rmap.bs; Nblock = mat->cmap.N/mat->rmap.bs;
130:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
131:   PetscMalloc(2*len*sizeof(PetscInt),&tmp1);
132:   tmp2           = tmp1 + len;
133:   PetscMemzero(tmp1,2*len*sizeof(PetscInt));
134:   mlocal->mainbd = -1;
135:   for (i=0; i<mlocal->nd; i++) {
136:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
137:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
138:   }
139:   MPI_Allreduce(tmp1,tmp2,len,MPIU_INT,MPI_SUM,mat->comm);
140:   ict  = 0;
141:   for (i=0; i<len; i++) {
142:     if (tmp2[i]) {
143:       mbd->gdiag[ict] = i - Mblock;
144:       ict++;
145:     }
146:   }
147:   mbd->gnd = ict;
148:   PetscFree(tmp1);

150:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
151:     MatSetUpMultiply_MPIBDiag(mat);
152:   }
153:   return(0);
154: }

158: PetscErrorCode MatZeroEntries_MPIBDiag(Mat A)
159: {
160:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;

164:   MatZeroEntries(l->A);
165:   return(0);
166: }

168: /* again this uses the same basic stratagy as in the assembly and 
169:    scatter create routines, we should try to do it systematically 
170:    if we can figure out the proper level of generality. */

172: /* the code does not do the diagonal entries correctly unless the 
173:    matrix is square and the column and row owerships are identical.
174:    This is a BUG. The only way to fix it seems to be to access 
175:    aij->A and aij->B directly and not through the MatZeroRows() 
176:    routine. 
177: */

181: PetscErrorCode MatZeroRows_MPIBDiag(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
182: {
183:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
185:   PetscMPIInt    n,imdex,size = l->size,rank = l->rank,tag = A->tag;
186:   PetscInt       i,*owners = A->rmap.range;
187:   PetscInt       *nprocs,j,idx,nsends;
188:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
189:   PetscInt       *rvalues,count,base,slen,*source;
190:   PetscInt       *lens,*lrows,*values;
191:   MPI_Comm       comm = A->comm;
192:   MPI_Request    *send_waits,*recv_waits;
193:   MPI_Status     recv_status,*send_status;
194:   PetscTruth     found;

197:   /*  first count number of contributors to each processor */
198:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
199:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
200:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
201:   for (i=0; i<N; i++) {
202:     idx = rows[i];
203:     found = PETSC_FALSE;
204:     for (j=0; j<size; j++) {
205:       if (idx >= owners[j] && idx < owners[j+1]) {
206:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
207:       }
208:     }
209:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
210:   }
211:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

213:   /* inform other processors of number of messages and max length*/
214:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

216:   /* post receives:   */
217:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
218:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
219:   for (i=0; i<nrecvs; i++) {
220:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
221:   }

223:   /* do sends:
224:       1) starts[i] gives the starting index in svalues for stuff going to 
225:          the ith processor
226:   */
227:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
228:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
229:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
230:   starts[0] = 0;
231:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
232:   for (i=0; i<N; i++) {
233:     svalues[starts[owner[i]]++] = rows[i];
234:   }

236:   starts[0] = 0;
237:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
238:   count = 0;
239:   for (i=0; i<size; i++) {
240:     if (nprocs[2*i+1]) {
241:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
242:     }
243:   }
244:   PetscFree(starts);

246:   base = owners[rank];

248:   /*  wait on receives */
249:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
250:   source = lens + nrecvs;
251:   count  = nrecvs;
252:   slen   = 0;
253:   while (count) {
254:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
255:     /* unpack receives into our local space */
256:     MPI_Get_count(&recv_status,MPIU_INT,&n);
257:     source[imdex]  = recv_status.MPI_SOURCE;
258:     lens[imdex]  = n;
259:     slen += n;
260:     count--;
261:   }
262:   PetscFree(recv_waits);
263: 
264:   /* move the data into the send scatter */
265:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
266:   count = 0;
267:   for (i=0; i<nrecvs; i++) {
268:     values = rvalues + i*nmax;
269:     for (j=0; j<lens[i]; j++) {
270:       lrows[count++] = values[j] - base;
271:     }
272:   }
273:   PetscFree(rvalues);
274:   PetscFree(lens);
275:   PetscFree(owner);
276:   PetscFree(nprocs);
277: 
278:   /* actually zap the local rows */
279:   MatZeroRows(l->A,slen,lrows,diag);
280:   PetscFree(lrows);

282:   /* wait on sends */
283:   if (nsends) {
284:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
285:     MPI_Waitall(nsends,send_waits,send_status);
286:     PetscFree(send_status);
287:   }
288:   PetscFree(send_waits);
289:   PetscFree(svalues);

291:   return(0);
292: }

296: PetscErrorCode MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
297: {
298:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;

302:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
303:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
304:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
305:   return(0);
306: }

310: PetscErrorCode MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
311: {
312:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;

316:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
317:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
319:   return(0);
320: }

324: PetscErrorCode MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
325: {
326:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;
328:   PetscScalar    zero = 0.0;

331:   VecSet(yy,zero);
332:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
333:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
334:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335:   return(0);
336: }

340: PetscErrorCode MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
341: {
342:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;

346:   VecCopy(yy,zz);
347:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
348:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
349:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
350:   return(0);
351: }

355: PetscErrorCode MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
356: {
357:   Mat_MPIBDiag   *mat = (Mat_MPIBDiag*)matin->data;
359:   PetscReal      isend[5],irecv[5];

362:   info->block_size     = (PetscReal)mat->A->rmap.bs;
363:   MatGetInfo(mat->A,MAT_LOCAL,info);
364:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
365:   isend[3] = info->memory;  isend[4] = info->mallocs;
366:   if (flag == MAT_LOCAL) {
367:     info->nz_used      = isend[0];
368:     info->nz_allocated = isend[1];
369:     info->nz_unneeded  = isend[2];
370:     info->memory       = isend[3];
371:     info->mallocs      = isend[4];
372:   } else if (flag == MAT_GLOBAL_MAX) {
373:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
374:     info->nz_used      = irecv[0];
375:     info->nz_allocated = irecv[1];
376:     info->nz_unneeded  = irecv[2];
377:     info->memory       = irecv[3];
378:     info->mallocs      = irecv[4];
379:   } else if (flag == MAT_GLOBAL_SUM) {
380:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
381:     info->nz_used      = irecv[0];
382:     info->nz_allocated = irecv[1];
383:     info->nz_unneeded  = irecv[2];
384:     info->memory       = irecv[3];
385:     info->mallocs      = irecv[4];
386:   }
387:   info->rows_global    = (double)matin->rmap.N;
388:   info->columns_global = (double)matin->cmap.N;
389:   info->rows_local     = (double)matin->rmap.n;
390:   info->columns_local  = (double)matin->cmap.N;
391:   return(0);
392: }

396: PetscErrorCode MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
397: {
399:   Mat_MPIBDiag   *A = (Mat_MPIBDiag*)mat->data;

402:   MatGetDiagonal(A->A,v);
403:   return(0);
404: }

408: PetscErrorCode MatDestroy_MPIBDiag(Mat mat)
409: {
410:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
412: #if defined(PETSC_USE_LOG)
413:   Mat_SeqBDiag   *ms = (Mat_SeqBDiag*)mbd->A->data;

416:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D, BSize=%D, NDiag=%D",mat->rmap.N,mat->cmap.N,mat->rmap.bs,ms->nd);
417: #else
419: #endif
420:   MatStashDestroy_Private(&mat->stash);
421:   PetscFree(mbd->gdiag);
422:   MatDestroy(mbd->A);
423:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
424:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
425:   PetscFree(mbd);
426:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
427:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIBDiagSetPreallocation_C","",PETSC_NULL);
428:   return(0);
429: }


434: static PetscErrorCode MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
435: {
436:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;

440:   if (mbd->size == 1) {
441:     MatView(mbd->A,viewer);
442:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
443:   return(0);
444: }

448: static PetscErrorCode MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
449: {
450:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
451:   PetscErrorCode    ierr;
452:   PetscMPIInt       size = mbd->size,rank = mbd->rank;
453:   PetscInt          i;
454:   PetscTruth        iascii,isdraw;
455:   PetscViewer       sviewer;
456:   PetscViewerFormat format;

459:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
460:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
461:   if (iascii) {
462:     PetscViewerGetFormat(viewer,&format);
463:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
464:       PetscInt nline = PetscMin(10,mbd->gnd),k,nk,np;
465:       PetscViewerASCIIPrintf(viewer,"  block size=%D, total number of diagonals=%D\n",mat->rmap.bs,mbd->gnd);
466:       nk = (mbd->gnd-1)/nline + 1;
467:       for (k=0; k<nk; k++) {
468:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
469:         np = PetscMin(nline,mbd->gnd - nline*k);
470:         for (i=0; i<np; i++) {
471:           PetscViewerASCIIPrintf(viewer,"  %D",mbd->gdiag[i+nline*k]);
472:         }
473:         PetscViewerASCIIPrintf(viewer,"\n");
474:       }
475:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
476:         MatInfo info;
477:         MPI_Comm_rank(mat->comm,&rank);
478:         MatGetInfo(mat,MAT_LOCAL,&info);
479:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.N,
480:             (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
481:         PetscViewerFlush(viewer);
482:         VecScatterView(mbd->Mvctx,viewer);
483:       }
484:       return(0);
485:     }
486:   }

488:   if (isdraw) {
489:     PetscDraw       draw;
490:     PetscTruth isnull;
491:     PetscViewerDrawGetDraw(viewer,0,&draw);
492:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
493:   }

495:   if (size == 1) {
496:     MatView(mbd->A,viewer);
497:   } else {
498:     /* assemble the entire matrix onto first processor. */
499:     Mat          A;
500:     PetscInt     M = mat->rmap.N,N = mat->cmap.N,m,row,nz,*cols;
501:     PetscScalar  *vals;

503:     /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
504:     MatCreate(mat->comm,&A);
505:     if (!rank) {
506:       MatSetSizes(A,M,N,M,N);
507:       MatSetType(A,MATMPIBDIAG);
508:       MatMPIBDiagSetPreallocation(A,mbd->gnd,mbd->A->rmap.bs,mbd->gdiag,PETSC_NULL);
509:     } else {
510:       MatSetSizes(A,0,0,M,N);
511:       MatSetType(A,MATMPIBDIAG);
512:       MatMPIBDiagSetPreallocation(A,0,mbd->A->rmap.bs,PETSC_NULL,PETSC_NULL);
513:     }
514:     PetscLogObjectParent(mat,A);

516:     /* Copy the matrix ... This isn't the most efficient means,
517:        but it's quick for now */
518:     row = mat->rmap.rstart;
519:     m = mbd->A->rmap.N;
520:     for (i=0; i<m; i++) {
521:       MatGetRow_MPIBDiag(mat,row,&nz,&cols,&vals);
522:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
523:       MatRestoreRow_MPIBDiag(mat,row,&nz,&cols,&vals);
524:       row++;
525:     }
526:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
527:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
528:     PetscViewerGetSingleton(viewer,&sviewer);
529:     if (!rank) {
530:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
531:     }
532:     PetscViewerRestoreSingleton(viewer,&sviewer);
533:     PetscViewerFlush(viewer);
534:     MatDestroy(A);
535:   }
536:   return(0);
537: }

541: PetscErrorCode MatView_MPIBDiag(Mat mat,PetscViewer viewer)
542: {
544:   PetscTruth     iascii,isdraw,isbinary;

547:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
548:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
549:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
550:   if (iascii || isdraw) {
551:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
552:   } else if (isbinary) {
553:     MatView_MPIBDiag_Binary(mat,viewer);
554:   } else {
555:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
556:   }
557:   return(0);
558: }

562: PetscErrorCode MatSetOption_MPIBDiag(Mat A,MatOption op)
563: {
564:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)A->data;

567:   switch (op) {
568:   case MAT_NO_NEW_NONZERO_LOCATIONS:
569:   case MAT_YES_NEW_NONZERO_LOCATIONS:
570:   case MAT_NEW_NONZERO_LOCATION_ERR:
571:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
572:   case MAT_NO_NEW_DIAGONALS:
573:   case MAT_YES_NEW_DIAGONALS:
574:     MatSetOption(mbd->A,op);
575:     break;
576:   case MAT_ROW_ORIENTED:
577:     mbd->roworiented = PETSC_TRUE;
578:     MatSetOption(mbd->A,op);
579:     break;
580:   case MAT_COLUMN_ORIENTED:
581:     mbd->roworiented = PETSC_FALSE;
582:     MatSetOption(mbd->A,op);
583:     break;
584:   case MAT_IGNORE_OFF_PROC_ENTRIES:
585:     mbd->donotstash = PETSC_TRUE;
586:     break;
587:   case MAT_ROWS_SORTED:
588:   case MAT_ROWS_UNSORTED:
589:   case MAT_COLUMNS_SORTED:
590:   case MAT_COLUMNS_UNSORTED:
591:     PetscInfo1(A,"Option %d ignored\n",op);
592:     break;
593:   case MAT_SYMMETRIC:
594:   case MAT_STRUCTURALLY_SYMMETRIC:
595:   case MAT_NOT_SYMMETRIC:
596:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
597:   case MAT_HERMITIAN:
598:   case MAT_NOT_HERMITIAN:
599:   case MAT_SYMMETRY_ETERNAL:
600:   case MAT_NOT_SYMMETRY_ETERNAL:
601:     break;
602:   default:
603:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
604:   }
605:   return(0);
606: }


611: PetscErrorCode MatGetRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
612: {
613:   Mat_MPIBDiag   *mat = (Mat_MPIBDiag*)matin->data;
615:   PetscInt       lrow;

618:   if (row < matin->rmap.rstart || row >= matin->rmap.rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
619:   lrow = row - matin->rmap.rstart;
620:   MatGetRow_SeqBDiag(mat->A,lrow,nz,idx,v);
621:   return(0);
622: }

626: PetscErrorCode MatRestoreRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
627: {
628:   Mat_MPIBDiag   *mat = (Mat_MPIBDiag*)matin->data;
630:   PetscInt       lrow;

633:   lrow = row - matin->rmap.rstart;
634:   MatRestoreRow_SeqBDiag(mat->A,lrow,nz,idx,v);
635:   return(0);
636: }


641: PetscErrorCode MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
642: {
643:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)A->data;
644:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)mbd->A->data;
645:   PetscReal      sum = 0.0;
647:   PetscInt       d,i,nd = a->nd,bs = A->rmap.bs,len;
648:   PetscScalar    *dv;

651:   if (type == NORM_FROBENIUS) {
652:     for (d=0; d<nd; d++) {
653:       dv   = a->diagv[d];
654:       len  = a->bdlen[d]*bs*bs;
655:       for (i=0; i<len; i++) {
656: #if defined(PETSC_USE_COMPLEX)
657:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
658: #else
659:         sum += dv[i]*dv[i];
660: #endif
661:       }
662:     }
663:     MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
664:     *nrm = sqrt(*nrm);
665:     PetscLogFlops(2*A->rmap.n*A->rmap.N);
666:   } else if (type == NORM_1) { /* max column norm */
667:     PetscReal *tmp,*tmp2;
668:     PetscInt    j;
669:     PetscMalloc((mbd->A->cmap.n+1)*sizeof(PetscReal),&tmp);
670:     PetscMalloc((mbd->A->cmap.n+1)*sizeof(PetscReal),&tmp2);
671:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->cmap.n);
672:     *nrm = 0.0;
673:     MPI_Allreduce(tmp,tmp2,mbd->A->cmap.n,MPIU_REAL,MPI_SUM,A->comm);
674:     for (j=0; j<mbd->A->cmap.n; j++) {
675:       if (tmp2[j] > *nrm) *nrm = tmp2[j];
676:     }
677:     PetscFree(tmp);
678:     PetscFree(tmp2);
679:   } else if (type == NORM_INFINITY) { /* max row norm */
680:     PetscReal normtemp;
681:     MatNorm(mbd->A,type,&normtemp);
682:     MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
683:   }
684:   return(0);
685: }

689: PetscErrorCode MatScale_MPIBDiag(Mat A,PetscScalar alpha)
690: {
692:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;

695:   MatScale_SeqBDiag(a->A,alpha);
696:   return(0);
697: }

701: PetscErrorCode MatSetUpPreallocation_MPIBDiag(Mat A)
702: {

706:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
707:   return(0);
708: }

710: /* -------------------------------------------------------------------*/

712: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
713:        MatGetRow_MPIBDiag,
714:        MatRestoreRow_MPIBDiag,
715:        MatMult_MPIBDiag,
716: /* 4*/ MatMultAdd_MPIBDiag,
717:        MatMultTranspose_MPIBDiag,
718:        MatMultTransposeAdd_MPIBDiag,
719:        0,
720:        0,
721:        0,
722: /*10*/ 0,
723:        0,
724:        0,
725:        0,
726:        0,
727: /*15*/ MatGetInfo_MPIBDiag,
728:        0,
729:        MatGetDiagonal_MPIBDiag,
730:        0,
731:        MatNorm_MPIBDiag,
732: /*20*/ MatAssemblyBegin_MPIBDiag,
733:        MatAssemblyEnd_MPIBDiag,
734:        0,
735:        MatSetOption_MPIBDiag,
736:        MatZeroEntries_MPIBDiag,
737: /*25*/ MatZeroRows_MPIBDiag,
738:        0,
739:        0,
740:        0,
741:        0,
742: /*30*/ MatSetUpPreallocation_MPIBDiag,
743:        0,
744:        0,
745:        0,
746:        0,
747: /*35*/ 0,
748:        0,
749:        0,
750:        0,
751:        0,
752: /*40*/ 0,
753:        0,
754:        0,
755:        MatGetValues_MPIBDiag,
756:        0,
757: /*45*/ 0,
758:        MatScale_MPIBDiag,
759:        0,
760:        0,
761:        0,
762: /*50*/ 0,
763:        0,
764:        0,
765:        0,
766:        0,
767: /*55*/ 0,
768:        0,
769:        0,
770:        0,
771:        0,
772: /*60*/ 0,
773:        MatDestroy_MPIBDiag,
774:        MatView_MPIBDiag,
775:        0,
776:        0,
777: /*65*/ 0,
778:        0,
779:        0,
780:        0,
781:        0,
782: /*70*/ 0,
783:        0,
784:        0,
785:        0,
786:        0,
787: /*75*/ 0,
788:        0,
789:        0,
790:        0,
791:        0,
792: /*80*/ 0,
793:        0,
794:        0,
795:        0,
796:        MatLoad_MPIBDiag,
797: /*85*/ 0,
798:        0,
799:        0,
800:        0,
801:        0,
802: /*90*/ 0,
803:        0,
804:        0,
805:        0,
806:        0,
807: /*95*/ 0,
808:        0,
809:        0,
810:        0};

815: PetscErrorCode  MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
816: {
817:   Mat_MPIBDiag   *matin = (Mat_MPIBDiag *)A->data;
819:   PetscInt       lrows,lcols,rstart,rend;
820:   IS             localc,localr;

823:   MatGetLocalSize(A,&lrows,&lcols);
824:   MatGetOwnershipRange(A,&rstart,&rend);
825:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
826:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
827:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
828:   ISDestroy(localr);
829:   ISDestroy(localc);

831:   *iscopy = PETSC_TRUE;
832:   return(0);
833: }

839: PetscErrorCode  MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,PetscInt nd,PetscInt bs,PetscInt *diag,PetscScalar **diagv)
840: {
841:   Mat_MPIBDiag   *b;
843:   PetscInt       i,k,*ldiag,len,nd2;
844:   PetscScalar    **ldiagv = 0;
845:   PetscTruth     flg2;

848:   B->preallocated = PETSC_TRUE;
849:   if (bs == PETSC_DEFAULT) bs = 1;
850:   if (nd == PETSC_DEFAULT) nd = 0;
851:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
852:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
853:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
854:   if (nd && !diag) {
855:     PetscMalloc(nd*sizeof(PetscInt),&diag);
856:     nd2  = nd;
857:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
858:     if (nd2 != nd) {
859:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
860:     }
861:   } else if (flg2) {
862:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
863:   }

865:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

867:   B->rmap.bs = B->cmap.bs = bs;

869:   PetscMapInitialize(B->comm,&B->rmap);
870:   PetscMapInitialize(B->comm,&B->cmap);

872:   if ((B->cmap.N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
873:   if ((B->rmap.N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
874:   if ((B->rmap.N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");


877:   b          = (Mat_MPIBDiag*)B->data;
878:   b->gnd     = nd;
879:   b->brstart = (B->rmap.rstart)/bs;
880:   b->brend   = (B->rmap.rend)/bs;


883:   /* Determine local diagonals; for now, assume global rows = global cols */
884:   /* These are sorted in MatCreateSeqBDiag */
885:   PetscMalloc((nd+1)*sizeof(PetscInt),&ldiag);
886:   len  = B->rmap.N/bs + B->cmap.N/bs + 1;
887:   PetscMalloc(len*sizeof(PetscInt),&b->gdiag);
888:   k    = 0;
889:   if (diagv) {
890:     PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
891:   }
892:   for (i=0; i<nd; i++) {
893:     b->gdiag[i] = diag[i];
894:     if (diag[i] > 0) { /* lower triangular */
895:       if (diag[i] < b->brend) {
896:         ldiag[k] = diag[i] - b->brstart;
897:         if (diagv) ldiagv[k] = diagv[i];
898:         k++;
899:       }
900:     } else { /* upper triangular */
901:       if (B->rmap.N/bs - diag[i] > B->cmap.N/bs) {
902:         if (B->rmap.N/bs + diag[i] > b->brstart) {
903:           ldiag[k] = diag[i] - b->brstart;
904:           if (diagv) ldiagv[k] = diagv[i];
905:           k++;
906:         }
907:       } else {
908:         if (B->rmap.N/bs > b->brstart) {
909:           ldiag[k] = diag[i] - b->brstart;
910:           if (diagv) ldiagv[k] = diagv[i];
911:           k++;
912:         }
913:       }
914:     }
915:   }

917:   /* Form local matrix */
918:   MatCreate(PETSC_COMM_SELF,&b->A);
919:   MatSetSizes(b->A,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
920:   MatSetType(b->A,MATSEQBDIAG);
921:   MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
922:   PetscLogObjectParent(B,b->A);
923:   PetscFree(ldiag);
924:   PetscFree(ldiagv);

926:   return(0);
927: }

930: /*MC
931:    MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.

933:    Options Database Keys:
934: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()

936:   Level: beginner

938: .seealso: MatCreateMPIBDiag
939: M*/

944: PetscErrorCode  MatCreate_MPIBDiag(Mat B)
945: {
946:   Mat_MPIBDiag   *b;

950:   PetscNew(Mat_MPIBDiag,&b);
951:   B->data         = (void*)b;
952:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
953:   B->factor       = 0;
954:   B->mapping      = 0;

956:   B->insertmode = NOT_SET_VALUES;
957:   MPI_Comm_rank(B->comm,&b->rank);
958:   MPI_Comm_size(B->comm,&b->size);

960:   /* build cache for off array entries formed */
961:   MatStashCreate_Private(B->comm,1,&B->stash);
962:   b->donotstash = PETSC_FALSE;

964:   /* stuff used for matrix-vector multiply */
965:   b->lvec        = 0;
966:   b->Mvctx       = 0;

968:   /* used for MatSetValues() input */
969:   b->roworiented = PETSC_TRUE;

971:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
972:                                      "MatGetDiagonalBlock_MPIBDiag",
973:                                       MatGetDiagonalBlock_MPIBDiag);
974:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
975:                                      "MatMPIBDiagSetPreallocation_MPIBDiag",
976:                                       MatMPIBDiagSetPreallocation_MPIBDiag);
977:   PetscObjectChangeTypeName((PetscObject)B,MATMPIBDIAG);
978:   return(0);
979: }

982: /*MC
983:    MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.

985:    This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
986:    and MATMPIBDIAG otherwise.

988:    Options Database Keys:
989: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()

991:   Level: beginner

993: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
994: M*/

999: PetscErrorCode  MatCreate_BDiag(Mat A)
1000: {
1002:   PetscMPIInt   size;

1005:   MPI_Comm_size(A->comm,&size);
1006:   if (size == 1) {
1007:     MatSetType(A,MATSEQBDIAG);
1008:   } else {
1009:     MatSetType(A,MATMPIBDIAG);
1010:   }
1011:   return(0);
1012: }

1017: /*@C
1018:    MatMPIBDiagSetPreallocation - 

1020:    Collective on Mat

1022:    Input Parameters:
1023: +  A - the matrix 
1024: .  nd - number of block diagonals (global) (optional)
1025: .  bs - each element of a diagonal is an bs x bs dense matrix
1026: .  diag - optional array of block diagonal numbers (length nd).
1027:    For a matrix element A[i,j], where i=row and j=column, the
1028:    diagonal number is
1029: $     diag = i/bs - j/bs  (integer division)
1030:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1031:    needed (expensive).
1032: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1033:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1034:    to control memory allocation.


1037:    Options Database Keys:
1038: .  -mat_block_size <bs> - Sets blocksize
1039: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1041:    Notes:
1042:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1043:    than it must be used on all processors that share the object for that argument.

1045:    The parallel matrix is partitioned across the processors by rows, where
1046:    each local rectangular matrix is stored in the uniprocessor block 
1047:    diagonal format.  See the users manual for further details.

1049:    The user MUST specify either the local or global numbers of rows
1050:    (possibly both).

1052:    The case bs=1 (conventional diagonal storage) is implemented as
1053:    a special case.

1055:    Fortran Notes:
1056:    Fortran programmers cannot set diagv; this variable is ignored.

1058:    Level: intermediate

1060: .keywords: matrix, block, diagonal, parallel, sparse

1062: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1063: @*/
1064: PetscErrorCode  MatMPIBDiagSetPreallocation(Mat B,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[])
1065: {
1066:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscScalar*[]);

1069:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1070:   if (f) {
1071:     (*f)(B,nd,bs,diag,diagv);
1072:   }
1073:   return(0);
1074: }

1078: /*@C
1079:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

1081:    Collective on MPI_Comm

1083:    Input Parameters:
1084: +  comm - MPI communicator
1085: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1086: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1087: .  N - number of columns (local and global)
1088: .  nd - number of block diagonals (global) (optional)
1089: .  bs - each element of a diagonal is an bs x bs dense matrix
1090: .  diag - optional array of block diagonal numbers (length nd).
1091:    For a matrix element A[i,j], where i=row and j=column, the
1092:    diagonal number is
1093: $     diag = i/bs - j/bs  (integer division)
1094:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1095:    needed (expensive).
1096: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1097:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1098:    to control memory allocation.

1100:    Output Parameter:
1101: .  A - the matrix 

1103:    Options Database Keys:
1104: .  -mat_block_size <bs> - Sets blocksize
1105: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1107:    Notes:
1108:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1109:    than it must be used on all processors that share the object for that argument.

1111:    The parallel matrix is partitioned across the processors by rows, where
1112:    each local rectangular matrix is stored in the uniprocessor block 
1113:    diagonal format.  See the users manual for further details.

1115:    The user MUST specify either the local or global numbers of rows
1116:    (possibly both).

1118:    The case bs=1 (conventional diagonal storage) is implemented as
1119:    a special case.

1121:    Fortran Notes:
1122:    Fortran programmers cannot set diagv; this variable is ignored.

1124:    Level: intermediate

1126: .keywords: matrix, block, diagonal, parallel, sparse

1128: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1129: @*/
1130: PetscErrorCode  MatCreateMPIBDiag(MPI_Comm comm,PetscInt m,PetscInt M,PetscInt N,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[],Mat *A)
1131: {
1133:   PetscMPIInt    size;

1136:   MatCreate(comm,A);
1137:   MatSetSizes(*A,m,m,M,N);
1138:   MPI_Comm_size(comm,&size);
1139:   if (size > 1) {
1140:     MatSetType(*A,MATMPIBDIAG);
1141:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1142:   } else {
1143:     MatSetType(*A,MATSEQBDIAG);
1144:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1145:   }
1146:   return(0);
1147: }

1151: /*@C
1152:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1153:    For the parallel case, this returns information for the local submatrix.

1155:    Input Parameters:
1156: .  mat - the matrix, stored in block diagonal format.

1158:    Not Collective

1160:    Output Parameters:
1161: +  m - number of rows
1162: .  n - number of columns
1163: .  nd - number of block diagonals
1164: .  bs - each element of a diagonal is an bs x bs dense matrix
1165: .  bdlen - array of total block lengths of block diagonals
1166: .  diag - optional array of block diagonal numbers (length nd).
1167:    For a matrix element A[i,j], where i=row and j=column, the
1168:    diagonal number is
1169: $     diag = i/bs - j/bs  (integer division)
1170:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1171:    needed (expensive).
1172: -  diagv - pointer to actual diagonals (in same order as diag array), 

1174:    Level: advanced

1176:    Notes:
1177:    See the users manual for further details regarding this storage format.

1179: .keywords: matrix, block, diagonal, get, data

1181: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1182: @*/
1183: PetscErrorCode  MatBDiagGetData(Mat mat,PetscInt *nd,PetscInt *bs,PetscInt *diag[],PetscInt *bdlen[],PetscScalar ***diagv)
1184: {
1185:   Mat_MPIBDiag   *pdmat;
1186:   Mat_SeqBDiag   *dmat = 0;
1187:   PetscTruth     isseq,ismpi;

1192:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1193:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1194:   if (isseq) {
1195:     dmat = (Mat_SeqBDiag*)mat->data;
1196:   } else if (ismpi) {
1197:     pdmat = (Mat_MPIBDiag*)mat->data;
1198:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1199:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1200:   *nd    = dmat->nd;
1201:   *bs    = mat->rmap.bs;
1202:   *diag  = dmat->diag;
1203:   *bdlen = dmat->bdlen;
1204:   *diagv = dmat->diagv;
1205:   return(0);
1206: }

1208:  #include petscsys.h

1212: PetscErrorCode MatLoad_MPIBDiag(PetscViewer viewer, MatType type,Mat *newmat)
1213: {
1214:   Mat            A;
1215:   PetscScalar    *vals,*svals;
1216:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1217:   MPI_Status     status;
1219:   int            fd;
1220:   PetscMPIInt    tag = ((PetscObject)viewer)->tag,rank,size,*sndcounts = 0,*rowners,maxnz,mm;
1221:   PetscInt       bs,i,nz,j,rstart,rend,*cols;
1222:   PetscInt       header[4],*rowlengths = 0,M,N,m,Mbs;
1223:   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1224:   PetscInt       extra_rows;

1227:   MPI_Comm_size(comm,&size);
1228:   MPI_Comm_rank(comm,&rank);
1229:   if (!rank) {
1230:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1231:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1232:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1233:     if (header[3] < 0) {
1234:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1235:     }
1236:   }
1237:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1238:   M = header[1]; N = header[2];

1240:   bs = 1;   /* uses a block size of 1 by default; */
1241:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1243:   /* 
1244:      This code adds extra rows to make sure the number of rows is 
1245:      divisible by the blocksize
1246:   */
1247:   Mbs        = M/bs;
1248:   extra_rows = bs - M + bs*(Mbs);
1249:   if (extra_rows == bs) extra_rows = 0;
1250:   else                  Mbs++;
1251:   if (extra_rows && !rank) {
1252:     PetscInfo(0,"Padding loaded matrix to match blocksize\n");
1253:   }

1255:   /* determine ownership of all rows */
1256:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1257:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1258:   mm         = (PetscMPIInt)m;
1259:   MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1260:   rowners[0] = 0;
1261:   for (i=2; i<=size; i++) {
1262:     rowners[i] += rowners[i-1];
1263:   }
1264:   rstart = rowners[rank];
1265:   rend   = rowners[rank+1];

1267:   /* distribute row lengths to all processors */
1268:   PetscMalloc((rend-rstart)*sizeof(PetscInt),&ourlens);
1269:   if (!rank) {
1270:     PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1271:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1272:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1273:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1274:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1275:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1276:     PetscFree(sndcounts);
1277:   } else {
1278:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1279:   }

1281:   if (!rank) {
1282:     /* calculate the number of nonzeros on each processor */
1283:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1284:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1285:     for (i=0; i<size; i++) {
1286:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1287:         procsnz[i] += rowlengths[j];
1288:       }
1289:     }
1290:     PetscFree(rowlengths);

1292:     /* determine max buffer needed and allocate it */
1293:     maxnz = 0;
1294:     for (i=0; i<size; i++) {
1295:       maxnz = PetscMax(maxnz,procsnz[i]);
1296:     }
1297:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1299:     /* read in my part of the matrix column indices  */
1300:     nz   = procsnz[0];
1301:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1302:     if (size == 1)  nz -= extra_rows;
1303:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1304:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1306:     /* read in every one elses and ship off */
1307:     for (i=1; i<size-1; i++) {
1308:       nz   = procsnz[i];
1309:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1310:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1311:     }
1312:     /* read in the stuff for the last proc */
1313:     if (size != 1) {
1314:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1315:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1316:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1317:       MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
1318:     }
1319:     PetscFree(cols);
1320:   } else {
1321:     /* determine buffer space needed for message */
1322:     nz = 0;
1323:     for (i=0; i<m; i++) {
1324:       nz += ourlens[i];
1325:     }
1326:     PetscMalloc(nz*sizeof(PetscInt),&mycols);

1328:     /* receive message of column indices*/
1329:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1330:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1331:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1332:   }

1334:   MatCreate(comm,newmat);
1335:   MatSetSizes(*newmat,m,m,M+extra_rows,N+extra_rows);
1336:   MatSetType(*newmat,type);
1337:   MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1338:   A = *newmat;

1340:   if (!rank) {
1341:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1343:     /* read in my part of the matrix numerical values  */
1344:     nz = procsnz[0];
1345:     if (size == 1)  nz -= extra_rows;
1346:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1347:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1349:     /* insert into matrix */
1350:     jj      = rstart;
1351:     smycols = mycols;
1352:     svals   = vals;
1353:     for (i=0; i<m; i++) {
1354:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1355:       smycols += ourlens[i];
1356:       svals   += ourlens[i];
1357:       jj++;
1358:     }

1360:     /* read in other processors (except the last one) and ship out */
1361:     for (i=1; i<size-1; i++) {
1362:       nz   = procsnz[i];
1363:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1364:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1365:     }
1366:     /* the last proc */
1367:     if (size != 1){
1368:       nz   = procsnz[i] - extra_rows;
1369:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1370:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1371:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1372:     }
1373:     PetscFree(procsnz);
1374:   } else {
1375:     /* receive numeric values */
1376:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1378:     /* receive message of values*/
1379:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1380:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1381:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1383:     /* insert into matrix */
1384:     jj      = rstart;
1385:     smycols = mycols;
1386:     svals   = vals;
1387:     for (i=0; i<m; i++) {
1388:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1389:       smycols += ourlens[i];
1390:       svals   += ourlens[i];
1391:       jj++;
1392:     }
1393:   }
1394:   PetscFree(ourlens);
1395:   PetscFree(vals);
1396:   PetscFree(mycols);
1397:   PetscFree(rowners);

1399:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1400:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1401:   return(0);
1402: }