Actual source code: mpidense.c

petsc-dev 2014-08-28
Report Typos and Errors
  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7: #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
  8: #include <../src/mat/impls/aij/mpi/mpiaij.h>

 12: /*@

 14:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 15:               matrix that represents the operator. For sequential matrices it returns itself.

 17:     Input Parameter:
 18: .      A - the Seq or MPI dense matrix

 20:     Output Parameter:
 21: .      B - the inner matrix

 23:     Level: intermediate

 25: @*/
 26: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 27: {
 28:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 30:   PetscBool      flg;

 33:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 34:   if (flg) *B = mat->A;
 35:   else *B = A;
 36:   return(0);
 37: }

 41: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 42: {
 43:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 45:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 48:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 49:   lrow = row - rstart;
 50:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 51:   return(0);
 52: }

 56: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 57: {

 61:   if (idx) {PetscFree(*idx);}
 62:   if (v) {PetscFree(*v);}
 63:   return(0);
 64: }

 68: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 69: {
 70:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 72:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 73:   PetscScalar    *array;
 74:   MPI_Comm       comm;
 75:   Mat            B;

 78:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 80:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 81:   if (!B) {
 82:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 83:     MatCreate(comm,&B);
 84:     MatSetSizes(B,m,m,m,m);
 85:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 86:     MatDenseGetArray(mdn->A,&array);
 87:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 88:     MatDenseRestoreArray(mdn->A,&array);
 89:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 90:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 91:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 92:     *a   = B;
 93:     MatDestroy(&B);
 94:   } else *a = B;
 95:   return(0);
 96: }

100: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101: {
102:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105:   PetscBool      roworiented = A->roworiented;

108:   for (i=0; i<m; i++) {
109:     if (idxm[i] < 0) continue;
110:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111:     if (idxm[i] >= rstart && idxm[i] < rend) {
112:       row = idxm[i] - rstart;
113:       if (roworiented) {
114:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
115:       } else {
116:         for (j=0; j<n; j++) {
117:           if (idxn[j] < 0) continue;
118:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
120:         }
121:       }
122:     } else if (!A->donotstash) {
123:       mat->assembled = PETSC_FALSE;
124:       if (roworiented) {
125:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
126:       } else {
127:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
128:       }
129:     }
130:   }
131:   return(0);
132: }

136: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137: {
138:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

143:   for (i=0; i<m; i++) {
144:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146:     if (idxm[i] >= rstart && idxm[i] < rend) {
147:       row = idxm[i] - rstart;
148:       for (j=0; j<n; j++) {
149:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
152:       }
153:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154:   }
155:   return(0);
156: }

160: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161: {
162:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

166:   MatDenseGetArray(a->A,array);
167:   return(0);
168: }

172: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173: {
174:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
175:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178:   const PetscInt *irow,*icol;
179:   PetscScalar    *av,*bv,*v = lmat->v;
180:   Mat            newmat;
181:   IS             iscol_local;

184:   ISAllGather(iscol,&iscol_local);
185:   ISGetIndices(isrow,&irow);
186:   ISGetIndices(iscol_local,&icol);
187:   ISGetLocalSize(isrow,&nrows);
188:   ISGetLocalSize(iscol,&ncols);
189:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

191:   /* No parallel redistribution currently supported! Should really check each index set
192:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
193:      original matrix! */

195:   MatGetLocalSize(A,&nlrows,&nlcols);
196:   MatGetOwnershipRange(A,&rstart,&rend);

198:   /* Check submatrix call */
199:   if (scall == MAT_REUSE_MATRIX) {
200:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201:     /* Really need to test rows and column sizes! */
202:     newmat = *B;
203:   } else {
204:     /* Create and fill new matrix */
205:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
206:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
207:     MatSetType(newmat,((PetscObject)A)->type_name);
208:     MatMPIDenseSetPreallocation(newmat,NULL);
209:   }

211:   /* Now extract the data pointers and do the copy, column at a time */
212:   newmatd = (Mat_MPIDense*)newmat->data;
213:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

215:   for (i=0; i<Ncols; i++) {
216:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217:     for (j=0; j<nrows; j++) {
218:       *bv++ = av[irow[j] - rstart];
219:     }
220:   }

222:   /* Assemble the matrices so that the correct flags are set */
223:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
224:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

226:   /* Free work space */
227:   ISRestoreIndices(isrow,&irow);
228:   ISRestoreIndices(iscol_local,&icol);
229:   ISDestroy(&iscol_local);
230:   *B   = newmat;
231:   return(0);
232: }

236: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237: {
238:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

242:   MatDenseRestoreArray(a->A,array);
243:   return(0);
244: }

248: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249: {
250:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
251:   MPI_Comm       comm;
253:   PetscInt       nstash,reallocs;
254:   InsertMode     addv;

257:   PetscObjectGetComm((PetscObject)mat,&comm);
258:   /* make sure all processors are either in INSERTMODE or ADDMODE */
259:   MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
260:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261:   mat->insertmode = addv; /* in case this processor had no cache */

263:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
264:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
265:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
266:   return(0);
267: }

271: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272: {
273:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
276:   PetscMPIInt    n;
277:   PetscScalar    *val;
278:   InsertMode     addv=mat->insertmode;

281:   /*  wait on receives */
282:   while (1) {
283:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
284:     if (!flg) break;

286:     for (i=0; i<n;) {
287:       /* Now identify the consecutive vals belonging to the same row */
288:       for (j=i,rstart=row[j]; j<n; j++) {
289:         if (row[j] != rstart) break;
290:       }
291:       if (j < n) ncols = j-i;
292:       else       ncols = n-i;
293:       /* Now assemble all these values with a single function call */
294:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
295:       i    = j;
296:     }
297:   }
298:   MatStashScatterEnd_Private(&mat->stash);

300:   MatAssemblyBegin(mdn->A,mode);
301:   MatAssemblyEnd(mdn->A,mode);

303:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304:     MatSetUpMultiply_MPIDense(mat);
305:   }
306:   return(0);
307: }

311: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312: {
314:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

317:   MatZeroEntries(l->A);
318:   return(0);
319: }

321: /* the code does not do the diagonal entries correctly unless the
322:    matrix is square and the column and row owerships are identical.
323:    This is a BUG. The only way to fix it seems to be to access
324:    mdn->A and mdn->B directly and not through the MatZeroRows()
325:    routine.
326: */
329: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330: {
331:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332:   PetscErrorCode    ierr;
333:   PetscInt          i,*owners = A->rmap->range;
334:   PetscInt          *sizes,j,idx,nsends;
335:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337:   PetscInt          *lens,*lrows,*values;
338:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339:   MPI_Comm          comm;
340:   MPI_Request       *send_waits,*recv_waits;
341:   MPI_Status        recv_status,*send_status;
342:   PetscBool         found;
343:   const PetscScalar *xx;
344:   PetscScalar       *bb;

347:   PetscObjectGetComm((PetscObject)A,&comm);
348:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350:   /*  first count number of contributors to each processor */
351:   PetscCalloc1(2*size,&sizes);
352:   PetscMalloc1(N+1,&owner);  /* see note*/
353:   for (i=0; i<N; i++) {
354:     idx   = rows[i];
355:     found = PETSC_FALSE;
356:     for (j=0; j<size; j++) {
357:       if (idx >= owners[j] && idx < owners[j+1]) {
358:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359:       }
360:     }
361:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362:   }
363:   nsends = 0;
364:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

366:   /* inform other processors of number of messages and max length*/
367:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

369:   /* post receives:   */
370:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
371:   PetscMalloc1((nrecvs+1),&recv_waits);
372:   for (i=0; i<nrecvs; i++) {
373:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
374:   }

376:   /* do sends:
377:       1) starts[i] gives the starting index in svalues for stuff going to
378:          the ith processor
379:   */
380:   PetscMalloc1((N+1),&svalues);
381:   PetscMalloc1((nsends+1),&send_waits);
382:   PetscMalloc1((size+1),&starts);

384:   starts[0] = 0;
385:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

388:   starts[0] = 0;
389:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390:   count = 0;
391:   for (i=0; i<size; i++) {
392:     if (sizes[2*i+1]) {
393:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
394:     }
395:   }
396:   PetscFree(starts);

398:   base = owners[rank];

400:   /*  wait on receives */
401:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
402:   count = nrecvs;
403:   slen  = 0;
404:   while (count) {
405:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
406:     /* unpack receives into our local space */
407:     MPI_Get_count(&recv_status,MPIU_INT,&n);

409:     source[imdex] = recv_status.MPI_SOURCE;
410:     lens[imdex]   = n;
411:     slen += n;
412:     count--;
413:   }
414:   PetscFree(recv_waits);

416:   /* move the data into the send scatter */
417:   PetscMalloc1((slen+1),&lrows);
418:   count = 0;
419:   for (i=0; i<nrecvs; i++) {
420:     values = rvalues + i*nmax;
421:     for (j=0; j<lens[i]; j++) {
422:       lrows[count++] = values[j] - base;
423:     }
424:   }
425:   PetscFree(rvalues);
426:   PetscFree2(lens,source);
427:   PetscFree(owner);
428:   PetscFree(sizes);

430:   /* fix right hand side if needed */
431:   if (x && b) {
432:     VecGetArrayRead(x,&xx);
433:     VecGetArray(b,&bb);
434:     for (i=0; i<slen; i++) {
435:       bb[lrows[i]] = diag*xx[lrows[i]];
436:     }
437:     VecRestoreArrayRead(x,&xx);
438:     VecRestoreArray(b,&bb);
439:   }

441:   /* actually zap the local rows */
442:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
443:   if (diag != 0.0) {
444:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445:     PetscInt     m   = ll->lda, i;

447:     for (i=0; i<slen; i++) {
448:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449:     }
450:   }
451:   PetscFree(lrows);

453:   /* wait on sends */
454:   if (nsends) {
455:     PetscMalloc1(nsends,&send_status);
456:     MPI_Waitall(nsends,send_waits,send_status);
457:     PetscFree(send_status);
458:   }
459:   PetscFree(send_waits);
460:   PetscFree(svalues);
461:   return(0);
462: }

466: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467: {
468:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

472:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
473:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
474:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
475:   return(0);
476: }

480: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481: {
482:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

486:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
487:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
488:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
489:   return(0);
490: }

494: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495: {
496:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
498:   PetscScalar    zero = 0.0;

501:   VecSet(yy,zero);
502:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
503:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
504:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
505:   return(0);
506: }

510: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511: {
512:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

516:   VecCopy(yy,zz);
517:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
518:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
519:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
520:   return(0);
521: }

525: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526: {
527:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
528:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
530:   PetscInt       len,i,n,m = A->rmap->n,radd;
531:   PetscScalar    *x,zero = 0.0;

534:   VecSet(v,zero);
535:   VecGetArray(v,&x);
536:   VecGetSize(v,&n);
537:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
539:   radd = A->rmap->rstart*m;
540:   for (i=0; i<len; i++) {
541:     x[i] = aloc->v[radd + i*m + i];
542:   }
543:   VecRestoreArray(v,&x);
544:   return(0);
545: }

549: PetscErrorCode MatDestroy_MPIDense(Mat mat)
550: {
551:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

555: #if defined(PETSC_USE_LOG)
556:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557: #endif
558:   MatStashDestroy_Private(&mat->stash);
559:   MatDestroy(&mdn->A);
560:   VecDestroy(&mdn->lvec);
561:   VecScatterDestroy(&mdn->Mvctx);

563:   PetscFree(mat->data);
564:   PetscObjectChangeTypeName((PetscObject)mat,0);

566:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
567:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
568: #if defined(PETSC_HAVE_ELEMENTAL)
569:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
570: #endif
571:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
572:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
573:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
574:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
575:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
576:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
577:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
578:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
579:   return(0);
580: }

584: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585: {
586:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587:   PetscErrorCode    ierr;
588:   PetscViewerFormat format;
589:   int               fd;
590:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592:   PetscScalar       *work,*v,*vv;
593:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

596:   if (mdn->size == 1) {
597:     MatView(mdn->A,viewer);
598:   } else {
599:     PetscViewerBinaryGetDescriptor(viewer,&fd);
600:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
601:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

603:     PetscViewerGetFormat(viewer,&format);
604:     if (format == PETSC_VIEWER_NATIVE) {

606:       if (!rank) {
607:         /* store the matrix as a dense matrix */
608:         header[0] = MAT_FILE_CLASSID;
609:         header[1] = mat->rmap->N;
610:         header[2] = N;
611:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
612:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

614:         /* get largest work array needed for transposing array */
615:         mmax = mat->rmap->n;
616:         for (i=1; i<size; i++) {
617:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
618:         }
619:         PetscMalloc1(mmax*N,&work);

621:         /* write out local array, by rows */
622:         m = mat->rmap->n;
623:         v = a->v;
624:         for (j=0; j<N; j++) {
625:           for (i=0; i<m; i++) {
626:             work[j + i*N] = *v++;
627:           }
628:         }
629:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
630:         /* get largest work array to receive messages from other processes, excludes process zero */
631:         mmax = 0;
632:         for (i=1; i<size; i++) {
633:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
634:         }
635:         PetscMalloc1(mmax*N,&vv);
636:         for (k = 1; k < size; k++) {
637:           v    = vv;
638:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
639:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

641:           for (j = 0; j < N; j++) {
642:             for (i = 0; i < m; i++) {
643:               work[j + i*N] = *v++;
644:             }
645:           }
646:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
647:         }
648:         PetscFree(work);
649:         PetscFree(vv);
650:       } else {
651:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
652:       }
653:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
654:   }
655:   return(0);
656: }

658: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
659: #include <petscdraw.h>
662: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663: {
664:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665:   PetscErrorCode    ierr;
666:   PetscMPIInt       rank = mdn->rank;
667:   PetscViewerType   vtype;
668:   PetscBool         iascii,isdraw;
669:   PetscViewer       sviewer;
670:   PetscViewerFormat format;

673:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
674:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
675:   if (iascii) {
676:     PetscViewerGetType(viewer,&vtype);
677:     PetscViewerGetFormat(viewer,&format);
678:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679:       MatInfo info;
680:       MatGetInfo(mat,MAT_LOCAL,&info);
681:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
682:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
683:       PetscViewerFlush(viewer);
684:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
685:       VecScatterView(mdn->Mvctx,viewer);
686:       return(0);
687:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
688:       return(0);
689:     }
690:   } else if (isdraw) {
691:     PetscDraw draw;
692:     PetscBool isnull;

694:     PetscViewerDrawGetDraw(viewer,0,&draw);
695:     PetscDrawIsNull(draw,&isnull);
696:     if (isnull) return(0);
697:   }

699:   {
700:     /* assemble the entire matrix onto first processor. */
701:     Mat         A;
702:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703:     PetscInt    *cols;
704:     PetscScalar *vals;

706:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
707:     if (!rank) {
708:       MatSetSizes(A,M,N,M,N);
709:     } else {
710:       MatSetSizes(A,0,0,M,N);
711:     }
712:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713:     MatSetType(A,MATMPIDENSE);
714:     MatMPIDenseSetPreallocation(A,NULL);
715:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

717:     /* Copy the matrix ... This isn't the most efficient means,
718:        but it's quick for now */
719:     A->insertmode = INSERT_VALUES;

721:     row = mat->rmap->rstart;
722:     m   = mdn->A->rmap->n;
723:     for (i=0; i<m; i++) {
724:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
725:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
726:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
727:       row++;
728:     }

730:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
731:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
732:     PetscViewerGetSingleton(viewer,&sviewer);
733:     if (!rank) {
734:         MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
735:     }
736:     PetscViewerRestoreSingleton(viewer,&sviewer);
737:     PetscViewerFlush(viewer);
738:     MatDestroy(&A);
739:   }
740:   return(0);
741: }

745: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
746: {
748:   PetscBool      iascii,isbinary,isdraw,issocket;

751:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
752:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
753:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
754:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

756:   if (iascii || issocket || isdraw) {
757:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
758:   } else if (isbinary) {
759:     MatView_MPIDense_Binary(mat,viewer);
760:   }
761:   return(0);
762: }

766: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767: {
768:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769:   Mat            mdn  = mat->A;
771:   PetscReal      isend[5],irecv[5];

774:   info->block_size = 1.0;

776:   MatGetInfo(mdn,MAT_LOCAL,info);

778:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779:   isend[3] = info->memory;  isend[4] = info->mallocs;
780:   if (flag == MAT_LOCAL) {
781:     info->nz_used      = isend[0];
782:     info->nz_allocated = isend[1];
783:     info->nz_unneeded  = isend[2];
784:     info->memory       = isend[3];
785:     info->mallocs      = isend[4];
786:   } else if (flag == MAT_GLOBAL_MAX) {
787:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));

789:     info->nz_used      = irecv[0];
790:     info->nz_allocated = irecv[1];
791:     info->nz_unneeded  = irecv[2];
792:     info->memory       = irecv[3];
793:     info->mallocs      = irecv[4];
794:   } else if (flag == MAT_GLOBAL_SUM) {
795:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));

797:     info->nz_used      = irecv[0];
798:     info->nz_allocated = irecv[1];
799:     info->nz_unneeded  = irecv[2];
800:     info->memory       = irecv[3];
801:     info->mallocs      = irecv[4];
802:   }
803:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804:   info->fill_ratio_needed = 0;
805:   info->factor_mallocs    = 0;
806:   return(0);
807: }

811: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
812: {
813:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

817:   switch (op) {
818:   case MAT_NEW_NONZERO_LOCATIONS:
819:   case MAT_NEW_NONZERO_LOCATION_ERR:
820:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821:     MatSetOption(a->A,op,flg);
822:     break;
823:   case MAT_ROW_ORIENTED:
824:     a->roworiented = flg;

826:     MatSetOption(a->A,op,flg);
827:     break;
828:   case MAT_NEW_DIAGONALS:
829:   case MAT_KEEP_NONZERO_PATTERN:
830:   case MAT_USE_HASH_TABLE:
831:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
832:     break;
833:   case MAT_IGNORE_OFF_PROC_ENTRIES:
834:     a->donotstash = flg;
835:     break;
836:   case MAT_SYMMETRIC:
837:   case MAT_STRUCTURALLY_SYMMETRIC:
838:   case MAT_HERMITIAN:
839:   case MAT_SYMMETRY_ETERNAL:
840:   case MAT_IGNORE_LOWER_TRIANGULAR:
841:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
842:     break;
843:   default:
844:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
845:   }
846:   return(0);
847: }


852: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
853: {
854:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
855:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
856:   PetscScalar    *l,*r,x,*v;
858:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

861:   MatGetLocalSize(A,&s2,&s3);
862:   if (ll) {
863:     VecGetLocalSize(ll,&s2a);
864:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
865:     VecGetArray(ll,&l);
866:     for (i=0; i<m; i++) {
867:       x = l[i];
868:       v = mat->v + i;
869:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
870:     }
871:     VecRestoreArray(ll,&l);
872:     PetscLogFlops(n*m);
873:   }
874:   if (rr) {
875:     VecGetLocalSize(rr,&s3a);
876:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
877:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
878:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
879:     VecGetArray(mdn->lvec,&r);
880:     for (i=0; i<n; i++) {
881:       x = r[i];
882:       v = mat->v + i*m;
883:       for (j=0; j<m; j++) (*v++) *= x;
884:     }
885:     VecRestoreArray(mdn->lvec,&r);
886:     PetscLogFlops(n*m);
887:   }
888:   return(0);
889: }

893: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
894: {
895:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
896:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
898:   PetscInt       i,j;
899:   PetscReal      sum = 0.0;
900:   PetscScalar    *v  = mat->v;

903:   if (mdn->size == 1) {
904:      MatNorm(mdn->A,type,nrm);
905:   } else {
906:     if (type == NORM_FROBENIUS) {
907:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
908:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909:       }
910:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
911:       *nrm = PetscSqrtReal(*nrm);
912:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
913:     } else if (type == NORM_1) {
914:       PetscReal *tmp,*tmp2;
915:       PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
916:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
917:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
918:       *nrm = 0.0;
919:       v    = mat->v;
920:       for (j=0; j<mdn->A->cmap->n; j++) {
921:         for (i=0; i<mdn->A->rmap->n; i++) {
922:           tmp[j] += PetscAbsScalar(*v);  v++;
923:         }
924:       }
925:       MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
926:       for (j=0; j<A->cmap->N; j++) {
927:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
928:       }
929:       PetscFree2(tmp,tmp);
930:       PetscLogFlops(A->cmap->n*A->rmap->n);
931:     } else if (type == NORM_INFINITY) { /* max row norm */
932:       PetscReal ntemp;
933:       MatNorm(mdn->A,type,&ntemp);
934:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
935:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
936:   }
937:   return(0);
938: }

942: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
943: {
944:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
945:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
946:   Mat            B;
947:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
949:   PetscInt       j,i;
950:   PetscScalar    *v;

953:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
954:   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
955:     MatCreate(PetscObjectComm((PetscObject)A),&B);
956:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
957:     MatSetType(B,((PetscObject)A)->type_name);
958:     MatMPIDenseSetPreallocation(B,NULL);
959:   } else {
960:     B = *matout;
961:   }

963:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
964:   PetscMalloc1(m,&rwork);
965:   for (i=0; i<m; i++) rwork[i] = rstart + i;
966:   for (j=0; j<n; j++) {
967:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
968:     v   += m;
969:   }
970:   PetscFree(rwork);
971:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
972:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
973:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
974:     *matout = B;
975:   } else {
976:     MatHeaderMerge(A,B);
977:   }
978:   return(0);
979: }


982: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
983: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

987: PetscErrorCode MatSetUp_MPIDense(Mat A)
988: {

992:    MatMPIDenseSetPreallocation(A,0);
993:   return(0);
994: }

998: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
999: {
1001:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1004:   MatAXPY(A->A,alpha,B->A,str);
1005:   PetscObjectStateIncrease((PetscObject)Y);
1006:   return(0);
1007: }

1011: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1012: {
1013:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

1017:   MatConjugate(a->A);
1018:   return(0);
1019: }

1023: PetscErrorCode MatRealPart_MPIDense(Mat A)
1024: {
1025:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1029:   MatRealPart(a->A);
1030:   return(0);
1031: }

1035: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1036: {
1037:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1041:   MatImaginaryPart(a->A);
1042:   return(0);
1043: }

1045: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1048: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1049: {
1051:   PetscInt       i,n;
1052:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1053:   PetscReal      *work;

1056:   MatGetSize(A,NULL,&n);
1057:   PetscMalloc1(n,&work);
1058:   MatGetColumnNorms_SeqDense(a->A,type,work);
1059:   if (type == NORM_2) {
1060:     for (i=0; i<n; i++) work[i] *= work[i];
1061:   }
1062:   if (type == NORM_INFINITY) {
1063:     MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1064:   } else {
1065:     MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1066:   }
1067:   PetscFree(work);
1068:   if (type == NORM_2) {
1069:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1070:   }
1071:   return(0);
1072: }

1076: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1077: {
1078:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1080:   PetscScalar    *a;
1081:   PetscInt       m,n,i;

1084:   MatGetSize(d->A,&m,&n);
1085:   MatDenseGetArray(d->A,&a);
1086:   for (i=0; i<m*n; i++) {
1087:     PetscRandomGetValue(rctx,a+i);
1088:   }
1089:   MatDenseRestoreArray(d->A,&a);
1090:   return(0);
1091: }

1093: /* -------------------------------------------------------------------*/
1094: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1095:                                         MatGetRow_MPIDense,
1096:                                         MatRestoreRow_MPIDense,
1097:                                         MatMult_MPIDense,
1098:                                 /*  4*/ MatMultAdd_MPIDense,
1099:                                         MatMultTranspose_MPIDense,
1100:                                         MatMultTransposeAdd_MPIDense,
1101:                                         0,
1102:                                         0,
1103:                                         0,
1104:                                 /* 10*/ 0,
1105:                                         0,
1106:                                         0,
1107:                                         0,
1108:                                         MatTranspose_MPIDense,
1109:                                 /* 15*/ MatGetInfo_MPIDense,
1110:                                         MatEqual_MPIDense,
1111:                                         MatGetDiagonal_MPIDense,
1112:                                         MatDiagonalScale_MPIDense,
1113:                                         MatNorm_MPIDense,
1114:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1115:                                         MatAssemblyEnd_MPIDense,
1116:                                         MatSetOption_MPIDense,
1117:                                         MatZeroEntries_MPIDense,
1118:                                 /* 24*/ MatZeroRows_MPIDense,
1119:                                         0,
1120:                                         0,
1121:                                         0,
1122:                                         0,
1123:                                 /* 29*/ MatSetUp_MPIDense,
1124:                                         0,
1125:                                         0,
1126:                                         0,
1127:                                         0,
1128:                                 /* 34*/ MatDuplicate_MPIDense,
1129:                                         0,
1130:                                         0,
1131:                                         0,
1132:                                         0,
1133:                                 /* 39*/ MatAXPY_MPIDense,
1134:                                         MatGetSubMatrices_MPIDense,
1135:                                         0,
1136:                                         MatGetValues_MPIDense,
1137:                                         0,
1138:                                 /* 44*/ 0,
1139:                                         MatScale_MPIDense,
1140:                                         0,
1141:                                         0,
1142:                                         0,
1143:                                 /* 49*/ MatSetRandom_MPIDense,
1144:                                         0,
1145:                                         0,
1146:                                         0,
1147:                                         0,
1148:                                 /* 54*/ 0,
1149:                                         0,
1150:                                         0,
1151:                                         0,
1152:                                         0,
1153:                                 /* 59*/ MatGetSubMatrix_MPIDense,
1154:                                         MatDestroy_MPIDense,
1155:                                         MatView_MPIDense,
1156:                                         0,
1157:                                         0,
1158:                                 /* 64*/ 0,
1159:                                         0,
1160:                                         0,
1161:                                         0,
1162:                                         0,
1163:                                 /* 69*/ 0,
1164:                                         0,
1165:                                         0,
1166:                                         0,
1167:                                         0,
1168:                                 /* 74*/ 0,
1169:                                         0,
1170:                                         0,
1171:                                         0,
1172:                                         0,
1173:                                 /* 79*/ 0,
1174:                                         0,
1175:                                         0,
1176:                                         0,
1177:                                 /* 83*/ MatLoad_MPIDense,
1178:                                         0,
1179:                                         0,
1180:                                         0,
1181:                                         0,
1182:                                         0,
1183:                                 /* 89*/
1184:                                         0,
1185:                                         0,
1186:                                         0,
1187:                                         0,
1188:                                         0,
1189:                                 /* 94*/ 0,
1190:                                         0,
1191:                                         0,
1192:                                         0,
1193:                                         0,
1194:                                 /* 99*/ 0,
1195:                                         0,
1196:                                         0,
1197:                                         MatConjugate_MPIDense,
1198:                                         0,
1199:                                 /*104*/ 0,
1200:                                         MatRealPart_MPIDense,
1201:                                         MatImaginaryPart_MPIDense,
1202:                                         0,
1203:                                         0,
1204:                                 /*109*/ 0,
1205:                                         0,
1206:                                         0,
1207:                                         0,
1208:                                         0,
1209:                                 /*114*/ 0,
1210:                                         0,
1211:                                         0,
1212:                                         0,
1213:                                         0,
1214:                                 /*119*/ 0,
1215:                                         0,
1216:                                         0,
1217:                                         0,
1218:                                         0,
1219:                                 /*124*/ 0,
1220:                                         MatGetColumnNorms_MPIDense,
1221:                                         0,
1222:                                         0,
1223:                                         0,
1224:                                 /*129*/ 0,
1225:                                         0,
1226:                                         0,
1227:                                         0,
1228:                                         0,
1229:                                 /*134*/ 0,
1230:                                         0,
1231:                                         0,
1232:                                         0,
1233:                                         0,
1234:                                 /*139*/ 0,
1235:                                         0,
1236:                                         0
1237: };

1241: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1242: {
1243:   Mat_MPIDense   *a;

1247:   mat->preallocated = PETSC_TRUE;
1248:   /* Note:  For now, when data is specified above, this assumes the user correctly
1249:    allocates the local dense storage space.  We should add error checking. */

1251:   a       = (Mat_MPIDense*)mat->data;
1252:   PetscLayoutSetUp(mat->rmap);
1253:   PetscLayoutSetUp(mat->cmap);
1254:   a->nvec = mat->cmap->n;

1256:   MatCreate(PETSC_COMM_SELF,&a->A);
1257:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1258:   MatSetType(a->A,MATSEQDENSE);
1259:   MatSeqDenseSetPreallocation(a->A,data);
1260:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1261:   return(0);
1262: }

1266: PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1267: {
1269:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for requested operation");
1270:   return(0);
1271: }

1275: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1276: {
1277:   Mat_MPIDense   *a;

1281:   PetscNewLog(mat,&a);
1282:   mat->data = (void*)a;
1283:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1285:   mat->insertmode = NOT_SET_VALUES;
1286:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1287:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1289:   /* build cache for off array entries formed */
1290:   a->donotstash = PETSC_FALSE;

1292:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1294:   /* stuff used for matrix vector multiply */
1295:   a->lvec        = 0;
1296:   a->Mvctx       = 0;
1297:   a->roworiented = PETSC_TRUE;

1299:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1300:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1301: #if defined(PETSC_HAVE_ELEMENTAL)
1302:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1303: #endif
1304:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);
1305:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1306:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1307:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1308:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);

1310:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1311:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1312:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1313:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1314:   return(0);
1315: }

1317: /*MC
1318:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1320:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1321:    and MATMPIDENSE otherwise.

1323:    Options Database Keys:
1324: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1326:   Level: beginner


1329: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1330: M*/

1334: /*@C
1335:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1337:    Not collective

1339:    Input Parameters:
1340: .  B - the matrix
1341: -  data - optional location of matrix data.  Set data=NULL for PETSc
1342:    to control all matrix memory allocation.

1344:    Notes:
1345:    The dense format is fully compatible with standard Fortran 77
1346:    storage by columns.

1348:    The data input variable is intended primarily for Fortran programmers
1349:    who wish to allocate their own matrix memory space.  Most users should
1350:    set data=NULL.

1352:    Level: intermediate

1354: .keywords: matrix,dense, parallel

1356: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1357: @*/
1358: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1359: {

1363:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1364:   return(0);
1365: }

1369: /*@C
1370:    MatCreateDense - Creates a parallel matrix in dense format.

1372:    Collective on MPI_Comm

1374:    Input Parameters:
1375: +  comm - MPI communicator
1376: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1377: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1378: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1379: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1380: -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1381:    to control all matrix memory allocation.

1383:    Output Parameter:
1384: .  A - the matrix

1386:    Notes:
1387:    The dense format is fully compatible with standard Fortran 77
1388:    storage by columns.

1390:    The data input variable is intended primarily for Fortran programmers
1391:    who wish to allocate their own matrix memory space.  Most users should
1392:    set data=NULL (NULL_SCALAR for Fortran users).

1394:    The user MUST specify either the local or global matrix dimensions
1395:    (possibly both).

1397:    Level: intermediate

1399: .keywords: matrix,dense, parallel

1401: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1402: @*/
1403: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1404: {
1406:   PetscMPIInt    size;

1409:   MatCreate(comm,A);
1410:   MatSetSizes(*A,m,n,M,N);
1411:   MPI_Comm_size(comm,&size);
1412:   if (size > 1) {
1413:     MatSetType(*A,MATMPIDENSE);
1414:     MatMPIDenseSetPreallocation(*A,data);
1415:   } else {
1416:     MatSetType(*A,MATSEQDENSE);
1417:     MatSeqDenseSetPreallocation(*A,data);
1418:   }
1419:   return(0);
1420: }

1424: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1425: {
1426:   Mat            mat;
1427:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1431:   *newmat = 0;
1432:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1433:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1434:   MatSetType(mat,((PetscObject)A)->type_name);
1435:   a       = (Mat_MPIDense*)mat->data;
1436:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));

1438:   mat->factortype   = A->factortype;
1439:   mat->assembled    = PETSC_TRUE;
1440:   mat->preallocated = PETSC_TRUE;

1442:   a->size         = oldmat->size;
1443:   a->rank         = oldmat->rank;
1444:   mat->insertmode = NOT_SET_VALUES;
1445:   a->nvec         = oldmat->nvec;
1446:   a->donotstash   = oldmat->donotstash;

1448:   PetscLayoutReference(A->rmap,&mat->rmap);
1449:   PetscLayoutReference(A->cmap,&mat->cmap);

1451:   MatSetUpMultiply_MPIDense(mat);
1452:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1453:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1455:   *newmat = mat;
1456:   return(0);
1457: }

1461: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1462: {
1464:   PetscMPIInt    rank,size;
1465:   PetscInt       *rowners,i,m,nz,j;
1466:   PetscScalar    *array,*vals,*vals_ptr;

1469:   MPI_Comm_rank(comm,&rank);
1470:   MPI_Comm_size(comm,&size);

1472:   /* determine ownership of all rows */
1473:   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1474:   else m = newmat->rmap->n;
1475:   PetscMalloc1((size+2),&rowners);
1476:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1477:   rowners[0] = 0;
1478:   for (i=2; i<=size; i++) {
1479:     rowners[i] += rowners[i-1];
1480:   }

1482:   if (!sizesset) {
1483:     MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1484:   }
1485:   MatMPIDenseSetPreallocation(newmat,NULL);
1486:   MatDenseGetArray(newmat,&array);

1488:   if (!rank) {
1489:     PetscMalloc1(m*N,&vals);

1491:     /* read in my part of the matrix numerical values  */
1492:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);

1494:     /* insert into matrix-by row (this is why cannot directly read into array */
1495:     vals_ptr = vals;
1496:     for (i=0; i<m; i++) {
1497:       for (j=0; j<N; j++) {
1498:         array[i + j*m] = *vals_ptr++;
1499:       }
1500:     }

1502:     /* read in other processors and ship out */
1503:     for (i=1; i<size; i++) {
1504:       nz   = (rowners[i+1] - rowners[i])*N;
1505:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1506:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1507:     }
1508:   } else {
1509:     /* receive numeric values */
1510:     PetscMalloc1(m*N,&vals);

1512:     /* receive message of values*/
1513:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1515:     /* insert into matrix-by row (this is why cannot directly read into array */
1516:     vals_ptr = vals;
1517:     for (i=0; i<m; i++) {
1518:       for (j=0; j<N; j++) {
1519:         array[i + j*m] = *vals_ptr++;
1520:       }
1521:     }
1522:   }
1523:   MatDenseRestoreArray(newmat,&array);
1524:   PetscFree(rowners);
1525:   PetscFree(vals);
1526:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1527:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1528:   return(0);
1529: }

1533: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1534: {
1535:   PetscScalar    *vals,*svals;
1536:   MPI_Comm       comm;
1537:   MPI_Status     status;
1538:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1539:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1540:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1541:   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1542:   int            fd;

1546:   PetscObjectGetComm((PetscObject)viewer,&comm);
1547:   MPI_Comm_size(comm,&size);
1548:   MPI_Comm_rank(comm,&rank);
1549:   if (!rank) {
1550:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1551:     PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1552:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1553:   }
1554:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;

1556:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1557:   M    = header[1]; N = header[2]; nz = header[3];

1559:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1560:   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1561:   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;

1563:   /* If global sizes are set, check if they are consistent with that given in the file */
1564:   if (sizesset) {
1565:     MatGetSize(newmat,&grows,&gcols);
1566:   }
1567:   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1568:   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);

1570:   /*
1571:        Handle case where matrix is stored on disk as a dense matrix
1572:   */
1573:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1574:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);
1575:     return(0);
1576:   }

1578:   /* determine ownership of all rows */
1579:   if (newmat->rmap->n < 0) {
1580:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1581:   } else {
1582:     PetscMPIIntCast(newmat->rmap->n,&m);
1583:   }
1584:   PetscMalloc1((size+2),&rowners);
1585:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1586:   rowners[0] = 0;
1587:   for (i=2; i<=size; i++) {
1588:     rowners[i] += rowners[i-1];
1589:   }
1590:   rstart = rowners[rank];
1591:   rend   = rowners[rank+1];

1593:   /* distribute row lengths to all processors */
1594:   PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);
1595:   if (!rank) {
1596:     PetscMalloc1(M,&rowlengths);
1597:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1598:     PetscMalloc1(size,&sndcounts);
1599:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1600:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1601:     PetscFree(sndcounts);
1602:   } else {
1603:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1604:   }

1606:   if (!rank) {
1607:     /* calculate the number of nonzeros on each processor */
1608:     PetscMalloc1(size,&procsnz);
1609:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1610:     for (i=0; i<size; i++) {
1611:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1612:         procsnz[i] += rowlengths[j];
1613:       }
1614:     }
1615:     PetscFree(rowlengths);

1617:     /* determine max buffer needed and allocate it */
1618:     maxnz = 0;
1619:     for (i=0; i<size; i++) {
1620:       maxnz = PetscMax(maxnz,procsnz[i]);
1621:     }
1622:     PetscMalloc1(maxnz,&cols);

1624:     /* read in my part of the matrix column indices  */
1625:     nz   = procsnz[0];
1626:     PetscMalloc1(nz,&mycols);
1627:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1629:     /* read in every one elses and ship off */
1630:     for (i=1; i<size; i++) {
1631:       nz   = procsnz[i];
1632:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1633:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1634:     }
1635:     PetscFree(cols);
1636:   } else {
1637:     /* determine buffer space needed for message */
1638:     nz = 0;
1639:     for (i=0; i<m; i++) {
1640:       nz += ourlens[i];
1641:     }
1642:     PetscMalloc1((nz+1),&mycols);

1644:     /* receive message of column indices*/
1645:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1646:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1647:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1648:   }

1650:   /* loop over local rows, determining number of off diagonal entries */
1651:   PetscMemzero(offlens,m*sizeof(PetscInt));
1652:   jj   = 0;
1653:   for (i=0; i<m; i++) {
1654:     for (j=0; j<ourlens[i]; j++) {
1655:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1656:       jj++;
1657:     }
1658:   }

1660:   /* create our matrix */
1661:   for (i=0; i<m; i++) ourlens[i] -= offlens[i];

1663:   if (!sizesset) {
1664:     MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1665:   }
1666:   MatMPIDenseSetPreallocation(newmat,NULL);
1667:   for (i=0; i<m; i++) ourlens[i] += offlens[i];

1669:   if (!rank) {
1670:     PetscMalloc1(maxnz,&vals);

1672:     /* read in my part of the matrix numerical values  */
1673:     nz   = procsnz[0];
1674:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);

1676:     /* insert into matrix */
1677:     jj      = rstart;
1678:     smycols = mycols;
1679:     svals   = vals;
1680:     for (i=0; i<m; i++) {
1681:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1682:       smycols += ourlens[i];
1683:       svals   += ourlens[i];
1684:       jj++;
1685:     }

1687:     /* read in other processors and ship out */
1688:     for (i=1; i<size; i++) {
1689:       nz   = procsnz[i];
1690:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1691:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1692:     }
1693:     PetscFree(procsnz);
1694:   } else {
1695:     /* receive numeric values */
1696:     PetscMalloc1((nz+1),&vals);

1698:     /* receive message of values*/
1699:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1700:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1701:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1703:     /* insert into matrix */
1704:     jj      = rstart;
1705:     smycols = mycols;
1706:     svals   = vals;
1707:     for (i=0; i<m; i++) {
1708:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1709:       smycols += ourlens[i];
1710:       svals   += ourlens[i];
1711:       jj++;
1712:     }
1713:   }
1714:   PetscFree2(ourlens,offlens);
1715:   PetscFree(vals);
1716:   PetscFree(mycols);
1717:   PetscFree(rowners);

1719:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1720:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1721:   return(0);
1722: }

1726: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1727: {
1728:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1729:   Mat            a,b;
1730:   PetscBool      flg;

1734:   a    = matA->A;
1735:   b    = matB->A;
1736:   MatEqual(a,b,&flg);
1737:   MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1738:   return(0);
1739: }