Actual source code: mpidense.c

petsc-master 2017-01-16
Report Typos and Errors

  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7:  #include <../src/mat/impls/dense/mpi/mpidense.h>
  8:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9:  #include <petscblaslapack.h>

 11: /*@

 13:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 14:               matrix that represents the operator. For sequential matrices it returns itself.

 16:     Input Parameter:
 17: .      A - the Seq or MPI dense matrix

 19:     Output Parameter:
 20: .      B - the inner matrix

 22:     Level: intermediate

 24: @*/
 25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 26: {
 27:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 29:   PetscBool      flg;

 32:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 33:   if (flg) *B = mat->A;
 34:   else *B = A;
 35:   return(0);
 36: }

 38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 39: {
 40:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 42:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 45:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 46:   lrow = row - rstart;
 47:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 48:   return(0);
 49: }

 51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 52: {

 56:   if (idx) {PetscFree(*idx);}
 57:   if (v) {PetscFree(*v);}
 58:   return(0);
 59: }

 61: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 62: {
 63:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 65:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 66:   PetscScalar    *array;
 67:   MPI_Comm       comm;
 68:   Mat            B;

 71:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 73:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 74:   if (!B) {
 75:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 76:     MatCreate(comm,&B);
 77:     MatSetSizes(B,m,m,m,m);
 78:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 79:     MatDenseGetArray(mdn->A,&array);
 80:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 81:     MatDenseRestoreArray(mdn->A,&array);
 82:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 83:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 84:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 85:     *a   = B;
 86:     MatDestroy(&B);
 87:   } else *a = B;
 88:   return(0);
 89: }

 91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 92: {
 93:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
 95:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
 96:   PetscBool      roworiented = A->roworiented;

 99:   for (i=0; i<m; i++) {
100:     if (idxm[i] < 0) continue;
101:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102:     if (idxm[i] >= rstart && idxm[i] < rend) {
103:       row = idxm[i] - rstart;
104:       if (roworiented) {
105:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106:       } else {
107:         for (j=0; j<n; j++) {
108:           if (idxn[j] < 0) continue;
109:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111:         }
112:       }
113:     } else if (!A->donotstash) {
114:       mat->assembled = PETSC_FALSE;
115:       if (roworiented) {
116:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117:       } else {
118:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119:       }
120:     }
121:   }
122:   return(0);
123: }

125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
129:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

132:   for (i=0; i<m; i++) {
133:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135:     if (idxm[i] >= rstart && idxm[i] < rend) {
136:       row = idxm[i] - rstart;
137:       for (j=0; j<n; j++) {
138:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141:       }
142:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143:   }
144:   return(0);
145: }

147: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148: {
149:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

153:   MatDenseGetArray(a->A,array);
154:   return(0);
155: }

157: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
158: {
159:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
160:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
162:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
163:   const PetscInt *irow,*icol;
164:   PetscScalar    *av,*bv,*v = lmat->v;
165:   Mat            newmat;
166:   IS             iscol_local;

169:   ISAllGather(iscol,&iscol_local);
170:   ISGetIndices(isrow,&irow);
171:   ISGetIndices(iscol_local,&icol);
172:   ISGetLocalSize(isrow,&nrows);
173:   ISGetLocalSize(iscol,&ncols);
174:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

176:   /* No parallel redistribution currently supported! Should really check each index set
177:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
178:      original matrix! */

180:   MatGetLocalSize(A,&nlrows,&nlcols);
181:   MatGetOwnershipRange(A,&rstart,&rend);

183:   /* Check submatrix call */
184:   if (scall == MAT_REUSE_MATRIX) {
185:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
186:     /* Really need to test rows and column sizes! */
187:     newmat = *B;
188:   } else {
189:     /* Create and fill new matrix */
190:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
191:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
192:     MatSetType(newmat,((PetscObject)A)->type_name);
193:     MatMPIDenseSetPreallocation(newmat,NULL);
194:   }

196:   /* Now extract the data pointers and do the copy, column at a time */
197:   newmatd = (Mat_MPIDense*)newmat->data;
198:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

200:   for (i=0; i<Ncols; i++) {
201:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
202:     for (j=0; j<nrows; j++) {
203:       *bv++ = av[irow[j] - rstart];
204:     }
205:   }

207:   /* Assemble the matrices so that the correct flags are set */
208:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
209:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

211:   /* Free work space */
212:   ISRestoreIndices(isrow,&irow);
213:   ISRestoreIndices(iscol_local,&icol);
214:   ISDestroy(&iscol_local);
215:   *B   = newmat;
216:   return(0);
217: }

219: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
220: {
221:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

225:   MatDenseRestoreArray(a->A,array);
226:   return(0);
227: }

229: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
230: {
231:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
232:   MPI_Comm       comm;
234:   PetscInt       nstash,reallocs;
235:   InsertMode     addv;

238:   PetscObjectGetComm((PetscObject)mat,&comm);
239:   /* make sure all processors are either in INSERTMODE or ADDMODE */
240:   MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
241:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
242:   mat->insertmode = addv; /* in case this processor had no cache */

244:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
245:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
246:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
247:   return(0);
248: }

250: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
251: {
252:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
254:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
255:   PetscMPIInt    n;
256:   PetscScalar    *val;

259:   /*  wait on receives */
260:   while (1) {
261:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
262:     if (!flg) break;

264:     for (i=0; i<n;) {
265:       /* Now identify the consecutive vals belonging to the same row */
266:       for (j=i,rstart=row[j]; j<n; j++) {
267:         if (row[j] != rstart) break;
268:       }
269:       if (j < n) ncols = j-i;
270:       else       ncols = n-i;
271:       /* Now assemble all these values with a single function call */
272:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
273:       i    = j;
274:     }
275:   }
276:   MatStashScatterEnd_Private(&mat->stash);

278:   MatAssemblyBegin(mdn->A,mode);
279:   MatAssemblyEnd(mdn->A,mode);

281:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
282:     MatSetUpMultiply_MPIDense(mat);
283:   }
284:   return(0);
285: }

287: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
288: {
290:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

293:   MatZeroEntries(l->A);
294:   return(0);
295: }

297: /* the code does not do the diagonal entries correctly unless the
298:    matrix is square and the column and row owerships are identical.
299:    This is a BUG. The only way to fix it seems to be to access
300:    mdn->A and mdn->B directly and not through the MatZeroRows()
301:    routine.
302: */
303: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
304: {
305:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
306:   PetscErrorCode    ierr;
307:   PetscInt          i,*owners = A->rmap->range;
308:   PetscInt          *sizes,j,idx,nsends;
309:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
310:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
311:   PetscInt          *lens,*lrows,*values;
312:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
313:   MPI_Comm          comm;
314:   MPI_Request       *send_waits,*recv_waits;
315:   MPI_Status        recv_status,*send_status;
316:   PetscBool         found;
317:   const PetscScalar *xx;
318:   PetscScalar       *bb;

321:   PetscObjectGetComm((PetscObject)A,&comm);
322:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
323:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
324:   /*  first count number of contributors to each processor */
325:   PetscCalloc1(2*size,&sizes);
326:   PetscMalloc1(N+1,&owner);  /* see note*/
327:   for (i=0; i<N; i++) {
328:     idx   = rows[i];
329:     found = PETSC_FALSE;
330:     for (j=0; j<size; j++) {
331:       if (idx >= owners[j] && idx < owners[j+1]) {
332:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
333:       }
334:     }
335:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
336:   }
337:   nsends = 0;
338:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

340:   /* inform other processors of number of messages and max length*/
341:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

343:   /* post receives:   */
344:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
345:   PetscMalloc1(nrecvs+1,&recv_waits);
346:   for (i=0; i<nrecvs; i++) {
347:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
348:   }

350:   /* do sends:
351:       1) starts[i] gives the starting index in svalues for stuff going to
352:          the ith processor
353:   */
354:   PetscMalloc1(N+1,&svalues);
355:   PetscMalloc1(nsends+1,&send_waits);
356:   PetscMalloc1(size+1,&starts);

358:   starts[0] = 0;
359:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
360:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

362:   starts[0] = 0;
363:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
364:   count = 0;
365:   for (i=0; i<size; i++) {
366:     if (sizes[2*i+1]) {
367:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
368:     }
369:   }
370:   PetscFree(starts);

372:   base = owners[rank];

374:   /*  wait on receives */
375:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
376:   count = nrecvs;
377:   slen  = 0;
378:   while (count) {
379:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
380:     /* unpack receives into our local space */
381:     MPI_Get_count(&recv_status,MPIU_INT,&n);

383:     source[imdex] = recv_status.MPI_SOURCE;
384:     lens[imdex]   = n;
385:     slen += n;
386:     count--;
387:   }
388:   PetscFree(recv_waits);

390:   /* move the data into the send scatter */
391:   PetscMalloc1(slen+1,&lrows);
392:   count = 0;
393:   for (i=0; i<nrecvs; i++) {
394:     values = rvalues + i*nmax;
395:     for (j=0; j<lens[i]; j++) {
396:       lrows[count++] = values[j] - base;
397:     }
398:   }
399:   PetscFree(rvalues);
400:   PetscFree2(lens,source);
401:   PetscFree(owner);
402:   PetscFree(sizes);

404:   /* fix right hand side if needed */
405:   if (x && b) {
406:     VecGetArrayRead(x,&xx);
407:     VecGetArray(b,&bb);
408:     for (i=0; i<slen; i++) {
409:       bb[lrows[i]] = diag*xx[lrows[i]];
410:     }
411:     VecRestoreArrayRead(x,&xx);
412:     VecRestoreArray(b,&bb);
413:   }

415:   /* actually zap the local rows */
416:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
417:   if (diag != 0.0) {
418:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
419:     PetscInt     m   = ll->lda, i;

421:     for (i=0; i<slen; i++) {
422:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
423:     }
424:   }
425:   PetscFree(lrows);

427:   /* wait on sends */
428:   if (nsends) {
429:     PetscMalloc1(nsends,&send_status);
430:     MPI_Waitall(nsends,send_waits,send_status);
431:     PetscFree(send_status);
432:   }
433:   PetscFree(send_waits);
434:   PetscFree(svalues);
435:   return(0);
436: }

438: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
439: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
440: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
441: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);

443: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444: {
445:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

449:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
450:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
451:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
452:   return(0);
453: }

455: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
456: {
457:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

461:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
462:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
463:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
464:   return(0);
465: }

467: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
468: {
469:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
471:   PetscScalar    zero = 0.0;

474:   VecSet(yy,zero);
475:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
476:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
477:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
478:   return(0);
479: }

481: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
482: {
483:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

487:   VecCopy(yy,zz);
488:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
489:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
490:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
491:   return(0);
492: }

494: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
495: {
496:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
497:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
499:   PetscInt       len,i,n,m = A->rmap->n,radd;
500:   PetscScalar    *x,zero = 0.0;

503:   VecSet(v,zero);
504:   VecGetArray(v,&x);
505:   VecGetSize(v,&n);
506:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
507:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
508:   radd = A->rmap->rstart*m;
509:   for (i=0; i<len; i++) {
510:     x[i] = aloc->v[radd + i*m + i];
511:   }
512:   VecRestoreArray(v,&x);
513:   return(0);
514: }

516: PetscErrorCode MatDestroy_MPIDense(Mat mat)
517: {
518:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

522: #if defined(PETSC_USE_LOG)
523:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
524: #endif
525:   MatStashDestroy_Private(&mat->stash);
526:   MatDestroy(&mdn->A);
527:   VecDestroy(&mdn->lvec);
528:   VecScatterDestroy(&mdn->Mvctx);

530:   PetscFree(mat->data);
531:   PetscObjectChangeTypeName((PetscObject)mat,0);

533:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
534:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
535: #if defined(PETSC_HAVE_ELEMENTAL)
536:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
537: #endif
538:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
539:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
540:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
541:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
542:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
543:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
544:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
545:   return(0);
546: }

548: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
549: {
550:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
551:   PetscErrorCode    ierr;
552:   PetscViewerFormat format;
553:   int               fd;
554:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
555:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
556:   PetscScalar       *work,*v,*vv;
557:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

560:   if (mdn->size == 1) {
561:     MatView(mdn->A,viewer);
562:   } else {
563:     PetscViewerBinaryGetDescriptor(viewer,&fd);
564:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
565:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

567:     PetscViewerGetFormat(viewer,&format);
568:     if (format == PETSC_VIEWER_NATIVE) {

570:       if (!rank) {
571:         /* store the matrix as a dense matrix */
572:         header[0] = MAT_FILE_CLASSID;
573:         header[1] = mat->rmap->N;
574:         header[2] = N;
575:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
576:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

578:         /* get largest work array needed for transposing array */
579:         mmax = mat->rmap->n;
580:         for (i=1; i<size; i++) {
581:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
582:         }
583:         PetscMalloc1(mmax*N,&work);

585:         /* write out local array, by rows */
586:         m = mat->rmap->n;
587:         v = a->v;
588:         for (j=0; j<N; j++) {
589:           for (i=0; i<m; i++) {
590:             work[j + i*N] = *v++;
591:           }
592:         }
593:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
594:         /* get largest work array to receive messages from other processes, excludes process zero */
595:         mmax = 0;
596:         for (i=1; i<size; i++) {
597:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
598:         }
599:         PetscMalloc1(mmax*N,&vv);
600:         for (k = 1; k < size; k++) {
601:           v    = vv;
602:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
603:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

605:           for (j = 0; j < N; j++) {
606:             for (i = 0; i < m; i++) {
607:               work[j + i*N] = *v++;
608:             }
609:           }
610:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
611:         }
612:         PetscFree(work);
613:         PetscFree(vv);
614:       } else {
615:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
616:       }
617:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
618:   }
619:   return(0);
620: }

622: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623:  #include <petscdraw.h>
624: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
625: {
626:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
627:   PetscErrorCode    ierr;
628:   PetscMPIInt       rank = mdn->rank;
629:   PetscViewerType   vtype;
630:   PetscBool         iascii,isdraw;
631:   PetscViewer       sviewer;
632:   PetscViewerFormat format;

635:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
636:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
637:   if (iascii) {
638:     PetscViewerGetType(viewer,&vtype);
639:     PetscViewerGetFormat(viewer,&format);
640:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
641:       MatInfo info;
642:       MatGetInfo(mat,MAT_LOCAL,&info);
643:       PetscViewerASCIIPushSynchronized(viewer);
644:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
645:       PetscViewerFlush(viewer);
646:       PetscViewerASCIIPopSynchronized(viewer);
647:       VecScatterView(mdn->Mvctx,viewer);
648:       return(0);
649:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
650:       return(0);
651:     }
652:   } else if (isdraw) {
653:     PetscDraw draw;
654:     PetscBool isnull;

656:     PetscViewerDrawGetDraw(viewer,0,&draw);
657:     PetscDrawIsNull(draw,&isnull);
658:     if (isnull) return(0);
659:   }

661:   {
662:     /* assemble the entire matrix onto first processor. */
663:     Mat         A;
664:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
665:     PetscInt    *cols;
666:     PetscScalar *vals;

668:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
669:     if (!rank) {
670:       MatSetSizes(A,M,N,M,N);
671:     } else {
672:       MatSetSizes(A,0,0,M,N);
673:     }
674:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
675:     MatSetType(A,MATMPIDENSE);
676:     MatMPIDenseSetPreallocation(A,NULL);
677:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

679:     /* Copy the matrix ... This isn't the most efficient means,
680:        but it's quick for now */
681:     A->insertmode = INSERT_VALUES;

683:     row = mat->rmap->rstart;
684:     m   = mdn->A->rmap->n;
685:     for (i=0; i<m; i++) {
686:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
687:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
688:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
689:       row++;
690:     }

692:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
693:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
694:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
695:     if (!rank) {
696:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
697:       MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
698:     }
699:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
700:     PetscViewerFlush(viewer);
701:     MatDestroy(&A);
702:   }
703:   return(0);
704: }

706: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
707: {
709:   PetscBool      iascii,isbinary,isdraw,issocket;

712:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
713:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
714:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
715:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

717:   if (iascii || issocket || isdraw) {
718:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
719:   } else if (isbinary) {
720:     MatView_MPIDense_Binary(mat,viewer);
721:   }
722:   return(0);
723: }

725: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
726: {
727:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
728:   Mat            mdn  = mat->A;
730:   PetscReal      isend[5],irecv[5];

733:   info->block_size = 1.0;

735:   MatGetInfo(mdn,MAT_LOCAL,info);

737:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
738:   isend[3] = info->memory;  isend[4] = info->mallocs;
739:   if (flag == MAT_LOCAL) {
740:     info->nz_used      = isend[0];
741:     info->nz_allocated = isend[1];
742:     info->nz_unneeded  = isend[2];
743:     info->memory       = isend[3];
744:     info->mallocs      = isend[4];
745:   } else if (flag == MAT_GLOBAL_MAX) {
746:     MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));

748:     info->nz_used      = irecv[0];
749:     info->nz_allocated = irecv[1];
750:     info->nz_unneeded  = irecv[2];
751:     info->memory       = irecv[3];
752:     info->mallocs      = irecv[4];
753:   } else if (flag == MAT_GLOBAL_SUM) {
754:     MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));

756:     info->nz_used      = irecv[0];
757:     info->nz_allocated = irecv[1];
758:     info->nz_unneeded  = irecv[2];
759:     info->memory       = irecv[3];
760:     info->mallocs      = irecv[4];
761:   }
762:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
763:   info->fill_ratio_needed = 0;
764:   info->factor_mallocs    = 0;
765:   return(0);
766: }

768: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
769: {
770:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

774:   switch (op) {
775:   case MAT_NEW_NONZERO_LOCATIONS:
776:   case MAT_NEW_NONZERO_LOCATION_ERR:
777:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
778:     MatCheckPreallocated(A,1);
779:     MatSetOption(a->A,op,flg);
780:     break;
781:   case MAT_ROW_ORIENTED:
782:     MatCheckPreallocated(A,1);
783:     a->roworiented = flg;
784:     MatSetOption(a->A,op,flg);
785:     break;
786:   case MAT_NEW_DIAGONALS:
787:   case MAT_KEEP_NONZERO_PATTERN:
788:   case MAT_USE_HASH_TABLE:
789:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
790:     break;
791:   case MAT_IGNORE_OFF_PROC_ENTRIES:
792:     a->donotstash = flg;
793:     break;
794:   case MAT_SYMMETRIC:
795:   case MAT_STRUCTURALLY_SYMMETRIC:
796:   case MAT_HERMITIAN:
797:   case MAT_SYMMETRY_ETERNAL:
798:   case MAT_IGNORE_LOWER_TRIANGULAR:
799:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
800:     break;
801:   default:
802:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803:   }
804:   return(0);
805: }


808: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
809: {
810:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
811:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
812:   PetscScalar    *l,*r,x,*v;
814:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

817:   MatGetLocalSize(A,&s2,&s3);
818:   if (ll) {
819:     VecGetLocalSize(ll,&s2a);
820:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
821:     VecGetArray(ll,&l);
822:     for (i=0; i<m; i++) {
823:       x = l[i];
824:       v = mat->v + i;
825:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
826:     }
827:     VecRestoreArray(ll,&l);
828:     PetscLogFlops(n*m);
829:   }
830:   if (rr) {
831:     VecGetLocalSize(rr,&s3a);
832:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
833:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
834:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
835:     VecGetArray(mdn->lvec,&r);
836:     for (i=0; i<n; i++) {
837:       x = r[i];
838:       v = mat->v + i*m;
839:       for (j=0; j<m; j++) (*v++) *= x;
840:     }
841:     VecRestoreArray(mdn->lvec,&r);
842:     PetscLogFlops(n*m);
843:   }
844:   return(0);
845: }

847: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
848: {
849:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852:   PetscInt       i,j;
853:   PetscReal      sum = 0.0;
854:   PetscScalar    *v  = mat->v;

857:   if (mdn->size == 1) {
858:      MatNorm(mdn->A,type,nrm);
859:   } else {
860:     if (type == NORM_FROBENIUS) {
861:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
862:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
863:       }
864:       MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
865:       *nrm = PetscSqrtReal(*nrm);
866:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
867:     } else if (type == NORM_1) {
868:       PetscReal *tmp,*tmp2;
869:       PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
870:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
871:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
872:       *nrm = 0.0;
873:       v    = mat->v;
874:       for (j=0; j<mdn->A->cmap->n; j++) {
875:         for (i=0; i<mdn->A->rmap->n; i++) {
876:           tmp[j] += PetscAbsScalar(*v);  v++;
877:         }
878:       }
879:       MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
880:       for (j=0; j<A->cmap->N; j++) {
881:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
882:       }
883:       PetscFree2(tmp,tmp2);
884:       PetscLogFlops(A->cmap->n*A->rmap->n);
885:     } else if (type == NORM_INFINITY) { /* max row norm */
886:       PetscReal ntemp;
887:       MatNorm(mdn->A,type,&ntemp);
888:       MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
889:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
890:   }
891:   return(0);
892: }

894: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
895: {
896:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
897:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
898:   Mat            B;
899:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
901:   PetscInt       j,i;
902:   PetscScalar    *v;

905:   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
906:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
907:     MatCreate(PetscObjectComm((PetscObject)A),&B);
908:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
909:     MatSetType(B,((PetscObject)A)->type_name);
910:     MatMPIDenseSetPreallocation(B,NULL);
911:   } else {
912:     B = *matout;
913:   }

915:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
916:   PetscMalloc1(m,&rwork);
917:   for (i=0; i<m; i++) rwork[i] = rstart + i;
918:   for (j=0; j<n; j++) {
919:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
920:     v   += m;
921:   }
922:   PetscFree(rwork);
923:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
924:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
925:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
926:     *matout = B;
927:   } else {
928:     MatHeaderMerge(A,&B);
929:   }
930:   return(0);
931: }


934: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
935: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

937: PetscErrorCode MatSetUp_MPIDense(Mat A)
938: {

942:    MatMPIDenseSetPreallocation(A,0);
943:   return(0);
944: }

946: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
947: {
949:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

952:   MatAXPY(A->A,alpha,B->A,str);
953:   PetscObjectStateIncrease((PetscObject)Y);
954:   return(0);
955: }

957: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
958: {
959:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

963:   MatConjugate(a->A);
964:   return(0);
965: }

967: PetscErrorCode MatRealPart_MPIDense(Mat A)
968: {
969:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

973:   MatRealPart(a->A);
974:   return(0);
975: }

977: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
978: {
979:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

983:   MatImaginaryPart(a->A);
984:   return(0);
985: }

987: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
988: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
989: {
991:   PetscInt       i,n;
992:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
993:   PetscReal      *work;

996:   MatGetSize(A,NULL,&n);
997:   PetscMalloc1(n,&work);
998:   MatGetColumnNorms_SeqDense(a->A,type,work);
999:   if (type == NORM_2) {
1000:     for (i=0; i<n; i++) work[i] *= work[i];
1001:   }
1002:   if (type == NORM_INFINITY) {
1003:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1004:   } else {
1005:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1006:   }
1007:   PetscFree(work);
1008:   if (type == NORM_2) {
1009:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1010:   }
1011:   return(0);
1012: }

1014: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1015: {
1016:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1018:   PetscScalar    *a;
1019:   PetscInt       m,n,i;

1022:   MatGetSize(d->A,&m,&n);
1023:   MatDenseGetArray(d->A,&a);
1024:   for (i=0; i<m*n; i++) {
1025:     PetscRandomGetValue(rctx,a+i);
1026:   }
1027:   MatDenseRestoreArray(d->A,&a);
1028:   return(0);
1029: }

1031: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);

1033: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1034: {
1036:   *missing = PETSC_FALSE;
1037:   return(0);
1038: }

1040: /* -------------------------------------------------------------------*/
1041: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1042:                                         MatGetRow_MPIDense,
1043:                                         MatRestoreRow_MPIDense,
1044:                                         MatMult_MPIDense,
1045:                                 /*  4*/ MatMultAdd_MPIDense,
1046:                                         MatMultTranspose_MPIDense,
1047:                                         MatMultTransposeAdd_MPIDense,
1048:                                         0,
1049:                                         0,
1050:                                         0,
1051:                                 /* 10*/ 0,
1052:                                         0,
1053:                                         0,
1054:                                         0,
1055:                                         MatTranspose_MPIDense,
1056:                                 /* 15*/ MatGetInfo_MPIDense,
1057:                                         MatEqual_MPIDense,
1058:                                         MatGetDiagonal_MPIDense,
1059:                                         MatDiagonalScale_MPIDense,
1060:                                         MatNorm_MPIDense,
1061:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1062:                                         MatAssemblyEnd_MPIDense,
1063:                                         MatSetOption_MPIDense,
1064:                                         MatZeroEntries_MPIDense,
1065:                                 /* 24*/ MatZeroRows_MPIDense,
1066:                                         0,
1067:                                         0,
1068:                                         0,
1069:                                         0,
1070:                                 /* 29*/ MatSetUp_MPIDense,
1071:                                         0,
1072:                                         0,
1073:                                         MatGetDiagonalBlock_MPIDense,
1074:                                         0,
1075:                                 /* 34*/ MatDuplicate_MPIDense,
1076:                                         0,
1077:                                         0,
1078:                                         0,
1079:                                         0,
1080:                                 /* 39*/ MatAXPY_MPIDense,
1081:                                         MatGetSubMatrices_MPIDense,
1082:                                         0,
1083:                                         MatGetValues_MPIDense,
1084:                                         0,
1085:                                 /* 44*/ 0,
1086:                                         MatScale_MPIDense,
1087:                                         MatShift_Basic,
1088:                                         0,
1089:                                         0,
1090:                                 /* 49*/ MatSetRandom_MPIDense,
1091:                                         0,
1092:                                         0,
1093:                                         0,
1094:                                         0,
1095:                                 /* 54*/ 0,
1096:                                         0,
1097:                                         0,
1098:                                         0,
1099:                                         0,
1100:                                 /* 59*/ MatGetSubMatrix_MPIDense,
1101:                                         MatDestroy_MPIDense,
1102:                                         MatView_MPIDense,
1103:                                         0,
1104:                                         0,
1105:                                 /* 64*/ 0,
1106:                                         0,
1107:                                         0,
1108:                                         0,
1109:                                         0,
1110:                                 /* 69*/ 0,
1111:                                         0,
1112:                                         0,
1113:                                         0,
1114:                                         0,
1115:                                 /* 74*/ 0,
1116:                                         0,
1117:                                         0,
1118:                                         0,
1119:                                         0,
1120:                                 /* 79*/ 0,
1121:                                         0,
1122:                                         0,
1123:                                         0,
1124:                                 /* 83*/ MatLoad_MPIDense,
1125:                                         0,
1126:                                         0,
1127:                                         0,
1128:                                         0,
1129:                                         0,
1130: #if defined(PETSC_HAVE_ELEMENTAL)
1131:                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1132:                                         MatMatMultSymbolic_MPIDense_MPIDense,
1133: #else
1134:                                 /* 89*/ 0,
1135:                                         0,
1136: #endif
1137:                                         MatMatMultNumeric_MPIDense,
1138:                                         0,
1139:                                         0,
1140:                                 /* 94*/ 0,
1141:                                         0,
1142:                                         0,
1143:                                         0,
1144:                                         0,
1145:                                 /* 99*/ 0,
1146:                                         0,
1147:                                         0,
1148:                                         MatConjugate_MPIDense,
1149:                                         0,
1150:                                 /*104*/ 0,
1151:                                         MatRealPart_MPIDense,
1152:                                         MatImaginaryPart_MPIDense,
1153:                                         0,
1154:                                         0,
1155:                                 /*109*/ 0,
1156:                                         0,
1157:                                         0,
1158:                                         0,
1159:                                         MatMissingDiagonal_MPIDense,
1160:                                 /*114*/ 0,
1161:                                         0,
1162:                                         0,
1163:                                         0,
1164:                                         0,
1165:                                 /*119*/ 0,
1166:                                         0,
1167:                                         0,
1168:                                         0,
1169:                                         0,
1170:                                 /*124*/ 0,
1171:                                         MatGetColumnNorms_MPIDense,
1172:                                         0,
1173:                                         0,
1174:                                         0,
1175:                                 /*129*/ 0,
1176:                                         MatTransposeMatMult_MPIDense_MPIDense,
1177:                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1178:                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1179:                                         0,
1180:                                 /*134*/ 0,
1181:                                         0,
1182:                                         0,
1183:                                         0,
1184:                                         0,
1185:                                 /*139*/ 0,
1186:                                         0,
1187:                                         0
1188: };

1190: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1191: {
1192:   Mat_MPIDense   *a;

1196:   mat->preallocated = PETSC_TRUE;
1197:   /* Note:  For now, when data is specified above, this assumes the user correctly
1198:    allocates the local dense storage space.  We should add error checking. */

1200:   a       = (Mat_MPIDense*)mat->data;
1201:   PetscLayoutSetUp(mat->rmap);
1202:   PetscLayoutSetUp(mat->cmap);
1203:   a->nvec = mat->cmap->n;

1205:   MatCreate(PETSC_COMM_SELF,&a->A);
1206:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1207:   MatSetType(a->A,MATSEQDENSE);
1208:   MatSeqDenseSetPreallocation(a->A,data);
1209:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1210:   return(0);
1211: }

1213: #if defined(PETSC_HAVE_ELEMENTAL)
1214: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1215: {
1216:   Mat            mat_elemental;
1218:   PetscScalar    *v;
1219:   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1220: 
1222:   if (reuse == MAT_REUSE_MATRIX) {
1223:     mat_elemental = *newmat;
1224:     MatZeroEntries(*newmat);
1225:   } else {
1226:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1227:     MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1228:     MatSetType(mat_elemental,MATELEMENTAL);
1229:     MatSetUp(mat_elemental);
1230:     MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1231:   }

1233:   PetscMalloc2(m,&rows,N,&cols);
1234:   for (i=0; i<N; i++) cols[i] = i;
1235:   for (i=0; i<m; i++) rows[i] = rstart + i;
1236: 
1237:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1238:   MatDenseGetArray(A,&v);
1239:   MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1240:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1241:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1242:   MatDenseRestoreArray(A,&v);
1243:   PetscFree2(rows,cols);

1245:   if (reuse == MAT_INPLACE_MATRIX) {
1246:     MatHeaderReplace(A,&mat_elemental);
1247:   } else {
1248:     *newmat = mat_elemental;
1249:   }
1250:   return(0);
1251: }
1252: #endif

1254: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1255: {
1256:   Mat_MPIDense   *a;

1260:   PetscNewLog(mat,&a);
1261:   mat->data = (void*)a;
1262:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1264:   mat->insertmode = NOT_SET_VALUES;
1265:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1266:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1268:   /* build cache for off array entries formed */
1269:   a->donotstash = PETSC_FALSE;

1271:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1273:   /* stuff used for matrix vector multiply */
1274:   a->lvec        = 0;
1275:   a->Mvctx       = 0;
1276:   a->roworiented = PETSC_TRUE;

1278:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1279:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1280: #if defined(PETSC_HAVE_ELEMENTAL)
1281:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1282: #endif
1283:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1284:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1285:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1286:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);

1288:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1289:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1290:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1291:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1292:   return(0);
1293: }

1295: /*MC
1296:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1298:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1299:    and MATMPIDENSE otherwise.

1301:    Options Database Keys:
1302: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1304:   Level: beginner


1307: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1308: M*/

1310: /*@C
1311:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1313:    Not collective

1315:    Input Parameters:
1316: .  B - the matrix
1317: -  data - optional location of matrix data.  Set data=NULL for PETSc
1318:    to control all matrix memory allocation.

1320:    Notes:
1321:    The dense format is fully compatible with standard Fortran 77
1322:    storage by columns.

1324:    The data input variable is intended primarily for Fortran programmers
1325:    who wish to allocate their own matrix memory space.  Most users should
1326:    set data=NULL.

1328:    Level: intermediate

1330: .keywords: matrix,dense, parallel

1332: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1333: @*/
1334: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1335: {

1339:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1340:   return(0);
1341: }

1343: /*@C
1344:    MatCreateDense - Creates a parallel matrix in dense format.

1346:    Collective on MPI_Comm

1348:    Input Parameters:
1349: +  comm - MPI communicator
1350: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1351: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1352: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1353: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1354: -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1355:    to control all matrix memory allocation.

1357:    Output Parameter:
1358: .  A - the matrix

1360:    Notes:
1361:    The dense format is fully compatible with standard Fortran 77
1362:    storage by columns.

1364:    The data input variable is intended primarily for Fortran programmers
1365:    who wish to allocate their own matrix memory space.  Most users should
1366:    set data=NULL (PETSC_NULL_SCALAR for Fortran users).

1368:    The user MUST specify either the local or global matrix dimensions
1369:    (possibly both).

1371:    Level: intermediate

1373: .keywords: matrix,dense, parallel

1375: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1376: @*/
1377: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1378: {
1380:   PetscMPIInt    size;

1383:   MatCreate(comm,A);
1384:   MatSetSizes(*A,m,n,M,N);
1385:   MPI_Comm_size(comm,&size);
1386:   if (size > 1) {
1387:     MatSetType(*A,MATMPIDENSE);
1388:     MatMPIDenseSetPreallocation(*A,data);
1389:     if (data) {  /* user provided data array, so no need to assemble */
1390:       MatSetUpMultiply_MPIDense(*A);
1391:       (*A)->assembled = PETSC_TRUE;
1392:     }
1393:   } else {
1394:     MatSetType(*A,MATSEQDENSE);
1395:     MatSeqDenseSetPreallocation(*A,data);
1396:   }
1397:   return(0);
1398: }

1400: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1401: {
1402:   Mat            mat;
1403:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1407:   *newmat = 0;
1408:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1409:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1410:   MatSetType(mat,((PetscObject)A)->type_name);
1411:   a       = (Mat_MPIDense*)mat->data;
1412:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));

1414:   mat->factortype   = A->factortype;
1415:   mat->assembled    = PETSC_TRUE;
1416:   mat->preallocated = PETSC_TRUE;

1418:   a->size         = oldmat->size;
1419:   a->rank         = oldmat->rank;
1420:   mat->insertmode = NOT_SET_VALUES;
1421:   a->nvec         = oldmat->nvec;
1422:   a->donotstash   = oldmat->donotstash;

1424:   PetscLayoutReference(A->rmap,&mat->rmap);
1425:   PetscLayoutReference(A->cmap,&mat->cmap);

1427:   MatSetUpMultiply_MPIDense(mat);
1428:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1429:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1431:   *newmat = mat;
1432:   return(0);
1433: }

1435: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1436: {
1438:   PetscMPIInt    rank,size;
1439:   const PetscInt *rowners;
1440:   PetscInt       i,m,n,nz,j,mMax;
1441:   PetscScalar    *array,*vals,*vals_ptr;
1442:   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;

1445:   MPI_Comm_rank(comm,&rank);
1446:   MPI_Comm_size(comm,&size);

1448:   /* determine ownership of rows and columns */
1449:   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1450:   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;

1452:   MatSetSizes(newmat,m,n,M,N);
1453:   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1454:     MatMPIDenseSetPreallocation(newmat,NULL);
1455:   }
1456:   MatDenseGetArray(newmat,&array);
1457:   MatGetLocalSize(newmat,&m,NULL);
1458:   MatGetOwnershipRanges(newmat,&rowners);
1459:   MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1460:   if (!rank) {
1461:     PetscMalloc1(mMax*N,&vals);

1463:     /* read in my part of the matrix numerical values  */
1464:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);

1466:     /* insert into matrix-by row (this is why cannot directly read into array */
1467:     vals_ptr = vals;
1468:     for (i=0; i<m; i++) {
1469:       for (j=0; j<N; j++) {
1470:         array[i + j*m] = *vals_ptr++;
1471:       }
1472:     }

1474:     /* read in other processors and ship out */
1475:     for (i=1; i<size; i++) {
1476:       nz   = (rowners[i+1] - rowners[i])*N;
1477:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1478:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1479:     }
1480:   } else {
1481:     /* receive numeric values */
1482:     PetscMalloc1(m*N,&vals);

1484:     /* receive message of values*/
1485:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1487:     /* insert into matrix-by row (this is why cannot directly read into array */
1488:     vals_ptr = vals;
1489:     for (i=0; i<m; i++) {
1490:       for (j=0; j<N; j++) {
1491:         array[i + j*m] = *vals_ptr++;
1492:       }
1493:     }
1494:   }
1495:   MatDenseRestoreArray(newmat,&array);
1496:   PetscFree(vals);
1497:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1498:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1499:   return(0);
1500: }

1502: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1503: {
1504:   Mat_MPIDense   *a;
1505:   PetscScalar    *vals,*svals;
1506:   MPI_Comm       comm;
1507:   MPI_Status     status;
1508:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1509:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1510:   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1511:   PetscInt       i,nz,j,rstart,rend;
1512:   int            fd;

1516:   /* force binary viewer to load .info file if it has not yet done so */
1517:   PetscViewerSetUp(viewer);
1518:   PetscObjectGetComm((PetscObject)viewer,&comm);
1519:   MPI_Comm_size(comm,&size);
1520:   MPI_Comm_rank(comm,&rank);
1521:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1522:   if (!rank) {
1523:     PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1524:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1525:   }
1526:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1527:   M    = header[1]; N = header[2]; nz = header[3];

1529:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1530:   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1531:   if (newmat->cmap->N < 0) newmat->cmap->N = N;

1533:   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1534:   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);

1536:   /*
1537:        Handle case where matrix is stored on disk as a dense matrix
1538:   */
1539:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1540:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1541:     return(0);
1542:   }

1544:   /* determine ownership of all rows */
1545:   if (newmat->rmap->n < 0) {
1546:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1547:   } else {
1548:     PetscMPIIntCast(newmat->rmap->n,&m);
1549:   }
1550:   if (newmat->cmap->n < 0) {
1551:     n = PETSC_DECIDE;
1552:   } else {
1553:     PetscMPIIntCast(newmat->cmap->n,&n);
1554:   }

1556:   PetscMalloc1(size+2,&rowners);
1557:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1558:   rowners[0] = 0;
1559:   for (i=2; i<=size; i++) {
1560:     rowners[i] += rowners[i-1];
1561:   }
1562:   rstart = rowners[rank];
1563:   rend   = rowners[rank+1];

1565:   /* distribute row lengths to all processors */
1566:   PetscMalloc1(rend-rstart,&ourlens);
1567:   if (!rank) {
1568:     PetscMalloc1(M,&rowlengths);
1569:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1570:     PetscMalloc1(size,&sndcounts);
1571:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1572:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1573:     PetscFree(sndcounts);
1574:   } else {
1575:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1576:   }

1578:   if (!rank) {
1579:     /* calculate the number of nonzeros on each processor */
1580:     PetscMalloc1(size,&procsnz);
1581:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1582:     for (i=0; i<size; i++) {
1583:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1584:         procsnz[i] += rowlengths[j];
1585:       }
1586:     }
1587:     PetscFree(rowlengths);

1589:     /* determine max buffer needed and allocate it */
1590:     maxnz = 0;
1591:     for (i=0; i<size; i++) {
1592:       maxnz = PetscMax(maxnz,procsnz[i]);
1593:     }
1594:     PetscMalloc1(maxnz,&cols);

1596:     /* read in my part of the matrix column indices  */
1597:     nz   = procsnz[0];
1598:     PetscMalloc1(nz,&mycols);
1599:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1601:     /* read in every one elses and ship off */
1602:     for (i=1; i<size; i++) {
1603:       nz   = procsnz[i];
1604:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1605:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1606:     }
1607:     PetscFree(cols);
1608:   } else {
1609:     /* determine buffer space needed for message */
1610:     nz = 0;
1611:     for (i=0; i<m; i++) {
1612:       nz += ourlens[i];
1613:     }
1614:     PetscMalloc1(nz+1,&mycols);

1616:     /* receive message of column indices*/
1617:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1618:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1619:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1620:   }

1622:   MatSetSizes(newmat,m,n,M,N);
1623:   a = (Mat_MPIDense*)newmat->data;
1624:   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1625:     MatMPIDenseSetPreallocation(newmat,NULL);
1626:   }

1628:   if (!rank) {
1629:     PetscMalloc1(maxnz,&vals);

1631:     /* read in my part of the matrix numerical values  */
1632:     nz   = procsnz[0];
1633:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);

1635:     /* insert into matrix */
1636:     jj      = rstart;
1637:     smycols = mycols;
1638:     svals   = vals;
1639:     for (i=0; i<m; i++) {
1640:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1641:       smycols += ourlens[i];
1642:       svals   += ourlens[i];
1643:       jj++;
1644:     }

1646:     /* read in other processors and ship out */
1647:     for (i=1; i<size; i++) {
1648:       nz   = procsnz[i];
1649:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1650:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1651:     }
1652:     PetscFree(procsnz);
1653:   } else {
1654:     /* receive numeric values */
1655:     PetscMalloc1(nz+1,&vals);

1657:     /* receive message of values*/
1658:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1659:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1660:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1662:     /* insert into matrix */
1663:     jj      = rstart;
1664:     smycols = mycols;
1665:     svals   = vals;
1666:     for (i=0; i<m; i++) {
1667:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1668:       smycols += ourlens[i];
1669:       svals   += ourlens[i];
1670:       jj++;
1671:     }
1672:   }
1673:   PetscFree(ourlens);
1674:   PetscFree(vals);
1675:   PetscFree(mycols);
1676:   PetscFree(rowners);

1678:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1679:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1680:   return(0);
1681: }

1683: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1684: {
1685:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1686:   Mat            a,b;
1687:   PetscBool      flg;

1691:   a    = matA->A;
1692:   b    = matB->A;
1693:   MatEqual(a,b,&flg);
1694:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1695:   return(0);
1696: }

1698: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1699: {
1700:   PetscErrorCode        ierr;
1701:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1702:   Mat_TransMatMultDense *atb = a->atbdense;

1705:   PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1706:   (atb->destroy)(A);
1707:   PetscFree(atb);
1708:   return(0);
1709: }

1711: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1712: {
1713:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1714:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1715:   Mat_TransMatMultDense *atb = c->atbdense;
1717:   MPI_Comm       comm;
1718:   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1719:   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1720:   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1721:   PetscScalar    _DOne=1.0,_DZero=0.0;
1722:   PetscBLASInt   am,an,bn,aN;
1723:   const PetscInt *ranges;

1726:   PetscObjectGetComm((PetscObject)A,&comm);
1727:   MPI_Comm_rank(comm,&rank);
1728:   MPI_Comm_size(comm,&size);

1730:   /* compute atbarray = aseq^T * bseq */
1731:   PetscBLASIntCast(a->A->cmap->n,&an);
1732:   PetscBLASIntCast(b->A->cmap->n,&bn);
1733:   PetscBLASIntCast(a->A->rmap->n,&am);
1734:   PetscBLASIntCast(A->cmap->N,&aN);
1735:   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1736: 
1737:   MatGetOwnershipRanges(C,&ranges);
1738:   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1739: 
1740:   /* arrange atbarray into sendbuf */
1741:   k = 0;
1742:   for (proc=0; proc<size; proc++) {
1743:     for (j=0; j<cN; j++) {
1744:       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1745:     }
1746:   }
1747:   /* sum all atbarray to local values of C */
1748:   MatDenseGetArray(c->A,&carray);
1749:   MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1750:   MatDenseRestoreArray(c->A,&carray);
1751:   return(0);
1752: }

1754: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1755: {
1756:   PetscErrorCode        ierr;
1757:   Mat                   Cdense;
1758:   MPI_Comm              comm;
1759:   PetscMPIInt           size;
1760:   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1761:   Mat_MPIDense          *c;
1762:   Mat_TransMatMultDense *atb;

1765:   PetscObjectGetComm((PetscObject)A,&comm);
1766:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1767:     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1768:   }

1770:   /* create matrix product Cdense */
1771:   MatCreate(comm,&Cdense);
1772:   MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1773:   MatSetType(Cdense,MATMPIDENSE);
1774:   MatMPIDenseSetPreallocation(Cdense,NULL);
1775:   MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1776:   MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1777:   *C   = Cdense;

1779:   /* create data structure for reuse Cdense */
1780:   MPI_Comm_size(comm,&size);
1781:   PetscNew(&atb);
1782:   cM = Cdense->rmap->N;
1783:   PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1784: 
1785:   c                    = (Mat_MPIDense*)Cdense->data;
1786:   c->atbdense          = atb;
1787:   atb->destroy         = Cdense->ops->destroy;
1788:   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1789:   return(0);
1790: }

1792: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1793: {

1797:   if (scall == MAT_INITIAL_MATRIX) {
1798:     MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1799:   }
1800:   MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1801:   return(0);
1802: }

1804: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1805: {
1806:   PetscErrorCode   ierr;
1807:   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1808:   Mat_MatMultDense *ab = a->abdense;

1811:   MatDestroy(&ab->Ce);
1812:   MatDestroy(&ab->Ae);
1813:   MatDestroy(&ab->Be);

1815:   (ab->destroy)(A);
1816:   PetscFree(ab);
1817:   return(0);
1818: }

1820: #if defined(PETSC_HAVE_ELEMENTAL)
1821: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1822: {
1823:   PetscErrorCode   ierr;
1824:   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1825:   Mat_MatMultDense *ab=c->abdense;

1828:   MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1829:   MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1830:   MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
1831:   MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1832:   return(0);
1833: }

1835: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1836: {
1837:   PetscErrorCode   ierr;
1838:   Mat              Ae,Be,Ce;
1839:   Mat_MPIDense     *c;
1840:   Mat_MatMultDense *ab;

1843:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1844:     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1845:   }

1847:   /* convert A and B to Elemental matrices Ae and Be */
1848:   MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
1849:   MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);

1851:   /* Ce = Ae*Be */
1852:   MatMatMultSymbolic(Ae,Be,fill,&Ce);
1853:   MatMatMultNumeric(Ae,Be,Ce);
1854: 
1855:   /* convert Ce to C */
1856:   MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);

1858:   /* create data structure for reuse Cdense */
1859:   PetscNew(&ab);
1860:   c                  = (Mat_MPIDense*)(*C)->data;
1861:   c->abdense         = ab;

1863:   ab->Ae             = Ae;
1864:   ab->Be             = Be;
1865:   ab->Ce             = Ce;
1866:   ab->destroy        = (*C)->ops->destroy;
1867:   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1868:   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1869:   return(0);
1870: }

1872: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1873: {

1877:   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1878:     PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
1879:     MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1880:     PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
1881:   } else {
1882:     PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
1883:     MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1884:     PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
1885:   }
1886:   return(0);
1887: }
1888: #endif