Actual source code: mpidense.c

petsc-master 2019-11-13
Report Typos and Errors

  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7:  #include <../src/mat/impls/dense/mpi/mpidense.h>
  8:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9:  #include <petscblaslapack.h>

 11: /*@

 13:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 14:               matrix that represents the operator. For sequential matrices it returns itself.

 16:     Input Parameter:
 17: .      A - the Seq or MPI dense matrix

 19:     Output Parameter:
 20: .      B - the inner matrix

 22:     Level: intermediate

 24: @*/
 25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 26: {
 27:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 29:   PetscBool      flg;

 32:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 33:   if (flg) *B = mat->A;
 34:   else *B = A;
 35:   return(0);
 36: }

 38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 39: {
 40:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 42:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 45:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 46:   lrow = row - rstart;
 47:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 48:   return(0);
 49: }

 51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 52: {

 56:   if (idx) {PetscFree(*idx);}
 57:   if (v) {PetscFree(*v);}
 58:   return(0);
 59: }

 61: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 62: {
 63:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 65:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 66:   PetscScalar    *array;
 67:   MPI_Comm       comm;
 68:   Mat            B;

 71:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 73:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 74:   if (!B) {
 75:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 76:     MatCreate(comm,&B);
 77:     MatSetSizes(B,m,m,m,m);
 78:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 79:     MatDenseGetArray(mdn->A,&array);
 80:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 81:     MatDenseRestoreArray(mdn->A,&array);
 82:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 83:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 84:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 85:     *a   = B;
 86:     MatDestroy(&B);
 87:   } else *a = B;
 88:   return(0);
 89: }

 91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 92: {
 93:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
 95:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
 96:   PetscBool      roworiented = A->roworiented;

 99:   for (i=0; i<m; i++) {
100:     if (idxm[i] < 0) continue;
101:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102:     if (idxm[i] >= rstart && idxm[i] < rend) {
103:       row = idxm[i] - rstart;
104:       if (roworiented) {
105:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106:       } else {
107:         for (j=0; j<n; j++) {
108:           if (idxn[j] < 0) continue;
109:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111:         }
112:       }
113:     } else if (!A->donotstash) {
114:       mat->assembled = PETSC_FALSE;
115:       if (roworiented) {
116:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117:       } else {
118:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119:       }
120:     }
121:   }
122:   return(0);
123: }

125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
129:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

132:   for (i=0; i<m; i++) {
133:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135:     if (idxm[i] >= rstart && idxm[i] < rend) {
136:       row = idxm[i] - rstart;
137:       for (j=0; j<n; j++) {
138:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141:       }
142:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143:   }
144:   return(0);
145: }

147: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148: {
149:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

153:   MatDenseGetLDA(a->A,lda);
154:   return(0);
155: }

157: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158: {
159:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

163:   MatDenseGetArray(a->A,array);
164:   return(0);
165: }

167: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168: {
169:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

173:   MatDenseGetArrayRead(a->A,array);
174:   return(0);
175: }

177: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178: {
179:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

183:   MatDensePlaceArray(a->A,array);
184:   return(0);
185: }

187: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188: {
189:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

193:   MatDenseResetArray(a->A);
194:   return(0);
195: }

197: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198: {
199:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
202:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203:   const PetscInt *irow,*icol;
204:   PetscScalar    *av,*bv,*v = lmat->v;
205:   Mat            newmat;
206:   IS             iscol_local;
207:   MPI_Comm       comm_is,comm_mat;

210:   PetscObjectGetComm((PetscObject)A,&comm_mat);
211:   PetscObjectGetComm((PetscObject)iscol,&comm_is);
212:   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");

214:   ISAllGather(iscol,&iscol_local);
215:   ISGetIndices(isrow,&irow);
216:   ISGetIndices(iscol_local,&icol);
217:   ISGetLocalSize(isrow,&nrows);
218:   ISGetLocalSize(iscol,&ncols);
219:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

221:   /* No parallel redistribution currently supported! Should really check each index set
222:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223:      original matrix! */

225:   MatGetLocalSize(A,&nlrows,&nlcols);
226:   MatGetOwnershipRange(A,&rstart,&rend);

228:   /* Check submatrix call */
229:   if (scall == MAT_REUSE_MATRIX) {
230:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231:     /* Really need to test rows and column sizes! */
232:     newmat = *B;
233:   } else {
234:     /* Create and fill new matrix */
235:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
236:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
237:     MatSetType(newmat,((PetscObject)A)->type_name);
238:     MatMPIDenseSetPreallocation(newmat,NULL);
239:   }

241:   /* Now extract the data pointers and do the copy, column at a time */
242:   newmatd = (Mat_MPIDense*)newmat->data;
243:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

245:   for (i=0; i<Ncols; i++) {
246:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247:     for (j=0; j<nrows; j++) {
248:       *bv++ = av[irow[j] - rstart];
249:     }
250:   }

252:   /* Assemble the matrices so that the correct flags are set */
253:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
254:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

256:   /* Free work space */
257:   ISRestoreIndices(isrow,&irow);
258:   ISRestoreIndices(iscol_local,&icol);
259:   ISDestroy(&iscol_local);
260:   *B   = newmat;
261:   return(0);
262: }

264: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265: {
266:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

270:   MatDenseRestoreArray(a->A,array);
271:   return(0);
272: }

274: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275: {
276:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

280:   MatDenseRestoreArrayRead(a->A,array);
281:   return(0);
282: }

284: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285: {
286:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287:   MPI_Comm       comm;
289:   PetscInt       nstash,reallocs;
290:   InsertMode     addv;

293:   PetscObjectGetComm((PetscObject)mat,&comm);
294:   /* make sure all processors are either in INSERTMODE or ADDMODE */
295:   MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
296:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297:   mat->insertmode = addv; /* in case this processor had no cache */

299:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
300:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
301:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
302:   return(0);
303: }

305: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306: {
307:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
309:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310:   PetscMPIInt    n;
311:   PetscScalar    *val;

314:   /*  wait on receives */
315:   while (1) {
316:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
317:     if (!flg) break;

319:     for (i=0; i<n;) {
320:       /* Now identify the consecutive vals belonging to the same row */
321:       for (j=i,rstart=row[j]; j<n; j++) {
322:         if (row[j] != rstart) break;
323:       }
324:       if (j < n) ncols = j-i;
325:       else       ncols = n-i;
326:       /* Now assemble all these values with a single function call */
327:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
328:       i    = j;
329:     }
330:   }
331:   MatStashScatterEnd_Private(&mat->stash);

333:   MatAssemblyBegin(mdn->A,mode);
334:   MatAssemblyEnd(mdn->A,mode);

336:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337:     MatSetUpMultiply_MPIDense(mat);
338:   }
339:   return(0);
340: }

342: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343: {
345:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

348:   MatZeroEntries(l->A);
349:   return(0);
350: }

352: /* the code does not do the diagonal entries correctly unless the
353:    matrix is square and the column and row owerships are identical.
354:    This is a BUG. The only way to fix it seems to be to access
355:    mdn->A and mdn->B directly and not through the MatZeroRows()
356:    routine.
357: */
358: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359: {
360:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361:   PetscErrorCode    ierr;
362:   PetscInt          i,*owners = A->rmap->range;
363:   PetscInt          *sizes,j,idx,nsends;
364:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366:   PetscInt          *lens,*lrows,*values;
367:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368:   MPI_Comm          comm;
369:   MPI_Request       *send_waits,*recv_waits;
370:   MPI_Status        recv_status,*send_status;
371:   PetscBool         found;
372:   const PetscScalar *xx;
373:   PetscScalar       *bb;

376:   PetscObjectGetComm((PetscObject)A,&comm);
377:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379:   /*  first count number of contributors to each processor */
380:   PetscCalloc1(2*size,&sizes);
381:   PetscMalloc1(N+1,&owner);  /* see note*/
382:   for (i=0; i<N; i++) {
383:     idx   = rows[i];
384:     found = PETSC_FALSE;
385:     for (j=0; j<size; j++) {
386:       if (idx >= owners[j] && idx < owners[j+1]) {
387:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388:       }
389:     }
390:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391:   }
392:   nsends = 0;
393:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

395:   /* inform other processors of number of messages and max length*/
396:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

398:   /* post receives:   */
399:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
400:   PetscMalloc1(nrecvs+1,&recv_waits);
401:   for (i=0; i<nrecvs; i++) {
402:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
403:   }

405:   /* do sends:
406:       1) starts[i] gives the starting index in svalues for stuff going to
407:          the ith processor
408:   */
409:   PetscMalloc1(N+1,&svalues);
410:   PetscMalloc1(nsends+1,&send_waits);
411:   PetscMalloc1(size+1,&starts);

413:   starts[0] = 0;
414:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

417:   starts[0] = 0;
418:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419:   count = 0;
420:   for (i=0; i<size; i++) {
421:     if (sizes[2*i+1]) {
422:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
423:     }
424:   }
425:   PetscFree(starts);

427:   base = owners[rank];

429:   /*  wait on receives */
430:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
431:   count = nrecvs;
432:   slen  = 0;
433:   while (count) {
434:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
435:     /* unpack receives into our local space */
436:     MPI_Get_count(&recv_status,MPIU_INT,&n);

438:     source[imdex] = recv_status.MPI_SOURCE;
439:     lens[imdex]   = n;
440:     slen += n;
441:     count--;
442:   }
443:   PetscFree(recv_waits);

445:   /* move the data into the send scatter */
446:   PetscMalloc1(slen+1,&lrows);
447:   count = 0;
448:   for (i=0; i<nrecvs; i++) {
449:     values = rvalues + i*nmax;
450:     for (j=0; j<lens[i]; j++) {
451:       lrows[count++] = values[j] - base;
452:     }
453:   }
454:   PetscFree(rvalues);
455:   PetscFree2(lens,source);
456:   PetscFree(owner);
457:   PetscFree(sizes);

459:   /* fix right hand side if needed */
460:   if (x && b) {
461:     VecGetArrayRead(x,&xx);
462:     VecGetArray(b,&bb);
463:     for (i=0; i<slen; i++) {
464:       bb[lrows[i]] = diag*xx[lrows[i]];
465:     }
466:     VecRestoreArrayRead(x,&xx);
467:     VecRestoreArray(b,&bb);
468:   }

470:   /* actually zap the local rows */
471:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
472:   if (diag != 0.0) {
473:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474:     PetscInt     m   = ll->lda, i;

476:     for (i=0; i<slen; i++) {
477:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478:     }
479:   }
480:   PetscFree(lrows);

482:   /* wait on sends */
483:   if (nsends) {
484:     PetscMalloc1(nsends,&send_status);
485:     MPI_Waitall(nsends,send_waits,send_status);
486:     PetscFree(send_status);
487:   }
488:   PetscFree(send_waits);
489:   PetscFree(svalues);
490:   return(0);
491: }

493: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);

498: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499: {
500:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

504:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
505:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
506:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
507:   return(0);
508: }

510: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511: {
512:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

516:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
517:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
518:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
519:   return(0);
520: }

522: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523: {
524:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
526:   PetscScalar    zero = 0.0;

529:   VecSet(yy,zero);
530:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
531:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
532:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
533:   return(0);
534: }

536: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537: {
538:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

542:   VecCopy(yy,zz);
543:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
544:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
545:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
546:   return(0);
547: }

549: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550: {
551:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
554:   PetscInt       len,i,n,m = A->rmap->n,radd;
555:   PetscScalar    *x,zero = 0.0;

558:   VecSet(v,zero);
559:   VecGetArray(v,&x);
560:   VecGetSize(v,&n);
561:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563:   radd = A->rmap->rstart*m;
564:   for (i=0; i<len; i++) {
565:     x[i] = aloc->v[radd + i*m + i];
566:   }
567:   VecRestoreArray(v,&x);
568:   return(0);
569: }

571: PetscErrorCode MatDestroy_MPIDense(Mat mat)
572: {
573:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

577: #if defined(PETSC_USE_LOG)
578:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579: #endif
580:   MatStashDestroy_Private(&mat->stash);
581:   MatDestroy(&mdn->A);
582:   VecDestroy(&mdn->lvec);
583:   VecScatterDestroy(&mdn->Mvctx);

585:   PetscFree(mat->data);
586:   PetscObjectChangeTypeName((PetscObject)mat,0);

588:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);
589:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
590:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
591:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
592:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
593:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
594:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
595: #if defined(PETSC_HAVE_ELEMENTAL)
596:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
597: #endif
598:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
599:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
600:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
601:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
602:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",NULL);
603:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);
604:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);
605:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
606:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
607:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
608:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
609:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
610:   return(0);
611: }

613: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
614: {
615:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
616:   PetscErrorCode    ierr;
617:   PetscViewerFormat format;
618:   int               fd;
619:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
620:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
621:   PetscScalar       *work,*v,*vv;
622:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

625:   if (mdn->size == 1) {
626:     MatView(mdn->A,viewer);
627:   } else {
628:     PetscViewerBinaryGetDescriptor(viewer,&fd);
629:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
630:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

632:     PetscViewerGetFormat(viewer,&format);
633:     if (format == PETSC_VIEWER_NATIVE) {

635:       if (!rank) {
636:         /* store the matrix as a dense matrix */
637:         header[0] = MAT_FILE_CLASSID;
638:         header[1] = mat->rmap->N;
639:         header[2] = N;
640:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
641:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

643:         /* get largest work array needed for transposing array */
644:         mmax = mat->rmap->n;
645:         for (i=1; i<size; i++) {
646:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
647:         }
648:         PetscMalloc1(mmax*N,&work);

650:         /* write out local array, by rows */
651:         m = mat->rmap->n;
652:         v = a->v;
653:         for (j=0; j<N; j++) {
654:           for (i=0; i<m; i++) {
655:             work[j + i*N] = *v++;
656:           }
657:         }
658:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
659:         /* get largest work array to receive messages from other processes, excludes process zero */
660:         mmax = 0;
661:         for (i=1; i<size; i++) {
662:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
663:         }
664:         PetscMalloc1(mmax*N,&vv);
665:         for (k = 1; k < size; k++) {
666:           v    = vv;
667:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
668:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

670:           for (j = 0; j < N; j++) {
671:             for (i = 0; i < m; i++) {
672:               work[j + i*N] = *v++;
673:             }
674:           }
675:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
676:         }
677:         PetscFree(work);
678:         PetscFree(vv);
679:       } else {
680:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
681:       }
682:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
683:   }
684:   return(0);
685: }

687: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);

689:  #include <petscdraw.h>
690: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
691: {
692:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
693:   PetscErrorCode    ierr;
694:   PetscMPIInt       rank = mdn->rank;
695:   PetscViewerType   vtype;
696:   PetscBool         iascii,isdraw;
697:   PetscViewer       sviewer;
698:   PetscViewerFormat format;

701:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
702:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
703:   if (iascii) {
704:     PetscViewerGetType(viewer,&vtype);
705:     PetscViewerGetFormat(viewer,&format);
706:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
707:       MatInfo info;
708:       MatGetInfo(mat,MAT_LOCAL,&info);
709:       PetscViewerASCIIPushSynchronized(viewer);
710:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
711:       PetscViewerFlush(viewer);
712:       PetscViewerASCIIPopSynchronized(viewer);
713:       VecScatterView(mdn->Mvctx,viewer);
714:       return(0);
715:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
716:       return(0);
717:     }
718:   } else if (isdraw) {
719:     PetscDraw draw;
720:     PetscBool isnull;

722:     PetscViewerDrawGetDraw(viewer,0,&draw);
723:     PetscDrawIsNull(draw,&isnull);
724:     if (isnull) return(0);
725:   }

727:   {
728:     /* assemble the entire matrix onto first processor. */
729:     Mat         A;
730:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
731:     PetscInt    *cols;
732:     PetscScalar *vals;

734:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
735:     if (!rank) {
736:       MatSetSizes(A,M,N,M,N);
737:     } else {
738:       MatSetSizes(A,0,0,M,N);
739:     }
740:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
741:     MatSetType(A,MATMPIDENSE);
742:     MatMPIDenseSetPreallocation(A,NULL);
743:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

745:     /* Copy the matrix ... This isn't the most efficient means,
746:        but it's quick for now */
747:     A->insertmode = INSERT_VALUES;

749:     row = mat->rmap->rstart;
750:     m   = mdn->A->rmap->n;
751:     for (i=0; i<m; i++) {
752:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
753:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
754:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
755:       row++;
756:     }

758:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
759:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
760:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
761:     if (!rank) {
762:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
763:       MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
764:     }
765:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
766:     PetscViewerFlush(viewer);
767:     MatDestroy(&A);
768:   }
769:   return(0);
770: }

772: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
773: {
775:   PetscBool      iascii,isbinary,isdraw,issocket;

778:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
779:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
780:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
781:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

783:   if (iascii || issocket || isdraw) {
784:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
785:   } else if (isbinary) {
786:     MatView_MPIDense_Binary(mat,viewer);
787:   }
788:   return(0);
789: }

791: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
792: {
793:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
794:   Mat            mdn  = mat->A;
796:   PetscLogDouble isend[5],irecv[5];

799:   info->block_size = 1.0;

801:   MatGetInfo(mdn,MAT_LOCAL,info);

803:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
804:   isend[3] = info->memory;  isend[4] = info->mallocs;
805:   if (flag == MAT_LOCAL) {
806:     info->nz_used      = isend[0];
807:     info->nz_allocated = isend[1];
808:     info->nz_unneeded  = isend[2];
809:     info->memory       = isend[3];
810:     info->mallocs      = isend[4];
811:   } else if (flag == MAT_GLOBAL_MAX) {
812:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));

814:     info->nz_used      = irecv[0];
815:     info->nz_allocated = irecv[1];
816:     info->nz_unneeded  = irecv[2];
817:     info->memory       = irecv[3];
818:     info->mallocs      = irecv[4];
819:   } else if (flag == MAT_GLOBAL_SUM) {
820:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));

822:     info->nz_used      = irecv[0];
823:     info->nz_allocated = irecv[1];
824:     info->nz_unneeded  = irecv[2];
825:     info->memory       = irecv[3];
826:     info->mallocs      = irecv[4];
827:   }
828:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
829:   info->fill_ratio_needed = 0;
830:   info->factor_mallocs    = 0;
831:   return(0);
832: }

834: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
835: {
836:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

840:   switch (op) {
841:   case MAT_NEW_NONZERO_LOCATIONS:
842:   case MAT_NEW_NONZERO_LOCATION_ERR:
843:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
844:     MatCheckPreallocated(A,1);
845:     MatSetOption(a->A,op,flg);
846:     break;
847:   case MAT_ROW_ORIENTED:
848:     MatCheckPreallocated(A,1);
849:     a->roworiented = flg;
850:     MatSetOption(a->A,op,flg);
851:     break;
852:   case MAT_NEW_DIAGONALS:
853:   case MAT_KEEP_NONZERO_PATTERN:
854:   case MAT_USE_HASH_TABLE:
855:   case MAT_SORTED_FULL:
856:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
857:     break;
858:   case MAT_IGNORE_OFF_PROC_ENTRIES:
859:     a->donotstash = flg;
860:     break;
861:   case MAT_SYMMETRIC:
862:   case MAT_STRUCTURALLY_SYMMETRIC:
863:   case MAT_HERMITIAN:
864:   case MAT_SYMMETRY_ETERNAL:
865:   case MAT_IGNORE_LOWER_TRIANGULAR:
866:   case MAT_IGNORE_ZERO_ENTRIES:
867:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
868:     break;
869:   default:
870:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
871:   }
872:   return(0);
873: }


876: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
877: {
878:   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
879:   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
880:   const PetscScalar *l,*r;
881:   PetscScalar       x,*v;
882:   PetscErrorCode    ierr;
883:   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

886:   MatGetLocalSize(A,&s2,&s3);
887:   if (ll) {
888:     VecGetLocalSize(ll,&s2a);
889:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
890:     VecGetArrayRead(ll,&l);
891:     for (i=0; i<m; i++) {
892:       x = l[i];
893:       v = mat->v + i;
894:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
895:     }
896:     VecRestoreArrayRead(ll,&l);
897:     PetscLogFlops(n*m);
898:   }
899:   if (rr) {
900:     VecGetLocalSize(rr,&s3a);
901:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
902:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
903:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
904:     VecGetArrayRead(mdn->lvec,&r);
905:     for (i=0; i<n; i++) {
906:       x = r[i];
907:       v = mat->v + i*m;
908:       for (j=0; j<m; j++) (*v++) *= x;
909:     }
910:     VecRestoreArrayRead(mdn->lvec,&r);
911:     PetscLogFlops(n*m);
912:   }
913:   return(0);
914: }

916: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
917: {
918:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
919:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
921:   PetscInt       i,j;
922:   PetscReal      sum = 0.0;
923:   PetscScalar    *v  = mat->v;

926:   if (mdn->size == 1) {
927:      MatNorm(mdn->A,type,nrm);
928:   } else {
929:     if (type == NORM_FROBENIUS) {
930:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
931:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
932:       }
933:       MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
934:       *nrm = PetscSqrtReal(*nrm);
935:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
936:     } else if (type == NORM_1) {
937:       PetscReal *tmp,*tmp2;
938:       PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
939:       *nrm = 0.0;
940:       v    = mat->v;
941:       for (j=0; j<mdn->A->cmap->n; j++) {
942:         for (i=0; i<mdn->A->rmap->n; i++) {
943:           tmp[j] += PetscAbsScalar(*v);  v++;
944:         }
945:       }
946:       MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
947:       for (j=0; j<A->cmap->N; j++) {
948:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
949:       }
950:       PetscFree2(tmp,tmp2);
951:       PetscLogFlops(A->cmap->n*A->rmap->n);
952:     } else if (type == NORM_INFINITY) { /* max row norm */
953:       PetscReal ntemp;
954:       MatNorm(mdn->A,type,&ntemp);
955:       MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
956:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
957:   }
958:   return(0);
959: }

961: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
962: {
963:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
964:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
965:   Mat            B;
966:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
968:   PetscInt       j,i;
969:   PetscScalar    *v;

972:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
973:     MatCreate(PetscObjectComm((PetscObject)A),&B);
974:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
975:     MatSetType(B,((PetscObject)A)->type_name);
976:     MatMPIDenseSetPreallocation(B,NULL);
977:   } else {
978:     B = *matout;
979:   }

981:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
982:   PetscMalloc1(m,&rwork);
983:   for (i=0; i<m; i++) rwork[i] = rstart + i;
984:   for (j=0; j<n; j++) {
985:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
986:     v   += m;
987:   }
988:   PetscFree(rwork);
989:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
990:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
991:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
992:     *matout = B;
993:   } else {
994:     MatHeaderMerge(A,&B);
995:   }
996:   return(0);
997: }

999: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
1000: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

1002: PetscErrorCode MatSetUp_MPIDense(Mat A)
1003: {

1007:    MatMPIDenseSetPreallocation(A,0);
1008:   return(0);
1009: }

1011: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1012: {
1014:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1017:   MatAXPY(A->A,alpha,B->A,str);
1018:   PetscObjectStateIncrease((PetscObject)Y);
1019:   return(0);
1020: }

1022: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1023: {
1024:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

1028:   MatConjugate(a->A);
1029:   return(0);
1030: }

1032: PetscErrorCode MatRealPart_MPIDense(Mat A)
1033: {
1034:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1038:   MatRealPart(a->A);
1039:   return(0);
1040: }

1042: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1043: {
1044:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1048:   MatImaginaryPart(a->A);
1049:   return(0);
1050: }

1052: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1053: {
1054:   PetscErrorCode    ierr;
1055:   PetscScalar       *x;
1056:   const PetscScalar *a;
1057:   PetscInt          lda;

1060:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1061:   MatDenseGetLDA(A,&lda);
1062:   MatDenseGetArrayRead(A,&a);
1063:   VecGetArray(v,&x);
1064:   PetscArraycpy(x,a+col*lda,A->rmap->n);
1065:   VecRestoreArray(v,&x);
1066:   MatDenseGetArrayRead(A,&a);
1067:   return(0);
1068: }

1070: PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);

1072: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1073: {
1075:   PetscInt       i,n;
1076:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1077:   PetscReal      *work;

1080:   MatGetSize(A,NULL,&n);
1081:   PetscMalloc1(n,&work);
1082:   MatGetColumnNorms_SeqDense(a->A,type,work);
1083:   if (type == NORM_2) {
1084:     for (i=0; i<n; i++) work[i] *= work[i];
1085:   }
1086:   if (type == NORM_INFINITY) {
1087:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1088:   } else {
1089:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1090:   }
1091:   PetscFree(work);
1092:   if (type == NORM_2) {
1093:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1094:   }
1095:   return(0);
1096: }

1098: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1099: {
1100:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1102:   PetscScalar    *a;
1103:   PetscInt       m,n,i;

1106:   MatGetSize(d->A,&m,&n);
1107:   MatDenseGetArray(d->A,&a);
1108:   for (i=0; i<m*n; i++) {
1109:     PetscRandomGetValue(rctx,a+i);
1110:   }
1111:   MatDenseRestoreArray(d->A,&a);
1112:   return(0);
1113: }

1115: PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);

1117: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1118: {
1120:   *missing = PETSC_FALSE;
1121:   return(0);
1122: }

1124: static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1125: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1126: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);

1128: /* -------------------------------------------------------------------*/
1129: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1130:                                         MatGetRow_MPIDense,
1131:                                         MatRestoreRow_MPIDense,
1132:                                         MatMult_MPIDense,
1133:                                 /*  4*/ MatMultAdd_MPIDense,
1134:                                         MatMultTranspose_MPIDense,
1135:                                         MatMultTransposeAdd_MPIDense,
1136:                                         0,
1137:                                         0,
1138:                                         0,
1139:                                 /* 10*/ 0,
1140:                                         0,
1141:                                         0,
1142:                                         0,
1143:                                         MatTranspose_MPIDense,
1144:                                 /* 15*/ MatGetInfo_MPIDense,
1145:                                         MatEqual_MPIDense,
1146:                                         MatGetDiagonal_MPIDense,
1147:                                         MatDiagonalScale_MPIDense,
1148:                                         MatNorm_MPIDense,
1149:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1150:                                         MatAssemblyEnd_MPIDense,
1151:                                         MatSetOption_MPIDense,
1152:                                         MatZeroEntries_MPIDense,
1153:                                 /* 24*/ MatZeroRows_MPIDense,
1154:                                         0,
1155:                                         0,
1156:                                         0,
1157:                                         0,
1158:                                 /* 29*/ MatSetUp_MPIDense,
1159:                                         0,
1160:                                         0,
1161:                                         MatGetDiagonalBlock_MPIDense,
1162:                                         0,
1163:                                 /* 34*/ MatDuplicate_MPIDense,
1164:                                         0,
1165:                                         0,
1166:                                         0,
1167:                                         0,
1168:                                 /* 39*/ MatAXPY_MPIDense,
1169:                                         MatCreateSubMatrices_MPIDense,
1170:                                         0,
1171:                                         MatGetValues_MPIDense,
1172:                                         0,
1173:                                 /* 44*/ 0,
1174:                                         MatScale_MPIDense,
1175:                                         MatShift_Basic,
1176:                                         0,
1177:                                         0,
1178:                                 /* 49*/ MatSetRandom_MPIDense,
1179:                                         0,
1180:                                         0,
1181:                                         0,
1182:                                         0,
1183:                                 /* 54*/ 0,
1184:                                         0,
1185:                                         0,
1186:                                         0,
1187:                                         0,
1188:                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1189:                                         MatDestroy_MPIDense,
1190:                                         MatView_MPIDense,
1191:                                         0,
1192:                                         0,
1193:                                 /* 64*/ 0,
1194:                                         0,
1195:                                         0,
1196:                                         0,
1197:                                         0,
1198:                                 /* 69*/ 0,
1199:                                         0,
1200:                                         0,
1201:                                         0,
1202:                                         0,
1203:                                 /* 74*/ 0,
1204:                                         0,
1205:                                         0,
1206:                                         0,
1207:                                         0,
1208:                                 /* 79*/ 0,
1209:                                         0,
1210:                                         0,
1211:                                         0,
1212:                                 /* 83*/ MatLoad_MPIDense,
1213:                                         0,
1214:                                         0,
1215:                                         0,
1216:                                         0,
1217:                                         0,
1218: #if defined(PETSC_HAVE_ELEMENTAL)
1219:                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1220:                                         MatMatMultSymbolic_MPIDense_MPIDense,
1221: #else
1222:                                 /* 89*/ 0,
1223:                                         0,
1224: #endif
1225:                                         MatMatMultNumeric_MPIDense,
1226:                                         0,
1227:                                         0,
1228:                                 /* 94*/ 0,
1229:                                         MatMatTransposeMult_MPIDense_MPIDense,
1230:                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1231:                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1232:                                         0,
1233:                                 /* 99*/ 0,
1234:                                         0,
1235:                                         0,
1236:                                         MatConjugate_MPIDense,
1237:                                         0,
1238:                                 /*104*/ 0,
1239:                                         MatRealPart_MPIDense,
1240:                                         MatImaginaryPart_MPIDense,
1241:                                         0,
1242:                                         0,
1243:                                 /*109*/ 0,
1244:                                         0,
1245:                                         0,
1246:                                         MatGetColumnVector_MPIDense,
1247:                                         MatMissingDiagonal_MPIDense,
1248:                                 /*114*/ 0,
1249:                                         0,
1250:                                         0,
1251:                                         0,
1252:                                         0,
1253:                                 /*119*/ 0,
1254:                                         0,
1255:                                         0,
1256:                                         0,
1257:                                         0,
1258:                                 /*124*/ 0,
1259:                                         MatGetColumnNorms_MPIDense,
1260:                                         0,
1261:                                         0,
1262:                                         0,
1263:                                 /*129*/ 0,
1264:                                         MatTransposeMatMult_MPIDense_MPIDense,
1265:                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1266:                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1267:                                         0,
1268:                                 /*134*/ 0,
1269:                                         0,
1270:                                         0,
1271:                                         0,
1272:                                         0,
1273:                                 /*139*/ 0,
1274:                                         0,
1275:                                         0,
1276:                                         0,
1277:                                         0,
1278:                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1279: };

1281: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1282: {
1283:   Mat_MPIDense   *a;

1287:   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1288:   mat->preallocated = PETSC_TRUE;
1289:   /* Note:  For now, when data is specified above, this assumes the user correctly
1290:    allocates the local dense storage space.  We should add error checking. */

1292:   a       = (Mat_MPIDense*)mat->data;
1293:   PetscLayoutSetUp(mat->rmap);
1294:   PetscLayoutSetUp(mat->cmap);
1295:   a->nvec = mat->cmap->n;

1297:   MatCreate(PETSC_COMM_SELF,&a->A);
1298:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1299:   MatSetType(a->A,MATSEQDENSE);
1300:   MatSeqDenseSetPreallocation(a->A,data);
1301:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1302:   return(0);
1303: }

1305: #if defined(PETSC_HAVE_ELEMENTAL)
1306: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1307: {
1308:   Mat            mat_elemental;
1310:   PetscScalar    *v;
1311:   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1312: 
1314:   if (reuse == MAT_REUSE_MATRIX) {
1315:     mat_elemental = *newmat;
1316:     MatZeroEntries(*newmat);
1317:   } else {
1318:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1319:     MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1320:     MatSetType(mat_elemental,MATELEMENTAL);
1321:     MatSetUp(mat_elemental);
1322:     MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1323:   }

1325:   PetscMalloc2(m,&rows,N,&cols);
1326:   for (i=0; i<N; i++) cols[i] = i;
1327:   for (i=0; i<m; i++) rows[i] = rstart + i;
1328: 
1329:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1330:   MatDenseGetArray(A,&v);
1331:   MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1332:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1333:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1334:   MatDenseRestoreArray(A,&v);
1335:   PetscFree2(rows,cols);

1337:   if (reuse == MAT_INPLACE_MATRIX) {
1338:     MatHeaderReplace(A,&mat_elemental);
1339:   } else {
1340:     *newmat = mat_elemental;
1341:   }
1342:   return(0);
1343: }
1344: #endif

1346: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1347: {
1348:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1352:   MatDenseGetColumn(mat->A,col,vals);
1353:   return(0);
1354: }

1356: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1357: {
1358:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1362:   MatDenseRestoreColumn(mat->A,vals);
1363:   return(0);
1364: }

1366: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1367: {
1369:   Mat_MPIDense   *mat;
1370:   PetscInt       m,nloc,N;

1373:   MatGetSize(inmat,&m,&N);
1374:   MatGetLocalSize(inmat,NULL,&nloc);
1375:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1376:     PetscInt sum;

1378:     if (n == PETSC_DECIDE) {
1379:       PetscSplitOwnership(comm,&n,&N);
1380:     }
1381:     /* Check sum(n) = N */
1382:     MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1383:     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);

1385:     MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1386:   }

1388:   /* numeric phase */
1389:   mat = (Mat_MPIDense*)(*outmat)->data;
1390:   MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1391:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1392:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1393:   return(0);
1394: }

1396: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1397: {
1398:   Mat_MPIDense   *a;

1402:   PetscNewLog(mat,&a);
1403:   mat->data = (void*)a;
1404:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1406:   mat->insertmode = NOT_SET_VALUES;
1407:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1408:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1410:   /* build cache for off array entries formed */
1411:   a->donotstash = PETSC_FALSE;

1413:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1415:   /* stuff used for matrix vector multiply */
1416:   a->lvec        = 0;
1417:   a->Mvctx       = 0;
1418:   a->roworiented = PETSC_TRUE;

1420:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);
1421:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1422:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1423:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1424:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1425:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1426:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1427: #if defined(PETSC_HAVE_ELEMENTAL)
1428:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1429: #endif
1430:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1431:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1432:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1433:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1434:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",MatMatMult_Nest_Dense);
1435:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);
1436:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);

1438:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1439:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1440:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1441:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1442:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1443:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1444:   return(0);
1445: }

1447: /*MC
1448:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1450:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1451:    and MATMPIDENSE otherwise.

1453:    Options Database Keys:
1454: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1456:   Level: beginner


1459: .seealso: MATSEQDENSE,MATMPIDENSE
1460: M*/

1462: /*@C
1463:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1465:    Not collective

1467:    Input Parameters:
1468: .  B - the matrix
1469: -  data - optional location of matrix data.  Set data=NULL for PETSc
1470:    to control all matrix memory allocation.

1472:    Notes:
1473:    The dense format is fully compatible with standard Fortran 77
1474:    storage by columns.

1476:    The data input variable is intended primarily for Fortran programmers
1477:    who wish to allocate their own matrix memory space.  Most users should
1478:    set data=NULL.

1480:    Level: intermediate

1482: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1483: @*/
1484: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1485: {

1489:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1490:   return(0);
1491: }

1493: /*@
1494:    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1495:    array provided by the user. This is useful to avoid copying an array
1496:    into a matrix

1498:    Not Collective

1500:    Input Parameters:
1501: +  mat - the matrix
1502: -  array - the array in column major order

1504:    Notes:
1505:    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1506:    freed when the matrix is destroyed.

1508:    Level: developer

1510: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1512: @*/
1513: PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1514: {
1517:   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1518:   PetscObjectStateIncrease((PetscObject)mat);
1519:   return(0);
1520: }

1522: /*@
1523:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()

1525:    Not Collective

1527:    Input Parameters:
1528: .  mat - the matrix

1530:    Notes:
1531:    You can only call this after a call to MatDensePlaceArray()

1533:    Level: developer

1535: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1537: @*/
1538: PetscErrorCode  MatDenseResetArray(Mat mat)
1539: {
1542:   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1543:   PetscObjectStateIncrease((PetscObject)mat);
1544:   return(0);
1545: }

1547: /*@C
1548:    MatCreateDense - Creates a parallel matrix in dense format.

1550:    Collective

1552:    Input Parameters:
1553: +  comm - MPI communicator
1554: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1555: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1556: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1557: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1558: -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1559:    to control all matrix memory allocation.

1561:    Output Parameter:
1562: .  A - the matrix

1564:    Notes:
1565:    The dense format is fully compatible with standard Fortran 77
1566:    storage by columns.

1568:    The data input variable is intended primarily for Fortran programmers
1569:    who wish to allocate their own matrix memory space.  Most users should
1570:    set data=NULL (PETSC_NULL_SCALAR for Fortran users).

1572:    The user MUST specify either the local or global matrix dimensions
1573:    (possibly both).

1575:    Level: intermediate

1577: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1578: @*/
1579: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1580: {
1582:   PetscMPIInt    size;

1585:   MatCreate(comm,A);
1586:   MatSetSizes(*A,m,n,M,N);
1587:   MPI_Comm_size(comm,&size);
1588:   if (size > 1) {
1589:     MatSetType(*A,MATMPIDENSE);
1590:     MatMPIDenseSetPreallocation(*A,data);
1591:     if (data) {  /* user provided data array, so no need to assemble */
1592:       MatSetUpMultiply_MPIDense(*A);
1593:       (*A)->assembled = PETSC_TRUE;
1594:     }
1595:   } else {
1596:     MatSetType(*A,MATSEQDENSE);
1597:     MatSeqDenseSetPreallocation(*A,data);
1598:   }
1599:   return(0);
1600: }

1602: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1603: {
1604:   Mat            mat;
1605:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1609:   *newmat = 0;
1610:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1611:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1612:   MatSetType(mat,((PetscObject)A)->type_name);
1613:   a       = (Mat_MPIDense*)mat->data;

1615:   mat->factortype   = A->factortype;
1616:   mat->assembled    = PETSC_TRUE;
1617:   mat->preallocated = PETSC_TRUE;

1619:   a->size         = oldmat->size;
1620:   a->rank         = oldmat->rank;
1621:   mat->insertmode = NOT_SET_VALUES;
1622:   a->nvec         = oldmat->nvec;
1623:   a->donotstash   = oldmat->donotstash;

1625:   PetscLayoutReference(A->rmap,&mat->rmap);
1626:   PetscLayoutReference(A->cmap,&mat->cmap);

1628:   MatSetUpMultiply_MPIDense(mat);
1629:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1630:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1632:   *newmat = mat;
1633:   return(0);
1634: }

1636: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1637: {
1639:   PetscMPIInt    rank,size;
1640:   const PetscInt *rowners;
1641:   PetscInt       i,m,n,nz,j,mMax;
1642:   PetscScalar    *array,*vals,*vals_ptr;
1643:   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;

1646:   MPI_Comm_rank(comm,&rank);
1647:   MPI_Comm_size(comm,&size);

1649:   /* determine ownership of rows and columns */
1650:   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1651:   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;

1653:   MatSetSizes(newmat,m,n,M,N);
1654:   if (!a->A) {
1655:     MatMPIDenseSetPreallocation(newmat,NULL);
1656:   }
1657:   MatDenseGetArray(newmat,&array);
1658:   MatGetLocalSize(newmat,&m,NULL);
1659:   MatGetOwnershipRanges(newmat,&rowners);
1660:   MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1661:   if (!rank) {
1662:     PetscMalloc1(mMax*N,&vals);

1664:     /* read in my part of the matrix numerical values  */
1665:     PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);

1667:     /* insert into matrix-by row (this is why cannot directly read into array */
1668:     vals_ptr = vals;
1669:     for (i=0; i<m; i++) {
1670:       for (j=0; j<N; j++) {
1671:         array[i + j*m] = *vals_ptr++;
1672:       }
1673:     }

1675:     /* read in other processors and ship out */
1676:     for (i=1; i<size; i++) {
1677:       nz   = (rowners[i+1] - rowners[i])*N;
1678:       PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
1679:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1680:     }
1681:   } else {
1682:     /* receive numeric values */
1683:     PetscMalloc1(m*N,&vals);

1685:     /* receive message of values*/
1686:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1688:     /* insert into matrix-by row (this is why cannot directly read into array */
1689:     vals_ptr = vals;
1690:     for (i=0; i<m; i++) {
1691:       for (j=0; j<N; j++) {
1692:         array[i + j*m] = *vals_ptr++;
1693:       }
1694:     }
1695:   }
1696:   MatDenseRestoreArray(newmat,&array);
1697:   PetscFree(vals);
1698:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1699:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1700:   return(0);
1701: }

1703: static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1704: {
1705:   Mat_MPIDense   *a;
1706:   PetscScalar    *vals,*svals;
1707:   MPI_Comm       comm;
1708:   MPI_Status     status;
1709:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1710:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1711:   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1712:   PetscInt       i,nz,j,rstart,rend;
1713:   int            fd;

1717:   PetscObjectGetComm((PetscObject)viewer,&comm);
1718:   MPI_Comm_size(comm,&size);
1719:   MPI_Comm_rank(comm,&rank);
1720:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1721:   if (!rank) {
1722:     PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);
1723:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1724:   }
1725:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1726:   M    = header[1]; N = header[2]; nz = header[3];

1728:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1729:   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1730:   if (newmat->cmap->N < 0) newmat->cmap->N = N;

1732:   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1733:   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);

1735:   /*
1736:        Handle case where matrix is stored on disk as a dense matrix
1737:   */
1738:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1739:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1740:     return(0);
1741:   }

1743:   /* determine ownership of all rows */
1744:   if (newmat->rmap->n < 0) {
1745:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1746:   } else {
1747:     PetscMPIIntCast(newmat->rmap->n,&m);
1748:   }
1749:   if (newmat->cmap->n < 0) {
1750:     n = PETSC_DECIDE;
1751:   } else {
1752:     PetscMPIIntCast(newmat->cmap->n,&n);
1753:   }

1755:   PetscMalloc1(size+2,&rowners);
1756:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1757:   rowners[0] = 0;
1758:   for (i=2; i<=size; i++) {
1759:     rowners[i] += rowners[i-1];
1760:   }
1761:   rstart = rowners[rank];
1762:   rend   = rowners[rank+1];

1764:   /* distribute row lengths to all processors */
1765:   PetscMalloc1(rend-rstart,&ourlens);
1766:   if (!rank) {
1767:     PetscMalloc1(M,&rowlengths);
1768:     PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);
1769:     PetscMalloc1(size,&sndcounts);
1770:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1771:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1772:     PetscFree(sndcounts);
1773:   } else {
1774:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1775:   }

1777:   if (!rank) {
1778:     /* calculate the number of nonzeros on each processor */
1779:     PetscCalloc1(size,&procsnz);
1780:     for (i=0; i<size; i++) {
1781:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1782:         procsnz[i] += rowlengths[j];
1783:       }
1784:     }
1785:     PetscFree(rowlengths);

1787:     /* determine max buffer needed and allocate it */
1788:     maxnz = 0;
1789:     for (i=0; i<size; i++) {
1790:       maxnz = PetscMax(maxnz,procsnz[i]);
1791:     }
1792:     PetscMalloc1(maxnz,&cols);

1794:     /* read in my part of the matrix column indices  */
1795:     nz   = procsnz[0];
1796:     PetscMalloc1(nz,&mycols);
1797:     PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);

1799:     /* read in every one elses and ship off */
1800:     for (i=1; i<size; i++) {
1801:       nz   = procsnz[i];
1802:       PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);
1803:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1804:     }
1805:     PetscFree(cols);
1806:   } else {
1807:     /* determine buffer space needed for message */
1808:     nz = 0;
1809:     for (i=0; i<m; i++) {
1810:       nz += ourlens[i];
1811:     }
1812:     PetscMalloc1(nz+1,&mycols);

1814:     /* receive message of column indices*/
1815:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1816:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1817:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1818:   }

1820:   MatSetSizes(newmat,m,n,M,N);
1821:   a = (Mat_MPIDense*)newmat->data;
1822:   if (!a->A) {
1823:     MatMPIDenseSetPreallocation(newmat,NULL);
1824:   }

1826:   if (!rank) {
1827:     PetscMalloc1(maxnz,&vals);

1829:     /* read in my part of the matrix numerical values  */
1830:     nz   = procsnz[0];
1831:     PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);

1833:     /* insert into matrix */
1834:     jj      = rstart;
1835:     smycols = mycols;
1836:     svals   = vals;
1837:     for (i=0; i<m; i++) {
1838:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1839:       smycols += ourlens[i];
1840:       svals   += ourlens[i];
1841:       jj++;
1842:     }

1844:     /* read in other processors and ship out */
1845:     for (i=1; i<size; i++) {
1846:       nz   = procsnz[i];
1847:       PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
1848:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1849:     }
1850:     PetscFree(procsnz);
1851:   } else {
1852:     /* receive numeric values */
1853:     PetscMalloc1(nz+1,&vals);

1855:     /* receive message of values*/
1856:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1857:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1858:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1860:     /* insert into matrix */
1861:     jj      = rstart;
1862:     smycols = mycols;
1863:     svals   = vals;
1864:     for (i=0; i<m; i++) {
1865:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1866:       smycols += ourlens[i];
1867:       svals   += ourlens[i];
1868:       jj++;
1869:     }
1870:   }
1871:   PetscFree(ourlens);
1872:   PetscFree(vals);
1873:   PetscFree(mycols);
1874:   PetscFree(rowners);

1876:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1877:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1878:   return(0);
1879: }

1881: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1882: {
1884:   PetscBool      isbinary;
1885: #if defined(PETSC_HAVE_HDF5)
1886:   PetscBool      ishdf5;
1887: #endif

1892:   /* force binary viewer to load .info file if it has not yet done so */
1893:   PetscViewerSetUp(viewer);
1894:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1895: #if defined(PETSC_HAVE_HDF5)
1896:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);
1897: #endif
1898:   if (isbinary) {
1899:     MatLoad_MPIDense_Binary(newMat,viewer);
1900: #if defined(PETSC_HAVE_HDF5)
1901:   } else if (ishdf5) {
1902:     MatLoad_Dense_HDF5(newMat,viewer);
1903: #endif
1904:   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1905:   return(0);
1906: }

1908: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1909: {
1910:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1911:   Mat            a,b;
1912:   PetscBool      flg;

1916:   a    = matA->A;
1917:   b    = matB->A;
1918:   MatEqual(a,b,&flg);
1919:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1920:   return(0);
1921: }

1923: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1924: {
1925:   PetscErrorCode        ierr;
1926:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1927:   Mat_TransMatMultDense *atb = a->atbdense;

1930:   PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1931:   (atb->destroy)(A);
1932:   PetscFree(atb);
1933:   return(0);
1934: }

1936: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1937: {
1938:   PetscErrorCode        ierr;
1939:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1940:   Mat_MatTransMultDense *abt = a->abtdense;

1943:   PetscFree2(abt->buf[0],abt->buf[1]);
1944:   PetscFree2(abt->recvcounts,abt->recvdispls);
1945:   (abt->destroy)(A);
1946:   PetscFree(abt);
1947:   return(0);
1948: }

1950: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1951: {
1952:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1953:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1954:   Mat_TransMatMultDense *atb = c->atbdense;
1956:   MPI_Comm       comm;
1957:   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1958:   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1959:   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1960:   PetscScalar    _DOne=1.0,_DZero=0.0;
1961:   PetscBLASInt   am,an,bn,aN;
1962:   const PetscInt *ranges;

1965:   PetscObjectGetComm((PetscObject)A,&comm);
1966:   MPI_Comm_rank(comm,&rank);
1967:   MPI_Comm_size(comm,&size);

1969:   /* compute atbarray = aseq^T * bseq */
1970:   PetscBLASIntCast(a->A->cmap->n,&an);
1971:   PetscBLASIntCast(b->A->cmap->n,&bn);
1972:   PetscBLASIntCast(a->A->rmap->n,&am);
1973:   PetscBLASIntCast(A->cmap->N,&aN);
1974:   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));

1976:   MatGetOwnershipRanges(C,&ranges);
1977:   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;

1979:   /* arrange atbarray into sendbuf */
1980:   k = 0;
1981:   for (proc=0; proc<size; proc++) {
1982:     for (j=0; j<cN; j++) {
1983:       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1984:     }
1985:   }
1986:   /* sum all atbarray to local values of C */
1987:   MatDenseGetArray(c->A,&carray);
1988:   MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1989:   MatDenseRestoreArray(c->A,&carray);
1990:   return(0);
1991: }

1993: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1994: {
1995:   PetscErrorCode        ierr;
1996:   Mat                   Cdense;
1997:   MPI_Comm              comm;
1998:   PetscMPIInt           size;
1999:   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2000:   Mat_MPIDense          *c;
2001:   Mat_TransMatMultDense *atb;

2004:   PetscObjectGetComm((PetscObject)A,&comm);
2005:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2006:     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2007:   }

2009:   /* create matrix product Cdense */
2010:   MatCreate(comm,&Cdense);
2011:   MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
2012:   MatSetType(Cdense,MATMPIDENSE);
2013:   MatMPIDenseSetPreallocation(Cdense,NULL);
2014:   MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
2015:   MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
2016:   *C   = Cdense;

2018:   /* create data structure for reuse Cdense */
2019:   MPI_Comm_size(comm,&size);
2020:   PetscNew(&atb);
2021:   cM = Cdense->rmap->N;
2022:   PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
2023: 
2024:   c                    = (Mat_MPIDense*)Cdense->data;
2025:   c->atbdense          = atb;
2026:   atb->destroy         = Cdense->ops->destroy;
2027:   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2028:   return(0);
2029: }

2031: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2032: {

2036:   if (scall == MAT_INITIAL_MATRIX) {
2037:     MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2038:   }
2039:   MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2040:   return(0);
2041: }

2043: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2044: {
2045:   PetscErrorCode        ierr;
2046:   Mat                   Cdense;
2047:   MPI_Comm              comm;
2048:   PetscMPIInt           i, size;
2049:   PetscInt              maxRows, bufsiz;
2050:   Mat_MPIDense          *c;
2051:   PetscMPIInt           tag;
2052:   const char            *algTypes[2] = {"allgatherv","cyclic"};
2053:   PetscInt              alg, nalg = 2;
2054:   Mat_MatTransMultDense *abt;

2057:   PetscObjectGetComm((PetscObject)A,&comm);
2058:   alg = 0; /* default is allgatherv */
2059:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");
2060:   PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);
2061:   PetscOptionsEnd();
2062:   if (A->cmap->N != B->cmap->N) {
2063:     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2064:   }

2066:   /* create matrix product Cdense */
2067:   MatCreate(comm,&Cdense);
2068:   MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
2069:   MatSetType(Cdense,MATMPIDENSE);
2070:   MatMPIDenseSetPreallocation(Cdense,NULL);
2071:   PetscObjectGetNewTag((PetscObject)Cdense, &tag);
2072:   MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
2073:   MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
2074:   *C   = Cdense;

2076:   /* create data structure for reuse Cdense */
2077:   MPI_Comm_size(comm,&size);
2078:   PetscNew(&abt);
2079:   abt->tag = tag;
2080:   abt->alg = alg;
2081:   switch (alg) {
2082:   case 1:
2083:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2084:     bufsiz = A->cmap->N * maxRows;
2085:     PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
2086:     break;
2087:   default:
2088:     PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2089:     PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
2090:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2091:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2092:     break;
2093:   }

2095:   c                    = (Mat_MPIDense*)Cdense->data;
2096:   c->abtdense          = abt;
2097:   abt->destroy         = Cdense->ops->destroy;
2098:   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2099:   return(0);
2100: }

2102: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2103: {
2104:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2105:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2106:   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2107:   Mat_MatTransMultDense *abt = c->abtdense;
2109:   MPI_Comm       comm;
2110:   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2111:   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2112:   PetscInt       i,cK=A->cmap->N,k,j,bn;
2113:   PetscScalar    _DOne=1.0,_DZero=0.0;
2114:   PetscBLASInt   cm, cn, ck;
2115:   MPI_Request    reqs[2];
2116:   const PetscInt *ranges;

2119:   PetscObjectGetComm((PetscObject)A,&comm);
2120:   MPI_Comm_rank(comm,&rank);
2121:   MPI_Comm_size(comm,&size);

2123:   MatGetOwnershipRanges(B,&ranges);
2124:   bn = B->rmap->n;
2125:   if (bseq->lda == bn) {
2126:     sendbuf = bseq->v;
2127:   } else {
2128:     sendbuf = abt->buf[0];
2129:     for (k = 0, i = 0; i < cK; i++) {
2130:       for (j = 0; j < bn; j++, k++) {
2131:         sendbuf[k] = bseq->v[i * bseq->lda + j];
2132:       }
2133:     }
2134:   }
2135:   if (size > 1) {
2136:     sendto = (rank + size - 1) % size;
2137:     recvfrom = (rank + size + 1) % size;
2138:   } else {
2139:     sendto = recvfrom = 0;
2140:   }
2141:   PetscBLASIntCast(cK,&ck);
2142:   PetscBLASIntCast(c->A->rmap->n,&cm);
2143:   recvisfrom = rank;
2144:   for (i = 0; i < size; i++) {
2145:     /* we have finished receiving in sending, bufs can be read/modified */
2146:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2147:     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

2149:     if (nextrecvisfrom != rank) {
2150:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2151:       sendsiz = cK * bn;
2152:       recvsiz = cK * nextbn;
2153:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2154:       MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2155:       MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2156:     }

2158:     /* local aseq * sendbuf^T */
2159:     PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2160:     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2161:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));

2163:     if (nextrecvisfrom != rank) {
2164:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2165:       MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2166:     }
2167:     bn = nextbn;
2168:     recvisfrom = nextrecvisfrom;
2169:     sendbuf = recvbuf;
2170:   }
2171:   return(0);
2172: }

2174: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2175: {
2176:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2177:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2178:   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2179:   Mat_MatTransMultDense *abt = c->abtdense;
2181:   MPI_Comm       comm;
2182:   PetscMPIInt    rank,size;
2183:   PetscScalar    *sendbuf, *recvbuf;
2184:   PetscInt       i,cK=A->cmap->N,k,j,bn;
2185:   PetscScalar    _DOne=1.0,_DZero=0.0;
2186:   PetscBLASInt   cm, cn, ck;

2189:   PetscObjectGetComm((PetscObject)A,&comm);
2190:   MPI_Comm_rank(comm,&rank);
2191:   MPI_Comm_size(comm,&size);

2193:   /* copy transpose of B into buf[0] */
2194:   bn      = B->rmap->n;
2195:   sendbuf = abt->buf[0];
2196:   recvbuf = abt->buf[1];
2197:   for (k = 0, j = 0; j < bn; j++) {
2198:     for (i = 0; i < cK; i++, k++) {
2199:       sendbuf[k] = bseq->v[i * bseq->lda + j];
2200:     }
2201:   }
2202:   MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2203:   PetscBLASIntCast(cK,&ck);
2204:   PetscBLASIntCast(c->A->rmap->n,&cm);
2205:   PetscBLASIntCast(c->A->cmap->n,&cn);
2206:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2207:   return(0);
2208: }

2210: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2211: {
2212:   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2213:   Mat_MatTransMultDense *abt = c->abtdense;

2217:   switch (abt->alg) {
2218:   case 1:
2219:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2220:     break;
2221:   default:
2222:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2223:     break;
2224:   }
2225:   return(0);
2226: }

2228: static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2229: {

2233:   if (scall == MAT_INITIAL_MATRIX) {
2234:     MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2235:   }
2236:   MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);
2237:   return(0);
2238: }

2240: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2241: {
2242:   PetscErrorCode   ierr;
2243:   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2244:   Mat_MatMultDense *ab = a->abdense;

2247:   MatDestroy(&ab->Ce);
2248:   MatDestroy(&ab->Ae);
2249:   MatDestroy(&ab->Be);

2251:   (ab->destroy)(A);
2252:   PetscFree(ab);
2253:   return(0);
2254: }

2256: #if defined(PETSC_HAVE_ELEMENTAL)
2257: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2258: {
2259:   PetscErrorCode   ierr;
2260:   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2261:   Mat_MatMultDense *ab=c->abdense;

2264:   MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
2265:   MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
2266:   MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
2267:   MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
2268:   return(0);
2269: }

2271: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2272: {
2273:   PetscErrorCode   ierr;
2274:   Mat              Ae,Be,Ce;
2275:   Mat_MPIDense     *c;
2276:   Mat_MatMultDense *ab;

2279:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2280:     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2281:   }

2283:   /* convert A and B to Elemental matrices Ae and Be */
2284:   MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
2285:   MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);

2287:   /* Ce = Ae*Be */
2288:   MatMatMultSymbolic(Ae,Be,fill,&Ce);
2289:   MatMatMultNumeric(Ae,Be,Ce);
2290: 
2291:   /* convert Ce to C */
2292:   MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);

2294:   /* create data structure for reuse Cdense */
2295:   PetscNew(&ab);
2296:   c                  = (Mat_MPIDense*)(*C)->data;
2297:   c->abdense         = ab;

2299:   ab->Ae             = Ae;
2300:   ab->Be             = Be;
2301:   ab->Ce             = Ce;
2302:   ab->destroy        = (*C)->ops->destroy;
2303:   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2304:   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2305:   return(0);
2306: }

2308: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2309: {

2313:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic product includes numeric product */
2314:     PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
2315:     MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2316:     PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
2317:   } else {
2318:     PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
2319:     MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2320:     PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
2321:   }
2322:   return(0);
2323: }
2324: #endif