Actual source code: mpidense.c

  1: /*
  2:    Basic functions for basic parallel dense matrices.
  3:    Portions of this code are under:
  4:    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  5: */

  7: #include <../src/mat/impls/dense/mpi/mpidense.h>
  8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9: #include <petscblaslapack.h>

 11: /*@
 12:   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
 13:   matrix that represents the operator. For sequential matrices it returns itself.

 15:   Input Parameter:
 16: . A - the sequential or MPI `MATDENSE` matrix

 18:   Output Parameter:
 19: . B - the inner matrix

 21:   Level: intermediate

 23: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
 24: @*/
 25: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
 26: {
 27:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 28:   PetscBool     flg;

 30:   PetscFunctionBegin;
 32:   PetscAssertPointer(B, 2);
 33:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
 34:   if (flg) *B = mat->A;
 35:   else {
 36:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
 37:     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
 38:     *B = A;
 39:   }
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 44: {
 45:   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
 46:   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;

 48:   PetscFunctionBegin;
 49:   PetscCall(MatCopy(Amat->A, Bmat->A, s));
 50:   PetscFunctionReturn(PETSC_SUCCESS);
 51: }

 53: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
 54: {
 55:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 56:   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
 57:   PetscScalar  *v;

 59:   PetscFunctionBegin;
 60:   PetscCall(MatDenseGetArray(mat->A, &v));
 61:   PetscCall(MatDenseGetLDA(mat->A, &lda));
 62:   rend2 = PetscMin(rend, A->cmap->N);
 63:   if (rend2 > rstart) {
 64:     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
 65:     PetscCall(PetscLogFlops(rend2 - rstart));
 66:   }
 67:   PetscCall(MatDenseRestoreArray(mat->A, &v));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 72: {
 73:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 74:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 76:   PetscFunctionBegin;
 77:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 78:   lrow = row - rstart;
 79:   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 84: {
 85:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 86:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 88:   PetscFunctionBegin;
 89:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 90:   lrow = row - rstart;
 91:   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
 96: {
 97:   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
 98:   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
 99:   PetscScalar  *array;
100:   MPI_Comm      comm;
101:   PetscBool     flg;
102:   Mat           B;

104:   PetscFunctionBegin;
105:   PetscCall(MatHasCongruentLayouts(A, &flg));
106:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107:   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109: #if PetscDefined(HAVE_CUDA)
110:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112: #elif PetscDefined(HAVE_HIP)
113:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115: #endif
116:     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117:     PetscCall(MatCreate(comm, &B));
118:     PetscCall(MatSetSizes(B, m, m, m, m));
119:     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120:     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121:     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122:     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123:     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124:     *a = B;
125:     PetscCall(MatDestroy(&B));
126:   } else *a = B;
127:   PetscFunctionReturn(PETSC_SUCCESS);
128: }

130: static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131: {
132:   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134:   PetscBool     roworiented = A->roworiented;

136:   PetscFunctionBegin;
137:   for (i = 0; i < m; i++) {
138:     if (idxm[i] < 0) continue;
139:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140:     if (idxm[i] >= rstart && idxm[i] < rend) {
141:       row = idxm[i] - rstart;
142:       if (roworiented) {
143:         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144:       } else {
145:         for (j = 0; j < n; j++) {
146:           if (idxn[j] < 0) continue;
147:           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148:           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149:         }
150:       }
151:     } else if (!A->donotstash) {
152:       mat->assembled = PETSC_FALSE;
153:       if (roworiented) {
154:         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155:       } else {
156:         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157:       }
158:     }
159:   }
160:   PetscFunctionReturn(PETSC_SUCCESS);
161: }

163: static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164: {
165:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;

168:   PetscFunctionBegin;
169:   for (i = 0; i < m; i++) {
170:     if (idxm[i] < 0) continue; /* negative row */
171:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172:     if (idxm[i] >= rstart && idxm[i] < rend) {
173:       row = idxm[i] - rstart;
174:       for (j = 0; j < n; j++) {
175:         if (idxn[j] < 0) continue; /* negative column */
176:         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177:         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178:       }
179:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180:   }
181:   PetscFunctionReturn(PETSC_SUCCESS);
182: }

184: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185: {
186:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

188:   PetscFunctionBegin;
189:   PetscCall(MatDenseGetLDA(a->A, lda));
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }

193: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194: {
195:   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196:   MatType       mtype = MATSEQDENSE;

198:   PetscFunctionBegin;
199:   if (!a->A) {
200:     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201:     PetscCall(PetscLayoutSetUp(A->rmap));
202:     PetscCall(PetscLayoutSetUp(A->cmap));
203:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204:     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205: #if PetscDefined(HAVE_CUDA)
206:     PetscBool iscuda;
207:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208:     if (iscuda) mtype = MATSEQDENSECUDA;
209: #elif PetscDefined(HAVE_HIP)
210:     PetscBool iship;
211:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212:     if (iship) mtype = MATSEQDENSEHIP;
213: #endif
214:     PetscCall(MatSetType(a->A, mtype));
215:   }
216:   PetscCall(MatDenseSetLDA(a->A, lda));
217:   PetscFunctionReturn(PETSC_SUCCESS);
218: }

220: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221: {
222:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

224:   PetscFunctionBegin;
225:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226:   PetscCall(MatDenseGetArray(a->A, array));
227:   PetscFunctionReturn(PETSC_SUCCESS);
228: }

230: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231: {
232:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

234:   PetscFunctionBegin;
235:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236:   PetscCall(MatDenseGetArrayRead(a->A, array));
237:   PetscFunctionReturn(PETSC_SUCCESS);
238: }

240: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241: {
242:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

244:   PetscFunctionBegin;
245:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246:   PetscCall(MatDenseGetArrayWrite(a->A, array));
247:   PetscFunctionReturn(PETSC_SUCCESS);
248: }

250: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251: {
252:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

254:   PetscFunctionBegin;
255:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257:   PetscCall(MatDensePlaceArray(a->A, array));
258:   PetscFunctionReturn(PETSC_SUCCESS);
259: }

261: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262: {
263:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

265:   PetscFunctionBegin;
266:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268:   PetscCall(MatDenseResetArray(a->A));
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }

272: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273: {
274:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

276:   PetscFunctionBegin;
277:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279:   PetscCall(MatDenseReplaceArray(a->A, array));
280:   PetscFunctionReturn(PETSC_SUCCESS);
281: }

283: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284: {
285:   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286:   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287:   const PetscInt    *irow, *icol;
288:   const PetscScalar *v;
289:   PetscScalar       *bv;
290:   Mat                newmat;
291:   IS                 iscol_local;
292:   MPI_Comm           comm_is, comm_mat;

294:   PetscFunctionBegin;
295:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296:   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297:   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");

299:   PetscCall(ISAllGather(iscol, &iscol_local));
300:   PetscCall(ISGetIndices(isrow, &irow));
301:   PetscCall(ISGetIndices(iscol_local, &icol));
302:   PetscCall(ISGetLocalSize(isrow, &nrows));
303:   PetscCall(ISGetLocalSize(iscol, &ncols));
304:   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */

306:   /* No parallel redistribution currently supported! Should really check each index set
307:      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308:      original matrix! */

310:   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));

313:   /* Check submatrix call */
314:   if (scall == MAT_REUSE_MATRIX) {
315:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316:     /* Really need to test rows and column sizes! */
317:     newmat = *B;
318:   } else {
319:     /* Create and fill new matrix */
320:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321:     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322:     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323:     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324:   }

326:   /* Now extract the data pointers and do the copy, column at a time */
327:   newmatd = (Mat_MPIDense *)newmat->data;
328:   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329:   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330:   PetscCall(MatDenseGetLDA(mat->A, &lda));
331:   for (i = 0; i < Ncols; i++) {
332:     const PetscScalar *av = v + lda * icol[i];
333:     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334:   }
335:   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336:   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));

338:   /* Assemble the matrices so that the correct flags are set */
339:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));

342:   /* Free work space */
343:   PetscCall(ISRestoreIndices(isrow, &irow));
344:   PetscCall(ISRestoreIndices(iscol_local, &icol));
345:   PetscCall(ISDestroy(&iscol_local));
346:   *B = newmat;
347:   PetscFunctionReturn(PETSC_SUCCESS);
348: }

350: static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351: {
352:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

354:   PetscFunctionBegin;
355:   PetscCall(MatDenseRestoreArray(a->A, array));
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360: {
361:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

363:   PetscFunctionBegin;
364:   PetscCall(MatDenseRestoreArrayRead(a->A, array));
365:   PetscFunctionReturn(PETSC_SUCCESS);
366: }

368: static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369: {
370:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

372:   PetscFunctionBegin;
373:   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374:   PetscFunctionReturn(PETSC_SUCCESS);
375: }

377: static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378: {
379:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380:   PetscInt      nstash, reallocs;

382:   PetscFunctionBegin;
383:   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);

385:   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386:   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387:   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392: {
393:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394:   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395:   PetscMPIInt   n;
396:   PetscScalar  *val;

398:   PetscFunctionBegin;
399:   if (!mdn->donotstash && !mat->nooffprocentries) {
400:     /*  wait on receives */
401:     while (1) {
402:       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403:       if (!flg) break;

405:       for (i = 0; i < n;) {
406:         /* Now identify the consecutive vals belonging to the same row */
407:         for (j = i, rstart = row[j]; j < n; j++) {
408:           if (row[j] != rstart) break;
409:         }
410:         if (j < n) ncols = j - i;
411:         else ncols = n - i;
412:         /* Now assemble all these values with a single function call */
413:         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414:         i = j;
415:       }
416:     }
417:     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418:   }

420:   PetscCall(MatAssemblyBegin(mdn->A, mode));
421:   PetscCall(MatAssemblyEnd(mdn->A, mode));
422:   PetscFunctionReturn(PETSC_SUCCESS);
423: }

425: static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426: {
427:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

429:   PetscFunctionBegin;
430:   PetscCall(MatZeroEntries(l->A));
431:   PetscFunctionReturn(PETSC_SUCCESS);
432: }

434: static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435: {
436:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437:   PetscInt      i, len, *lrows;

439:   PetscFunctionBegin;
440:   /* get locally owned rows */
441:   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442:   /* fix right hand side if needed */
443:   if (x && b) {
444:     const PetscScalar *xx;
445:     PetscScalar       *bb;

447:     PetscCall(VecGetArrayRead(x, &xx));
448:     PetscCall(VecGetArrayWrite(b, &bb));
449:     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450:     PetscCall(VecRestoreArrayRead(x, &xx));
451:     PetscCall(VecRestoreArrayWrite(b, &bb));
452:   }
453:   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454:   if (diag != 0.0) {
455:     Vec d;

457:     PetscCall(MatCreateVecs(A, NULL, &d));
458:     PetscCall(VecSet(d, diag));
459:     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460:     PetscCall(VecDestroy(&d));
461:   }
462:   PetscCall(PetscFree(lrows));
463:   PetscFunctionReturn(PETSC_SUCCESS);
464: }

466: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);

471: static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472: {
473:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474:   const PetscScalar *ax;
475:   PetscScalar       *ay;
476:   PetscMemType       axmtype, aymtype;

478:   PetscFunctionBegin;
479:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486:   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487:   PetscFunctionReturn(PETSC_SUCCESS);
488: }

490: static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
491: {
492:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493:   const PetscScalar *ax;
494:   PetscScalar       *ay;
495:   PetscMemType       axmtype, aymtype;

497:   PetscFunctionBegin;
498:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505:   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
506:   PetscFunctionReturn(PETSC_SUCCESS);
507: }

509: static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
510: {
511:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
512:   const PetscScalar *ax;
513:   PetscScalar       *ay;
514:   PetscMemType       axmtype, aymtype;

516:   PetscFunctionBegin;
517:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
518:   PetscCall(VecSet(yy, 0.0));
519:   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
520:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
521:   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
522:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
523:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
524:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
525:   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }

529: static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
530: {
531:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532:   const PetscScalar *ax;
533:   PetscScalar       *ay;
534:   PetscMemType       axmtype, aymtype;

536:   PetscFunctionBegin;
537:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538:   PetscCall(VecCopy(yy, zz));
539:   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
540:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541:   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
542:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545:   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
546:   PetscFunctionReturn(PETSC_SUCCESS);
547: }

549: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
550: {
551:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552:   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
553:   PetscScalar       *x;
554:   const PetscScalar *av;

556:   PetscFunctionBegin;
557:   PetscCall(VecGetArray(v, &x));
558:   PetscCall(VecGetSize(v, &ng));
559:   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
560:   PetscCall(VecGetLocalSize(v, &nl));
561:   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
562:   radd = A->rmap->rstart * m;
563:   PetscCall(MatDenseGetArrayRead(a->A, &av));
564:   PetscCall(MatDenseGetLDA(a->A, &lda));
565:   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
566:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
567:   PetscCall(PetscArrayzero(x + i, nl - i));
568:   PetscCall(VecRestoreArray(v, &x));
569:   PetscFunctionReturn(PETSC_SUCCESS);
570: }

572: static PetscErrorCode MatDestroy_MPIDense(Mat mat)
573: {
574:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;

576:   PetscFunctionBegin;
577:   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
578:   PetscCall(MatStashDestroy_Private(&mat->stash));
579:   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
580:   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
581:   PetscCall(MatDestroy(&mdn->A));
582:   PetscCall(VecDestroy(&mdn->lvec));
583:   PetscCall(PetscSFDestroy(&mdn->Mvctx));
584:   PetscCall(VecDestroy(&mdn->cvec));
585:   PetscCall(MatDestroy(&mdn->cmat));

587:   PetscCall(PetscFree(mat->data));
588:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));

590:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
591:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
592:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
593:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
594:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
595:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
596:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
597:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
598:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
599:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
600:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
601:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
602:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
603: #if defined(PETSC_HAVE_ELEMENTAL)
604:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
605: #endif
606: #if defined(PETSC_HAVE_SCALAPACK)
607:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
608: #endif
609:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
610:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
611:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
612: #if defined(PETSC_HAVE_CUDA)
613:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
614:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
615:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
616:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
617:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
618:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
619:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
620:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
621:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
622:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
623:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
624:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
625:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
626:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
627:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
628:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
629:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
630:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
631: #endif
632: #if defined(PETSC_HAVE_HIP)
633:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
634:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
635:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
636:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
637:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
638:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
639:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
640:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
641:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
642:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
643:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
644:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
645:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
646:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
647:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
648:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
649:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
650:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
651: #endif
652:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));

663:   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664:   PetscFunctionReturn(PETSC_SUCCESS);
665: }

667: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);

669: #include <petscdraw.h>
670: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671: {
672:   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673:   PetscMPIInt       rank;
674:   PetscViewerType   vtype;
675:   PetscBool         iascii, isdraw;
676:   PetscViewer       sviewer;
677:   PetscViewerFormat format;

679:   PetscFunctionBegin;
680:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683:   if (iascii) {
684:     PetscCall(PetscViewerGetType(viewer, &vtype));
685:     PetscCall(PetscViewerGetFormat(viewer, &format));
686:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687:       MatInfo info;
688:       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691:                                                    (PetscInt)info.memory));
692:       PetscCall(PetscViewerFlush(viewer));
693:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694:       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695:       PetscFunctionReturn(PETSC_SUCCESS);
696:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697:       PetscFunctionReturn(PETSC_SUCCESS);
698:     }
699:   } else if (isdraw) {
700:     PetscDraw draw;
701:     PetscBool isnull;

703:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704:     PetscCall(PetscDrawIsNull(draw, &isnull));
705:     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706:   }

708:   {
709:     /* assemble the entire matrix onto first processor. */
710:     Mat          A;
711:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712:     PetscInt    *cols;
713:     PetscScalar *vals;

715:     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716:     if (rank == 0) {
717:       PetscCall(MatSetSizes(A, M, N, M, N));
718:     } else {
719:       PetscCall(MatSetSizes(A, 0, 0, M, N));
720:     }
721:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722:     PetscCall(MatSetType(A, MATMPIDENSE));
723:     PetscCall(MatMPIDenseSetPreallocation(A, NULL));

725:     /* Copy the matrix ... This isn't the most efficient means,
726:        but it's quick for now */
727:     A->insertmode = INSERT_VALUES;

729:     row = mat->rmap->rstart;
730:     m   = mdn->A->rmap->n;
731:     for (i = 0; i < m; i++) {
732:       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733:       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734:       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735:       row++;
736:     }

738:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741:     if (rank == 0) {
742:       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743:       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744:     }
745:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746:     PetscCall(PetscViewerFlush(viewer));
747:     PetscCall(MatDestroy(&A));
748:   }
749:   PetscFunctionReturn(PETSC_SUCCESS);
750: }

752: static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753: {
754:   PetscBool iascii, isbinary, isdraw, issocket;

756:   PetscFunctionBegin;
757:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));

762:   if (iascii || issocket || isdraw) {
763:     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764:   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765:   PetscFunctionReturn(PETSC_SUCCESS);
766: }

768: static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769: {
770:   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771:   Mat            mdn = mat->A;
772:   PetscLogDouble isend[5], irecv[5];

774:   PetscFunctionBegin;
775:   info->block_size = 1.0;

777:   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));

779:   isend[0] = info->nz_used;
780:   isend[1] = info->nz_allocated;
781:   isend[2] = info->nz_unneeded;
782:   isend[3] = info->memory;
783:   isend[4] = info->mallocs;
784:   if (flag == MAT_LOCAL) {
785:     info->nz_used      = isend[0];
786:     info->nz_allocated = isend[1];
787:     info->nz_unneeded  = isend[2];
788:     info->memory       = isend[3];
789:     info->mallocs      = isend[4];
790:   } else if (flag == MAT_GLOBAL_MAX) {
791:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));

793:     info->nz_used      = irecv[0];
794:     info->nz_allocated = irecv[1];
795:     info->nz_unneeded  = irecv[2];
796:     info->memory       = irecv[3];
797:     info->mallocs      = irecv[4];
798:   } else if (flag == MAT_GLOBAL_SUM) {
799:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));

801:     info->nz_used      = irecv[0];
802:     info->nz_allocated = irecv[1];
803:     info->nz_unneeded  = irecv[2];
804:     info->memory       = irecv[3];
805:     info->mallocs      = irecv[4];
806:   }
807:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808:   info->fill_ratio_needed = 0;
809:   info->factor_mallocs    = 0;
810:   PetscFunctionReturn(PETSC_SUCCESS);
811: }

813: static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814: {
815:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

817:   PetscFunctionBegin;
818:   switch (op) {
819:   case MAT_NEW_NONZERO_LOCATIONS:
820:   case MAT_NEW_NONZERO_LOCATION_ERR:
821:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822:     MatCheckPreallocated(A, 1);
823:     PetscCall(MatSetOption(a->A, op, flg));
824:     break;
825:   case MAT_ROW_ORIENTED:
826:     MatCheckPreallocated(A, 1);
827:     a->roworiented = flg;
828:     PetscCall(MatSetOption(a->A, op, flg));
829:     break;
830:   case MAT_FORCE_DIAGONAL_ENTRIES:
831:   case MAT_KEEP_NONZERO_PATTERN:
832:   case MAT_USE_HASH_TABLE:
833:   case MAT_SORTED_FULL:
834:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835:     break;
836:   case MAT_IGNORE_OFF_PROC_ENTRIES:
837:     a->donotstash = flg;
838:     break;
839:   case MAT_SYMMETRIC:
840:   case MAT_STRUCTURALLY_SYMMETRIC:
841:   case MAT_HERMITIAN:
842:   case MAT_SYMMETRY_ETERNAL:
843:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844:   case MAT_SPD:
845:   case MAT_IGNORE_LOWER_TRIANGULAR:
846:   case MAT_IGNORE_ZERO_ENTRIES:
847:   case MAT_SPD_ETERNAL:
848:     /* if the diagonal matrix is square it inherits some of the properties above */
849:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850:     break;
851:   default:
852:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853:   }
854:   PetscFunctionReturn(PETSC_SUCCESS);
855: }

857: static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858: {
859:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860:   const PetscScalar *l;
861:   PetscScalar        x, *v, *vv, *r;
862:   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;

864:   PetscFunctionBegin;
865:   PetscCall(MatDenseGetArray(mdn->A, &vv));
866:   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867:   PetscCall(MatGetLocalSize(A, &s2, &s3));
868:   if (ll) {
869:     PetscCall(VecGetLocalSize(ll, &s2a));
870:     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871:     PetscCall(VecGetArrayRead(ll, &l));
872:     for (i = 0; i < m; i++) {
873:       x = l[i];
874:       v = vv + i;
875:       for (j = 0; j < n; j++) {
876:         (*v) *= x;
877:         v += lda;
878:       }
879:     }
880:     PetscCall(VecRestoreArrayRead(ll, &l));
881:     PetscCall(PetscLogFlops(1.0 * n * m));
882:   }
883:   if (rr) {
884:     const PetscScalar *ar;

886:     PetscCall(VecGetLocalSize(rr, &s3a));
887:     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888:     PetscCall(VecGetArrayRead(rr, &ar));
889:     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890:     PetscCall(VecGetArray(mdn->lvec, &r));
891:     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892:     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893:     PetscCall(VecRestoreArrayRead(rr, &ar));
894:     for (i = 0; i < n; i++) {
895:       x = r[i];
896:       v = vv + i * lda;
897:       for (j = 0; j < m; j++) (*v++) *= x;
898:     }
899:     PetscCall(VecRestoreArray(mdn->lvec, &r));
900:     PetscCall(PetscLogFlops(1.0 * n * m));
901:   }
902:   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903:   PetscFunctionReturn(PETSC_SUCCESS);
904: }

906: static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907: {
908:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909:   PetscInt           i, j;
910:   PetscMPIInt        size;
911:   PetscReal          sum = 0.0;
912:   const PetscScalar *av, *v;

914:   PetscFunctionBegin;
915:   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916:   v = av;
917:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918:   if (size == 1) {
919:     PetscCall(MatNorm(mdn->A, type, nrm));
920:   } else {
921:     if (type == NORM_FROBENIUS) {
922:       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923:         sum += PetscRealPart(PetscConj(*v) * (*v));
924:         v++;
925:       }
926:       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927:       *nrm = PetscSqrtReal(*nrm);
928:       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929:     } else if (type == NORM_1) {
930:       PetscReal *tmp, *tmp2;
931:       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932:       *nrm = 0.0;
933:       v    = av;
934:       for (j = 0; j < mdn->A->cmap->n; j++) {
935:         for (i = 0; i < mdn->A->rmap->n; i++) {
936:           tmp[j] += PetscAbsScalar(*v);
937:           v++;
938:         }
939:       }
940:       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941:       for (j = 0; j < A->cmap->N; j++) {
942:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943:       }
944:       PetscCall(PetscFree2(tmp, tmp2));
945:       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946:     } else if (type == NORM_INFINITY) { /* max row norm */
947:       PetscReal ntemp;
948:       PetscCall(MatNorm(mdn->A, type, &ntemp));
949:       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950:     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951:   }
952:   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953:   PetscFunctionReturn(PETSC_SUCCESS);
954: }

956: static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957: {
958:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959:   Mat           B;
960:   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961:   PetscInt      j, i, lda;
962:   PetscScalar  *v;

964:   PetscFunctionBegin;
965:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968:     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969:     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970:     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971:   } else B = *matout;

973:   m = a->A->rmap->n;
974:   n = a->A->cmap->n;
975:   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976:   PetscCall(MatDenseGetLDA(a->A, &lda));
977:   PetscCall(PetscMalloc1(m, &rwork));
978:   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979:   for (j = 0; j < n; j++) {
980:     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981:     v += lda;
982:   }
983:   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984:   PetscCall(PetscFree(rwork));
985:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988:     *matout = B;
989:   } else {
990:     PetscCall(MatHeaderMerge(A, &B));
991:   }
992:   PetscFunctionReturn(PETSC_SUCCESS);
993: }

995: static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);

998: static PetscErrorCode MatSetUp_MPIDense(Mat A)
999: {
1000:   PetscFunctionBegin;
1001:   PetscCall(PetscLayoutSetUp(A->rmap));
1002:   PetscCall(PetscLayoutSetUp(A->cmap));
1003:   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004:   PetscFunctionReturn(PETSC_SUCCESS);
1005: }

1007: static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008: {
1009:   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;

1011:   PetscFunctionBegin;
1012:   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013:   PetscFunctionReturn(PETSC_SUCCESS);
1014: }

1016: static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017: {
1018:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;

1020:   PetscFunctionBegin;
1021:   PetscCall(MatConjugate(a->A));
1022:   PetscFunctionReturn(PETSC_SUCCESS);
1023: }

1025: static PetscErrorCode MatRealPart_MPIDense(Mat A)
1026: {
1027:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1029:   PetscFunctionBegin;
1030:   PetscCall(MatRealPart(a->A));
1031:   PetscFunctionReturn(PETSC_SUCCESS);
1032: }

1034: static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035: {
1036:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1038:   PetscFunctionBegin;
1039:   PetscCall(MatImaginaryPart(a->A));
1040:   PetscFunctionReturn(PETSC_SUCCESS);
1041: }

1043: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044: {
1045:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1047:   PetscFunctionBegin;
1048:   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049:   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050:   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051:   PetscFunctionReturn(PETSC_SUCCESS);
1052: }

1054: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);

1056: static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057: {
1058:   PetscInt      i, m, n;
1059:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060:   PetscReal    *work;

1062:   PetscFunctionBegin;
1063:   PetscCall(MatGetSize(A, &m, &n));
1064:   PetscCall(PetscMalloc1(n, &work));
1065:   if (type == REDUCTION_MEAN_REALPART) {
1066:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069:   } else {
1070:     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071:   }
1072:   if (type == NORM_2) {
1073:     for (i = 0; i < n; i++) work[i] *= work[i];
1074:   }
1075:   if (type == NORM_INFINITY) {
1076:     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077:   } else {
1078:     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079:   }
1080:   PetscCall(PetscFree(work));
1081:   if (type == NORM_2) {
1082:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084:     for (i = 0; i < n; i++) reductions[i] /= m;
1085:   }
1086:   PetscFunctionReturn(PETSC_SUCCESS);
1087: }

1089: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1090: {
1091:   Mat_MPIDense *d = (Mat_MPIDense *)x->data;

1093:   PetscFunctionBegin;
1094:   PetscCall(MatSetRandom(d->A, rctx));
1095: #if defined(PETSC_HAVE_DEVICE)
1096:   x->offloadmask = d->A->offloadmask;
1097: #endif
1098:   PetscFunctionReturn(PETSC_SUCCESS);
1099: }

1101: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1102: {
1103:   PetscFunctionBegin;
1104:   *missing = PETSC_FALSE;
1105:   PetscFunctionReturn(PETSC_SUCCESS);
1106: }

1108: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1109: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1110: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1111: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1112: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1113: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);

1115: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1116:                                        MatGetRow_MPIDense,
1117:                                        MatRestoreRow_MPIDense,
1118:                                        MatMult_MPIDense,
1119:                                        /*  4*/ MatMultAdd_MPIDense,
1120:                                        MatMultTranspose_MPIDense,
1121:                                        MatMultTransposeAdd_MPIDense,
1122:                                        NULL,
1123:                                        NULL,
1124:                                        NULL,
1125:                                        /* 10*/ NULL,
1126:                                        NULL,
1127:                                        NULL,
1128:                                        NULL,
1129:                                        MatTranspose_MPIDense,
1130:                                        /* 15*/ MatGetInfo_MPIDense,
1131:                                        MatEqual_MPIDense,
1132:                                        MatGetDiagonal_MPIDense,
1133:                                        MatDiagonalScale_MPIDense,
1134:                                        MatNorm_MPIDense,
1135:                                        /* 20*/ MatAssemblyBegin_MPIDense,
1136:                                        MatAssemblyEnd_MPIDense,
1137:                                        MatSetOption_MPIDense,
1138:                                        MatZeroEntries_MPIDense,
1139:                                        /* 24*/ MatZeroRows_MPIDense,
1140:                                        NULL,
1141:                                        NULL,
1142:                                        NULL,
1143:                                        NULL,
1144:                                        /* 29*/ MatSetUp_MPIDense,
1145:                                        NULL,
1146:                                        NULL,
1147:                                        MatGetDiagonalBlock_MPIDense,
1148:                                        NULL,
1149:                                        /* 34*/ MatDuplicate_MPIDense,
1150:                                        NULL,
1151:                                        NULL,
1152:                                        NULL,
1153:                                        NULL,
1154:                                        /* 39*/ MatAXPY_MPIDense,
1155:                                        MatCreateSubMatrices_MPIDense,
1156:                                        NULL,
1157:                                        MatGetValues_MPIDense,
1158:                                        MatCopy_MPIDense,
1159:                                        /* 44*/ NULL,
1160:                                        MatScale_MPIDense,
1161:                                        MatShift_MPIDense,
1162:                                        NULL,
1163:                                        NULL,
1164:                                        /* 49*/ MatSetRandom_MPIDense,
1165:                                        NULL,
1166:                                        NULL,
1167:                                        NULL,
1168:                                        NULL,
1169:                                        /* 54*/ NULL,
1170:                                        NULL,
1171:                                        NULL,
1172:                                        NULL,
1173:                                        NULL,
1174:                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1175:                                        MatDestroy_MPIDense,
1176:                                        MatView_MPIDense,
1177:                                        NULL,
1178:                                        NULL,
1179:                                        /* 64*/ NULL,
1180:                                        NULL,
1181:                                        NULL,
1182:                                        NULL,
1183:                                        NULL,
1184:                                        /* 69*/ NULL,
1185:                                        NULL,
1186:                                        NULL,
1187:                                        NULL,
1188:                                        NULL,
1189:                                        /* 74*/ NULL,
1190:                                        NULL,
1191:                                        NULL,
1192:                                        NULL,
1193:                                        NULL,
1194:                                        /* 79*/ NULL,
1195:                                        NULL,
1196:                                        NULL,
1197:                                        NULL,
1198:                                        /* 83*/ MatLoad_MPIDense,
1199:                                        NULL,
1200:                                        NULL,
1201:                                        NULL,
1202:                                        NULL,
1203:                                        NULL,
1204:                                        /* 89*/ NULL,
1205:                                        NULL,
1206:                                        NULL,
1207:                                        NULL,
1208:                                        NULL,
1209:                                        /* 94*/ NULL,
1210:                                        NULL,
1211:                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1212:                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1213:                                        NULL,
1214:                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1215:                                        NULL,
1216:                                        NULL,
1217:                                        MatConjugate_MPIDense,
1218:                                        NULL,
1219:                                        /*104*/ NULL,
1220:                                        MatRealPart_MPIDense,
1221:                                        MatImaginaryPart_MPIDense,
1222:                                        NULL,
1223:                                        NULL,
1224:                                        /*109*/ NULL,
1225:                                        NULL,
1226:                                        NULL,
1227:                                        MatGetColumnVector_MPIDense,
1228:                                        MatMissingDiagonal_MPIDense,
1229:                                        /*114*/ NULL,
1230:                                        NULL,
1231:                                        NULL,
1232:                                        NULL,
1233:                                        NULL,
1234:                                        /*119*/ NULL,
1235:                                        NULL,
1236:                                        NULL,
1237:                                        NULL,
1238:                                        NULL,
1239:                                        /*124*/ NULL,
1240:                                        MatGetColumnReductions_MPIDense,
1241:                                        NULL,
1242:                                        NULL,
1243:                                        NULL,
1244:                                        /*129*/ NULL,
1245:                                        NULL,
1246:                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1247:                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1248:                                        NULL,
1249:                                        /*134*/ NULL,
1250:                                        NULL,
1251:                                        NULL,
1252:                                        NULL,
1253:                                        NULL,
1254:                                        /*139*/ NULL,
1255:                                        NULL,
1256:                                        NULL,
1257:                                        NULL,
1258:                                        NULL,
1259:                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1260:                                        /*145*/ NULL,
1261:                                        NULL,
1262:                                        NULL,
1263:                                        NULL,
1264:                                        NULL,
1265:                                        /*150*/ NULL,
1266:                                        NULL};

1268: static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1269: {
1270:   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1271:   MatType       mtype = MATSEQDENSE;

1273:   PetscFunctionBegin;
1274:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1275:   PetscCall(PetscLayoutSetUp(mat->rmap));
1276:   PetscCall(PetscLayoutSetUp(mat->cmap));
1277:   if (!a->A) {
1278:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1279:     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1280:   }
1281: #if defined(PETSC_HAVE_CUDA)
1282:   PetscBool iscuda;
1283:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1284:   if (iscuda) mtype = MATSEQDENSECUDA;
1285: #endif
1286: #if defined(PETSC_HAVE_HIP)
1287:   PetscBool iship;
1288:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1289:   if (iship) mtype = MATSEQDENSEHIP;
1290: #endif
1291:   PetscCall(MatSetType(a->A, mtype));
1292:   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1293: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1294:   mat->offloadmask = a->A->offloadmask;
1295: #endif
1296:   mat->preallocated = PETSC_TRUE;
1297:   mat->assembled    = PETSC_TRUE;
1298:   PetscFunctionReturn(PETSC_SUCCESS);
1299: }

1301: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1302: {
1303:   Mat B, C;

1305:   PetscFunctionBegin;
1306:   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1307:   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1308:   PetscCall(MatDestroy(&C));
1309:   if (reuse == MAT_REUSE_MATRIX) {
1310:     C = *newmat;
1311:   } else C = NULL;
1312:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1313:   PetscCall(MatDestroy(&B));
1314:   if (reuse == MAT_INPLACE_MATRIX) {
1315:     PetscCall(MatHeaderReplace(A, &C));
1316:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1317:   PetscFunctionReturn(PETSC_SUCCESS);
1318: }

1320: static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1321: {
1322:   Mat B, C;

1324:   PetscFunctionBegin;
1325:   PetscCall(MatDenseGetLocalMatrix(A, &C));
1326:   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1327:   if (reuse == MAT_REUSE_MATRIX) {
1328:     C = *newmat;
1329:   } else C = NULL;
1330:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1331:   PetscCall(MatDestroy(&B));
1332:   if (reuse == MAT_INPLACE_MATRIX) {
1333:     PetscCall(MatHeaderReplace(A, &C));
1334:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1335:   PetscFunctionReturn(PETSC_SUCCESS);
1336: }

1338: #if defined(PETSC_HAVE_ELEMENTAL)
1339: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1340: {
1341:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1342:   Mat           mat_elemental;
1343:   PetscScalar  *v;
1344:   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;

1346:   PetscFunctionBegin;
1347:   if (reuse == MAT_REUSE_MATRIX) {
1348:     mat_elemental = *newmat;
1349:     PetscCall(MatZeroEntries(*newmat));
1350:   } else {
1351:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1352:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1353:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1354:     PetscCall(MatSetUp(mat_elemental));
1355:     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1356:   }

1358:   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1359:   for (i = 0; i < N; i++) cols[i] = i;
1360:   for (i = 0; i < m; i++) rows[i] = rstart + i;

1362:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1363:   PetscCall(MatDenseGetArray(A, &v));
1364:   PetscCall(MatDenseGetLDA(a->A, &lda));
1365:   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1366:   else {
1367:     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1368:   }
1369:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1370:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1371:   PetscCall(MatDenseRestoreArray(A, &v));
1372:   PetscCall(PetscFree2(rows, cols));

1374:   if (reuse == MAT_INPLACE_MATRIX) {
1375:     PetscCall(MatHeaderReplace(A, &mat_elemental));
1376:   } else {
1377:     *newmat = mat_elemental;
1378:   }
1379:   PetscFunctionReturn(PETSC_SUCCESS);
1380: }
1381: #endif

1383: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1384: {
1385:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1387:   PetscFunctionBegin;
1388:   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1389:   PetscFunctionReturn(PETSC_SUCCESS);
1390: }

1392: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1393: {
1394:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1396:   PetscFunctionBegin;
1397:   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1398:   PetscFunctionReturn(PETSC_SUCCESS);
1399: }

1401: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1402: {
1403:   Mat_MPIDense *mat;
1404:   PetscInt      m, nloc, N;

1406:   PetscFunctionBegin;
1407:   PetscCall(MatGetSize(inmat, &m, &N));
1408:   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1409:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1410:     PetscInt sum;

1412:     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1413:     /* Check sum(n) = N */
1414:     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1415:     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);

1417:     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1418:     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1419:   }

1421:   /* numeric phase */
1422:   mat = (Mat_MPIDense *)(*outmat)->data;
1423:   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1424:   PetscFunctionReturn(PETSC_SUCCESS);
1425: }

1427: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1428: {
1429:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1430:   PetscInt      lda;

1432:   PetscFunctionBegin;
1433:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1434:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1435:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1436:   a->vecinuse = col + 1;
1437:   PetscCall(MatDenseGetLDA(a->A, &lda));
1438:   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1439:   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1440:   *v = a->cvec;
1441:   PetscFunctionReturn(PETSC_SUCCESS);
1442: }

1444: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1445: {
1446:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1448:   PetscFunctionBegin;
1449:   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1450:   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1451:   a->vecinuse = 0;
1452:   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1453:   PetscCall(VecResetArray(a->cvec));
1454:   if (v) *v = NULL;
1455:   PetscFunctionReturn(PETSC_SUCCESS);
1456: }

1458: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1459: {
1460:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1461:   PetscInt      lda;

1463:   PetscFunctionBegin;
1464:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1465:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1466:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1467:   a->vecinuse = col + 1;
1468:   PetscCall(MatDenseGetLDA(a->A, &lda));
1469:   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1470:   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1471:   PetscCall(VecLockReadPush(a->cvec));
1472:   *v = a->cvec;
1473:   PetscFunctionReturn(PETSC_SUCCESS);
1474: }

1476: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1477: {
1478:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1480:   PetscFunctionBegin;
1481:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1482:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1483:   a->vecinuse = 0;
1484:   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1485:   PetscCall(VecLockReadPop(a->cvec));
1486:   PetscCall(VecResetArray(a->cvec));
1487:   if (v) *v = NULL;
1488:   PetscFunctionReturn(PETSC_SUCCESS);
1489: }

1491: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1492: {
1493:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1494:   PetscInt      lda;

1496:   PetscFunctionBegin;
1497:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1498:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1499:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1500:   a->vecinuse = col + 1;
1501:   PetscCall(MatDenseGetLDA(a->A, &lda));
1502:   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1503:   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1504:   *v = a->cvec;
1505:   PetscFunctionReturn(PETSC_SUCCESS);
1506: }

1508: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1509: {
1510:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1512:   PetscFunctionBegin;
1513:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1514:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1515:   a->vecinuse = 0;
1516:   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1517:   PetscCall(VecResetArray(a->cvec));
1518:   if (v) *v = NULL;
1519:   PetscFunctionReturn(PETSC_SUCCESS);
1520: }

1522: static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1523: {
1524:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1525:   Mat_MPIDense *c;
1526:   MPI_Comm      comm;
1527:   PetscInt      pbegin, pend;

1529:   PetscFunctionBegin;
1530:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1531:   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1532:   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1533:   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1534:   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1535:   if (!a->cmat) {
1536:     PetscCall(MatCreate(comm, &a->cmat));
1537:     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1538:     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1539:     else {
1540:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1541:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1542:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1543:     }
1544:     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1545:     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1546:   } else {
1547:     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1548:     if (same && a->cmat->rmap->N != A->rmap->N) {
1549:       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1550:       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1551:     }
1552:     if (!same) {
1553:       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1554:       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1555:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1556:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1557:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1558:     }
1559:     if (cend - cbegin != a->cmat->cmap->N) {
1560:       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1561:       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1562:       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1563:       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1564:     }
1565:   }
1566:   c = (Mat_MPIDense *)a->cmat->data;
1567:   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1568:   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));

1570:   a->cmat->preallocated = PETSC_TRUE;
1571:   a->cmat->assembled    = PETSC_TRUE;
1572: #if defined(PETSC_HAVE_DEVICE)
1573:   a->cmat->offloadmask = c->A->offloadmask;
1574: #endif
1575:   a->matinuse = cbegin + 1;
1576:   *v          = a->cmat;
1577:   PetscFunctionReturn(PETSC_SUCCESS);
1578: }

1580: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1581: {
1582:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1583:   Mat_MPIDense *c;

1585:   PetscFunctionBegin;
1586:   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1587:   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1588:   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1589:   a->matinuse = 0;
1590:   c           = (Mat_MPIDense *)a->cmat->data;
1591:   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1592:   if (v) *v = NULL;
1593: #if defined(PETSC_HAVE_DEVICE)
1594:   A->offloadmask = a->A->offloadmask;
1595: #endif
1596:   PetscFunctionReturn(PETSC_SUCCESS);
1597: }

1599: /*MC
1600:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1602:    Options Database Key:
1603: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`

1605:   Level: beginner

1607: .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1608: M*/
1609: PetscErrorCode MatCreate_MPIDense(Mat mat)
1610: {
1611:   Mat_MPIDense *a;

1613:   PetscFunctionBegin;
1614:   PetscCall(PetscNew(&a));
1615:   mat->data   = (void *)a;
1616:   mat->ops[0] = MatOps_Values;

1618:   mat->insertmode = NOT_SET_VALUES;

1620:   /* build cache for off array entries formed */
1621:   a->donotstash = PETSC_FALSE;

1623:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));

1625:   /* stuff used for matrix vector multiply */
1626:   a->lvec        = NULL;
1627:   a->Mvctx       = NULL;
1628:   a->roworiented = PETSC_TRUE;

1630:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1631:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1632:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1633:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1634:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1635:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1636:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1637:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1638:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1639:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1640:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1641:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1642:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1643:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1644:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1645:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1646:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1647:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1648:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1649:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1650:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1651: #if defined(PETSC_HAVE_ELEMENTAL)
1652:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1653: #endif
1654: #if defined(PETSC_HAVE_SCALAPACK)
1655:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1656: #endif
1657: #if defined(PETSC_HAVE_CUDA)
1658:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1659: #endif
1660:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1661:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1662:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1663: #if defined(PETSC_HAVE_CUDA)
1664:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1665:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1666: #endif
1667: #if defined(PETSC_HAVE_HIP)
1668:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1669:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1670:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1671: #endif
1672:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1673:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1674:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1675:   PetscFunctionReturn(PETSC_SUCCESS);
1676: }

1678: /*MC
1679:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1681:    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1682:    and `MATMPIDENSE` otherwise.

1684:    Options Database Key:
1685: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`

1687:   Level: beginner

1689: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1690: M*/

1692: /*@C
1693:   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1695:   Collective

1697:   Input Parameters:
1698: + B    - the matrix
1699: - data - optional location of matrix data.  Set to `NULL` for PETSc
1700:    to control all matrix memory allocation.

1702:   Level: intermediate

1704:   Notes:
1705:   The dense format is fully compatible with standard Fortran
1706:   storage by columns.

1708:   The data input variable is intended primarily for Fortran programmers
1709:   who wish to allocate their own matrix memory space.  Most users should
1710:   set `data` to `NULL`.

1712: .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1713: @*/
1714: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1715: {
1716:   PetscFunctionBegin;
1718:   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1719:   PetscFunctionReturn(PETSC_SUCCESS);
1720: }

1722: /*@
1723:   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1724:   array provided by the user. This is useful to avoid copying an array
1725:   into a matrix

1727:   Not Collective

1729:   Input Parameters:
1730: + mat   - the matrix
1731: - array - the array in column major order

1733:   Level: developer

1735:   Note:
1736:   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1737:   freed when the matrix is destroyed.

1739: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1740:           `MatDenseReplaceArray()`
1741: @*/
1742: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1743: {
1744:   PetscFunctionBegin;
1746:   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1747:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1748: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1749:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1750: #endif
1751:   PetscFunctionReturn(PETSC_SUCCESS);
1752: }

1754: /*@
1755:   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`

1757:   Not Collective

1759:   Input Parameter:
1760: . mat - the matrix

1762:   Level: developer

1764:   Note:
1765:   You can only call this after a call to `MatDensePlaceArray()`

1767: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1768: @*/
1769: PetscErrorCode MatDenseResetArray(Mat mat)
1770: {
1771:   PetscFunctionBegin;
1773:   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1774:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1775:   PetscFunctionReturn(PETSC_SUCCESS);
1776: }

1778: /*@
1779:   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1780:   array provided by the user. This is useful to avoid copying an array
1781:   into a matrix

1783:   Not Collective

1785:   Input Parameters:
1786: + mat   - the matrix
1787: - array - the array in column major order

1789:   Level: developer

1791:   Note:
1792:   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1793:   freed by the user. It will be freed when the matrix is destroyed.

1795: .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1796: @*/
1797: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1798: {
1799:   PetscFunctionBegin;
1801:   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1802:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1803: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1804:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1805: #endif
1806:   PetscFunctionReturn(PETSC_SUCCESS);
1807: }

1809: /*@C
1810:   MatCreateDense - Creates a matrix in `MATDENSE` format.

1812:   Collective

1814:   Input Parameters:
1815: + comm - MPI communicator
1816: . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1817: . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1818: . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1819: . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1820: - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1821:    to control all matrix memory allocation.

1823:   Output Parameter:
1824: . A - the matrix

1826:   Level: intermediate

1828:   Notes:
1829:   The dense format is fully compatible with standard Fortran
1830:   storage by columns.

1832:   Although local portions of the matrix are stored in column-major
1833:   order, the matrix is partitioned across MPI ranks by row.

1835:   The data input variable is intended primarily for Fortran programmers
1836:   who wish to allocate their own matrix memory space.  Most users should
1837:   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).

1839:   The user MUST specify either the local or global matrix dimensions
1840:   (possibly both).

1842: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1843: @*/
1844: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1845: {
1846:   PetscFunctionBegin;
1847:   PetscCall(MatCreate(comm, A));
1848:   PetscCall(MatSetSizes(*A, m, n, M, N));
1849:   PetscCall(MatSetType(*A, MATDENSE));
1850:   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1851:   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1852:   PetscFunctionReturn(PETSC_SUCCESS);
1853: }

1855: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1856: {
1857:   Mat           mat;
1858:   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;

1860:   PetscFunctionBegin;
1861:   *newmat = NULL;
1862:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1863:   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1864:   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1865:   a = (Mat_MPIDense *)mat->data;

1867:   mat->factortype   = A->factortype;
1868:   mat->assembled    = PETSC_TRUE;
1869:   mat->preallocated = PETSC_TRUE;

1871:   mat->insertmode = NOT_SET_VALUES;
1872:   a->donotstash   = oldmat->donotstash;

1874:   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1875:   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));

1877:   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));

1879:   *newmat = mat;
1880:   PetscFunctionReturn(PETSC_SUCCESS);
1881: }

1883: static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1884: {
1885:   PetscBool isbinary;
1886: #if defined(PETSC_HAVE_HDF5)
1887:   PetscBool ishdf5;
1888: #endif

1890:   PetscFunctionBegin;
1893:   /* force binary viewer to load .info file if it has not yet done so */
1894:   PetscCall(PetscViewerSetUp(viewer));
1895:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1896: #if defined(PETSC_HAVE_HDF5)
1897:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1898: #endif
1899:   if (isbinary) {
1900:     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1901: #if defined(PETSC_HAVE_HDF5)
1902:   } else if (ishdf5) {
1903:     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1904: #endif
1905:   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1906:   PetscFunctionReturn(PETSC_SUCCESS);
1907: }

1909: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1910: {
1911:   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1912:   Mat           a, b;

1914:   PetscFunctionBegin;
1915:   a = matA->A;
1916:   b = matB->A;
1917:   PetscCall(MatEqual(a, b, flag));
1918:   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1919:   PetscFunctionReturn(PETSC_SUCCESS);
1920: }

1922: static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1923: {
1924:   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;

1926:   PetscFunctionBegin;
1927:   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1928:   PetscCall(MatDestroy(&atb->atb));
1929:   PetscCall(PetscFree(atb));
1930:   PetscFunctionReturn(PETSC_SUCCESS);
1931: }

1933: static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1934: {
1935:   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;

1937:   PetscFunctionBegin;
1938:   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1939:   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1940:   PetscCall(PetscFree(abt));
1941:   PetscFunctionReturn(PETSC_SUCCESS);
1942: }

1944: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1945: {
1946:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1947:   Mat_TransMatMultDense *atb;
1948:   MPI_Comm               comm;
1949:   PetscMPIInt            size, *recvcounts;
1950:   PetscScalar           *carray, *sendbuf;
1951:   const PetscScalar     *atbarray;
1952:   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1953:   const PetscInt        *ranges;

1955:   PetscFunctionBegin;
1956:   MatCheckProduct(C, 3);
1957:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1958:   atb        = (Mat_TransMatMultDense *)C->product->data;
1959:   recvcounts = atb->recvcounts;
1960:   sendbuf    = atb->sendbuf;

1962:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1963:   PetscCallMPI(MPI_Comm_size(comm, &size));

1965:   /* compute atbarray = aseq^T * bseq */
1966:   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));

1968:   PetscCall(MatGetOwnershipRanges(C, &ranges));

1970:   /* arrange atbarray into sendbuf */
1971:   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1972:   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1973:   for (proc = 0, k = 0; proc < size; proc++) {
1974:     for (j = 0; j < cN; j++) {
1975:       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1976:     }
1977:   }
1978:   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));

1980:   /* sum all atbarray to local values of C */
1981:   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1982:   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1983:   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1984:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1985:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1986:   PetscFunctionReturn(PETSC_SUCCESS);
1987: }

1989: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1990: {
1991:   MPI_Comm               comm;
1992:   PetscMPIInt            size;
1993:   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1994:   Mat_TransMatMultDense *atb;
1995:   PetscBool              cisdense = PETSC_FALSE;
1996:   PetscInt               i;
1997:   const PetscInt        *ranges;

1999:   PetscFunctionBegin;
2000:   MatCheckProduct(C, 4);
2001:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2002:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2003:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2004:     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2005:   }

2007:   /* create matrix product C */
2008:   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2009: #if defined(PETSC_HAVE_CUDA)
2010:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2011: #endif
2012: #if defined(PETSC_HAVE_HIP)
2013:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2014: #endif
2015:   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2016:   PetscCall(MatSetUp(C));

2018:   /* create data structure for reuse C */
2019:   PetscCallMPI(MPI_Comm_size(comm, &size));
2020:   PetscCall(PetscNew(&atb));
2021:   cM = C->rmap->N;
2022:   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2023:   PetscCall(MatGetOwnershipRanges(C, &ranges));
2024:   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;

2026:   C->product->data    = atb;
2027:   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2028:   PetscFunctionReturn(PETSC_SUCCESS);
2029: }

2031: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2032: {
2033:   MPI_Comm               comm;
2034:   PetscMPIInt            i, size;
2035:   PetscInt               maxRows, bufsiz;
2036:   PetscMPIInt            tag;
2037:   PetscInt               alg;
2038:   Mat_MatTransMultDense *abt;
2039:   Mat_Product           *product = C->product;
2040:   PetscBool              flg;

2042:   PetscFunctionBegin;
2043:   MatCheckProduct(C, 4);
2044:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2045:   /* check local size of A and B */
2046:   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);

2048:   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2049:   alg = flg ? 0 : 1;

2051:   /* setup matrix product C */
2052:   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2053:   PetscCall(MatSetType(C, MATMPIDENSE));
2054:   PetscCall(MatSetUp(C));
2055:   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));

2057:   /* create data structure for reuse C */
2058:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2059:   PetscCallMPI(MPI_Comm_size(comm, &size));
2060:   PetscCall(PetscNew(&abt));
2061:   abt->tag = tag;
2062:   abt->alg = alg;
2063:   switch (alg) {
2064:   case 1: /* alg: "cyclic" */
2065:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2066:     bufsiz = A->cmap->N * maxRows;
2067:     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2068:     break;
2069:   default: /* alg: "allgatherv" */
2070:     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2071:     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2072:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2073:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2074:     break;
2075:   }

2077:   C->product->data                = abt;
2078:   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2079:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2080:   PetscFunctionReturn(PETSC_SUCCESS);
2081: }

2083: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2084: {
2085:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2086:   Mat_MatTransMultDense *abt;
2087:   MPI_Comm               comm;
2088:   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2089:   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2090:   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2091:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2092:   const PetscScalar     *av, *bv;
2093:   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2094:   MPI_Request            reqs[2];
2095:   const PetscInt        *ranges;

2097:   PetscFunctionBegin;
2098:   MatCheckProduct(C, 3);
2099:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2100:   abt = (Mat_MatTransMultDense *)C->product->data;
2101:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2102:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2103:   PetscCallMPI(MPI_Comm_size(comm, &size));
2104:   PetscCall(MatDenseGetArrayRead(a->A, &av));
2105:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2106:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2107:   PetscCall(MatDenseGetLDA(a->A, &i));
2108:   PetscCall(PetscBLASIntCast(i, &alda));
2109:   PetscCall(MatDenseGetLDA(b->A, &i));
2110:   PetscCall(PetscBLASIntCast(i, &blda));
2111:   PetscCall(MatDenseGetLDA(c->A, &i));
2112:   PetscCall(PetscBLASIntCast(i, &clda));
2113:   PetscCall(MatGetOwnershipRanges(B, &ranges));
2114:   bn = B->rmap->n;
2115:   if (blda == bn) {
2116:     sendbuf = (PetscScalar *)bv;
2117:   } else {
2118:     sendbuf = abt->buf[0];
2119:     for (k = 0, i = 0; i < cK; i++) {
2120:       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2121:     }
2122:   }
2123:   if (size > 1) {
2124:     sendto   = (rank + size - 1) % size;
2125:     recvfrom = (rank + size + 1) % size;
2126:   } else {
2127:     sendto = recvfrom = 0;
2128:   }
2129:   PetscCall(PetscBLASIntCast(cK, &ck));
2130:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2131:   recvisfrom = rank;
2132:   for (i = 0; i < size; i++) {
2133:     /* we have finished receiving in sending, bufs can be read/modified */
2134:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2135:     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

2137:     if (nextrecvisfrom != rank) {
2138:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2139:       sendsiz = cK * bn;
2140:       recvsiz = cK * nextbn;
2141:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2142:       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2143:       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2144:     }

2146:     /* local aseq * sendbuf^T */
2147:     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2148:     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));

2150:     if (nextrecvisfrom != rank) {
2151:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2152:       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2153:     }
2154:     bn         = nextbn;
2155:     recvisfrom = nextrecvisfrom;
2156:     sendbuf    = recvbuf;
2157:   }
2158:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2159:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2160:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2161:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2162:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2163:   PetscFunctionReturn(PETSC_SUCCESS);
2164: }

2166: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2167: {
2168:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2169:   Mat_MatTransMultDense *abt;
2170:   MPI_Comm               comm;
2171:   PetscMPIInt            size;
2172:   PetscScalar           *cv, *sendbuf, *recvbuf;
2173:   const PetscScalar     *av, *bv;
2174:   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2175:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2176:   PetscBLASInt           cm, cn, ck, alda, clda;

2178:   PetscFunctionBegin;
2179:   MatCheckProduct(C, 3);
2180:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2181:   abt = (Mat_MatTransMultDense *)C->product->data;
2182:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2183:   PetscCallMPI(MPI_Comm_size(comm, &size));
2184:   PetscCall(MatDenseGetArrayRead(a->A, &av));
2185:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2186:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2187:   PetscCall(MatDenseGetLDA(a->A, &i));
2188:   PetscCall(PetscBLASIntCast(i, &alda));
2189:   PetscCall(MatDenseGetLDA(b->A, &blda));
2190:   PetscCall(MatDenseGetLDA(c->A, &i));
2191:   PetscCall(PetscBLASIntCast(i, &clda));
2192:   /* copy transpose of B into buf[0] */
2193:   bn      = B->rmap->n;
2194:   sendbuf = abt->buf[0];
2195:   recvbuf = abt->buf[1];
2196:   for (k = 0, j = 0; j < bn; j++) {
2197:     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2198:   }
2199:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2200:   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2201:   PetscCall(PetscBLASIntCast(cK, &ck));
2202:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2203:   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2204:   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2205:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2206:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2207:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2208:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2209:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2210:   PetscFunctionReturn(PETSC_SUCCESS);
2211: }

2213: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2214: {
2215:   Mat_MatTransMultDense *abt;

2217:   PetscFunctionBegin;
2218:   MatCheckProduct(C, 3);
2219:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2220:   abt = (Mat_MatTransMultDense *)C->product->data;
2221:   switch (abt->alg) {
2222:   case 1:
2223:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2224:     break;
2225:   default:
2226:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2227:     break;
2228:   }
2229:   PetscFunctionReturn(PETSC_SUCCESS);
2230: }

2232: #if defined(PETSC_HAVE_ELEMENTAL)
2233: static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2234: {
2235:   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;

2237:   PetscFunctionBegin;
2238:   PetscCall(MatDestroy(&ab->Ce));
2239:   PetscCall(MatDestroy(&ab->Ae));
2240:   PetscCall(MatDestroy(&ab->Be));
2241:   PetscCall(PetscFree(ab));
2242:   PetscFunctionReturn(PETSC_SUCCESS);
2243: }

2245: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2246: {
2247:   Mat_MatMultDense *ab;

2249:   PetscFunctionBegin;
2250:   MatCheckProduct(C, 3);
2251:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2252:   ab = (Mat_MatMultDense *)C->product->data;
2253:   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2254:   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2255:   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2256:   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2257:   PetscFunctionReturn(PETSC_SUCCESS);
2258: }

2260: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2261: {
2262:   Mat               Ae, Be, Ce;
2263:   Mat_MatMultDense *ab;

2265:   PetscFunctionBegin;
2266:   MatCheckProduct(C, 4);
2267:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2268:   /* check local size of A and B */
2269:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2270:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2271:   }

2273:   /* create elemental matrices Ae and Be */
2274:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2275:   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2276:   PetscCall(MatSetType(Ae, MATELEMENTAL));
2277:   PetscCall(MatSetUp(Ae));
2278:   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));

2280:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2281:   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2282:   PetscCall(MatSetType(Be, MATELEMENTAL));
2283:   PetscCall(MatSetUp(Be));
2284:   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));

2286:   /* compute symbolic Ce = Ae*Be */
2287:   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2288:   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));

2290:   /* setup C */
2291:   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2292:   PetscCall(MatSetType(C, MATDENSE));
2293:   PetscCall(MatSetUp(C));

2295:   /* create data structure for reuse Cdense */
2296:   PetscCall(PetscNew(&ab));
2297:   ab->Ae = Ae;
2298:   ab->Be = Be;
2299:   ab->Ce = Ce;

2301:   C->product->data       = ab;
2302:   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2303:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2304:   PetscFunctionReturn(PETSC_SUCCESS);
2305: }
2306: #endif

2308: #if defined(PETSC_HAVE_ELEMENTAL)
2309: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2310: {
2311:   PetscFunctionBegin;
2312:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2313:   C->ops->productsymbolic = MatProductSymbolic_AB;
2314:   PetscFunctionReturn(PETSC_SUCCESS);
2315: }
2316: #endif

2318: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2319: {
2320:   Mat_Product *product = C->product;
2321:   Mat          A = product->A, B = product->B;

2323:   PetscFunctionBegin;
2324:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2325:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2326:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2327:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2328:   PetscFunctionReturn(PETSC_SUCCESS);
2329: }

2331: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2332: {
2333:   Mat_Product *product     = C->product;
2334:   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2335:   PetscInt     alg, nalg = 2;
2336:   PetscBool    flg = PETSC_FALSE;

2338:   PetscFunctionBegin;
2339:   /* Set default algorithm */
2340:   alg = 0; /* default is allgatherv */
2341:   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2342:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

2344:   /* Get runtime option */
2345:   if (product->api_user) {
2346:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2347:     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2348:     PetscOptionsEnd();
2349:   } else {
2350:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2351:     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2352:     PetscOptionsEnd();
2353:   }
2354:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

2356:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2357:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2358:   PetscFunctionReturn(PETSC_SUCCESS);
2359: }

2361: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2362: {
2363:   Mat_Product *product = C->product;

2365:   PetscFunctionBegin;
2366:   switch (product->type) {
2367: #if defined(PETSC_HAVE_ELEMENTAL)
2368:   case MATPRODUCT_AB:
2369:     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2370:     break;
2371: #endif
2372:   case MATPRODUCT_AtB:
2373:     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2374:     break;
2375:   case MATPRODUCT_ABt:
2376:     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2377:     break;
2378:   default:
2379:     break;
2380:   }
2381:   PetscFunctionReturn(PETSC_SUCCESS);
2382: }