Actual source code: blockmat.c

  1: /*
  2:    This provides a matrix that consists of Mats
  3: */

  5: #include <petsc/private/matimpl.h>
  6: #include <../src/mat/impls/baij/seq/baij.h>

  8: typedef struct {
  9:   SEQAIJHEADER(Mat);
 10:   SEQBAIJHEADER;
 11:   Mat *diags;

 13:   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
 14: } Mat_BlockMat;

 16: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
 17: {
 18:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
 19:   PetscScalar       *x;
 20:   const Mat         *v;
 21:   const PetscScalar *b;
 22:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
 23:   const PetscInt    *idx;
 24:   IS                 row, col;
 25:   MatFactorInfo      info;
 26:   Vec                left = a->left, right = a->right, middle = a->middle;
 27:   Mat               *diag;

 29:   PetscFunctionBegin;
 30:   its = its * lits;
 31:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
 32:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
 33:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
 34:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
 35:   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");

 37:   if (!a->diags) {
 38:     PetscCall(PetscMalloc1(mbs, &a->diags));
 39:     PetscCall(MatFactorInfoInitialize(&info));
 40:     for (i = 0; i < mbs; i++) {
 41:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
 42:       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
 43:       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
 44:       PetscCall(ISDestroy(&row));
 45:       PetscCall(ISDestroy(&col));
 46:     }
 47:     PetscCall(VecDuplicate(bb, &a->workb));
 48:   }
 49:   diag = a->diags;

 51:   PetscCall(VecSet(xx, 0.0));
 52:   PetscCall(VecGetArray(xx, &x));
 53:   /* copy right hand side because it must be modified during iteration */
 54:   PetscCall(VecCopy(bb, a->workb));
 55:   PetscCall(VecGetArrayRead(a->workb, &b));

 57:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 58:   while (its--) {
 59:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
 60:       for (i = 0; i < mbs; i++) {
 61:         n   = a->i[i + 1] - a->i[i] - 1;
 62:         idx = a->j + a->i[i] + 1;
 63:         v   = a->a + a->i[i] + 1;

 65:         PetscCall(VecSet(left, 0.0));
 66:         for (j = 0; j < n; j++) {
 67:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 68:           PetscCall(MatMultAdd(v[j], right, left, left));
 69:           PetscCall(VecResetArray(right));
 70:         }
 71:         PetscCall(VecPlaceArray(right, b + i * bs));
 72:         PetscCall(VecAYPX(left, -1.0, right));
 73:         PetscCall(VecResetArray(right));

 75:         PetscCall(VecPlaceArray(right, x + i * bs));
 76:         PetscCall(MatSolve(diag[i], left, right));

 78:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 79:         for (j = 0; j < n; j++) {
 80:           PetscCall(MatMultTranspose(v[j], right, left));
 81:           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
 82:           PetscCall(VecAXPY(middle, -1.0, left));
 83:           PetscCall(VecResetArray(middle));
 84:         }
 85:         PetscCall(VecResetArray(right));
 86:       }
 87:     }
 88:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
 89:       for (i = mbs - 1; i >= 0; i--) {
 90:         n   = a->i[i + 1] - a->i[i] - 1;
 91:         idx = a->j + a->i[i] + 1;
 92:         v   = a->a + a->i[i] + 1;

 94:         PetscCall(VecSet(left, 0.0));
 95:         for (j = 0; j < n; j++) {
 96:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 97:           PetscCall(MatMultAdd(v[j], right, left, left));
 98:           PetscCall(VecResetArray(right));
 99:         }
100:         PetscCall(VecPlaceArray(right, b + i * bs));
101:         PetscCall(VecAYPX(left, -1.0, right));
102:         PetscCall(VecResetArray(right));

104:         PetscCall(VecPlaceArray(right, x + i * bs));
105:         PetscCall(MatSolve(diag[i], left, right));
106:         PetscCall(VecResetArray(right));
107:       }
108:     }
109:   }
110:   PetscCall(VecRestoreArray(xx, &x));
111:   PetscCall(VecRestoreArrayRead(a->workb, &b));
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: }

115: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116: {
117:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
118:   PetscScalar       *x;
119:   const Mat         *v;
120:   const PetscScalar *b;
121:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122:   const PetscInt    *idx;
123:   IS                 row, col;
124:   MatFactorInfo      info;
125:   Vec                left = a->left, right = a->right;
126:   Mat               *diag;

128:   PetscFunctionBegin;
129:   its = its * lits;
130:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
131:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
132:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
133:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");

135:   if (!a->diags) {
136:     PetscCall(PetscMalloc1(mbs, &a->diags));
137:     PetscCall(MatFactorInfoInitialize(&info));
138:     for (i = 0; i < mbs; i++) {
139:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
140:       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
141:       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
142:       PetscCall(ISDestroy(&row));
143:       PetscCall(ISDestroy(&col));
144:     }
145:   }
146:   diag = a->diags;

148:   PetscCall(VecSet(xx, 0.0));
149:   PetscCall(VecGetArray(xx, &x));
150:   PetscCall(VecGetArrayRead(bb, &b));

152:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
153:   while (its--) {
154:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
155:       for (i = 0; i < mbs; i++) {
156:         n   = a->i[i + 1] - a->i[i];
157:         idx = a->j + a->i[i];
158:         v   = a->a + a->i[i];

160:         PetscCall(VecSet(left, 0.0));
161:         for (j = 0; j < n; j++) {
162:           if (idx[j] != i) {
163:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
164:             PetscCall(MatMultAdd(v[j], right, left, left));
165:             PetscCall(VecResetArray(right));
166:           }
167:         }
168:         PetscCall(VecPlaceArray(right, b + i * bs));
169:         PetscCall(VecAYPX(left, -1.0, right));
170:         PetscCall(VecResetArray(right));

172:         PetscCall(VecPlaceArray(right, x + i * bs));
173:         PetscCall(MatSolve(diag[i], left, right));
174:         PetscCall(VecResetArray(right));
175:       }
176:     }
177:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178:       for (i = mbs - 1; i >= 0; i--) {
179:         n   = a->i[i + 1] - a->i[i];
180:         idx = a->j + a->i[i];
181:         v   = a->a + a->i[i];

183:         PetscCall(VecSet(left, 0.0));
184:         for (j = 0; j < n; j++) {
185:           if (idx[j] != i) {
186:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
187:             PetscCall(MatMultAdd(v[j], right, left, left));
188:             PetscCall(VecResetArray(right));
189:           }
190:         }
191:         PetscCall(VecPlaceArray(right, b + i * bs));
192:         PetscCall(VecAYPX(left, -1.0, right));
193:         PetscCall(VecResetArray(right));

195:         PetscCall(VecPlaceArray(right, x + i * bs));
196:         PetscCall(MatSolve(diag[i], left, right));
197:         PetscCall(VecResetArray(right));
198:       }
199:     }
200:   }
201:   PetscCall(VecRestoreArray(xx, &x));
202:   PetscCall(VecRestoreArrayRead(bb, &b));
203:   PetscFunctionReturn(PETSC_SUCCESS);
204: }

206: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
207: {
208:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
209:   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
210:   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
211:   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
212:   PetscInt      ridx, cidx;
213:   PetscBool     roworiented = a->roworiented;
214:   MatScalar     value;
215:   Mat          *ap, *aa = a->a;

217:   PetscFunctionBegin;
218:   for (k = 0; k < m; k++) { /* loop over added rows */
219:     row  = im[k];
220:     brow = row / bs;
221:     if (row < 0) continue;
222:     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
223:     rp   = aj + ai[brow];
224:     ap   = aa + ai[brow];
225:     rmax = imax[brow];
226:     nrow = ailen[brow];
227:     low  = 0;
228:     high = nrow;
229:     for (l = 0; l < n; l++) { /* loop over added columns */
230:       if (in[l] < 0) continue;
231:       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
232:       col  = in[l];
233:       bcol = col / bs;
234:       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
235:       ridx = row % bs;
236:       cidx = col % bs;
237:       if (roworiented) value = v[l + k * n];
238:       else value = v[k + l * m];

240:       if (col <= lastcol) low = 0;
241:       else high = nrow;
242:       lastcol = col;
243:       while (high - low > 7) {
244:         t = (low + high) / 2;
245:         if (rp[t] > bcol) high = t;
246:         else low = t;
247:       }
248:       for (i = low; i < high; i++) {
249:         if (rp[i] > bcol) break;
250:         if (rp[i] == bcol) goto noinsert1;
251:       }
252:       if (nonew == 1) goto noinsert1;
253:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
254:       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
255:       N = nrow++ - 1;
256:       high++;
257:       /* shift up all the later entries in this row */
258:       for (ii = N; ii >= i; ii--) {
259:         rp[ii + 1] = rp[ii];
260:         ap[ii + 1] = ap[ii];
261:       }
262:       if (N >= i) ap[i] = NULL;
263:       rp[i] = bcol;
264:       a->nz++;
265:       A->nonzerostate++;
266:     noinsert1:;
267:       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
268:       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
269:       low = i;
270:     }
271:     ailen[brow] = nrow;
272:   }
273:   PetscFunctionReturn(PETSC_SUCCESS);
274: }

276: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
277: {
278:   Mat                tmpA;
279:   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
280:   const PetscInt    *cols;
281:   const PetscScalar *values;
282:   PetscBool          flg = PETSC_FALSE, notdone;
283:   Mat_SeqAIJ        *a;
284:   Mat_BlockMat      *amat;

286:   PetscFunctionBegin;
287:   /* force binary viewer to load .info file if it has not yet done so */
288:   PetscCall(PetscViewerSetUp(viewer));
289:   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
290:   PetscCall(MatSetType(tmpA, MATSEQAIJ));
291:   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));

293:   PetscCall(MatGetLocalSize(tmpA, &m, &n));
294:   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
295:   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
296:   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
297:   PetscOptionsEnd();

299:   /* Determine number of nonzero blocks for each block row */
300:   a   = (Mat_SeqAIJ *)tmpA->data;
301:   mbs = m / bs;
302:   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
303:   PetscCall(PetscArrayzero(lens, mbs));

305:   for (i = 0; i < mbs; i++) {
306:     for (j = 0; j < bs; j++) {
307:       ii[j]    = a->j + a->i[i * bs + j];
308:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
309:     }

311:     currentcol = -1;
312:     while (PETSC_TRUE) {
313:       notdone = PETSC_FALSE;
314:       nextcol = 1000000000;
315:       for (j = 0; j < bs; j++) {
316:         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
317:           ii[j]++;
318:           ilens[j]--;
319:         }
320:         if (ilens[j] > 0) {
321:           notdone = PETSC_TRUE;
322:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
323:         }
324:       }
325:       if (!notdone) break;
326:       if (!flg || (nextcol >= i)) lens[i]++;
327:       currentcol = nextcol;
328:     }
329:   }

331:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
332:   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
333:   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
334:   amat = (Mat_BlockMat *)(newmat)->data;

336:   /* preallocate the submatrices */
337:   PetscCall(PetscMalloc1(bs, &llens));
338:   for (i = 0; i < mbs; i++) { /* loops for block rows */
339:     for (j = 0; j < bs; j++) {
340:       ii[j]    = a->j + a->i[i * bs + j];
341:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
342:     }

344:     currentcol = 1000000000;
345:     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
346:       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
347:     }

349:     while (PETSC_TRUE) { /* loops over blocks in block row */
350:       notdone = PETSC_FALSE;
351:       nextcol = 1000000000;
352:       PetscCall(PetscArrayzero(llens, bs));
353:       for (j = 0; j < bs; j++) {                              /* loop over rows in block */
354:         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
355:           ii[j]++;
356:           ilens[j]--;
357:           llens[j]++;
358:         }
359:         if (ilens[j] > 0) {
360:           notdone = PETSC_TRUE;
361:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
362:         }
363:       }
364:       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
365:       if (!flg || currentcol >= i) {
366:         amat->j[cnt] = currentcol;
367:         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
368:       }

370:       if (!notdone) break;
371:       currentcol = nextcol;
372:     }
373:     amat->ilen[i] = lens[i];
374:   }

376:   PetscCall(PetscFree3(lens, ii, ilens));
377:   PetscCall(PetscFree(llens));

379:   /* copy over the matrix, one row at a time */
380:   for (i = 0; i < m; i++) {
381:     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
382:     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
383:     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
384:   }
385:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
386:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
387:   PetscFunctionReturn(PETSC_SUCCESS);
388: }

390: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
391: {
392:   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
393:   const char       *name;
394:   PetscViewerFormat format;

396:   PetscFunctionBegin;
397:   PetscCall(PetscObjectGetName((PetscObject)A, &name));
398:   PetscCall(PetscViewerGetFormat(viewer, &format));
399:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
400:     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
401:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
402:   }
403:   PetscFunctionReturn(PETSC_SUCCESS);
404: }

406: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
407: {
408:   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
409:   PetscInt      i;

411:   PetscFunctionBegin;
412:   PetscCall(VecDestroy(&bmat->right));
413:   PetscCall(VecDestroy(&bmat->left));
414:   PetscCall(VecDestroy(&bmat->middle));
415:   PetscCall(VecDestroy(&bmat->workb));
416:   if (bmat->diags) {
417:     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
418:   }
419:   if (bmat->a) {
420:     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
421:   }
422:   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
423:   PetscCall(PetscFree(mat->data));
424:   PetscFunctionReturn(PETSC_SUCCESS);
425: }

427: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
428: {
429:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
430:   PetscScalar  *xx, *yy;
431:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
432:   Mat          *aa;

434:   PetscFunctionBegin;
435:   /*
436:      Standard CSR multiply except each entry is a Mat
437:   */
438:   PetscCall(VecGetArray(x, &xx));

440:   PetscCall(VecSet(y, 0.0));
441:   PetscCall(VecGetArray(y, &yy));
442:   aj = bmat->j;
443:   aa = bmat->a;
444:   ii = bmat->i;
445:   for (i = 0; i < m; i++) {
446:     jrow = ii[i];
447:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
448:     n = ii[i + 1] - jrow;
449:     for (j = 0; j < n; j++) {
450:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
451:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
452:       PetscCall(VecResetArray(bmat->right));
453:       jrow++;
454:     }
455:     PetscCall(VecResetArray(bmat->left));
456:   }
457:   PetscCall(VecRestoreArray(x, &xx));
458:   PetscCall(VecRestoreArray(y, &yy));
459:   PetscFunctionReturn(PETSC_SUCCESS);
460: }

462: static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
463: {
464:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
465:   PetscScalar  *xx, *yy;
466:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
467:   Mat          *aa;

469:   PetscFunctionBegin;
470:   /*
471:      Standard CSR multiply except each entry is a Mat
472:   */
473:   PetscCall(VecGetArray(x, &xx));

475:   PetscCall(VecSet(y, 0.0));
476:   PetscCall(VecGetArray(y, &yy));
477:   aj = bmat->j;
478:   aa = bmat->a;
479:   ii = bmat->i;
480:   for (i = 0; i < m; i++) {
481:     jrow = ii[i];
482:     n    = ii[i + 1] - jrow;
483:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
484:     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
485:     /* if we ALWAYS required a diagonal entry then could remove this if test */
486:     if (aj[jrow] == i) {
487:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
488:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
489:       PetscCall(VecResetArray(bmat->right));
490:       jrow++;
491:       n--;
492:     }
493:     for (j = 0; j < n; j++) {
494:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
495:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
496:       PetscCall(VecResetArray(bmat->right));

498:       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
499:       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
500:       PetscCall(VecResetArray(bmat->right));
501:       jrow++;
502:     }
503:     PetscCall(VecResetArray(bmat->left));
504:     PetscCall(VecResetArray(bmat->middle));
505:   }
506:   PetscCall(VecRestoreArray(x, &xx));
507:   PetscCall(VecRestoreArray(y, &yy));
508:   PetscFunctionReturn(PETSC_SUCCESS);
509: }

511: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
512: {
513:   PetscFunctionBegin;
514:   PetscFunctionReturn(PETSC_SUCCESS);
515: }

517: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
518: {
519:   PetscFunctionBegin;
520:   PetscFunctionReturn(PETSC_SUCCESS);
521: }

523: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
524: {
525:   PetscFunctionBegin;
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }

529: /*
530:      Adds diagonal pointers to sparse matrix structure.
531: */
532: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
533: {
534:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
535:   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;

537:   PetscFunctionBegin;
538:   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
539:   for (i = 0; i < mbs; i++) {
540:     a->diag[i] = a->i[i + 1];
541:     for (j = a->i[i]; j < a->i[i + 1]; j++) {
542:       if (a->j[j] == i) {
543:         a->diag[i] = j;
544:         break;
545:       }
546:     }
547:   }
548:   PetscFunctionReturn(PETSC_SUCCESS);
549: }

551: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
552: {
553:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
554:   Mat_SeqAIJ   *c;
555:   PetscInt      i, k, first, step, lensi, nrows, ncols;
556:   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
557:   PetscScalar  *a_new;
558:   Mat           C, *aa = a->a;
559:   PetscBool     stride, equal;

561:   PetscFunctionBegin;
562:   PetscCall(ISEqual(isrow, iscol, &equal));
563:   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
564:   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
565:   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
566:   PetscCall(ISStrideGetInfo(iscol, &first, &step));
567:   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");

569:   PetscCall(ISGetLocalSize(isrow, &nrows));
570:   ncols = nrows;

572:   /* create submatrix */
573:   if (scall == MAT_REUSE_MATRIX) {
574:     PetscInt n_cols, n_rows;
575:     C = *B;
576:     PetscCall(MatGetSize(C, &n_rows, &n_cols));
577:     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
578:     PetscCall(MatZeroEntries(C));
579:   } else {
580:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
581:     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
582:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
583:     else PetscCall(MatSetType(C, MATSEQAIJ));
584:     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
585:     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
586:   }
587:   c = (Mat_SeqAIJ *)C->data;

589:   /* loop over rows inserting into submatrix */
590:   a_new = c->a;
591:   j_new = c->j;
592:   i_new = c->i;

594:   for (i = 0; i < nrows; i++) {
595:     lensi = ailen[i];
596:     for (k = 0; k < lensi; k++) {
597:       *j_new++ = *aj++;
598:       PetscCall(MatGetValue(*aa++, first, first, a_new++));
599:     }
600:     i_new[i + 1] = i_new[i] + lensi;
601:     c->ilen[i]   = lensi;
602:   }

604:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
605:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
606:   *B = C;
607:   PetscFunctionReturn(PETSC_SUCCESS);
608: }

610: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
611: {
612:   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
613:   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
614:   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
615:   Mat          *aa = a->a, *ap;

617:   PetscFunctionBegin;
618:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

620:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
621:   for (i = 1; i < m; i++) {
622:     /* move each row back by the amount of empty slots (fshift) before it*/
623:     fshift += imax[i - 1] - ailen[i - 1];
624:     rmax = PetscMax(rmax, ailen[i]);
625:     if (fshift) {
626:       ip = aj + ai[i];
627:       ap = aa + ai[i];
628:       N  = ailen[i];
629:       for (j = 0; j < N; j++) {
630:         ip[j - fshift] = ip[j];
631:         ap[j - fshift] = ap[j];
632:       }
633:     }
634:     ai[i] = ai[i - 1] + ailen[i - 1];
635:   }
636:   if (m) {
637:     fshift += imax[m - 1] - ailen[m - 1];
638:     ai[m] = ai[m - 1] + ailen[m - 1];
639:   }
640:   /* reset ilen and imax for each row */
641:   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
642:   a->nz = ai[m];
643:   for (i = 0; i < a->nz; i++) {
644:     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
645:     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
646:     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
647:   }
648:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
649:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
650:   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));

652:   A->info.mallocs += a->reallocs;
653:   a->reallocs         = 0;
654:   A->info.nz_unneeded = (double)fshift;
655:   a->rmax             = rmax;
656:   PetscCall(MatMarkDiagonal_BlockMat(A));
657:   PetscFunctionReturn(PETSC_SUCCESS);
658: }

660: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
661: {
662:   PetscFunctionBegin;
663:   if (opt == MAT_SYMMETRIC && flg) {
664:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
665:     A->ops->mult = MatMult_BlockMat_Symmetric;
666:   } else {
667:     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
668:   }
669:   PetscFunctionReturn(PETSC_SUCCESS);
670: }

672: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
673:                                        NULL,
674:                                        NULL,
675:                                        MatMult_BlockMat,
676:                                        /*  4*/ MatMultAdd_BlockMat,
677:                                        MatMultTranspose_BlockMat,
678:                                        MatMultTransposeAdd_BlockMat,
679:                                        NULL,
680:                                        NULL,
681:                                        NULL,
682:                                        /* 10*/ NULL,
683:                                        NULL,
684:                                        NULL,
685:                                        MatSOR_BlockMat,
686:                                        NULL,
687:                                        /* 15*/ NULL,
688:                                        NULL,
689:                                        NULL,
690:                                        NULL,
691:                                        NULL,
692:                                        /* 20*/ NULL,
693:                                        MatAssemblyEnd_BlockMat,
694:                                        MatSetOption_BlockMat,
695:                                        NULL,
696:                                        /* 24*/ NULL,
697:                                        NULL,
698:                                        NULL,
699:                                        NULL,
700:                                        NULL,
701:                                        /* 29*/ NULL,
702:                                        NULL,
703:                                        NULL,
704:                                        NULL,
705:                                        NULL,
706:                                        /* 34*/ NULL,
707:                                        NULL,
708:                                        NULL,
709:                                        NULL,
710:                                        NULL,
711:                                        /* 39*/ NULL,
712:                                        NULL,
713:                                        NULL,
714:                                        NULL,
715:                                        NULL,
716:                                        /* 44*/ NULL,
717:                                        NULL,
718:                                        MatShift_Basic,
719:                                        NULL,
720:                                        NULL,
721:                                        /* 49*/ NULL,
722:                                        NULL,
723:                                        NULL,
724:                                        NULL,
725:                                        NULL,
726:                                        /* 54*/ NULL,
727:                                        NULL,
728:                                        NULL,
729:                                        NULL,
730:                                        NULL,
731:                                        /* 59*/ MatCreateSubMatrix_BlockMat,
732:                                        MatDestroy_BlockMat,
733:                                        MatView_BlockMat,
734:                                        NULL,
735:                                        NULL,
736:                                        /* 64*/ NULL,
737:                                        NULL,
738:                                        NULL,
739:                                        NULL,
740:                                        NULL,
741:                                        /* 69*/ NULL,
742:                                        NULL,
743:                                        NULL,
744:                                        NULL,
745:                                        NULL,
746:                                        /* 74*/ NULL,
747:                                        NULL,
748:                                        NULL,
749:                                        NULL,
750:                                        NULL,
751:                                        /* 79*/ NULL,
752:                                        NULL,
753:                                        NULL,
754:                                        NULL,
755:                                        MatLoad_BlockMat,
756:                                        /* 84*/ NULL,
757:                                        NULL,
758:                                        NULL,
759:                                        NULL,
760:                                        NULL,
761:                                        /* 89*/ NULL,
762:                                        NULL,
763:                                        NULL,
764:                                        NULL,
765:                                        NULL,
766:                                        /* 94*/ NULL,
767:                                        NULL,
768:                                        NULL,
769:                                        NULL,
770:                                        NULL,
771:                                        /* 99*/ NULL,
772:                                        NULL,
773:                                        NULL,
774:                                        NULL,
775:                                        NULL,
776:                                        /*104*/ NULL,
777:                                        NULL,
778:                                        NULL,
779:                                        NULL,
780:                                        NULL,
781:                                        /*109*/ NULL,
782:                                        NULL,
783:                                        NULL,
784:                                        NULL,
785:                                        NULL,
786:                                        /*114*/ NULL,
787:                                        NULL,
788:                                        NULL,
789:                                        NULL,
790:                                        NULL,
791:                                        /*119*/ NULL,
792:                                        NULL,
793:                                        NULL,
794:                                        NULL,
795:                                        NULL,
796:                                        /*124*/ NULL,
797:                                        NULL,
798:                                        NULL,
799:                                        NULL,
800:                                        NULL,
801:                                        /*129*/ NULL,
802:                                        NULL,
803:                                        NULL,
804:                                        NULL,
805:                                        NULL,
806:                                        /*134*/ NULL,
807:                                        NULL,
808:                                        NULL,
809:                                        NULL,
810:                                        NULL,
811:                                        /*139*/ NULL,
812:                                        NULL,
813:                                        NULL,
814:                                        NULL,
815:                                        NULL,
816:                                        /*144*/ NULL,
817:                                        NULL,
818:                                        NULL,
819:                                        NULL,
820:                                        NULL,
821:                                        NULL,
822:                                        /*150*/ NULL,
823:                                        NULL,
824:                                        NULL};

826: /*@C
827:   MatBlockMatSetPreallocation - For good matrix assembly performance
828:   the user should preallocate the matrix storage by setting the parameter nz
829:   (or the array nnz).  By setting these parameters accurately, performance
830:   during matrix assembly can be increased by more than a factor of 50.

832:   Collective

834:   Input Parameters:
835: + B   - The matrix
836: . bs  - size of each block in matrix
837: . nz  - number of nonzeros per block row (same for all rows)
838: - nnz - array containing the number of nonzeros in the various block rows
839:          (possibly different for each row) or `NULL`

841:   Level: intermediate

843:   Notes:
844:   If `nnz` is given then `nz` is ignored

846:   Specify the preallocated storage with either `nz` or `nnz` (not both).
847:   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
848:   allocation.

850: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
851: @*/
852: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
853: {
854:   PetscFunctionBegin;
855:   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
856:   PetscFunctionReturn(PETSC_SUCCESS);
857: }

859: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
860: {
861:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
862:   PetscInt      i;

864:   PetscFunctionBegin;
865:   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
866:   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
867:   PetscCall(PetscLayoutSetUp(A->rmap));
868:   PetscCall(PetscLayoutSetUp(A->cmap));
869:   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));

871:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
872:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
873:   if (nnz) {
874:     for (i = 0; i < A->rmap->n / bs; i++) {
875:       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
876:       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
877:     }
878:   }
879:   bmat->mbs = A->rmap->n / bs;

881:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
882:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
883:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));

885:   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
886:   if (PetscLikely(nnz)) {
887:     nz = 0;
888:     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
889:       bmat->imax[i] = nnz[i];
890:       nz += nnz[i];
891:     }
892:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");

894:   /* bmat->ilen will count nonzeros in each row so far. */
895:   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));

897:   /* allocate the matrix space */
898:   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
899:   PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
900:   bmat->i[0] = 0;
901:   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
902:   bmat->singlemalloc = PETSC_TRUE;
903:   bmat->free_a       = PETSC_TRUE;
904:   bmat->free_ij      = PETSC_TRUE;

906:   bmat->nz            = 0;
907:   bmat->maxnz         = nz;
908:   A->info.nz_unneeded = (double)bmat->maxnz;
909:   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
910:   PetscFunctionReturn(PETSC_SUCCESS);
911: }

913: /*MC
914:    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
915:                  consisting of (usually) sparse blocks.

917:   Level: advanced

919: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
920: M*/

922: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
923: {
924:   Mat_BlockMat *b;

926:   PetscFunctionBegin;
927:   PetscCall(PetscNew(&b));
928:   A->data         = (void *)b;
929:   A->ops[0]       = MatOps_Values;
930:   A->assembled    = PETSC_TRUE;
931:   A->preallocated = PETSC_FALSE;
932:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));

934:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
935:   PetscFunctionReturn(PETSC_SUCCESS);
936: }

938: /*@C
939:   MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object

941:   Collective

943:   Input Parameters:
944: + comm - MPI communicator
945: . m    - number of rows
946: . n    - number of columns
947: . bs   - size of each submatrix
948: . nz   - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
949: - nnz  - expected number of nonzers per block row if known (use `NULL` otherwise)

951:   Output Parameter:
952: . A - the matrix

954:   Level: intermediate

956:   Notes:
957:   Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
958:   have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

960:   For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.

962:   Developer Notes:
963:   I don't like the name, it is not `MATNESTMAT`

965: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
966: @*/
967: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
968: {
969:   PetscFunctionBegin;
970:   PetscCall(MatCreate(comm, A));
971:   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
972:   PetscCall(MatSetType(*A, MATBLOCKMAT));
973:   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
974:   PetscFunctionReturn(PETSC_SUCCESS);
975: }