Actual source code: matrart.c

  1: /*
  2:   Defines projective product routines where A is a SeqAIJ matrix
  3:           C = R * A * R^T
  4: */

  6: #include <../src/mat/impls/aij/seq/aij.h>
  7: #include <../src/mat/utils/freespace.h>
  8: #include <../src/mat/impls/dense/seq/dense.h>

 10: static PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
 11: {
 12:   Mat_RARt *rart = (Mat_RARt *)data;

 14:   PetscFunctionBegin;
 15:   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
 16:   PetscCall(MatDestroy(&rart->Rt));
 17:   PetscCall(MatDestroy(&rart->RARt));
 18:   PetscCall(MatDestroy(&rart->ARt));
 19:   PetscCall(PetscFree(rart->work));
 20:   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
 21:   PetscCall(PetscFree(rart));
 22:   PetscFunctionReturn(PETSC_SUCCESS);
 23: }

 25: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
 26: {
 27:   Mat                  P;
 28:   Mat_RARt            *rart;
 29:   MatColoring          coloring;
 30:   MatTransposeColoring matcoloring;
 31:   ISColoring           iscoloring;
 32:   Mat                  Rt_dense, RARt_dense;

 34:   PetscFunctionBegin;
 35:   MatCheckProduct(C, 4);
 36:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
 37:   /* create symbolic P=Rt */
 38:   PetscCall(MatTransposeSymbolic(R, &P));

 40:   /* get symbolic C=Pt*A*P */
 41:   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
 42:   PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs)));
 43:   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;

 45:   /* create a supporting struct */
 46:   PetscCall(PetscNew(&rart));
 47:   C->product->data    = rart;
 48:   C->product->destroy = MatDestroy_SeqAIJ_RARt;

 50:   /* Use coloring  */
 51:   /* inode causes memory problem */
 52:   PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));

 54:   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
 55:   PetscCall(MatColoringCreate(C, &coloring));
 56:   PetscCall(MatColoringSetDistance(coloring, 2));
 57:   PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
 58:   PetscCall(MatColoringSetFromOptions(coloring));
 59:   PetscCall(MatColoringApply(coloring, &iscoloring));
 60:   PetscCall(MatColoringDestroy(&coloring));
 61:   PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));

 63:   rart->matcoloring = matcoloring;
 64:   PetscCall(ISColoringDestroy(&iscoloring));

 66:   /* Create Rt_dense */
 67:   PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
 68:   PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
 69:   PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
 70:   PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));

 72:   Rt_dense->assembled = PETSC_TRUE;
 73:   rart->Rt            = Rt_dense;

 75:   /* Create RARt_dense = R*A*Rt_dense */
 76:   PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
 77:   PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
 78:   PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
 79:   PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));

 81:   rart->RARt = RARt_dense;

 83:   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
 84:   PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));

 86:   /* clean up */
 87:   PetscCall(MatDestroy(&P));

 89: #if defined(PETSC_USE_INFO)
 90:   {
 91:     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
 92:     PetscReal   density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n);
 93:     PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
 94:     PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
 95:   }
 96: #endif
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: /*
101:  RAB = R * A * B, R and A in seqaij format, B in dense format;
102: */
103: static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
104: {
105:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
106:   PetscScalar        r1, r2, r3, r4;
107:   const PetscScalar *b, *b1, *b2, *b3, *b4;
108:   MatScalar         *aa, *ra;
109:   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
110:   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
111:   PetscScalar       *d, *c, *c2, *c3, *c4;
112:   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
113:   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;

115:   PetscFunctionBegin;
116:   if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS);
117:   PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
118:   PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
119:   PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
120:   PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);

122:   { /*
123:      This approach is not as good as original ones (will be removed later), but it reveals that
124:      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
125:      */
126:     PetscBool via_matmatmult = PETSC_FALSE;
127:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
128:     if (via_matmatmult) {
129:       Mat AB_den = NULL;
130:       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
131:       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
132:       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
133:       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
134:       PetscCall(MatDestroy(&AB_den));
135:       PetscFunctionReturn(PETSC_SUCCESS);
136:     }
137:   }

139:   PetscCall(MatDenseGetArrayRead(B, &b));
140:   PetscCall(MatDenseGetArray(RAB, &d));
141:   b1 = b;
142:   b2 = b1 + bm;
143:   b3 = b2 + bm;
144:   b4 = b3 + bm;
145:   c  = work;
146:   c2 = c + am;
147:   c3 = c2 + am;
148:   c4 = c3 + am;
149:   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
150:     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
151:       r1 = r2 = r3 = r4 = 0.0;
152:       n                 = ai[i + 1] - ai[i];
153:       aj                = a->j + ai[i];
154:       aa                = a->a + ai[i];
155:       for (j = 0; j < n; j++) {
156:         r1 += (*aa) * b1[*aj];
157:         r2 += (*aa) * b2[*aj];
158:         r3 += (*aa) * b3[*aj];
159:         r4 += (*aa++) * b4[*aj++];
160:       }
161:       c[i]       = r1;
162:       c[am + i]  = r2;
163:       c[am2 + i] = r3;
164:       c[am3 + i] = r4;
165:     }
166:     b1 += bm4;
167:     b2 += bm4;
168:     b3 += bm4;
169:     b4 += bm4;

171:     /* RAB[:,col] = R*C[:,col] */
172:     colrm = col * rm;
173:     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
174:       r1 = r2 = r3 = r4 = 0.0;
175:       n                 = r->i[i + 1] - r->i[i];
176:       rj                = r->j + r->i[i];
177:       ra                = r->a + r->i[i];
178:       for (j = 0; j < n; j++) {
179:         r1 += (*ra) * c[*rj];
180:         r2 += (*ra) * c2[*rj];
181:         r3 += (*ra) * c3[*rj];
182:         r4 += (*ra++) * c4[*rj++];
183:       }
184:       d[colrm + i]       = r1;
185:       d[colrm + rm + i]  = r2;
186:       d[colrm + rm2 + i] = r3;
187:       d[colrm + rm3 + i] = r4;
188:     }
189:   }
190:   for (; col < cn; col++) {    /* over extra columns of C */
191:     for (i = 0; i < am; i++) { /* over rows of A in those columns */
192:       r1 = 0.0;
193:       n  = a->i[i + 1] - a->i[i];
194:       aj = a->j + a->i[i];
195:       aa = a->a + a->i[i];
196:       for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
197:       c[i] = r1;
198:     }
199:     b1 += bm;

201:     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
202:       r1 = 0.0;
203:       n  = r->i[i + 1] - r->i[i];
204:       rj = r->j + r->i[i];
205:       ra = r->a + r->i[i];
206:       for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
207:       d[col * rm + i] = r1;
208:     }
209:   }
210:   PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));

212:   PetscCall(MatDenseRestoreArrayRead(B, &b));
213:   PetscCall(MatDenseRestoreArray(RAB, &d));
214:   PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
215:   PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
216:   PetscFunctionReturn(PETSC_SUCCESS);
217: }

219: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
220: {
221:   Mat_RARt            *rart;
222:   MatTransposeColoring matcoloring;
223:   Mat                  Rt, RARt;

225:   PetscFunctionBegin;
226:   MatCheckProduct(C, 3);
227:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
228:   rart = (Mat_RARt *)C->product->data;

230:   /* Get dense Rt by Apply MatTransposeColoring to R */
231:   matcoloring = rart->matcoloring;
232:   Rt          = rart->Rt;
233:   PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));

235:   /* Get dense RARt = R*A*Rt -- dominates! */
236:   RARt = rart->RARt;
237:   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));

239:   /* Recover C from C_dense */
240:   PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
241:   PetscFunctionReturn(PETSC_SUCCESS);
242: }

244: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
245: {
246:   Mat       ARt;
247:   Mat_RARt *rart;
248:   char     *alg;

250:   PetscFunctionBegin;
251:   MatCheckProduct(C, 4);
252:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
253:   /* create symbolic ARt = A*R^T  */
254:   PetscCall(MatProductCreate(A, R, NULL, &ARt));
255:   PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
256:   PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
257:   PetscCall(MatProductSetFill(ARt, fill));
258:   PetscCall(MatProductSetFromOptions(ARt));
259:   PetscCall(MatProductSymbolic(ARt));

261:   /* compute symbolic C = R*ARt */
262:   /* set algorithm for C = R*ARt */
263:   PetscCall(PetscStrallocpy(C->product->alg, &alg));
264:   PetscCall(MatProductSetAlgorithm(C, "sorted"));
265:   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
266:   /* resume original algorithm for C */
267:   PetscCall(MatProductSetAlgorithm(C, alg));
268:   PetscCall(PetscFree(alg));

270:   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;

272:   PetscCall(PetscNew(&rart));
273:   rart->ARt           = ARt;
274:   C->product->data    = rart;
275:   C->product->destroy = MatDestroy_SeqAIJ_RARt;
276:   PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
277:   PetscFunctionReturn(PETSC_SUCCESS);
278: }

280: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
281: {
282:   Mat_RARt *rart;

284:   PetscFunctionBegin;
285:   MatCheckProduct(C, 3);
286:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
287:   rart = (Mat_RARt *)C->product->data;
288:   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
289:   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
294: {
295:   Mat       Rt;
296:   Mat_RARt *rart;

298:   PetscFunctionBegin;
299:   MatCheckProduct(C, 4);
300:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
301:   PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
302:   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));

304:   PetscCall(PetscNew(&rart));
305:   rart->data          = C->product->data;
306:   rart->destroy       = C->product->destroy;
307:   rart->Rt            = Rt;
308:   C->product->data    = rart;
309:   C->product->destroy = MatDestroy_SeqAIJ_RARt;
310:   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
311:   PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
312:   PetscFunctionReturn(PETSC_SUCCESS);
313: }

315: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
316: {
317:   Mat_RARt *rart;

319:   PetscFunctionBegin;
320:   MatCheckProduct(C, 3);
321:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
322:   rart = (Mat_RARt *)C->product->data;
323:   PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
324:   /* MatMatMatMultSymbolic used a different data */
325:   C->product->data = rart->data;
326:   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
327:   C->product->data = rart;
328:   PetscFunctionReturn(PETSC_SUCCESS);
329: }

331: PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
332: {
333:   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
334:   PetscInt    alg         = 0; /* set default algorithm */

336:   PetscFunctionBegin;
337:   if (scall == MAT_INITIAL_MATRIX) {
338:     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
339:     PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
340:     PetscOptionsEnd();

342:     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
343:     PetscCall(MatCreate(PETSC_COMM_SELF, C));
344:     switch (alg) {
345:     case 1:
346:       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
347:       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
348:       break;
349:     case 2:
350:       /* via coloring_rart: apply coloring C = R*A*R^T                          */
351:       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
352:       break;
353:     default:
354:       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
355:       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
356:       break;
357:     }
358:     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
359:   }

361:   PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
362:   PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
363:   PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
364:   PetscFunctionReturn(PETSC_SUCCESS);
365: }

367: PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
368: {
369:   Mat_Product        *product = C->product;
370:   Mat                 A = product->A, R = product->B;
371:   MatProductAlgorithm alg  = product->alg;
372:   PetscReal           fill = product->fill;
373:   PetscBool           flg;

375:   PetscFunctionBegin;
376:   PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
377:   if (flg) {
378:     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
379:     goto next;
380:   }

382:   PetscCall(PetscStrcmp(alg, "r*art", &flg));
383:   if (flg) {
384:     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
385:     goto next;
386:   }

388:   PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
389:   if (flg) {
390:     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
391:     goto next;
392:   }

394:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");

396: next:
397:   C->ops->productnumeric = MatProductNumeric_RARt;
398:   PetscFunctionReturn(PETSC_SUCCESS);
399: }