Actual source code: matrart.c
1: /*
2: Defines projective product routines where A is a SeqAIJ matrix
3: C = R * A * R^T
4: */
6: #include <../src/mat/impls/aij/seq/aij.h>
7: #include <../src/mat/utils/freespace.h>
8: #include <../src/mat/impls/dense/seq/dense.h>
10: static PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
11: {
12: Mat_RARt *rart = (Mat_RARt *)data;
14: PetscFunctionBegin;
15: PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
16: PetscCall(MatDestroy(&rart->Rt));
17: PetscCall(MatDestroy(&rart->RARt));
18: PetscCall(MatDestroy(&rart->ARt));
19: PetscCall(PetscFree(rart->work));
20: if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
21: PetscCall(PetscFree(rart));
22: PetscFunctionReturn(PETSC_SUCCESS);
23: }
25: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
26: {
27: Mat P;
28: Mat_RARt *rart;
29: MatColoring coloring;
30: MatTransposeColoring matcoloring;
31: ISColoring iscoloring;
32: Mat Rt_dense, RARt_dense;
34: PetscFunctionBegin;
35: MatCheckProduct(C, 4);
36: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
37: /* create symbolic P=Rt */
38: PetscCall(MatTransposeSymbolic(R, &P));
40: /* get symbolic C=Pt*A*P */
41: PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
42: PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs)));
43: C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
45: /* create a supporting struct */
46: PetscCall(PetscNew(&rart));
47: C->product->data = rart;
48: C->product->destroy = MatDestroy_SeqAIJ_RARt;
50: /* Use coloring */
51: /* inode causes memory problem */
52: PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
54: /* Create MatTransposeColoring from symbolic C=R*A*R^T */
55: PetscCall(MatColoringCreate(C, &coloring));
56: PetscCall(MatColoringSetDistance(coloring, 2));
57: PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
58: PetscCall(MatColoringSetFromOptions(coloring));
59: PetscCall(MatColoringApply(coloring, &iscoloring));
60: PetscCall(MatColoringDestroy(&coloring));
61: PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
63: rart->matcoloring = matcoloring;
64: PetscCall(ISColoringDestroy(&iscoloring));
66: /* Create Rt_dense */
67: PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
68: PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
69: PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
70: PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
72: Rt_dense->assembled = PETSC_TRUE;
73: rart->Rt = Rt_dense;
75: /* Create RARt_dense = R*A*Rt_dense */
76: PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
77: PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
78: PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
79: PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
81: rart->RARt = RARt_dense;
83: /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
84: PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
86: /* clean up */
87: PetscCall(MatDestroy(&P));
89: #if defined(PETSC_USE_INFO)
90: {
91: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
92: PetscReal density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n);
93: PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
94: PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
95: }
96: #endif
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: /*
101: RAB = R * A * B, R and A in seqaij format, B in dense format;
102: */
103: static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
104: {
105: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
106: PetscScalar r1, r2, r3, r4;
107: const PetscScalar *b, *b1, *b2, *b3, *b4;
108: MatScalar *aa, *ra;
109: PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
110: PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
111: PetscScalar *d, *c, *c2, *c3, *c4;
112: PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
113: PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm;
115: PetscFunctionBegin;
116: if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS);
117: PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
118: PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
119: PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
120: PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
122: { /*
123: This approach is not as good as original ones (will be removed later), but it reveals that
124: AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
125: */
126: PetscBool via_matmatmult = PETSC_FALSE;
127: PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
128: if (via_matmatmult) {
129: Mat AB_den = NULL;
130: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
131: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
132: PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
133: PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
134: PetscCall(MatDestroy(&AB_den));
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
137: }
139: PetscCall(MatDenseGetArrayRead(B, &b));
140: PetscCall(MatDenseGetArray(RAB, &d));
141: b1 = b;
142: b2 = b1 + bm;
143: b3 = b2 + bm;
144: b4 = b3 + bm;
145: c = work;
146: c2 = c + am;
147: c3 = c2 + am;
148: c4 = c3 + am;
149: for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
150: for (i = 0; i < am; i++) { /* over rows of A in those columns */
151: r1 = r2 = r3 = r4 = 0.0;
152: n = ai[i + 1] - ai[i];
153: aj = a->j + ai[i];
154: aa = a->a + ai[i];
155: for (j = 0; j < n; j++) {
156: r1 += (*aa) * b1[*aj];
157: r2 += (*aa) * b2[*aj];
158: r3 += (*aa) * b3[*aj];
159: r4 += (*aa++) * b4[*aj++];
160: }
161: c[i] = r1;
162: c[am + i] = r2;
163: c[am2 + i] = r3;
164: c[am3 + i] = r4;
165: }
166: b1 += bm4;
167: b2 += bm4;
168: b3 += bm4;
169: b4 += bm4;
171: /* RAB[:,col] = R*C[:,col] */
172: colrm = col * rm;
173: for (i = 0; i < rm; i++) { /* over rows of R in those columns */
174: r1 = r2 = r3 = r4 = 0.0;
175: n = r->i[i + 1] - r->i[i];
176: rj = r->j + r->i[i];
177: ra = r->a + r->i[i];
178: for (j = 0; j < n; j++) {
179: r1 += (*ra) * c[*rj];
180: r2 += (*ra) * c2[*rj];
181: r3 += (*ra) * c3[*rj];
182: r4 += (*ra++) * c4[*rj++];
183: }
184: d[colrm + i] = r1;
185: d[colrm + rm + i] = r2;
186: d[colrm + rm2 + i] = r3;
187: d[colrm + rm3 + i] = r4;
188: }
189: }
190: for (; col < cn; col++) { /* over extra columns of C */
191: for (i = 0; i < am; i++) { /* over rows of A in those columns */
192: r1 = 0.0;
193: n = a->i[i + 1] - a->i[i];
194: aj = a->j + a->i[i];
195: aa = a->a + a->i[i];
196: for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
197: c[i] = r1;
198: }
199: b1 += bm;
201: for (i = 0; i < rm; i++) { /* over rows of R in those columns */
202: r1 = 0.0;
203: n = r->i[i + 1] - r->i[i];
204: rj = r->j + r->i[i];
205: ra = r->a + r->i[i];
206: for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
207: d[col * rm + i] = r1;
208: }
209: }
210: PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
212: PetscCall(MatDenseRestoreArrayRead(B, &b));
213: PetscCall(MatDenseRestoreArray(RAB, &d));
214: PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
215: PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
216: PetscFunctionReturn(PETSC_SUCCESS);
217: }
219: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
220: {
221: Mat_RARt *rart;
222: MatTransposeColoring matcoloring;
223: Mat Rt, RARt;
225: PetscFunctionBegin;
226: MatCheckProduct(C, 3);
227: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
228: rart = (Mat_RARt *)C->product->data;
230: /* Get dense Rt by Apply MatTransposeColoring to R */
231: matcoloring = rart->matcoloring;
232: Rt = rart->Rt;
233: PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
235: /* Get dense RARt = R*A*Rt -- dominates! */
236: RARt = rart->RARt;
237: PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
239: /* Recover C from C_dense */
240: PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
245: {
246: Mat ARt;
247: Mat_RARt *rart;
248: char *alg;
250: PetscFunctionBegin;
251: MatCheckProduct(C, 4);
252: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
253: /* create symbolic ARt = A*R^T */
254: PetscCall(MatProductCreate(A, R, NULL, &ARt));
255: PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
256: PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
257: PetscCall(MatProductSetFill(ARt, fill));
258: PetscCall(MatProductSetFromOptions(ARt));
259: PetscCall(MatProductSymbolic(ARt));
261: /* compute symbolic C = R*ARt */
262: /* set algorithm for C = R*ARt */
263: PetscCall(PetscStrallocpy(C->product->alg, &alg));
264: PetscCall(MatProductSetAlgorithm(C, "sorted"));
265: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
266: /* resume original algorithm for C */
267: PetscCall(MatProductSetAlgorithm(C, alg));
268: PetscCall(PetscFree(alg));
270: C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
272: PetscCall(PetscNew(&rart));
273: rart->ARt = ARt;
274: C->product->data = rart;
275: C->product->destroy = MatDestroy_SeqAIJ_RARt;
276: PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
277: PetscFunctionReturn(PETSC_SUCCESS);
278: }
280: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
281: {
282: Mat_RARt *rart;
284: PetscFunctionBegin;
285: MatCheckProduct(C, 3);
286: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
287: rart = (Mat_RARt *)C->product->data;
288: PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
289: PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
294: {
295: Mat Rt;
296: Mat_RARt *rart;
298: PetscFunctionBegin;
299: MatCheckProduct(C, 4);
300: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
301: PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
302: PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
304: PetscCall(PetscNew(&rart));
305: rart->data = C->product->data;
306: rart->destroy = C->product->destroy;
307: rart->Rt = Rt;
308: C->product->data = rart;
309: C->product->destroy = MatDestroy_SeqAIJ_RARt;
310: C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
311: PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
312: PetscFunctionReturn(PETSC_SUCCESS);
313: }
315: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
316: {
317: Mat_RARt *rart;
319: PetscFunctionBegin;
320: MatCheckProduct(C, 3);
321: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
322: rart = (Mat_RARt *)C->product->data;
323: PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
324: /* MatMatMatMultSymbolic used a different data */
325: C->product->data = rart->data;
326: PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
327: C->product->data = rart;
328: PetscFunctionReturn(PETSC_SUCCESS);
329: }
331: PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
332: {
333: const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
334: PetscInt alg = 0; /* set default algorithm */
336: PetscFunctionBegin;
337: if (scall == MAT_INITIAL_MATRIX) {
338: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
339: PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
340: PetscOptionsEnd();
342: PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
343: PetscCall(MatCreate(PETSC_COMM_SELF, C));
344: switch (alg) {
345: case 1:
346: /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
347: PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
348: break;
349: case 2:
350: /* via coloring_rart: apply coloring C = R*A*R^T */
351: PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
352: break;
353: default:
354: /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
355: PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
356: break;
357: }
358: PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
359: }
361: PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
362: PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
363: PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
364: PetscFunctionReturn(PETSC_SUCCESS);
365: }
367: PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
368: {
369: Mat_Product *product = C->product;
370: Mat A = product->A, R = product->B;
371: MatProductAlgorithm alg = product->alg;
372: PetscReal fill = product->fill;
373: PetscBool flg;
375: PetscFunctionBegin;
376: PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
377: if (flg) {
378: PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
379: goto next;
380: }
382: PetscCall(PetscStrcmp(alg, "r*art", &flg));
383: if (flg) {
384: PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
385: goto next;
386: }
388: PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
389: if (flg) {
390: PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
391: goto next;
392: }
394: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
396: next:
397: C->ops->productnumeric = MatProductNumeric_RARt;
398: PetscFunctionReturn(PETSC_SUCCESS);
399: }