Actual source code: bddcscalingbasic.c
1: #include <petsc/private/pcbddcimpl.h>
2: #include <petsc/private/pcbddcprivateimpl.h>
3: #include <petscblaslapack.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
6: /* prototypes for deluxe functions */
7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X)
14: {
15: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
16: const PetscScalar *b;
17: PetscScalar *x;
18: PetscInt n;
19: PetscBLASInt nrhs, info, m;
20: PetscBool flg;
22: PetscFunctionBegin;
23: PetscCall(PetscBLASIntCast(A->rmap->n, &m));
24: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
25: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
26: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
27: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
29: PetscCall(MatGetSize(B, NULL, &n));
30: PetscCall(PetscBLASIntCast(n, &nrhs));
31: PetscCall(MatDenseGetArrayRead(B, &b));
32: PetscCall(MatDenseGetArray(X, &x));
33: PetscCall(PetscArraycpy(x, b, m * nrhs));
34: PetscCall(MatDenseRestoreArrayRead(B, &b));
36: PetscCheck(A->factortype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
37: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
38: PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve");
40: PetscCall(MatDenseRestoreArray(X, &x));
41: PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
42: PetscFunctionReturn(PETSC_SUCCESS);
43: }
45: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
46: {
47: PC_IS *pcis = (PC_IS *)pc->data;
48: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
50: PetscFunctionBegin;
51: /* Apply partition of unity */
52: PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector));
53: PetscCall(VecSet(global_vector, 0.0));
54: PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
55: PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
56: PetscFunctionReturn(PETSC_SUCCESS);
57: }
59: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
60: {
61: PC_IS *pcis = (PC_IS *)pc->data;
62: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
63: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
65: PetscFunctionBegin;
66: PetscCall(VecSet(pcbddc->work_scaling, 0.0));
67: PetscCall(VecSet(y, 0.0));
68: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
69: PetscInt i;
70: const PetscScalar *array_x, *array_D;
71: PetscScalar *array;
72: PetscCall(VecGetArrayRead(x, &array_x));
73: PetscCall(VecGetArrayRead(pcis->D, &array_D));
74: PetscCall(VecGetArray(pcbddc->work_scaling, &array));
75: for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
76: PetscCall(VecRestoreArray(pcbddc->work_scaling, &array));
77: PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
78: PetscCall(VecRestoreArrayRead(x, &array_x));
79: }
80: /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
81: if (deluxe_ctx->seq_mat) {
82: PetscInt i;
83: for (i = 0; i < deluxe_ctx->seq_n; i++) {
84: if (deluxe_ctx->change) {
85: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
86: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
87: if (deluxe_ctx->change_with_qr) {
88: Mat change;
90: PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
91: PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
92: } else {
93: PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
94: }
95: } else {
96: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
97: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
98: }
99: PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
100: if (deluxe_ctx->seq_mat_inv_sum[i]) {
101: PetscScalar *x;
103: PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x));
104: PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x));
105: PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x));
106: PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
107: PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
108: }
109: if (deluxe_ctx->change) {
110: Mat change;
112: PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
113: PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
114: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
115: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
116: } else {
117: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
118: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
119: }
120: }
121: }
122: /* put local boundary part in global vector */
123: PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
124: PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
129: {
130: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
132: PetscFunctionBegin;
136: PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
137: PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
138: PetscFunctionReturn(PETSC_SUCCESS);
139: }
141: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
142: {
143: PC_IS *pcis = (PC_IS *)pc->data;
145: PetscFunctionBegin;
146: PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
147: PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
148: /* Apply partition of unity */
149: PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector));
150: PetscFunctionReturn(PETSC_SUCCESS);
151: }
153: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
154: {
155: PC_IS *pcis = (PC_IS *)pc->data;
156: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
157: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
159: PetscFunctionBegin;
160: /* get local boundary part of global vector */
161: PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
162: PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
163: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
164: PetscInt i;
165: PetscScalar *array_y;
166: const PetscScalar *array_D;
167: PetscCall(VecGetArray(y, &array_y));
168: PetscCall(VecGetArrayRead(pcis->D, &array_D));
169: for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
170: PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
171: PetscCall(VecRestoreArray(y, &array_y));
172: }
173: /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
174: if (deluxe_ctx->seq_mat) {
175: PetscInt i;
176: for (i = 0; i < deluxe_ctx->seq_n; i++) {
177: if (deluxe_ctx->change) {
178: Mat change;
180: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
181: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
182: PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
183: PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
184: } else {
185: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
186: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
187: }
188: if (deluxe_ctx->seq_mat_inv_sum[i]) {
189: PetscScalar *x;
191: PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x));
192: PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x));
193: PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x));
194: PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
195: PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
196: }
197: PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
198: if (deluxe_ctx->change) {
199: if (deluxe_ctx->change_with_qr) {
200: Mat change;
202: PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
203: PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
204: } else {
205: PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
206: }
207: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
208: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
209: } else {
210: PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
211: PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
212: }
213: }
214: }
215: PetscFunctionReturn(PETSC_SUCCESS);
216: }
218: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
219: {
220: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
222: PetscFunctionBegin;
226: PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
227: PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
228: PetscFunctionReturn(PETSC_SUCCESS);
229: }
231: PetscErrorCode PCBDDCScalingSetUp(PC pc)
232: {
233: PC_IS *pcis = (PC_IS *)pc->data;
234: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
236: PetscFunctionBegin;
238: PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
239: /* create work vector for the operator */
240: PetscCall(VecDestroy(&pcbddc->work_scaling));
241: PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling));
242: /* always rebuild pcis->D */
243: if (pcis->use_stiffness_scaling) {
244: PetscScalar *a;
245: PetscInt i, n;
247: PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N));
248: PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
249: PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
250: PetscCall(VecAbs(pcis->D));
251: PetscCall(VecGetLocalSize(pcis->D, &n));
252: PetscCall(VecGetArray(pcis->D, &a));
253: for (i = 0; i < n; i++)
254: if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
255: PetscCall(VecRestoreArray(pcis->D, &a));
256: }
257: PetscCall(VecSet(pcis->vec1_global, 0.0));
258: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
259: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
260: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
261: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
262: PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B));
263: /* now setup */
264: if (pcbddc->use_deluxe_scaling) {
265: if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc));
266: PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
267: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe));
268: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe));
269: } else {
270: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic));
271: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic));
272: }
273: PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
275: /* test */
276: if (pcbddc->dbg_flag) {
277: Mat B0_B = NULL;
278: Vec B0_Bv = NULL, B0_Bv2 = NULL;
279: Vec vec2_global;
280: PetscViewer viewer = pcbddc->dbg_viewer;
281: PetscReal error;
283: /* extension -> from local to parallel */
284: PetscCall(VecSet(pcis->vec1_global, 0.0));
285: PetscCall(VecSetRandom(pcis->vec1_B, NULL));
286: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
287: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
288: PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global));
289: PetscCall(VecCopy(pcis->vec1_global, vec2_global));
290: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
291: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
292: if (pcbddc->benign_n) {
293: IS is_dummy;
295: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
296: PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
297: PetscCall(ISDestroy(&is_dummy));
298: PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv));
299: PetscCall(VecDuplicate(B0_Bv, &B0_Bv2));
300: PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv));
301: }
302: PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global));
303: if (pcbddc->benign_saddle_point) {
304: PetscReal errorl = 0.;
305: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
306: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
307: if (pcbddc->benign_n) {
308: PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2));
309: PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2));
310: PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl));
311: }
312: PetscCall(MPIU_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc)));
313: PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error));
314: }
315: PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global));
316: PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
317: PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error));
318: PetscCall(VecDestroy(&vec2_global));
320: /* restriction -> from parallel to local */
321: PetscCall(VecSet(pcis->vec1_global, 0.0));
322: PetscCall(VecSetRandom(pcis->vec1_B, NULL));
323: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
324: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
325: PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B));
326: PetscCall(VecScale(pcis->vec1_B, -1.0));
327: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
328: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
329: PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
330: PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error));
331: PetscCall(MatDestroy(&B0_B));
332: PetscCall(VecDestroy(&B0_Bv));
333: PetscCall(VecDestroy(&B0_Bv2));
334: }
335: PetscFunctionReturn(PETSC_SUCCESS);
336: }
338: PetscErrorCode PCBDDCScalingDestroy(PC pc)
339: {
340: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
342: PetscFunctionBegin;
343: if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
344: PetscCall(VecDestroy(&pcbddc->work_scaling));
345: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL));
346: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL));
347: PetscFunctionReturn(PETSC_SUCCESS);
348: }
350: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
351: {
352: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
353: PCBDDCDeluxeScaling deluxe_ctx;
355: PetscFunctionBegin;
356: PetscCall(PetscNew(&deluxe_ctx));
357: pcbddc->deluxe_ctx = deluxe_ctx;
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
361: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
362: {
363: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
365: PetscFunctionBegin;
366: PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
367: PetscCall(PetscFree(pcbddc->deluxe_ctx));
368: PetscFunctionReturn(PETSC_SUCCESS);
369: }
371: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
372: {
373: PetscInt i;
375: PetscFunctionBegin;
376: PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
377: deluxe_ctx->n_simple = 0;
378: for (i = 0; i < deluxe_ctx->seq_n; i++) {
379: PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
380: PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
381: PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
382: PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
383: PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
384: }
385: PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum));
386: PetscCall(PetscFree(deluxe_ctx->workspace));
387: deluxe_ctx->seq_n = 0;
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
392: {
393: PC_IS *pcis = (PC_IS *)pc->data;
394: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
395: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
396: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
398: PetscFunctionBegin;
399: /* reset data structures if the topology has changed */
400: if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));
402: /* Compute data structures to solve sequential problems */
403: PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));
405: /* diagonal scaling on interface dofs not contained in cc */
406: if (sub_schurs->is_vertices || sub_schurs->is_dir) {
407: PetscInt n_com, n_dir;
408: n_com = 0;
409: if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com));
410: n_dir = 0;
411: if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
412: if (!deluxe_ctx->n_simple) {
413: deluxe_ctx->n_simple = n_dir + n_com;
414: PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B));
415: if (sub_schurs->is_vertices) {
416: PetscInt nmap;
417: const PetscInt *idxs;
419: PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
420: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B));
421: PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com);
422: PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
423: }
424: if (sub_schurs->is_dir) {
425: PetscInt nmap;
426: const PetscInt *idxs;
428: PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
429: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com));
430: PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir);
431: PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
432: }
433: PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B));
434: } else {
435: PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple);
436: }
437: } else {
438: deluxe_ctx->n_simple = 0;
439: deluxe_ctx->idx_simple_B = NULL;
440: }
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
444: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
445: {
446: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
447: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
448: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
449: PetscScalar *matdata, *matdata2;
450: PetscInt i, max_subset_size, cum, cum2;
451: const PetscInt *idxs;
452: PetscBool newsetup = PETSC_FALSE;
454: PetscFunctionBegin;
455: PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs");
456: if (!sub_schurs->n_subs) PetscFunctionReturn(PETSC_SUCCESS);
458: /* Allocate arrays for subproblems */
459: if (!deluxe_ctx->seq_n) {
460: deluxe_ctx->seq_n = sub_schurs->n_subs;
461: PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum));
462: newsetup = PETSC_TRUE;
463: } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs);
465: /* the change of basis is just a reference to sub_schurs->change (if any) */
466: deluxe_ctx->change = sub_schurs->change;
467: deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
469: /* Create objects for deluxe */
470: max_subset_size = 0;
471: for (i = 0; i < sub_schurs->n_subs; i++) {
472: PetscInt subset_size;
473: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
474: max_subset_size = PetscMax(subset_size, max_subset_size);
475: }
476: if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace));
477: cum = cum2 = 0;
478: PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
479: PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata));
480: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2));
481: for (i = 0; i < deluxe_ctx->seq_n; i++) {
482: PetscInt subset_size;
484: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
485: if (newsetup) {
486: IS sub;
487: /* work vectors */
488: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]));
489: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]));
491: /* scatters */
492: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub));
493: PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]));
494: PetscCall(ISDestroy(&sub));
495: }
497: /* S_E_j */
498: PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
499: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]));
501: /* \sum_k S^k_E_j */
502: PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
503: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]));
504: PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef));
505: PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian));
506: if (sub_schurs->is_hermitian) {
507: PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL));
508: } else {
509: PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL));
510: }
511: if (pcbddc->deluxe_singlemat) {
512: Mat X, Y;
513: if (!sub_schurs->is_hermitian) {
514: PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X));
515: } else {
516: PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
517: X = deluxe_ctx->seq_mat[i];
518: }
519: PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y));
520: if (!sub_schurs->is_hermitian) {
521: PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
522: } else {
523: PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
524: }
526: PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
527: PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
528: PetscCall(MatDestroy(&X));
529: if (deluxe_ctx->change) {
530: Mat C, CY;
531: PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis");
532: PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL));
533: PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY));
534: PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
535: PetscCall(MatDestroy(&CY));
536: PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
537: }
538: PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y));
539: deluxe_ctx->seq_mat[i] = Y;
540: }
541: cum += subset_size;
542: cum2 += subset_size * subset_size;
543: }
544: PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
545: PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata));
546: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2));
547: if (pcbddc->deluxe_singlemat) {
548: deluxe_ctx->change = NULL;
549: deluxe_ctx->change_with_qr = PETSC_FALSE;
550: }
552: if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
553: for (i = 0; i < deluxe_ctx->seq_n; i++) {
554: if (newsetup) {
555: PC pc;
557: PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc));
558: PetscCall(PCSetType(pc, PCLU));
559: PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
560: }
561: PetscCall(KSPSetUp(deluxe_ctx->change[i]));
562: }
563: }
564: PetscFunctionReturn(PETSC_SUCCESS);
565: }