Actual source code: bddcscalingbasic.c

  1: #include <petsc/private/pcbddcimpl.h>
  2: #include <petsc/private/pcbddcprivateimpl.h>
  3: #include <petscblaslapack.h>
  4: #include <../src/mat/impls/dense/seq/dense.h>

  6: /* prototypes for deluxe functions */
  7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
  8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
  9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
 10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
 11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);

 13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X)
 14: {
 15:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
 16:   const PetscScalar *b;
 17:   PetscScalar       *x;
 18:   PetscInt           n;
 19:   PetscBLASInt       nrhs, info, m;
 20:   PetscBool          flg;

 22:   PetscFunctionBegin;
 23:   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
 24:   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
 25:   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
 26:   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
 27:   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");

 29:   PetscCall(MatGetSize(B, NULL, &n));
 30:   PetscCall(PetscBLASIntCast(n, &nrhs));
 31:   PetscCall(MatDenseGetArrayRead(B, &b));
 32:   PetscCall(MatDenseGetArray(X, &x));
 33:   PetscCall(PetscArraycpy(x, b, m * nrhs));
 34:   PetscCall(MatDenseRestoreArrayRead(B, &b));

 36:   PetscCheck(A->factortype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
 37:   PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
 38:   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve");

 40:   PetscCall(MatDenseRestoreArray(X, &x));
 41:   PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
 42:   PetscFunctionReturn(PETSC_SUCCESS);
 43: }

 45: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
 46: {
 47:   PC_IS   *pcis   = (PC_IS *)pc->data;
 48:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

 50:   PetscFunctionBegin;
 51:   /* Apply partition of unity */
 52:   PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector));
 53:   PetscCall(VecSet(global_vector, 0.0));
 54:   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
 55:   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
 56:   PetscFunctionReturn(PETSC_SUCCESS);
 57: }

 59: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
 60: {
 61:   PC_IS              *pcis       = (PC_IS *)pc->data;
 62:   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
 63:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;

 65:   PetscFunctionBegin;
 66:   PetscCall(VecSet(pcbddc->work_scaling, 0.0));
 67:   PetscCall(VecSet(y, 0.0));
 68:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
 69:     PetscInt           i;
 70:     const PetscScalar *array_x, *array_D;
 71:     PetscScalar       *array;
 72:     PetscCall(VecGetArrayRead(x, &array_x));
 73:     PetscCall(VecGetArrayRead(pcis->D, &array_D));
 74:     PetscCall(VecGetArray(pcbddc->work_scaling, &array));
 75:     for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
 76:     PetscCall(VecRestoreArray(pcbddc->work_scaling, &array));
 77:     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
 78:     PetscCall(VecRestoreArrayRead(x, &array_x));
 79:   }
 80:   /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
 81:   if (deluxe_ctx->seq_mat) {
 82:     PetscInt i;
 83:     for (i = 0; i < deluxe_ctx->seq_n; i++) {
 84:       if (deluxe_ctx->change) {
 85:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
 86:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
 87:         if (deluxe_ctx->change_with_qr) {
 88:           Mat change;

 90:           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
 91:           PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
 92:         } else {
 93:           PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
 94:         }
 95:       } else {
 96:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
 97:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
 98:       }
 99:       PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
100:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
101:         PetscScalar *x;

103:         PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x));
104:         PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x));
105:         PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x));
106:         PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
107:         PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
108:       }
109:       if (deluxe_ctx->change) {
110:         Mat change;

112:         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
113:         PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
114:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
115:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
116:       } else {
117:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
118:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
119:       }
120:     }
121:   }
122:   /* put local boundary part in global vector */
123:   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
124:   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
129: {
130:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

132:   PetscFunctionBegin;
136:   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
137:   PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
138:   PetscFunctionReturn(PETSC_SUCCESS);
139: }

141: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
142: {
143:   PC_IS *pcis = (PC_IS *)pc->data;

145:   PetscFunctionBegin;
146:   PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
147:   PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
148:   /* Apply partition of unity */
149:   PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector));
150:   PetscFunctionReturn(PETSC_SUCCESS);
151: }

153: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
154: {
155:   PC_IS              *pcis       = (PC_IS *)pc->data;
156:   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
157:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;

159:   PetscFunctionBegin;
160:   /* get local boundary part of global vector */
161:   PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
162:   PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
163:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
164:     PetscInt           i;
165:     PetscScalar       *array_y;
166:     const PetscScalar *array_D;
167:     PetscCall(VecGetArray(y, &array_y));
168:     PetscCall(VecGetArrayRead(pcis->D, &array_D));
169:     for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
170:     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
171:     PetscCall(VecRestoreArray(y, &array_y));
172:   }
173:   /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
174:   if (deluxe_ctx->seq_mat) {
175:     PetscInt i;
176:     for (i = 0; i < deluxe_ctx->seq_n; i++) {
177:       if (deluxe_ctx->change) {
178:         Mat change;

180:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
181:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
182:         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
183:         PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
184:       } else {
185:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
186:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
187:       }
188:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
189:         PetscScalar *x;

191:         PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x));
192:         PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x));
193:         PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x));
194:         PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
195:         PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
196:       }
197:       PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
198:       if (deluxe_ctx->change) {
199:         if (deluxe_ctx->change_with_qr) {
200:           Mat change;

202:           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
203:           PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
204:         } else {
205:           PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
206:         }
207:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
208:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
209:       } else {
210:         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
211:         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
212:       }
213:     }
214:   }
215:   PetscFunctionReturn(PETSC_SUCCESS);
216: }

218: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
219: {
220:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

222:   PetscFunctionBegin;
226:   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
227:   PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
228:   PetscFunctionReturn(PETSC_SUCCESS);
229: }

231: PetscErrorCode PCBDDCScalingSetUp(PC pc)
232: {
233:   PC_IS   *pcis   = (PC_IS *)pc->data;
234:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

236:   PetscFunctionBegin;
238:   PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
239:   /* create work vector for the operator */
240:   PetscCall(VecDestroy(&pcbddc->work_scaling));
241:   PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling));
242:   /* always rebuild pcis->D */
243:   if (pcis->use_stiffness_scaling) {
244:     PetscScalar *a;
245:     PetscInt     i, n;

247:     PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N));
248:     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
249:     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
250:     PetscCall(VecAbs(pcis->D));
251:     PetscCall(VecGetLocalSize(pcis->D, &n));
252:     PetscCall(VecGetArray(pcis->D, &a));
253:     for (i = 0; i < n; i++)
254:       if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
255:     PetscCall(VecRestoreArray(pcis->D, &a));
256:   }
257:   PetscCall(VecSet(pcis->vec1_global, 0.0));
258:   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
259:   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
260:   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
261:   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
262:   PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B));
263:   /* now setup */
264:   if (pcbddc->use_deluxe_scaling) {
265:     if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc));
266:     PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
267:     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe));
268:     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe));
269:   } else {
270:     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic));
271:     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic));
272:   }
273:   PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));

275:   /* test */
276:   if (pcbddc->dbg_flag) {
277:     Mat         B0_B  = NULL;
278:     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
279:     Vec         vec2_global;
280:     PetscViewer viewer = pcbddc->dbg_viewer;
281:     PetscReal   error;

283:     /* extension -> from local to parallel */
284:     PetscCall(VecSet(pcis->vec1_global, 0.0));
285:     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
286:     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
287:     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
288:     PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global));
289:     PetscCall(VecCopy(pcis->vec1_global, vec2_global));
290:     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
291:     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
292:     if (pcbddc->benign_n) {
293:       IS is_dummy;

295:       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
296:       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
297:       PetscCall(ISDestroy(&is_dummy));
298:       PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv));
299:       PetscCall(VecDuplicate(B0_Bv, &B0_Bv2));
300:       PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv));
301:     }
302:     PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global));
303:     if (pcbddc->benign_saddle_point) {
304:       PetscReal errorl = 0.;
305:       PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
306:       PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
307:       if (pcbddc->benign_n) {
308:         PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2));
309:         PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2));
310:         PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl));
311:       }
312:       PetscCall(MPIU_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc)));
313:       PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error));
314:     }
315:     PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global));
316:     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
317:     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error));
318:     PetscCall(VecDestroy(&vec2_global));

320:     /* restriction -> from parallel to local */
321:     PetscCall(VecSet(pcis->vec1_global, 0.0));
322:     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
323:     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
324:     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
325:     PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B));
326:     PetscCall(VecScale(pcis->vec1_B, -1.0));
327:     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
328:     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
329:     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
330:     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error));
331:     PetscCall(MatDestroy(&B0_B));
332:     PetscCall(VecDestroy(&B0_Bv));
333:     PetscCall(VecDestroy(&B0_Bv2));
334:   }
335:   PetscFunctionReturn(PETSC_SUCCESS);
336: }

338: PetscErrorCode PCBDDCScalingDestroy(PC pc)
339: {
340:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

342:   PetscFunctionBegin;
343:   if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
344:   PetscCall(VecDestroy(&pcbddc->work_scaling));
345:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL));
346:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL));
347:   PetscFunctionReturn(PETSC_SUCCESS);
348: }

350: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
351: {
352:   PC_BDDC            *pcbddc = (PC_BDDC *)pc->data;
353:   PCBDDCDeluxeScaling deluxe_ctx;

355:   PetscFunctionBegin;
356:   PetscCall(PetscNew(&deluxe_ctx));
357:   pcbddc->deluxe_ctx = deluxe_ctx;
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }

361: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
362: {
363:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

365:   PetscFunctionBegin;
366:   PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
367:   PetscCall(PetscFree(pcbddc->deluxe_ctx));
368:   PetscFunctionReturn(PETSC_SUCCESS);
369: }

371: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
372: {
373:   PetscInt i;

375:   PetscFunctionBegin;
376:   PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
377:   deluxe_ctx->n_simple = 0;
378:   for (i = 0; i < deluxe_ctx->seq_n; i++) {
379:     PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
380:     PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
381:     PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
382:     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
383:     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
384:   }
385:   PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum));
386:   PetscCall(PetscFree(deluxe_ctx->workspace));
387:   deluxe_ctx->seq_n = 0;
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
392: {
393:   PC_IS              *pcis       = (PC_IS *)pc->data;
394:   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
395:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
396:   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;

398:   PetscFunctionBegin;
399:   /* reset data structures if the topology has changed */
400:   if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));

402:   /* Compute data structures to solve sequential problems */
403:   PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));

405:   /* diagonal scaling on interface dofs not contained in cc */
406:   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
407:     PetscInt n_com, n_dir;
408:     n_com = 0;
409:     if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com));
410:     n_dir = 0;
411:     if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
412:     if (!deluxe_ctx->n_simple) {
413:       deluxe_ctx->n_simple = n_dir + n_com;
414:       PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B));
415:       if (sub_schurs->is_vertices) {
416:         PetscInt        nmap;
417:         const PetscInt *idxs;

419:         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
420:         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B));
421:         PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com);
422:         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
423:       }
424:       if (sub_schurs->is_dir) {
425:         PetscInt        nmap;
426:         const PetscInt *idxs;

428:         PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
429:         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com));
430:         PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir);
431:         PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
432:       }
433:       PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B));
434:     } else {
435:       PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple);
436:     }
437:   } else {
438:     deluxe_ctx->n_simple     = 0;
439:     deluxe_ctx->idx_simple_B = NULL;
440:   }
441:   PetscFunctionReturn(PETSC_SUCCESS);
442: }

444: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
445: {
446:   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
447:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
448:   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
449:   PetscScalar        *matdata, *matdata2;
450:   PetscInt            i, max_subset_size, cum, cum2;
451:   const PetscInt     *idxs;
452:   PetscBool           newsetup = PETSC_FALSE;

454:   PetscFunctionBegin;
455:   PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs");
456:   if (!sub_schurs->n_subs) PetscFunctionReturn(PETSC_SUCCESS);

458:   /* Allocate arrays for subproblems */
459:   if (!deluxe_ctx->seq_n) {
460:     deluxe_ctx->seq_n = sub_schurs->n_subs;
461:     PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum));
462:     newsetup = PETSC_TRUE;
463:   } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs);

465:   /* the change of basis is just a reference to sub_schurs->change (if any) */
466:   deluxe_ctx->change         = sub_schurs->change;
467:   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

469:   /* Create objects for deluxe */
470:   max_subset_size = 0;
471:   for (i = 0; i < sub_schurs->n_subs; i++) {
472:     PetscInt subset_size;
473:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
474:     max_subset_size = PetscMax(subset_size, max_subset_size);
475:   }
476:   if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace));
477:   cum = cum2 = 0;
478:   PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
479:   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata));
480:   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2));
481:   for (i = 0; i < deluxe_ctx->seq_n; i++) {
482:     PetscInt subset_size;

484:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
485:     if (newsetup) {
486:       IS sub;
487:       /* work vectors */
488:       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]));
489:       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]));

491:       /* scatters */
492:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub));
493:       PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]));
494:       PetscCall(ISDestroy(&sub));
495:     }

497:     /* S_E_j */
498:     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
499:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]));

501:     /* \sum_k S^k_E_j */
502:     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
503:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]));
504:     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef));
505:     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian));
506:     if (sub_schurs->is_hermitian) {
507:       PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL));
508:     } else {
509:       PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL));
510:     }
511:     if (pcbddc->deluxe_singlemat) {
512:       Mat X, Y;
513:       if (!sub_schurs->is_hermitian) {
514:         PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X));
515:       } else {
516:         PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
517:         X = deluxe_ctx->seq_mat[i];
518:       }
519:       PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y));
520:       if (!sub_schurs->is_hermitian) {
521:         PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
522:       } else {
523:         PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
524:       }

526:       PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
527:       PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
528:       PetscCall(MatDestroy(&X));
529:       if (deluxe_ctx->change) {
530:         Mat C, CY;
531:         PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis");
532:         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL));
533:         PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY));
534:         PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
535:         PetscCall(MatDestroy(&CY));
536:         PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
537:       }
538:       PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y));
539:       deluxe_ctx->seq_mat[i] = Y;
540:     }
541:     cum += subset_size;
542:     cum2 += subset_size * subset_size;
543:   }
544:   PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
545:   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata));
546:   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2));
547:   if (pcbddc->deluxe_singlemat) {
548:     deluxe_ctx->change         = NULL;
549:     deluxe_ctx->change_with_qr = PETSC_FALSE;
550:   }

552:   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
553:     for (i = 0; i < deluxe_ctx->seq_n; i++) {
554:       if (newsetup) {
555:         PC pc;

557:         PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc));
558:         PetscCall(PCSetType(pc, PCLU));
559:         PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
560:       }
561:       PetscCall(KSPSetUp(deluxe_ctx->change[i]));
562:     }
563:   }
564:   PetscFunctionReturn(PETSC_SUCCESS);
565: }