Actual source code: bddcschurs.c

  1: #include <petsc/private/pcbddcimpl.h>
  2: #include <petsc/private/pcbddcprivateimpl.h>
  3: #include <../src/mat/impls/dense/seq/dense.h>
  4: #include <petscblaslapack.h>

  6: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
  7: static PetscErrorCode        PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
  8: static PetscErrorCode        PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
  9: static PetscErrorCode        PCBDDCReuseSolvers_Correction(PC, Vec, Vec);

 11: /* if v2 is not present, correction is done in-place */
 12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
 13: {
 14:   PetscScalar *array;
 15:   PetscScalar *array2;

 17:   PetscFunctionBegin;
 18:   if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
 19:   if (sol && full) {
 20:     PetscInt n_I, size_schur;

 22:     /* get sizes */
 23:     PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
 24:     PetscCall(VecGetSize(v, &n_I));
 25:     n_I = n_I - size_schur;
 26:     /* get schur sol from array */
 27:     PetscCall(VecGetArray(v, &array));
 28:     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
 29:     PetscCall(VecRestoreArray(v, &array));
 30:     /* apply interior sol correction */
 31:     PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
 32:     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
 33:     PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
 34:   }
 35:   if (v2) {
 36:     PetscInt nl;

 38:     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
 39:     PetscCall(VecGetLocalSize(v2, &nl));
 40:     PetscCall(VecGetArray(v2, &array2));
 41:     PetscCall(PetscArraycpy(array2, array, nl));
 42:   } else {
 43:     PetscCall(VecGetArray(v, &array));
 44:     array2 = array;
 45:   }
 46:   if (!sol) { /* change rhs */
 47:     PetscInt n;
 48:     for (n = 0; n < ctx->benign_n; n++) {
 49:       PetscScalar     sum = 0.;
 50:       const PetscInt *cols;
 51:       PetscInt        nz, i;

 53:       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
 54:       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
 55:       for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
 56: #if defined(PETSC_USE_COMPLEX)
 57:       sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
 58: #else
 59:       sum = -sum / nz;
 60: #endif
 61:       for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
 62:       ctx->benign_save_vals[n] = array2[cols[nz - 1]];
 63:       array2[cols[nz - 1]]     = sum;
 64:       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
 65:     }
 66:   } else {
 67:     PetscInt n;
 68:     for (n = 0; n < ctx->benign_n; n++) {
 69:       PetscScalar     sum = 0.;
 70:       const PetscInt *cols;
 71:       PetscInt        nz, i;
 72:       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
 73:       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
 74:       for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
 75: #if defined(PETSC_USE_COMPLEX)
 76:       sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
 77: #else
 78:       sum = -sum / nz;
 79: #endif
 80:       for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
 81:       array2[cols[nz - 1]] = ctx->benign_save_vals[n];
 82:       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
 83:     }
 84:   }
 85:   if (v2) {
 86:     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
 87:     PetscCall(VecRestoreArray(v2, &array2));
 88:   } else {
 89:     PetscCall(VecRestoreArray(v, &array));
 90:   }
 91:   if (!sol && full) {
 92:     Vec      usedv;
 93:     PetscInt n_I, size_schur;

 95:     /* get sizes */
 96:     PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
 97:     PetscCall(VecGetSize(v, &n_I));
 98:     n_I = n_I - size_schur;
 99:     /* compute schur rhs correction */
100:     if (v2) {
101:       usedv = v2;
102:     } else {
103:       usedv = v;
104:     }
105:     /* apply schur rhs correction */
106:     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
107:     PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
108:     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
109:     PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
110:     PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
111:     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112:   }
113:   PetscFunctionReturn(PETSC_SUCCESS);
114: }

116: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117: {
118:   PCBDDCReuseSolvers ctx;
119:   PetscBool          copy = PETSC_FALSE;

121:   PetscFunctionBegin;
122:   PetscCall(PCShellGetContext(pc, &ctx));
123:   if (full) {
124: #if defined(PETSC_HAVE_MUMPS)
125:     PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
126: #endif
127: #if defined(PETSC_HAVE_MKL_PARDISO)
128:     PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
129: #endif
130:     copy = ctx->has_vertices;
131:   } else { /* interior solver */
132: #if defined(PETSC_HAVE_MUMPS)
133:     PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
134: #endif
135: #if defined(PETSC_HAVE_MKL_PARDISO)
136:     PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
137: #endif
138:     copy = PETSC_TRUE;
139:   }
140:   /* copy rhs into factored matrix workspace */
141:   if (copy) {
142:     PetscInt     n;
143:     PetscScalar *array, *array_solver;

145:     PetscCall(VecGetLocalSize(rhs, &n));
146:     PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
147:     PetscCall(VecGetArray(ctx->rhs, &array_solver));
148:     PetscCall(PetscArraycpy(array_solver, array, n));
149:     PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
150:     PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));

152:     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
153:     if (transpose) {
154:       PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
155:     } else {
156:       PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
157:     }
158:     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));

160:     /* get back data to caller worskpace */
161:     PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
162:     PetscCall(VecGetArray(sol, &array));
163:     PetscCall(PetscArraycpy(array, array_solver, n));
164:     PetscCall(VecRestoreArray(sol, &array));
165:     PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
166:   } else {
167:     if (ctx->benign_n) {
168:       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
169:       if (transpose) {
170:         PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
171:       } else {
172:         PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
173:       }
174:       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
175:     } else {
176:       if (transpose) {
177:         PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
178:       } else {
179:         PetscCall(MatSolve(ctx->F, rhs, sol));
180:       }
181:     }
182:   }
183:   /* restore defaults */
184: #if defined(PETSC_HAVE_MUMPS)
185:   PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
186: #endif
187: #if defined(PETSC_HAVE_MKL_PARDISO)
188:   PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
189: #endif
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }

193: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194: {
195:   PetscFunctionBegin;
196:   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
197:   PetscFunctionReturn(PETSC_SUCCESS);
198: }

200: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201: {
202:   PetscFunctionBegin;
203:   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
204:   PetscFunctionReturn(PETSC_SUCCESS);
205: }

207: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208: {
209:   PetscFunctionBegin;
210:   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
211:   PetscFunctionReturn(PETSC_SUCCESS);
212: }

214: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215: {
216:   PetscFunctionBegin;
217:   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }

221: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222: {
223:   PCBDDCReuseSolvers ctx;
224:   PetscBool          iascii;

226:   PetscFunctionBegin;
227:   PetscCall(PCShellGetContext(pc, &ctx));
228:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
229:   if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
230:   PetscCall(MatView(ctx->F, viewer));
231:   if (iascii) PetscCall(PetscViewerPopFormat(viewer));
232:   PetscFunctionReturn(PETSC_SUCCESS);
233: }

235: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
236: {
237:   PetscInt i;

239:   PetscFunctionBegin;
240:   PetscCall(MatDestroy(&reuse->F));
241:   PetscCall(VecDestroy(&reuse->sol));
242:   PetscCall(VecDestroy(&reuse->rhs));
243:   PetscCall(PCDestroy(&reuse->interior_solver));
244:   PetscCall(PCDestroy(&reuse->correction_solver));
245:   PetscCall(ISDestroy(&reuse->is_R));
246:   PetscCall(ISDestroy(&reuse->is_B));
247:   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
248:   PetscCall(VecDestroy(&reuse->sol_B));
249:   PetscCall(VecDestroy(&reuse->rhs_B));
250:   for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
251:   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
252:   PetscCall(PetscFree(reuse->benign_save_vals));
253:   PetscCall(MatDestroy(&reuse->benign_csAIB));
254:   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
255:   PetscCall(VecDestroy(&reuse->benign_corr_work));
256:   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
257:   PetscFunctionReturn(PETSC_SUCCESS);
258: }

260: static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
261: {
262:   PCBDDCReuseSolvers ctx;

264:   PetscFunctionBegin;
265:   PetscCall(PCShellGetContext(pc, &ctx));
266:   PetscCall(PCBDDCReuseSolversReset(ctx));
267:   PetscCall(PetscFree(ctx));
268:   PetscCall(PCShellSetContext(pc, NULL));
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }

272: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
273: {
274:   Mat         B, C, D, Bd, Cd, AinvBd;
275:   KSP         ksp;
276:   PC          pc;
277:   PetscBool   isLU, isILU, isCHOL, Bdense, Cdense;
278:   PetscReal   fill = 2.0;
279:   PetscInt    n_I;
280:   PetscMPIInt size;

282:   PetscFunctionBegin;
283:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
284:   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
285:   if (reuse == MAT_REUSE_MATRIX) {
286:     PetscBool Sdense;

288:     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
289:     PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
290:   }
291:   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
292:   PetscCall(MatSchurComplementGetKSP(M, &ksp));
293:   PetscCall(KSPGetPC(ksp, &pc));
294:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
295:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
296:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
297:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
298:   PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
299:   PetscCall(MatGetSize(B, &n_I, NULL));
300:   if (n_I) {
301:     if (!Bdense) {
302:       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
303:     } else {
304:       Bd = B;
305:     }

307:     if (isLU || isILU || isCHOL) {
308:       Mat fact;
309:       PetscCall(KSPSetUp(ksp));
310:       PetscCall(PCFactorGetMatrix(pc, &fact));
311:       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
312:       PetscCall(MatMatSolve(fact, Bd, AinvBd));
313:     } else {
314:       PetscBool ex = PETSC_TRUE;

316:       if (ex) {
317:         Mat Ainvd;

319:         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
320:         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
321:         PetscCall(MatDestroy(&Ainvd));
322:       } else {
323:         Vec          sol, rhs;
324:         PetscScalar *arrayrhs, *arraysol;
325:         PetscInt     i, nrhs, n;

327:         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
328:         PetscCall(MatGetSize(Bd, &n, &nrhs));
329:         PetscCall(MatDenseGetArray(Bd, &arrayrhs));
330:         PetscCall(MatDenseGetArray(AinvBd, &arraysol));
331:         PetscCall(KSPGetSolution(ksp, &sol));
332:         PetscCall(KSPGetRhs(ksp, &rhs));
333:         for (i = 0; i < nrhs; i++) {
334:           PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
335:           PetscCall(VecPlaceArray(sol, arraysol + i * n));
336:           PetscCall(KSPSolve(ksp, rhs, sol));
337:           PetscCall(VecResetArray(rhs));
338:           PetscCall(VecResetArray(sol));
339:         }
340:         PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
341:         PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
342:       }
343:     }
344:     if (!Bdense & !issym) PetscCall(MatDestroy(&Bd));

346:     if (!issym) {
347:       if (!Cdense) {
348:         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
349:       } else {
350:         Cd = C;
351:       }
352:       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
353:       if (!Cdense) PetscCall(MatDestroy(&Cd));
354:     } else {
355:       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
356:       if (!Bdense) PetscCall(MatDestroy(&Bd));
357:     }
358:     PetscCall(MatDestroy(&AinvBd));
359:   }

361:   if (D) {
362:     Mat       Dd;
363:     PetscBool Ddense;

365:     PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
366:     if (!Ddense) {
367:       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
368:     } else {
369:       Dd = D;
370:     }
371:     if (n_I) {
372:       PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
373:     } else {
374:       if (reuse == MAT_INITIAL_MATRIX) {
375:         PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
376:       } else {
377:         PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
378:       }
379:     }
380:     if (!Ddense) PetscCall(MatDestroy(&Dd));
381:   } else {
382:     PetscCall(MatScale(*S, -1.0));
383:   }
384:   PetscFunctionReturn(PETSC_SUCCESS);
385: }

387: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
388: {
389:   Mat          F, A_II, A_IB, A_BI, A_BB, AE_II;
390:   Mat          S_all;
391:   Vec          gstash, lstash;
392:   VecScatter   sstash;
393:   IS           is_I, is_I_layer;
394:   IS           all_subsets, all_subsets_mult, all_subsets_n;
395:   PetscScalar *stasharray, *Bwork;
396:   PetscInt    *nnz, *all_local_idx_N;
397:   PetscInt    *auxnum1, *auxnum2;
398:   PetscInt     i, subset_size, max_subset_size;
399:   PetscInt     n_B, extra, local_size, global_size;
400:   PetscInt     local_stash_size;
401:   PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
402:   MPI_Comm     comm_n;
403:   PetscBool    deluxe   = PETSC_TRUE;
404:   PetscBool    use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
405:   PetscViewer  matl_dbg_viewer = NULL;
406:   PetscBool    flg;

408:   PetscFunctionBegin;
409:   PetscCall(MatDestroy(&sub_schurs->A));
410:   PetscCall(MatDestroy(&sub_schurs->S));
411:   if (Ain) {
412:     PetscCall(PetscObjectReference((PetscObject)Ain));
413:     sub_schurs->A = Ain;
414:   }

416:   PetscCall(PetscObjectReference((PetscObject)Sin));
417:   sub_schurs->S = Sin;
418:   if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);

420:   /* preliminary checks */
421:   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");

423:   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;

425:   /* debug (MATLAB) */
426:   if (sub_schurs->debug) {
427:     PetscMPIInt size, rank;
428:     PetscInt    nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;

430:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
431:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
432:     nr = size;
433:     PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
434:     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
435:     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
436:     if (!flg) print_schurs = PETSC_TRUE;
437:     else {
438:       print_schurs = PETSC_FALSE;
439:       for (i = 0; i < nr; i++)
440:         if (print_schurs_ranks[i] == (PetscInt)rank) {
441:           print_schurs = PETSC_TRUE;
442:           break;
443:         }
444:     }
445:     PetscOptionsEnd();
446:     PetscCall(PetscFree(print_schurs_ranks));
447:     if (print_schurs) {
448:       char filename[256];

450:       PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
451:       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
452:       PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
453:     }
454:   }

456:   /* restrict work on active processes */
457:   if (sub_schurs->restrict_comm) {
458:     PetscSubcomm subcomm;
459:     PetscMPIInt  color, rank;

461:     color = 0;
462:     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
463:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
464:     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
465:     PetscCall(PetscSubcommSetNumber(subcomm, 2));
466:     PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
467:     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
468:     PetscCall(PetscSubcommDestroy(&subcomm));
469:     if (!sub_schurs->n_subs) {
470:       PetscCall(PetscCommDestroy(&comm_n));
471:       PetscFunctionReturn(PETSC_SUCCESS);
472:     }
473:   } else {
474:     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
475:   }

477:   /* get Schur complement matrices */
478:   if (!sub_schurs->schur_explicit) {
479:     Mat       tA_IB, tA_BI, tA_BB;
480:     PetscBool isseqsbaij;
481:     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
482:     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
483:     if (isseqsbaij) {
484:       PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
485:       PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
486:       PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
487:     } else {
488:       PetscCall(PetscObjectReference((PetscObject)tA_BB));
489:       A_BB = tA_BB;
490:       PetscCall(PetscObjectReference((PetscObject)tA_IB));
491:       A_IB = tA_IB;
492:       PetscCall(PetscObjectReference((PetscObject)tA_BI));
493:       A_BI = tA_BI;
494:     }
495:   } else {
496:     A_II = NULL;
497:     A_IB = NULL;
498:     A_BI = NULL;
499:     A_BB = NULL;
500:   }
501:   S_all = NULL;

503:   /* determine interior problems */
504:   PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
505:   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
506:     PetscBT         touched;
507:     const PetscInt *idx_B;
508:     PetscInt        n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;

510:     PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
511:     /* get sizes */
512:     PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
513:     PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));

515:     PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
516:     PetscCall(PetscBTCreate(n_I + n_B, &touched));
517:     PetscCall(PetscBTMemzero(n_I + n_B, touched));

519:     /* all boundary dofs must be skipped when adding layers */
520:     PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
521:     for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j]));
522:     PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
523:     PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));

525:     /* add prescribed number of layers of dofs */
526:     n_local_dofs = n_B;
527:     n_prev_added = n_B;
528:     for (layer = 0; layer < nlayers; layer++) {
529:       PetscInt n_added = 0;
530:       if (n_local_dofs == n_I + n_B) break;
531:       PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
532:       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
533:       n_prev_added = n_added;
534:       n_local_dofs += n_added;
535:       if (!n_added) break;
536:     }
537:     PetscCall(PetscBTDestroy(&touched));

539:     /* IS for I layer dofs in original numbering */
540:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
541:     PetscCall(PetscFree(local_numbering));
542:     PetscCall(ISSort(is_I_layer));
543:     /* IS for I layer dofs in I numbering */
544:     if (!sub_schurs->schur_explicit) {
545:       ISLocalToGlobalMapping ItoNmap;
546:       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
547:       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
548:       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));

550:       /* II block */
551:       PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
552:     }
553:   } else {
554:     PetscInt n_I;

556:     /* IS for I dofs in original numbering */
557:     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
558:     is_I_layer = sub_schurs->is_I;

560:     /* IS for I dofs in I numbering (strided 1) */
561:     if (!sub_schurs->schur_explicit) {
562:       PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
563:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));

565:       /* II block is the same */
566:       PetscCall(PetscObjectReference((PetscObject)A_II));
567:       AE_II = A_II;
568:     }
569:   }

571:   /* Get info on subset sizes and sum of all subsets sizes */
572:   max_subset_size = 0;
573:   local_size      = 0;
574:   for (i = 0; i < sub_schurs->n_subs; i++) {
575:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
576:     max_subset_size = PetscMax(subset_size, max_subset_size);
577:     local_size += subset_size;
578:   }

580:   /* Work arrays for local indices */
581:   extra = 0;
582:   PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
583:   if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra));
584:   PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
585:   if (extra) {
586:     const PetscInt *idxs;
587:     PetscCall(ISGetIndices(is_I_layer, &idxs));
588:     PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
589:     PetscCall(ISRestoreIndices(is_I_layer, &idxs));
590:   }
591:   PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
592:   PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));

594:   /* Get local indices in local numbering */
595:   local_size       = 0;
596:   local_stash_size = 0;
597:   for (i = 0; i < sub_schurs->n_subs; i++) {
598:     const PetscInt *idxs;

600:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
601:     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
602:     /* start (smallest in global ordering) and multiplicity */
603:     auxnum1[i] = idxs[0];
604:     auxnum2[i] = subset_size * subset_size;
605:     /* subset indices in local numbering */
606:     PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
607:     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
608:     local_size += subset_size;
609:     local_stash_size += subset_size * subset_size;
610:   }

612:   /* allocate extra workspace needed only for GETRI or SYTRF */
613:   use_potr = use_sytr = PETSC_FALSE;
614:   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
615:     use_potr = PETSC_TRUE;
616:   } else if (sub_schurs->is_symmetric) {
617:     use_sytr = PETSC_TRUE;
618:   }
619:   if (local_size && !use_potr) {
620:     PetscScalar  lwork, dummyscalar = 0.;
621:     PetscBLASInt dummyint = 0;

623:     B_lwork = -1;
624:     PetscCall(PetscBLASIntCast(local_size, &B_N));
625:     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
626:     if (use_sytr) {
627:       PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
628:       PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %d", (int)B_ierr);
629:     } else {
630:       PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
631:       PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %d", (int)B_ierr);
632:     }
633:     PetscCall(PetscFPTrapPop());
634:     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
635:     PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
636:   } else {
637:     Bwork  = NULL;
638:     pivots = NULL;
639:   }

641:   /* prepare data for summing up properly schurs on subsets */
642:   PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
643:   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
644:   PetscCall(ISDestroy(&all_subsets_n));
645:   PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
646:   PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
647:   PetscCall(ISDestroy(&all_subsets));
648:   PetscCall(ISDestroy(&all_subsets_mult));
649:   PetscCall(ISGetLocalSize(all_subsets_n, &i));
650:   PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
651:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
652:   PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
653:   PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
654:   PetscCall(ISDestroy(&all_subsets_n));

656:   /* subset indices in local boundary numbering */
657:   if (!sub_schurs->is_Ej_all) {
658:     PetscInt *all_local_idx_B;

660:     PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
661:     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
662:     PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
663:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
664:   }

666:   if (change) {
667:     ISLocalToGlobalMapping BtoS;
668:     IS                     change_primal_B;
669:     IS                     change_primal_all;

671:     PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
672:     PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
673:     PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
674:     for (i = 0; i < sub_schurs->n_subs; i++) {
675:       ISLocalToGlobalMapping NtoS;
676:       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
677:       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
678:       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
679:     }
680:     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
681:     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
682:     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
683:     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
684:     PetscCall(ISDestroy(&change_primal_B));
685:     PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
686:     for (i = 0; i < sub_schurs->n_subs; i++) {
687:       Mat change_sub;

689:       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
690:       PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
691:       PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */
692:       PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
693:       if (!sub_schurs->change_with_qr) {
694:         PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
695:       } else {
696:         Mat change_subt;
697:         PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
698:         PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
699:         PetscCall(MatDestroy(&change_subt));
700:       }
701:       PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
702:       PetscCall(MatDestroy(&change_sub));
703:       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
704:       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
705:     }
706:     PetscCall(ISDestroy(&change_primal_all));
707:   }

709:   /* Local matrix of all local Schur on subsets (transposed) */
710:   if (!sub_schurs->S_Ej_all) {
711:     Mat          T;
712:     PetscScalar *v;
713:     PetscInt    *ii, *jj;
714:     PetscInt     cum, i, j, k;

716:     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
717:        Allocate properly a representative matrix and duplicate */
718:     PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
719:     ii[0] = 0;
720:     cum   = 0;
721:     for (i = 0; i < sub_schurs->n_subs; i++) {
722:       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
723:       for (j = 0; j < subset_size; j++) {
724:         const PetscInt row = cum + j;
725:         PetscInt       col = cum;

727:         ii[row + 1] = ii[row] + subset_size;
728:         for (k = ii[row]; k < ii[row + 1]; k++) {
729:           jj[k] = col;
730:           col++;
731:         }
732:       }
733:       cum += subset_size;
734:     }
735:     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
736:     PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
737:     PetscCall(MatDestroy(&T));
738:     PetscCall(PetscFree3(ii, jj, v));
739:   }
740:   /* matrices for deluxe scaling and adaptive selection */
741:   if (compute_Stilda) {
742:     if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all));
743:     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all));
744:   }

746:   /* Compute Schur complements explicitly */
747:   F = NULL;
748:   if (!sub_schurs->schur_explicit) {
749:     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
750:        it is not efficient, unless the economic version of the scaling is used */
751:     Mat          S_Ej_expl;
752:     PetscScalar *work;
753:     PetscInt     j, *dummy_idx;
754:     PetscBool    Sdense;

756:     PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
757:     local_size = 0;
758:     for (i = 0; i < sub_schurs->n_subs; i++) {
759:       IS  is_subset_B;
760:       Mat AE_EE, AE_IE, AE_EI, S_Ej;

762:       /* subsets in original and boundary numbering */
763:       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
764:       /* EE block */
765:       PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
766:       /* IE block */
767:       PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
768:       /* EI block */
769:       if (sub_schurs->is_symmetric) {
770:         PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
771:       } else if (sub_schurs->is_hermitian) {
772:         PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
773:       } else {
774:         PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
775:       }
776:       PetscCall(ISDestroy(&is_subset_B));
777:       PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
778:       PetscCall(MatDestroy(&AE_EE));
779:       PetscCall(MatDestroy(&AE_IE));
780:       PetscCall(MatDestroy(&AE_EI));
781:       if (AE_II == A_II) { /* we can reuse the same ksp */
782:         KSP ksp;
783:         PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
784:         PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
785:       } else { /* build new ksp object which inherits ksp and pc types from the original one */
786:         KSP       origksp, schurksp;
787:         PC        origpc, schurpc;
788:         KSPType   ksp_type;
789:         PetscInt  n_internal;
790:         PetscBool ispcnone;

792:         PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
793:         PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
794:         PetscCall(KSPGetType(origksp, &ksp_type));
795:         PetscCall(KSPSetType(schurksp, ksp_type));
796:         PetscCall(KSPGetPC(schurksp, &schurpc));
797:         PetscCall(KSPGetPC(origksp, &origpc));
798:         PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
799:         if (!ispcnone) {
800:           PCType pc_type;
801:           PetscCall(PCGetType(origpc, &pc_type));
802:           PetscCall(PCSetType(schurpc, pc_type));
803:         } else {
804:           PetscCall(PCSetType(schurpc, PCLU));
805:         }
806:         PetscCall(ISGetSize(is_I, &n_internal));
807:         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
808:           MatSolverType solver = NULL;
809:           PetscCall(PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver));
810:           if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
811:         }
812:         PetscCall(KSPSetUp(schurksp));
813:       }
814:       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
815:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
816:       PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
817:       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
818:       PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
819:       for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
820:       PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
821:       PetscCall(MatDestroy(&S_Ej));
822:       PetscCall(MatDestroy(&S_Ej_expl));
823:       local_size += subset_size;
824:     }
825:     PetscCall(PetscFree2(dummy_idx, work));
826:     /* free */
827:     PetscCall(ISDestroy(&is_I));
828:     PetscCall(MatDestroy(&AE_II));
829:     PetscCall(PetscFree(all_local_idx_N));
830:   } else {
831:     Mat                A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
832:     Mat               *gdswA;
833:     Vec                Dall = NULL;
834:     IS                 is_A_all, *is_p_r = NULL;
835:     MatType            Stype;
836:     PetscScalar       *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
837:     PetscScalar       *SEj_arr = NULL, *SEjinv_arr = NULL;
838:     const PetscScalar *rS_data;
839:     PetscInt           n, n_I, size_schur, size_active_schur, cum, cum2;
840:     PetscBool          economic, solver_S, S_lower_triangular = PETSC_FALSE;
841:     PetscBool          schur_has_vertices, factor_workaround;
842:     PetscBool          use_cholesky;
843: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
844:     PetscBool oldpin;
845: #endif

847:     /* get sizes */
848:     n_I = 0;
849:     if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I));
850:     economic = PETSC_FALSE;
851:     PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
852:     if (cum != n_I) economic = PETSC_TRUE;
853:     PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
854:     size_active_schur = local_size;

856:     /* import scaling vector (wrong formulation if we have 3D edges) */
857:     if (scaling && compute_Stilda) {
858:       const PetscScalar *array;
859:       PetscScalar       *array2;
860:       const PetscInt    *idxs;
861:       PetscInt           i;

863:       PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
864:       PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
865:       PetscCall(VecGetArrayRead(scaling, &array));
866:       PetscCall(VecGetArray(Dall, &array2));
867:       for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
868:       PetscCall(VecRestoreArray(Dall, &array2));
869:       PetscCall(VecRestoreArrayRead(scaling, &array));
870:       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
871:       deluxe = PETSC_FALSE;
872:     }

874:     /* size active schurs does not count any dirichlet or vertex dof on the interface */
875:     factor_workaround  = PETSC_FALSE;
876:     schur_has_vertices = PETSC_FALSE;
877:     cum                = n_I + size_active_schur;
878:     if (sub_schurs->is_dir) {
879:       const PetscInt *idxs;
880:       PetscInt        n_dir;

882:       PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
883:       PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
884:       PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
885:       PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
886:       cum += n_dir;
887:       if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
888:     }
889:     /* include the primal vertices in the Schur complement */
890:     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
891:       PetscInt n_v;

893:       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
894:       if (n_v) {
895:         const PetscInt *idxs;

897:         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
898:         PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
899:         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
900:         cum += n_v;
901:         if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
902:         schur_has_vertices = PETSC_TRUE;
903:       }
904:     }
905:     size_schur = cum - n_I;
906:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
907: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
908:     oldpin = sub_schurs->A->boundtocpu;
909:     PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
910: #endif
911:     if (cum == n) {
912:       PetscCall(ISSetPermutation(is_A_all));
913:       PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
914:     } else {
915:       PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
916:     }
917: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
918:     PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
919: #endif
920:     PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
921:     PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));

923:     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
924:        this is a workaround */
925:     if (benign_n) {
926:       Vec                    v, benign_AIIm1_ones;
927:       ISLocalToGlobalMapping N_to_reor;
928:       IS                     is_p0, is_p0_p;
929:       PetscScalar           *cs_AIB, *AIIm1_data;
930:       PetscInt               sizeA;

932:       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
933:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
934:       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
935:       PetscCall(ISDestroy(&is_p0));
936:       PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
937:       PetscCall(VecGetSize(v, &sizeA));
938:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
939:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
940:       PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
941:       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
942:       PetscCall(PetscMalloc1(benign_n, &is_p_r));
943:       /* compute colsum of A_IB restricted to pressures */
944:       for (i = 0; i < benign_n; i++) {
945:         const PetscScalar *array;
946:         const PetscInt    *idxs;
947:         PetscInt           j, nz;

949:         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
950:         PetscCall(ISGetLocalSize(is_p_r[i], &nz));
951:         PetscCall(ISGetIndices(is_p_r[i], &idxs));
952:         for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
953:         PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
954:         PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
955:         PetscCall(MatMult(A, benign_AIIm1_ones, v));
956:         PetscCall(VecResetArray(benign_AIIm1_ones));
957:         PetscCall(VecGetArrayRead(v, &array));
958:         for (j = 0; j < size_schur; j++) {
959: #if defined(PETSC_USE_COMPLEX)
960:           cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
961: #else
962:           cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
963: #endif
964:         }
965:         PetscCall(VecRestoreArrayRead(v, &array));
966:       }
967:       PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
968:       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
969:       PetscCall(VecDestroy(&v));
970:       PetscCall(VecDestroy(&benign_AIIm1_ones));
971:       PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
972:       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
973:       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
974:       PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
975:       PetscCall(ISDestroy(&is_p0_p));
976:       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
977:     }
978:     PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
979:     PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
980:     PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));

982:     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
983:     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);

985:     /* when using the benign subspace trick, the local Schur complements are SPD */
986:     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
987:        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
988:     if (benign_trick) {
989:       sub_schurs->is_posdef = PETSC_TRUE;
990:       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
991:       if (flg) use_cholesky = PETSC_FALSE;
992:     }

994:     if (n_I) {
995:       IS        is_schur;
996:       char      stype[64];
997:       PetscBool gpu = PETSC_FALSE;

999:       if (use_cholesky) {
1000:         PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F));
1001:       } else {
1002:         PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F));
1003:       }
1004:       PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name);
1005:       PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
1006: #if defined(PETSC_HAVE_MKL_PARDISO)
1007:       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1008: #endif
1009:       /* subsets ordered last */
1010:       PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
1011:       PetscCall(MatFactorSetSchurIS(F, is_schur));
1012:       PetscCall(ISDestroy(&is_schur));

1014:       /* factorization step */
1015:       if (use_cholesky) {
1016:         PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
1017: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1018:         PetscCall(MatMumpsSetIcntl(F, 19, 2));
1019: #endif
1020:         PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1021:         S_lower_triangular = PETSC_TRUE;
1022:       } else {
1023:         PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
1024: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1025:         PetscCall(MatMumpsSetIcntl(F, 19, 3));
1026: #endif
1027:         PetscCall(MatLUFactorNumeric(F, A, NULL));
1028:       }
1029:       PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));

1031:       if (matl_dbg_viewer) {
1032:         Mat S;
1033:         IS  is;

1035:         PetscCall(PetscObjectSetName((PetscObject)A, "A"));
1036:         PetscCall(MatView(A, matl_dbg_viewer));
1037:         PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1038:         PetscCall(PetscObjectSetName((PetscObject)S, "S"));
1039:         PetscCall(MatView(S, matl_dbg_viewer));
1040:         PetscCall(MatDestroy(&S));
1041:         PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
1042:         PetscCall(PetscObjectSetName((PetscObject)is, "I"));
1043:         PetscCall(ISView(is, matl_dbg_viewer));
1044:         PetscCall(ISDestroy(&is));
1045:         PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
1046:         PetscCall(PetscObjectSetName((PetscObject)is, "B"));
1047:         PetscCall(ISView(is, matl_dbg_viewer));
1048:         PetscCall(ISDestroy(&is));
1049:         PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
1050:         PetscCall(ISView(is_A_all, matl_dbg_viewer));
1051:         for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1052:           IS   is;
1053:           char name[16];

1055:           PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
1056:           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1057:           PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
1058:           PetscCall(PetscObjectSetName((PetscObject)is, name));
1059:           PetscCall(ISView(is, matl_dbg_viewer));
1060:           PetscCall(ISDestroy(&is));
1061:           if (sub_schurs->change) {
1062:             Mat T;

1064:             PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
1065:             PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
1066:             PetscCall(PetscObjectSetName((PetscObject)T, name));
1067:             PetscCall(MatView(T, matl_dbg_viewer));
1068:             PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
1069:             PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
1070:             PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
1071:           }
1072:           cum += subset_size;
1073:         }
1074:         PetscCall(PetscViewerFlush(matl_dbg_viewer));
1075:       }

1077:       /* get explicit Schur Complement computed during numeric factorization */
1078:       PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1079:       PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
1080: #if defined(PETSC_HAVE_CUDA)
1081:       PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
1082: #endif
1083:       if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
1084:       PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
1085:       PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
1086:       PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1087:       PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1088:       PetscCall(MatGetType(S_all, &Stype));

1090:       /* we can reuse the solvers if we are not using the economic version */
1091:       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1092:       if (!sub_schurs->gdsw) {
1093:         factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1094:         if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1095:       }
1096:       solver_S = PETSC_TRUE;

1098:       /* update the Schur complement with the change of basis on the pressures */
1099:       if (benign_n) {
1100:         const PetscScalar *cs_AIB;
1101:         PetscScalar       *S_data, *AIIm1_data;
1102:         Mat                S2 = NULL, S3 = NULL; /* dbg */
1103:         PetscScalar       *S2_data, *S3_data;    /* dbg */
1104:         Vec                v, benign_AIIm1_ones;
1105:         PetscInt           sizeA;

1107:         PetscCall(MatDenseGetArray(S_all, &S_data));
1108:         PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
1109:         PetscCall(VecGetSize(v, &sizeA));
1110: #if defined(PETSC_HAVE_MUMPS)
1111:         PetscCall(MatMumpsSetIcntl(F, 26, 0));
1112: #endif
1113: #if defined(PETSC_HAVE_MKL_PARDISO)
1114:         PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1115: #endif
1116:         PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
1117:         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
1118:         if (matl_dbg_viewer) {
1119:           PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
1120:           PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
1121:           PetscCall(MatDenseGetArray(S2, &S2_data));
1122:           PetscCall(MatDenseGetArray(S3, &S3_data));
1123:         }
1124:         for (i = 0; i < benign_n; i++) {
1125:           PetscScalar    *array, sum = 0., one = 1., *sums;
1126:           const PetscInt *idxs;
1127:           PetscInt        k, j, nz;
1128:           PetscBLASInt    B_k, B_n;

1130:           PetscCall(PetscCalloc1(benign_n, &sums));
1131:           PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
1132:           PetscCall(VecCopy(benign_AIIm1_ones, v));
1133:           PetscCall(MatSolve(F, v, benign_AIIm1_ones));
1134:           PetscCall(MatMult(A, benign_AIIm1_ones, v));
1135:           PetscCall(VecResetArray(benign_AIIm1_ones));
1136:           /* p0 dofs (eliminated) are excluded from the sums */
1137:           for (k = 0; k < benign_n; k++) {
1138:             PetscCall(ISGetLocalSize(is_p_r[k], &nz));
1139:             PetscCall(ISGetIndices(is_p_r[k], &idxs));
1140:             for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1141:             PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
1142:           }
1143:           PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
1144:           if (matl_dbg_viewer) {
1145:             Vec  vv;
1146:             char name[16];

1148:             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
1149:             PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
1150:             PetscCall(PetscObjectSetName((PetscObject)vv, name));
1151:             PetscCall(VecView(vv, matl_dbg_viewer));
1152:           }
1153:           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1154:           /* cs_AIB already scaled by 1./nz */
1155:           B_k = 1;
1156:           PetscCall(PetscBLASIntCast(size_schur, &B_n));
1157:           for (k = 0; k < benign_n; k++) {
1158:             sum = sums[k];

1160:             if (PetscAbsScalar(sum) == 0.0) continue;
1161:             if (k == i) {
1162:               if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1163:               if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1164:             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1165:               sum /= 2.0;
1166:               if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1167:               if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1168:             }
1169:           }
1170:           sum = 1.;
1171:           if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1172:           if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
1173:           PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
1174:           /* set p0 entry of AIIm1_ones to zero */
1175:           PetscCall(ISGetLocalSize(is_p_r[i], &nz));
1176:           PetscCall(ISGetIndices(is_p_r[i], &idxs));
1177:           for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1178:           PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
1179:           PetscCall(PetscFree(sums));
1180:         }
1181:         PetscCall(VecDestroy(&benign_AIIm1_ones));
1182:         if (matl_dbg_viewer) {
1183:           PetscCall(MatDenseRestoreArray(S2, &S2_data));
1184:           PetscCall(MatDenseRestoreArray(S3, &S3_data));
1185:         }
1186:         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1187:           PetscInt k, j;
1188:           for (k = 0; k < size_schur; k++) {
1189:             for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1190:           }
1191:         }

1193:         /* restore defaults */
1194: #if defined(PETSC_HAVE_MUMPS)
1195:         PetscCall(MatMumpsSetIcntl(F, 26, -1));
1196: #endif
1197: #if defined(PETSC_HAVE_MKL_PARDISO)
1198:         PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
1199: #endif
1200:         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
1201:         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1202:         PetscCall(VecDestroy(&v));
1203:         PetscCall(MatDenseRestoreArray(S_all, &S_data));
1204:         if (matl_dbg_viewer) {
1205:           Mat S;

1207:           PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1208:           PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1209:           PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
1210:           PetscCall(MatView(S, matl_dbg_viewer));
1211:           PetscCall(MatDestroy(&S));
1212:           PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
1213:           PetscCall(MatView(S2, matl_dbg_viewer));
1214:           PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
1215:           PetscCall(MatView(S3, matl_dbg_viewer));
1216:           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
1217:           PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
1218:           PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1219:         }
1220:         PetscCall(MatDestroy(&S2));
1221:         PetscCall(MatDestroy(&S3));
1222:       }
1223:       if (!reuse_solvers) {
1224:         for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i]));
1225:         PetscCall(PetscFree(is_p_r));
1226:         PetscCall(MatDestroy(&cs_AIB_mat));
1227:         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1228:       }
1229:     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1230:       PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
1231:       PetscCall(MatGetType(S_all, &Stype));
1232:       reuse_solvers     = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1233:       factor_workaround = PETSC_FALSE;
1234:       solver_S          = PETSC_FALSE;
1235:     }

1237:     if (reuse_solvers) {
1238:       Mat                A_II, pA_II, Afake;
1239:       Vec                vec1_B;
1240:       PCBDDCReuseSolvers msolv_ctx;
1241:       PetscInt           n_R;

1243:       if (sub_schurs->reuse_solver) {
1244:         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1245:       } else {
1246:         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1247:       }
1248:       msolv_ctx = sub_schurs->reuse_solver;
1249:       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
1250:       PetscCall(PetscObjectReference((PetscObject)F));
1251:       msolv_ctx->F = F;
1252:       PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1253:       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1254:       {
1255:         PetscScalar *array;
1256:         PetscInt     n;

1258:         PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
1259:         PetscCall(VecGetArray(msolv_ctx->sol, &array));
1260:         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
1261:         PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1262:       }
1263:       msolv_ctx->has_vertices = schur_has_vertices;

1265:       /* interior solver */
1266:       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
1267:       PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
1268:       PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
1269:       PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
1270:       PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
1271:       PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1272:       PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
1273:       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
1274:       if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));

1276:       /* correction solver */
1277:       if (!sub_schurs->gdsw) {
1278:         PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
1279:         PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
1280:         PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
1281:         PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
1282:         PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1283:         PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
1284:         PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));

1286:         /* scatter and vecs for Schur complement solver */
1287:         PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
1288:         PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
1289:         if (!schur_has_vertices) {
1290:           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
1291:           PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
1292:           PetscCall(PetscObjectReference((PetscObject)is_A_all));
1293:           msolv_ctx->is_R = is_A_all;
1294:         } else {
1295:           IS              is_B_all;
1296:           const PetscInt *idxs;
1297:           PetscInt        dual, n_v, n;

1299:           PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1300:           dual = size_schur - n_v;
1301:           PetscCall(ISGetLocalSize(is_A_all, &n));
1302:           PetscCall(ISGetIndices(is_A_all, &idxs));
1303:           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
1304:           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
1305:           PetscCall(ISDestroy(&is_B_all));
1306:           PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
1307:           PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
1308:           PetscCall(ISDestroy(&is_B_all));
1309:           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
1310:           PetscCall(ISRestoreIndices(is_A_all, &idxs));
1311:         }
1312:         PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
1313:         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
1314:         PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
1315:         PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
1316:         PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
1317:         PetscCall(MatDestroy(&Afake));
1318:         PetscCall(VecDestroy(&vec1_B));
1319:       }
1320:       /* communicate benign info to solver context */
1321:       if (benign_n) {
1322:         PetscScalar *array;

1324:         msolv_ctx->benign_n             = benign_n;
1325:         msolv_ctx->benign_zerodiag_subs = is_p_r;
1326:         PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
1327:         msolv_ctx->benign_csAIB = cs_AIB_mat;
1328:         PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
1329:         PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
1330:         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
1331:         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
1332:         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1333:       }
1334:     } else {
1335:       if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1336:       PetscCall(PetscFree(sub_schurs->reuse_solver));
1337:     }
1338:     PetscCall(MatDestroy(&A));
1339:     PetscCall(ISDestroy(&is_A_all));

1341:     /* Work arrays */
1342:     PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));

1344:     /* S_Ej_all */
1345:     cum = cum2 = 0;
1346:     PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1347:     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
1348:     if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1349:     if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA));
1350:     for (i = 0; i < sub_schurs->n_subs; i++) {
1351:       PetscInt j;

1353:       /* get S_E (or K^i_EE for GDSW) */
1354:       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1355:       if (sub_schurs->gdsw) {
1356:         Mat T;

1358:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
1359:         PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
1360:         PetscCall(MatDestroy(&T));
1361:       } else {
1362:         if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1363:           PetscInt k;
1364:           for (k = 0; k < subset_size; k++) {
1365:             for (j = k; j < subset_size; j++) {
1366:               work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1367:               work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1368:             }
1369:           }
1370:         } else { /* just copy to workspace */
1371:           PetscInt k;
1372:           for (k = 0; k < subset_size; k++) {
1373:             for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1374:           }
1375:         }
1376:       }
1377:       /* insert S_E values */
1378:       if (sub_schurs->change) {
1379:         Mat change_sub, SEj, T;

1381:         /* change basis */
1382:         PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1383:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1384:         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1385:           Mat T2;
1386:           PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1387:           PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1388:           PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1389:           PetscCall(MatDestroy(&T2));
1390:         } else {
1391:           PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1392:         }
1393:         PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1394:         PetscCall(MatDestroy(&T));
1395:         PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
1396:         PetscCall(MatDestroy(&SEj));
1397:       }
1398:       PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1399:       if (compute_Stilda) {
1400:         if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1401:           Mat                M;
1402:           const PetscScalar *vals;
1403:           PetscBool          isdense, isdensecuda;

1405:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
1406:           PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1407:           PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1408:           if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype));
1409:           PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
1410:           PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
1411:           if (use_cholesky) {
1412:             PetscCall(MatCholeskyFactor(M, NULL, NULL));
1413:           } else {
1414:             PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1415:           }
1416:           if (isdense) {
1417:             PetscCall(MatSeqDenseInvertFactors_Private(M));
1418: #if defined(PETSC_HAVE_CUDA)
1419:           } else if (isdensecuda) {
1420:             PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M));
1421: #endif
1422:           } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1423:           PetscCall(MatDenseGetArrayRead(M, &vals));
1424:           PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
1425:           PetscCall(MatDenseRestoreArrayRead(M, &vals));
1426:           PetscCall(MatDestroy(&M));
1427:         } else if (scaling) { /* not using deluxe */
1428:           Mat          SEj;
1429:           Vec          D;
1430:           PetscScalar *array;

1432:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1433:           PetscCall(VecGetArray(Dall, &array));
1434:           PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
1435:           PetscCall(VecRestoreArray(Dall, &array));
1436:           PetscCall(VecShift(D, -1.));
1437:           PetscCall(MatDiagonalScale(SEj, D, D));
1438:           PetscCall(MatDestroy(&SEj));
1439:           PetscCall(VecDestroy(&D));
1440:           PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1441:         }
1442:       }
1443:       cum += subset_size;
1444:       cum2 += subset_size * (size_schur + 1);
1445:       SEj_arr += subset_size * subset_size;
1446:       if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1447:     }
1448:     if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA));
1449:     PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
1450:     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
1451:     if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1452:     if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));

1454:       /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1455:        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1456: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1457:     if (reuse_solvers) {
1458:       Mat                  St;
1459:       MatFactorSchurStatus st;

1461:       flg = PETSC_FALSE;
1462:       PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
1463:       PetscCall(MatFactorGetSchurComplement(F, &St, &st));
1464:       PetscCall(MatBindToCPU(St, flg));
1465:       PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
1466:     }
1467: #endif

1469:     schur_factor = NULL;
1470:     if (compute_Stilda && size_active_schur) {
1471:       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1472:         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1473:         PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
1474:         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1475:       } else {
1476:         Mat S_all_inv = NULL;

1478:         if (solver_S && !sub_schurs->gdsw) {
1479:           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1480:              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1481:           if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1482:             PetscScalar *data;
1483:             PetscInt     nd = 0;

1485:             PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1486:             PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1487:             PetscCall(MatDenseGetArray(S_all_inv, &data));
1488:             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1489:               PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1490:             }

1492:             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1493:             if (schur_has_vertices) {
1494:               Mat          M;
1495:               PetscScalar *tdata;
1496:               PetscInt     nv = 0, news;

1498:               PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
1499:               news = size_active_schur + nv;
1500:               PetscCall(PetscCalloc1(news * news, &tdata));
1501:               for (i = 0; i < size_active_schur; i++) {
1502:                 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
1503:                 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
1504:               }
1505:               for (i = 0; i < nv; i++) {
1506:                 PetscInt k = i + size_active_schur;
1507:                 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
1508:               }

1510:               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
1511:               PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1512:               PetscCall(MatCholeskyFactor(M, NULL, NULL));
1513:               /* save the factors */
1514:               cum = 0;
1515:               PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
1516:               for (i = 0; i < size_active_schur; i++) {
1517:                 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1518:                 cum += size_active_schur - i;
1519:               }
1520:               for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1521:               PetscCall(MatSeqDenseInvertFactors_Private(M));
1522:               /* move back just the active dofs to the Schur complement */
1523:               for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur));
1524:               PetscCall(PetscFree(tdata));
1525:               PetscCall(MatDestroy(&M));
1526:             } else { /* we can factorize and invert just the activedofs */
1527:               Mat          M;
1528:               PetscScalar *aux;

1530:               PetscCall(PetscMalloc1(nd, &aux));
1531:               for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1532:               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
1533:               PetscCall(MatDenseSetLDA(M, size_schur));
1534:               PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1535:               PetscCall(MatCholeskyFactor(M, NULL, NULL));
1536:               PetscCall(MatSeqDenseInvertFactors_Private(M));
1537:               PetscCall(MatDestroy(&M));
1538:               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
1539:               PetscCall(MatZeroEntries(M));
1540:               PetscCall(MatDestroy(&M));
1541:               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
1542:               PetscCall(MatDenseSetLDA(M, size_schur));
1543:               PetscCall(MatZeroEntries(M));
1544:               PetscCall(MatDestroy(&M));
1545:               for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1546:               PetscCall(PetscFree(aux));
1547:             }
1548:             PetscCall(MatDenseRestoreArray(S_all_inv, &data));
1549:           } else { /* use MatFactor calls to invert S */
1550:             PetscCall(MatFactorInvertSchurComplement(F));
1551:             PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1552:           }
1553:         } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1554:           PetscCall(PetscObjectReference((PetscObject)S_all));
1555:           S_all_inv = S_all;
1556:           PetscCall(MatDenseGetArray(S_all_inv, &S_data));
1557:           PetscCall(PetscBLASIntCast(size_schur, &B_N));
1558:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1559:           if (use_potr) {
1560:             PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1561:             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1562:             PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1563:             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1564:           } else if (use_sytr) {
1565:             PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1566:             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1567:             PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1568:             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1569:           } else {
1570:             PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1571:             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1572:             PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1573:             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1574:           }
1575:           PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
1576:           PetscCall(PetscFPTrapPop());
1577:           PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1578:         } else if (sub_schurs->gdsw) {
1579:           Mat      tS, tX, SEj, S_II, S_IE, S_EE;
1580:           KSP      pS_II;
1581:           PC       pS_II_pc;
1582:           IS       EE, II;
1583:           PetscInt nS;

1585:           PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
1586:           PetscCall(MatGetSize(tS, &nS, NULL));
1587:           PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1588:           for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1589:             PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1590:             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));

1592:             PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
1593:             PetscCall(ISComplement(EE, 0, nS, &II));
1594:             PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
1595:             PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
1596:             PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
1597:             PetscCall(ISDestroy(&II));
1598:             PetscCall(ISDestroy(&EE));

1600:             PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
1601:             PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */
1602:             PetscCall(KSPSetType(pS_II, KSPPREONLY));
1603:             PetscCall(KSPGetPC(pS_II, &pS_II_pc));
1604:             PetscCall(PCSetType(pS_II_pc, PCSVD));
1605:             PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
1606:             PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
1607:             PetscCall(KSPSetOperators(pS_II, S_II, S_II));
1608:             PetscCall(MatDestroy(&S_II));
1609:             PetscCall(KSPSetFromOptions(pS_II));
1610:             PetscCall(KSPSetUp(pS_II));
1611:             PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
1612:             PetscCall(KSPMatSolve(pS_II, S_IE, tX));
1613:             PetscCall(KSPDestroy(&pS_II));

1615:             PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj));
1616:             PetscCall(MatDestroy(&S_IE));
1617:             PetscCall(MatDestroy(&tX));
1618:             PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
1619:             PetscCall(MatDestroy(&S_EE));

1621:             PetscCall(MatDestroy(&SEj));
1622:             cum += subset_size;
1623:             SEjinv_arr += subset_size * subset_size;
1624:           }
1625:           PetscCall(MatDestroy(&tS));
1626:           PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1627:         }
1628:         /* S_Ej_tilda_all */
1629:         cum = cum2 = 0;
1630:         rS_data    = NULL;
1631:         if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
1632:         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1633:         for (i = 0; i < sub_schurs->n_subs; i++) {
1634:           PetscInt j;

1636:           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1637:           /* get (St^-1)_E */
1638:           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1639:              will be properly accessed later during adaptive selection */
1640:           if (rS_data) {
1641:             if (S_lower_triangular) {
1642:               PetscInt k;
1643:               if (sub_schurs->change) {
1644:                 for (k = 0; k < subset_size; k++) {
1645:                   for (j = k; j < subset_size; j++) {
1646:                     work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1647:                     work[j * subset_size + k] = work[k * subset_size + j];
1648:                   }
1649:                 }
1650:               } else {
1651:                 for (k = 0; k < subset_size; k++) {
1652:                   for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1653:                 }
1654:               }
1655:             } else {
1656:               PetscInt k;
1657:               for (k = 0; k < subset_size; k++) {
1658:                 for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1659:               }
1660:             }
1661:           }
1662:           if (sub_schurs->change) {
1663:             Mat         change_sub, SEj, T;
1664:             PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;

1666:             /* change basis */
1667:             PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1668:             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj));
1669:             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1670:               Mat T2;
1671:               PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1672:               PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1673:               PetscCall(MatDestroy(&T2));
1674:               PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1675:             } else {
1676:               PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1677:             }
1678:             PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1679:             PetscCall(MatDestroy(&T));
1680:             PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
1681:             PetscCall(MatDestroy(&SEj));
1682:           }
1683:           if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1684:           cum += subset_size;
1685:           cum2 += subset_size * (size_schur + 1);
1686:           SEjinv_arr += subset_size * subset_size;
1687:         }
1688:         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1689:         if (S_all_inv) {
1690:           PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1691:           if (solver_S) {
1692:             if (schur_has_vertices) {
1693:               PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
1694:             } else {
1695:               PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
1696:             }
1697:           }
1698:         }
1699:         PetscCall(MatDestroy(&S_all_inv));
1700:       }

1702:       /* move back factors if needed */
1703:       if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1704:         Mat          S_tmp;
1705:         PetscInt     nd = 0;
1706:         PetscScalar *data;

1708:         PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1709:         PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
1710:         PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
1711:         PetscCall(MatDenseGetArray(S_tmp, &data));
1712:         PetscCall(PetscArrayzero(data, size_schur * size_schur));

1714:         if (S_lower_triangular) {
1715:           cum = 0;
1716:           for (i = 0; i < size_active_schur; i++) {
1717:             PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1718:             cum += size_active_schur - i;
1719:           }
1720:         } else {
1721:           PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1722:         }
1723:         if (sub_schurs->is_dir) {
1724:           PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1725:           for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1726:         }
1727:         /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1728:              set the diagonal entry of the Schur factor to a very large value */
1729:         for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
1730:         PetscCall(MatDenseRestoreArray(S_tmp, &data));
1731:         PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
1732:       }
1733:     } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1734:       PetscScalar *data;
1735:       PetscInt     nd = 0;

1737:       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1738:         PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1739:       }
1740:       PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1741:       PetscCall(MatDenseGetArray(S_all, &data));
1742:       for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1743:       for (i = size_active_schur + nd; i < size_schur; i++) {
1744:         PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1745:         data[i * (size_schur + 1)] = infty;
1746:       }
1747:       PetscCall(MatDenseRestoreArray(S_all, &data));
1748:       PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1749:     }
1750:     PetscCall(PetscFree(work));
1751:     PetscCall(PetscFree(schur_factor));
1752:     PetscCall(VecDestroy(&Dall));
1753:   }
1754:   PetscCall(ISDestroy(&is_I_layer));
1755:   PetscCall(MatDestroy(&S_all));
1756:   PetscCall(MatDestroy(&A_BB));
1757:   PetscCall(MatDestroy(&A_IB));
1758:   PetscCall(MatDestroy(&A_BI));
1759:   PetscCall(MatDestroy(&F));

1761:   PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz));
1762:   for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i]));
1763:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer));
1764:   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz));
1765:   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1766:   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1767:   if (compute_Stilda) {
1768:     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz));
1769:     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1770:     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1771:     if (deluxe) {
1772:       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz));
1773:       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1774:       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1775:     }
1776:   }
1777:   PetscCall(ISDestroy(&is_I_layer));

1779:   /* Get local part of (\sum_j S_Ej) */
1780:   if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all));
1781:   PetscCall(VecSet(gstash, 0.0));
1782:   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
1783:   PetscCall(VecPlaceArray(lstash, stasharray));
1784:   PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1785:   PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1786:   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
1787:   PetscCall(VecResetArray(lstash));
1788:   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
1789:   PetscCall(VecPlaceArray(lstash, stasharray));
1790:   PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1791:   PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1792:   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
1793:   PetscCall(VecResetArray(lstash));

1795:   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1796:   if (compute_Stilda) {
1797:     PetscCall(VecSet(gstash, 0.0));
1798:     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1799:     PetscCall(VecPlaceArray(lstash, stasharray));
1800:     PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1801:     PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1802:     PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1803:     PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1804:     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1805:     PetscCall(VecResetArray(lstash));
1806:     if (deluxe) {
1807:       PetscCall(VecSet(gstash, 0.0));
1808:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1809:       PetscCall(VecPlaceArray(lstash, stasharray));
1810:       PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1811:       PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1812:       PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1813:       PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1814:       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1815:       PetscCall(VecResetArray(lstash));
1816:     } else if (!sub_schurs->gdsw) {
1817:       PetscScalar *array;
1818:       PetscInt     cum;

1820:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1821:       cum = 0;
1822:       for (i = 0; i < sub_schurs->n_subs; i++) {
1823:         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1824:         PetscCall(PetscBLASIntCast(subset_size, &B_N));
1825:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1826:         if (use_potr) {
1827:           PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
1828:           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1829:           PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
1830:           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1831:         } else if (use_sytr) {
1832:           PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1833:           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1834:           PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
1835:           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1836:         } else {
1837:           PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
1838:           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1839:           PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1840:           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1841:         }
1842:         PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
1843:         PetscCall(PetscFPTrapPop());
1844:         cum += subset_size * subset_size;
1845:       }
1846:       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1847:       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1848:       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1849:       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1850:     }
1851:   }
1852:   PetscCall(VecDestroy(&lstash));
1853:   PetscCall(VecDestroy(&gstash));
1854:   PetscCall(VecScatterDestroy(&sstash));

1856:   if (matl_dbg_viewer) {
1857:     if (sub_schurs->S_Ej_all) {
1858:       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
1859:       PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
1860:     }
1861:     if (sub_schurs->sum_S_Ej_all) {
1862:       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
1863:       PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
1864:     }
1865:     if (sub_schurs->sum_S_Ej_inv_all) {
1866:       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
1867:       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
1868:     }
1869:     if (sub_schurs->sum_S_Ej_tilda_all) {
1870:       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
1871:       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
1872:     }
1873:   }

1875:   /* free workspace */
1876:   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1877:   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1878:   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1879:   PetscCall(PetscFree2(Bwork, pivots));
1880:   PetscCall(PetscCommDestroy(&comm_n));
1881:   PetscFunctionReturn(PETSC_SUCCESS);
1882: }

1884: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
1885: {
1886:   IS       *faces, *edges, *all_cc, vertices;
1887:   PetscInt  s, i, n_faces, n_edges, n_all_cc;
1888:   PetscBool is_sorted, ispardiso, ismumps;

1890:   PetscFunctionBegin;
1891:   PetscCall(ISSorted(is_I, &is_sorted));
1892:   PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
1893:   PetscCall(ISSorted(is_B, &is_sorted));
1894:   PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");

1896:   /* reset any previous data */
1897:   PetscCall(PCBDDCSubSchursReset(sub_schurs));

1899:   sub_schurs->gdsw = gdsw;

1901:   /* get index sets for faces and edges (already sorted by global ordering) */
1902:   PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1903:   n_all_cc = n_faces + n_edges;
1904:   PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
1905:   PetscCall(PetscMalloc1(n_all_cc, &all_cc));
1906:   n_all_cc = 0;
1907:   for (i = 0; i < n_faces; i++) {
1908:     PetscCall(ISGetSize(faces[i], &s));
1909:     if (!s) continue;
1910:     if (copycc) {
1911:       PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
1912:     } else {
1913:       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1914:       all_cc[n_all_cc] = faces[i];
1915:     }
1916:     n_all_cc++;
1917:   }
1918:   for (i = 0; i < n_edges; i++) {
1919:     PetscCall(ISGetSize(edges[i], &s));
1920:     if (!s) continue;
1921:     if (copycc) {
1922:       PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
1923:     } else {
1924:       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1925:       all_cc[n_all_cc] = edges[i];
1926:     }
1927:     PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
1928:     n_all_cc++;
1929:   }
1930:   PetscCall(PetscObjectReference((PetscObject)vertices));
1931:   sub_schurs->is_vertices = vertices;
1932:   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1933:   sub_schurs->is_dir = NULL;
1934:   PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));

1936:   /* Determine if MatFactor can be used */
1937:   PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
1938: #if defined(PETSC_HAVE_MUMPS)
1939:   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
1940: #elif defined(PETSC_HAVE_MKL_PARDISO)
1941:   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
1942: #else
1943:   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
1944: #endif
1945: #if defined(PETSC_USE_COMPLEX)
1946:   sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1947: #else
1948:   sub_schurs->is_hermitian = PETSC_TRUE;
1949: #endif
1950:   sub_schurs->is_posdef     = PETSC_TRUE;
1951:   sub_schurs->is_symmetric  = PETSC_TRUE;
1952:   sub_schurs->debug         = PETSC_FALSE;
1953:   sub_schurs->restrict_comm = PETSC_FALSE;
1954:   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
1955:   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
1956:   PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
1957:   PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
1958:   PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
1959:   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
1960:   PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
1961:   PetscOptionsEnd();
1962:   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
1963:   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
1964:   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);

1966:   /* for reals, symmetric and Hermitian are synonyms */
1967: #if !defined(PETSC_USE_COMPLEX)
1968:   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1969:   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1970: #endif

1972:   PetscCall(PetscObjectReference((PetscObject)is_I));
1973:   sub_schurs->is_I = is_I;
1974:   PetscCall(PetscObjectReference((PetscObject)is_B));
1975:   sub_schurs->is_B = is_B;
1976:   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
1977:   sub_schurs->l2gmap = graph->l2gmap;
1978:   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
1979:   sub_schurs->BtoNmap            = BtoNmap;
1980:   sub_schurs->n_subs             = n_all_cc;
1981:   sub_schurs->is_subs            = all_cc;
1982:   sub_schurs->S_Ej_all           = NULL;
1983:   sub_schurs->sum_S_Ej_all       = NULL;
1984:   sub_schurs->sum_S_Ej_inv_all   = NULL;
1985:   sub_schurs->sum_S_Ej_tilda_all = NULL;
1986:   sub_schurs->is_Ej_all          = NULL;
1987:   PetscFunctionReturn(PETSC_SUCCESS);
1988: }

1990: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1991: {
1992:   PCBDDCSubSchurs schurs_ctx;

1994:   PetscFunctionBegin;
1995:   PetscCall(PetscNew(&schurs_ctx));
1996:   schurs_ctx->n_subs = 0;
1997:   *sub_schurs        = schurs_ctx;
1998:   PetscFunctionReturn(PETSC_SUCCESS);
1999: }

2001: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2002: {
2003:   PetscInt i;

2005:   PetscFunctionBegin;
2006:   if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS);
2007:   PetscCall(PetscFree(sub_schurs->prefix));
2008:   PetscCall(MatDestroy(&sub_schurs->A));
2009:   PetscCall(MatDestroy(&sub_schurs->S));
2010:   PetscCall(ISDestroy(&sub_schurs->is_I));
2011:   PetscCall(ISDestroy(&sub_schurs->is_B));
2012:   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2013:   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2014:   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2015:   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2016:   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2017:   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2018:   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2019:   PetscCall(ISDestroy(&sub_schurs->is_vertices));
2020:   PetscCall(ISDestroy(&sub_schurs->is_dir));
2021:   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2022:   for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2023:   if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2024:   if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2025:   PetscCall(PetscFree(sub_schurs->reuse_solver));
2026:   if (sub_schurs->change) {
2027:     for (i = 0; i < sub_schurs->n_subs; i++) {
2028:       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2029:       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2030:     }
2031:   }
2032:   PetscCall(PetscFree(sub_schurs->change));
2033:   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2034:   sub_schurs->n_subs = 0;
2035:   PetscFunctionReturn(PETSC_SUCCESS);
2036: }

2038: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2039: {
2040:   PetscFunctionBegin;
2041:   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2042:   PetscCall(PetscFree(*sub_schurs));
2043:   PetscFunctionReturn(PETSC_SUCCESS);
2044: }

2046: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2047: {
2048:   PetscInt i, j, n;

2050:   PetscFunctionBegin;
2051:   n = 0;
2052:   for (i = -n_prev; i < 0; i++) {
2053:     PetscInt start_dof = queue_tip[i];
2054:     for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2055:       PetscInt dof = adjncy[j];
2056:       if (!PetscBTLookup(touched, dof)) {
2057:         PetscCall(PetscBTSet(touched, dof));
2058:         queue_tip[n] = dof;
2059:         n++;
2060:       }
2061:     }
2062:   }
2063:   *n_added = n;
2064:   PetscFunctionReturn(PETSC_SUCCESS);
2065: }