Actual source code: bddcschurs.c
1: #include <petsc/private/pcbddcimpl.h>
2: #include <petsc/private/pcbddcprivateimpl.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
4: #include <petscblaslapack.h>
6: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
11: /* if v2 is not present, correction is done in-place */
12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13: {
14: PetscScalar *array;
15: PetscScalar *array2;
17: PetscFunctionBegin;
18: if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
19: if (sol && full) {
20: PetscInt n_I, size_schur;
22: /* get sizes */
23: PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
24: PetscCall(VecGetSize(v, &n_I));
25: n_I = n_I - size_schur;
26: /* get schur sol from array */
27: PetscCall(VecGetArray(v, &array));
28: PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
29: PetscCall(VecRestoreArray(v, &array));
30: /* apply interior sol correction */
31: PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
32: PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33: PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
34: }
35: if (v2) {
36: PetscInt nl;
38: PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
39: PetscCall(VecGetLocalSize(v2, &nl));
40: PetscCall(VecGetArray(v2, &array2));
41: PetscCall(PetscArraycpy(array2, array, nl));
42: } else {
43: PetscCall(VecGetArray(v, &array));
44: array2 = array;
45: }
46: if (!sol) { /* change rhs */
47: PetscInt n;
48: for (n = 0; n < ctx->benign_n; n++) {
49: PetscScalar sum = 0.;
50: const PetscInt *cols;
51: PetscInt nz, i;
53: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
54: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
55: for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
56: #if defined(PETSC_USE_COMPLEX)
57: sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
58: #else
59: sum = -sum / nz;
60: #endif
61: for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
62: ctx->benign_save_vals[n] = array2[cols[nz - 1]];
63: array2[cols[nz - 1]] = sum;
64: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
65: }
66: } else {
67: PetscInt n;
68: for (n = 0; n < ctx->benign_n; n++) {
69: PetscScalar sum = 0.;
70: const PetscInt *cols;
71: PetscInt nz, i;
72: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
73: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
74: for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
75: #if defined(PETSC_USE_COMPLEX)
76: sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
77: #else
78: sum = -sum / nz;
79: #endif
80: for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
81: array2[cols[nz - 1]] = ctx->benign_save_vals[n];
82: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
83: }
84: }
85: if (v2) {
86: PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
87: PetscCall(VecRestoreArray(v2, &array2));
88: } else {
89: PetscCall(VecRestoreArray(v, &array));
90: }
91: if (!sol && full) {
92: Vec usedv;
93: PetscInt n_I, size_schur;
95: /* get sizes */
96: PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
97: PetscCall(VecGetSize(v, &n_I));
98: n_I = n_I - size_schur;
99: /* compute schur rhs correction */
100: if (v2) {
101: usedv = v2;
102: } else {
103: usedv = v;
104: }
105: /* apply schur rhs correction */
106: PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
107: PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
108: PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
109: PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
110: PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
111: PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112: }
113: PetscFunctionReturn(PETSC_SUCCESS);
114: }
116: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117: {
118: PCBDDCReuseSolvers ctx;
119: PetscBool copy = PETSC_FALSE;
121: PetscFunctionBegin;
122: PetscCall(PCShellGetContext(pc, &ctx));
123: if (full) {
124: #if defined(PETSC_HAVE_MUMPS)
125: PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
126: #endif
127: #if defined(PETSC_HAVE_MKL_PARDISO)
128: PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
129: #endif
130: copy = ctx->has_vertices;
131: } else { /* interior solver */
132: #if defined(PETSC_HAVE_MUMPS)
133: PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
134: #endif
135: #if defined(PETSC_HAVE_MKL_PARDISO)
136: PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
137: #endif
138: copy = PETSC_TRUE;
139: }
140: /* copy rhs into factored matrix workspace */
141: if (copy) {
142: PetscInt n;
143: PetscScalar *array, *array_solver;
145: PetscCall(VecGetLocalSize(rhs, &n));
146: PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
147: PetscCall(VecGetArray(ctx->rhs, &array_solver));
148: PetscCall(PetscArraycpy(array_solver, array, n));
149: PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
150: PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));
152: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
153: if (transpose) {
154: PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
155: } else {
156: PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
157: }
158: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));
160: /* get back data to caller worskpace */
161: PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
162: PetscCall(VecGetArray(sol, &array));
163: PetscCall(PetscArraycpy(array, array_solver, n));
164: PetscCall(VecRestoreArray(sol, &array));
165: PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
166: } else {
167: if (ctx->benign_n) {
168: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
169: if (transpose) {
170: PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
171: } else {
172: PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
173: }
174: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
175: } else {
176: if (transpose) {
177: PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
178: } else {
179: PetscCall(MatSolve(ctx->F, rhs, sol));
180: }
181: }
182: }
183: /* restore defaults */
184: #if defined(PETSC_HAVE_MUMPS)
185: PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
186: #endif
187: #if defined(PETSC_HAVE_MKL_PARDISO)
188: PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
189: #endif
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194: {
195: PetscFunctionBegin;
196: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
197: PetscFunctionReturn(PETSC_SUCCESS);
198: }
200: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201: {
202: PetscFunctionBegin;
203: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
204: PetscFunctionReturn(PETSC_SUCCESS);
205: }
207: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208: {
209: PetscFunctionBegin;
210: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
211: PetscFunctionReturn(PETSC_SUCCESS);
212: }
214: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215: {
216: PetscFunctionBegin;
217: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
221: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222: {
223: PCBDDCReuseSolvers ctx;
224: PetscBool iascii;
226: PetscFunctionBegin;
227: PetscCall(PCShellGetContext(pc, &ctx));
228: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
229: if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
230: PetscCall(MatView(ctx->F, viewer));
231: if (iascii) PetscCall(PetscViewerPopFormat(viewer));
232: PetscFunctionReturn(PETSC_SUCCESS);
233: }
235: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
236: {
237: PetscInt i;
239: PetscFunctionBegin;
240: PetscCall(MatDestroy(&reuse->F));
241: PetscCall(VecDestroy(&reuse->sol));
242: PetscCall(VecDestroy(&reuse->rhs));
243: PetscCall(PCDestroy(&reuse->interior_solver));
244: PetscCall(PCDestroy(&reuse->correction_solver));
245: PetscCall(ISDestroy(&reuse->is_R));
246: PetscCall(ISDestroy(&reuse->is_B));
247: PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
248: PetscCall(VecDestroy(&reuse->sol_B));
249: PetscCall(VecDestroy(&reuse->rhs_B));
250: for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
251: PetscCall(PetscFree(reuse->benign_zerodiag_subs));
252: PetscCall(PetscFree(reuse->benign_save_vals));
253: PetscCall(MatDestroy(&reuse->benign_csAIB));
254: PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
255: PetscCall(VecDestroy(&reuse->benign_corr_work));
256: PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
261: {
262: PCBDDCReuseSolvers ctx;
264: PetscFunctionBegin;
265: PetscCall(PCShellGetContext(pc, &ctx));
266: PetscCall(PCBDDCReuseSolversReset(ctx));
267: PetscCall(PetscFree(ctx));
268: PetscCall(PCShellSetContext(pc, NULL));
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
273: {
274: Mat B, C, D, Bd, Cd, AinvBd;
275: KSP ksp;
276: PC pc;
277: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
278: PetscReal fill = 2.0;
279: PetscInt n_I;
280: PetscMPIInt size;
282: PetscFunctionBegin;
283: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
284: PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
285: if (reuse == MAT_REUSE_MATRIX) {
286: PetscBool Sdense;
288: PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
289: PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
290: }
291: PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
292: PetscCall(MatSchurComplementGetKSP(M, &ksp));
293: PetscCall(KSPGetPC(ksp, &pc));
294: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
295: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
296: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
297: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
298: PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
299: PetscCall(MatGetSize(B, &n_I, NULL));
300: if (n_I) {
301: if (!Bdense) {
302: PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
303: } else {
304: Bd = B;
305: }
307: if (isLU || isILU || isCHOL) {
308: Mat fact;
309: PetscCall(KSPSetUp(ksp));
310: PetscCall(PCFactorGetMatrix(pc, &fact));
311: PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
312: PetscCall(MatMatSolve(fact, Bd, AinvBd));
313: } else {
314: PetscBool ex = PETSC_TRUE;
316: if (ex) {
317: Mat Ainvd;
319: PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
320: PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
321: PetscCall(MatDestroy(&Ainvd));
322: } else {
323: Vec sol, rhs;
324: PetscScalar *arrayrhs, *arraysol;
325: PetscInt i, nrhs, n;
327: PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
328: PetscCall(MatGetSize(Bd, &n, &nrhs));
329: PetscCall(MatDenseGetArray(Bd, &arrayrhs));
330: PetscCall(MatDenseGetArray(AinvBd, &arraysol));
331: PetscCall(KSPGetSolution(ksp, &sol));
332: PetscCall(KSPGetRhs(ksp, &rhs));
333: for (i = 0; i < nrhs; i++) {
334: PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
335: PetscCall(VecPlaceArray(sol, arraysol + i * n));
336: PetscCall(KSPSolve(ksp, rhs, sol));
337: PetscCall(VecResetArray(rhs));
338: PetscCall(VecResetArray(sol));
339: }
340: PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
341: PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
342: }
343: }
344: if (!Bdense & !issym) PetscCall(MatDestroy(&Bd));
346: if (!issym) {
347: if (!Cdense) {
348: PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
349: } else {
350: Cd = C;
351: }
352: PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
353: if (!Cdense) PetscCall(MatDestroy(&Cd));
354: } else {
355: PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
356: if (!Bdense) PetscCall(MatDestroy(&Bd));
357: }
358: PetscCall(MatDestroy(&AinvBd));
359: }
361: if (D) {
362: Mat Dd;
363: PetscBool Ddense;
365: PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
366: if (!Ddense) {
367: PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
368: } else {
369: Dd = D;
370: }
371: if (n_I) {
372: PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
373: } else {
374: if (reuse == MAT_INITIAL_MATRIX) {
375: PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
376: } else {
377: PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
378: }
379: }
380: if (!Ddense) PetscCall(MatDestroy(&Dd));
381: } else {
382: PetscCall(MatScale(*S, -1.0));
383: }
384: PetscFunctionReturn(PETSC_SUCCESS);
385: }
387: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
388: {
389: Mat F, A_II, A_IB, A_BI, A_BB, AE_II;
390: Mat S_all;
391: Vec gstash, lstash;
392: VecScatter sstash;
393: IS is_I, is_I_layer;
394: IS all_subsets, all_subsets_mult, all_subsets_n;
395: PetscScalar *stasharray, *Bwork;
396: PetscInt *nnz, *all_local_idx_N;
397: PetscInt *auxnum1, *auxnum2;
398: PetscInt i, subset_size, max_subset_size;
399: PetscInt n_B, extra, local_size, global_size;
400: PetscInt local_stash_size;
401: PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
402: MPI_Comm comm_n;
403: PetscBool deluxe = PETSC_TRUE;
404: PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
405: PetscViewer matl_dbg_viewer = NULL;
406: PetscBool flg;
408: PetscFunctionBegin;
409: PetscCall(MatDestroy(&sub_schurs->A));
410: PetscCall(MatDestroy(&sub_schurs->S));
411: if (Ain) {
412: PetscCall(PetscObjectReference((PetscObject)Ain));
413: sub_schurs->A = Ain;
414: }
416: PetscCall(PetscObjectReference((PetscObject)Sin));
417: sub_schurs->S = Sin;
418: if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
420: /* preliminary checks */
421: PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
423: if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
425: /* debug (MATLAB) */
426: if (sub_schurs->debug) {
427: PetscMPIInt size, rank;
428: PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
430: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
431: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
432: nr = size;
433: PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
434: PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
435: PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
436: if (!flg) print_schurs = PETSC_TRUE;
437: else {
438: print_schurs = PETSC_FALSE;
439: for (i = 0; i < nr; i++)
440: if (print_schurs_ranks[i] == (PetscInt)rank) {
441: print_schurs = PETSC_TRUE;
442: break;
443: }
444: }
445: PetscOptionsEnd();
446: PetscCall(PetscFree(print_schurs_ranks));
447: if (print_schurs) {
448: char filename[256];
450: PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
451: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
452: PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
453: }
454: }
456: /* restrict work on active processes */
457: if (sub_schurs->restrict_comm) {
458: PetscSubcomm subcomm;
459: PetscMPIInt color, rank;
461: color = 0;
462: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
463: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
464: PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
465: PetscCall(PetscSubcommSetNumber(subcomm, 2));
466: PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
467: PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
468: PetscCall(PetscSubcommDestroy(&subcomm));
469: if (!sub_schurs->n_subs) {
470: PetscCall(PetscCommDestroy(&comm_n));
471: PetscFunctionReturn(PETSC_SUCCESS);
472: }
473: } else {
474: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
475: }
477: /* get Schur complement matrices */
478: if (!sub_schurs->schur_explicit) {
479: Mat tA_IB, tA_BI, tA_BB;
480: PetscBool isseqsbaij;
481: PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
482: PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
483: if (isseqsbaij) {
484: PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
485: PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
486: PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
487: } else {
488: PetscCall(PetscObjectReference((PetscObject)tA_BB));
489: A_BB = tA_BB;
490: PetscCall(PetscObjectReference((PetscObject)tA_IB));
491: A_IB = tA_IB;
492: PetscCall(PetscObjectReference((PetscObject)tA_BI));
493: A_BI = tA_BI;
494: }
495: } else {
496: A_II = NULL;
497: A_IB = NULL;
498: A_BI = NULL;
499: A_BB = NULL;
500: }
501: S_all = NULL;
503: /* determine interior problems */
504: PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
505: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
506: PetscBT touched;
507: const PetscInt *idx_B;
508: PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
510: PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
511: /* get sizes */
512: PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
513: PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
515: PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
516: PetscCall(PetscBTCreate(n_I + n_B, &touched));
517: PetscCall(PetscBTMemzero(n_I + n_B, touched));
519: /* all boundary dofs must be skipped when adding layers */
520: PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
521: for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j]));
522: PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
523: PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));
525: /* add prescribed number of layers of dofs */
526: n_local_dofs = n_B;
527: n_prev_added = n_B;
528: for (layer = 0; layer < nlayers; layer++) {
529: PetscInt n_added = 0;
530: if (n_local_dofs == n_I + n_B) break;
531: PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
532: PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
533: n_prev_added = n_added;
534: n_local_dofs += n_added;
535: if (!n_added) break;
536: }
537: PetscCall(PetscBTDestroy(&touched));
539: /* IS for I layer dofs in original numbering */
540: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
541: PetscCall(PetscFree(local_numbering));
542: PetscCall(ISSort(is_I_layer));
543: /* IS for I layer dofs in I numbering */
544: if (!sub_schurs->schur_explicit) {
545: ISLocalToGlobalMapping ItoNmap;
546: PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
547: PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
548: PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
550: /* II block */
551: PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
552: }
553: } else {
554: PetscInt n_I;
556: /* IS for I dofs in original numbering */
557: PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
558: is_I_layer = sub_schurs->is_I;
560: /* IS for I dofs in I numbering (strided 1) */
561: if (!sub_schurs->schur_explicit) {
562: PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
563: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));
565: /* II block is the same */
566: PetscCall(PetscObjectReference((PetscObject)A_II));
567: AE_II = A_II;
568: }
569: }
571: /* Get info on subset sizes and sum of all subsets sizes */
572: max_subset_size = 0;
573: local_size = 0;
574: for (i = 0; i < sub_schurs->n_subs; i++) {
575: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
576: max_subset_size = PetscMax(subset_size, max_subset_size);
577: local_size += subset_size;
578: }
580: /* Work arrays for local indices */
581: extra = 0;
582: PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
583: if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra));
584: PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
585: if (extra) {
586: const PetscInt *idxs;
587: PetscCall(ISGetIndices(is_I_layer, &idxs));
588: PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
589: PetscCall(ISRestoreIndices(is_I_layer, &idxs));
590: }
591: PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
592: PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));
594: /* Get local indices in local numbering */
595: local_size = 0;
596: local_stash_size = 0;
597: for (i = 0; i < sub_schurs->n_subs; i++) {
598: const PetscInt *idxs;
600: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
601: PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
602: /* start (smallest in global ordering) and multiplicity */
603: auxnum1[i] = idxs[0];
604: auxnum2[i] = subset_size * subset_size;
605: /* subset indices in local numbering */
606: PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
607: PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
608: local_size += subset_size;
609: local_stash_size += subset_size * subset_size;
610: }
612: /* allocate extra workspace needed only for GETRI or SYTRF */
613: use_potr = use_sytr = PETSC_FALSE;
614: if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
615: use_potr = PETSC_TRUE;
616: } else if (sub_schurs->is_symmetric) {
617: use_sytr = PETSC_TRUE;
618: }
619: if (local_size && !use_potr) {
620: PetscScalar lwork, dummyscalar = 0.;
621: PetscBLASInt dummyint = 0;
623: B_lwork = -1;
624: PetscCall(PetscBLASIntCast(local_size, &B_N));
625: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
626: if (use_sytr) {
627: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
628: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %d", (int)B_ierr);
629: } else {
630: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
631: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %d", (int)B_ierr);
632: }
633: PetscCall(PetscFPTrapPop());
634: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
635: PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
636: } else {
637: Bwork = NULL;
638: pivots = NULL;
639: }
641: /* prepare data for summing up properly schurs on subsets */
642: PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
643: PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
644: PetscCall(ISDestroy(&all_subsets_n));
645: PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
646: PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
647: PetscCall(ISDestroy(&all_subsets));
648: PetscCall(ISDestroy(&all_subsets_mult));
649: PetscCall(ISGetLocalSize(all_subsets_n, &i));
650: PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
651: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
652: PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
653: PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
654: PetscCall(ISDestroy(&all_subsets_n));
656: /* subset indices in local boundary numbering */
657: if (!sub_schurs->is_Ej_all) {
658: PetscInt *all_local_idx_B;
660: PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
661: PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
662: PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
663: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
664: }
666: if (change) {
667: ISLocalToGlobalMapping BtoS;
668: IS change_primal_B;
669: IS change_primal_all;
671: PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
672: PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
673: PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
674: for (i = 0; i < sub_schurs->n_subs; i++) {
675: ISLocalToGlobalMapping NtoS;
676: PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
677: PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
678: PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
679: }
680: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
681: PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
682: PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
683: PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
684: PetscCall(ISDestroy(&change_primal_B));
685: PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
686: for (i = 0; i < sub_schurs->n_subs; i++) {
687: Mat change_sub;
689: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
690: PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
691: PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */
692: PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
693: if (!sub_schurs->change_with_qr) {
694: PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
695: } else {
696: Mat change_subt;
697: PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
698: PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
699: PetscCall(MatDestroy(&change_subt));
700: }
701: PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
702: PetscCall(MatDestroy(&change_sub));
703: PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
704: PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
705: }
706: PetscCall(ISDestroy(&change_primal_all));
707: }
709: /* Local matrix of all local Schur on subsets (transposed) */
710: if (!sub_schurs->S_Ej_all) {
711: Mat T;
712: PetscScalar *v;
713: PetscInt *ii, *jj;
714: PetscInt cum, i, j, k;
716: /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
717: Allocate properly a representative matrix and duplicate */
718: PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
719: ii[0] = 0;
720: cum = 0;
721: for (i = 0; i < sub_schurs->n_subs; i++) {
722: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
723: for (j = 0; j < subset_size; j++) {
724: const PetscInt row = cum + j;
725: PetscInt col = cum;
727: ii[row + 1] = ii[row] + subset_size;
728: for (k = ii[row]; k < ii[row + 1]; k++) {
729: jj[k] = col;
730: col++;
731: }
732: }
733: cum += subset_size;
734: }
735: PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
736: PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
737: PetscCall(MatDestroy(&T));
738: PetscCall(PetscFree3(ii, jj, v));
739: }
740: /* matrices for deluxe scaling and adaptive selection */
741: if (compute_Stilda) {
742: if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all));
743: if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all));
744: }
746: /* Compute Schur complements explicitly */
747: F = NULL;
748: if (!sub_schurs->schur_explicit) {
749: /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
750: it is not efficient, unless the economic version of the scaling is used */
751: Mat S_Ej_expl;
752: PetscScalar *work;
753: PetscInt j, *dummy_idx;
754: PetscBool Sdense;
756: PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
757: local_size = 0;
758: for (i = 0; i < sub_schurs->n_subs; i++) {
759: IS is_subset_B;
760: Mat AE_EE, AE_IE, AE_EI, S_Ej;
762: /* subsets in original and boundary numbering */
763: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
764: /* EE block */
765: PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
766: /* IE block */
767: PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
768: /* EI block */
769: if (sub_schurs->is_symmetric) {
770: PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
771: } else if (sub_schurs->is_hermitian) {
772: PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
773: } else {
774: PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
775: }
776: PetscCall(ISDestroy(&is_subset_B));
777: PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
778: PetscCall(MatDestroy(&AE_EE));
779: PetscCall(MatDestroy(&AE_IE));
780: PetscCall(MatDestroy(&AE_EI));
781: if (AE_II == A_II) { /* we can reuse the same ksp */
782: KSP ksp;
783: PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
784: PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
785: } else { /* build new ksp object which inherits ksp and pc types from the original one */
786: KSP origksp, schurksp;
787: PC origpc, schurpc;
788: KSPType ksp_type;
789: PetscInt n_internal;
790: PetscBool ispcnone;
792: PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
793: PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
794: PetscCall(KSPGetType(origksp, &ksp_type));
795: PetscCall(KSPSetType(schurksp, ksp_type));
796: PetscCall(KSPGetPC(schurksp, &schurpc));
797: PetscCall(KSPGetPC(origksp, &origpc));
798: PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
799: if (!ispcnone) {
800: PCType pc_type;
801: PetscCall(PCGetType(origpc, &pc_type));
802: PetscCall(PCSetType(schurpc, pc_type));
803: } else {
804: PetscCall(PCSetType(schurpc, PCLU));
805: }
806: PetscCall(ISGetSize(is_I, &n_internal));
807: if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
808: MatSolverType solver = NULL;
809: PetscCall(PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver));
810: if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
811: }
812: PetscCall(KSPSetUp(schurksp));
813: }
814: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
815: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
816: PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
817: PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
818: PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
819: for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
820: PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
821: PetscCall(MatDestroy(&S_Ej));
822: PetscCall(MatDestroy(&S_Ej_expl));
823: local_size += subset_size;
824: }
825: PetscCall(PetscFree2(dummy_idx, work));
826: /* free */
827: PetscCall(ISDestroy(&is_I));
828: PetscCall(MatDestroy(&AE_II));
829: PetscCall(PetscFree(all_local_idx_N));
830: } else {
831: Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
832: Mat *gdswA;
833: Vec Dall = NULL;
834: IS is_A_all, *is_p_r = NULL;
835: MatType Stype;
836: PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
837: PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL;
838: const PetscScalar *rS_data;
839: PetscInt n, n_I, size_schur, size_active_schur, cum, cum2;
840: PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE;
841: PetscBool schur_has_vertices, factor_workaround;
842: PetscBool use_cholesky;
843: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
844: PetscBool oldpin;
845: #endif
847: /* get sizes */
848: n_I = 0;
849: if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I));
850: economic = PETSC_FALSE;
851: PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
852: if (cum != n_I) economic = PETSC_TRUE;
853: PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
854: size_active_schur = local_size;
856: /* import scaling vector (wrong formulation if we have 3D edges) */
857: if (scaling && compute_Stilda) {
858: const PetscScalar *array;
859: PetscScalar *array2;
860: const PetscInt *idxs;
861: PetscInt i;
863: PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
864: PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
865: PetscCall(VecGetArrayRead(scaling, &array));
866: PetscCall(VecGetArray(Dall, &array2));
867: for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
868: PetscCall(VecRestoreArray(Dall, &array2));
869: PetscCall(VecRestoreArrayRead(scaling, &array));
870: PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
871: deluxe = PETSC_FALSE;
872: }
874: /* size active schurs does not count any dirichlet or vertex dof on the interface */
875: factor_workaround = PETSC_FALSE;
876: schur_has_vertices = PETSC_FALSE;
877: cum = n_I + size_active_schur;
878: if (sub_schurs->is_dir) {
879: const PetscInt *idxs;
880: PetscInt n_dir;
882: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
883: PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
884: PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
885: PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
886: cum += n_dir;
887: if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
888: }
889: /* include the primal vertices in the Schur complement */
890: if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
891: PetscInt n_v;
893: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
894: if (n_v) {
895: const PetscInt *idxs;
897: PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
898: PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
899: PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
900: cum += n_v;
901: if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
902: schur_has_vertices = PETSC_TRUE;
903: }
904: }
905: size_schur = cum - n_I;
906: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
907: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
908: oldpin = sub_schurs->A->boundtocpu;
909: PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
910: #endif
911: if (cum == n) {
912: PetscCall(ISSetPermutation(is_A_all));
913: PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
914: } else {
915: PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
916: }
917: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
918: PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
919: #endif
920: PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
921: PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));
923: /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
924: this is a workaround */
925: if (benign_n) {
926: Vec v, benign_AIIm1_ones;
927: ISLocalToGlobalMapping N_to_reor;
928: IS is_p0, is_p0_p;
929: PetscScalar *cs_AIB, *AIIm1_data;
930: PetscInt sizeA;
932: PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
933: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
934: PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
935: PetscCall(ISDestroy(&is_p0));
936: PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
937: PetscCall(VecGetSize(v, &sizeA));
938: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
939: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
940: PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
941: PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
942: PetscCall(PetscMalloc1(benign_n, &is_p_r));
943: /* compute colsum of A_IB restricted to pressures */
944: for (i = 0; i < benign_n; i++) {
945: const PetscScalar *array;
946: const PetscInt *idxs;
947: PetscInt j, nz;
949: PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
950: PetscCall(ISGetLocalSize(is_p_r[i], &nz));
951: PetscCall(ISGetIndices(is_p_r[i], &idxs));
952: for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
953: PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
954: PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
955: PetscCall(MatMult(A, benign_AIIm1_ones, v));
956: PetscCall(VecResetArray(benign_AIIm1_ones));
957: PetscCall(VecGetArrayRead(v, &array));
958: for (j = 0; j < size_schur; j++) {
959: #if defined(PETSC_USE_COMPLEX)
960: cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
961: #else
962: cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
963: #endif
964: }
965: PetscCall(VecRestoreArrayRead(v, &array));
966: }
967: PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
968: PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
969: PetscCall(VecDestroy(&v));
970: PetscCall(VecDestroy(&benign_AIIm1_ones));
971: PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
972: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
973: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
974: PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
975: PetscCall(ISDestroy(&is_p0_p));
976: PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
977: }
978: PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
979: PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
980: PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));
982: /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
983: use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
985: /* when using the benign subspace trick, the local Schur complements are SPD */
986: /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
987: Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
988: if (benign_trick) {
989: sub_schurs->is_posdef = PETSC_TRUE;
990: PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
991: if (flg) use_cholesky = PETSC_FALSE;
992: }
994: if (n_I) {
995: IS is_schur;
996: char stype[64];
997: PetscBool gpu = PETSC_FALSE;
999: if (use_cholesky) {
1000: PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F));
1001: } else {
1002: PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F));
1003: }
1004: PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name);
1005: PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
1006: #if defined(PETSC_HAVE_MKL_PARDISO)
1007: if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1008: #endif
1009: /* subsets ordered last */
1010: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
1011: PetscCall(MatFactorSetSchurIS(F, is_schur));
1012: PetscCall(ISDestroy(&is_schur));
1014: /* factorization step */
1015: if (use_cholesky) {
1016: PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
1017: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1018: PetscCall(MatMumpsSetIcntl(F, 19, 2));
1019: #endif
1020: PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1021: S_lower_triangular = PETSC_TRUE;
1022: } else {
1023: PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
1024: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1025: PetscCall(MatMumpsSetIcntl(F, 19, 3));
1026: #endif
1027: PetscCall(MatLUFactorNumeric(F, A, NULL));
1028: }
1029: PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));
1031: if (matl_dbg_viewer) {
1032: Mat S;
1033: IS is;
1035: PetscCall(PetscObjectSetName((PetscObject)A, "A"));
1036: PetscCall(MatView(A, matl_dbg_viewer));
1037: PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1038: PetscCall(PetscObjectSetName((PetscObject)S, "S"));
1039: PetscCall(MatView(S, matl_dbg_viewer));
1040: PetscCall(MatDestroy(&S));
1041: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
1042: PetscCall(PetscObjectSetName((PetscObject)is, "I"));
1043: PetscCall(ISView(is, matl_dbg_viewer));
1044: PetscCall(ISDestroy(&is));
1045: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
1046: PetscCall(PetscObjectSetName((PetscObject)is, "B"));
1047: PetscCall(ISView(is, matl_dbg_viewer));
1048: PetscCall(ISDestroy(&is));
1049: PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
1050: PetscCall(ISView(is_A_all, matl_dbg_viewer));
1051: for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1052: IS is;
1053: char name[16];
1055: PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
1056: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1057: PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
1058: PetscCall(PetscObjectSetName((PetscObject)is, name));
1059: PetscCall(ISView(is, matl_dbg_viewer));
1060: PetscCall(ISDestroy(&is));
1061: if (sub_schurs->change) {
1062: Mat T;
1064: PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
1065: PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
1066: PetscCall(PetscObjectSetName((PetscObject)T, name));
1067: PetscCall(MatView(T, matl_dbg_viewer));
1068: PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
1069: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
1070: PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
1071: }
1072: cum += subset_size;
1073: }
1074: PetscCall(PetscViewerFlush(matl_dbg_viewer));
1075: }
1077: /* get explicit Schur Complement computed during numeric factorization */
1078: PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1079: PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
1080: #if defined(PETSC_HAVE_CUDA)
1081: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
1082: #endif
1083: if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
1084: PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
1085: PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
1086: PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1087: PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1088: PetscCall(MatGetType(S_all, &Stype));
1090: /* we can reuse the solvers if we are not using the economic version */
1091: reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1092: if (!sub_schurs->gdsw) {
1093: factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1094: if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1095: }
1096: solver_S = PETSC_TRUE;
1098: /* update the Schur complement with the change of basis on the pressures */
1099: if (benign_n) {
1100: const PetscScalar *cs_AIB;
1101: PetscScalar *S_data, *AIIm1_data;
1102: Mat S2 = NULL, S3 = NULL; /* dbg */
1103: PetscScalar *S2_data, *S3_data; /* dbg */
1104: Vec v, benign_AIIm1_ones;
1105: PetscInt sizeA;
1107: PetscCall(MatDenseGetArray(S_all, &S_data));
1108: PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
1109: PetscCall(VecGetSize(v, &sizeA));
1110: #if defined(PETSC_HAVE_MUMPS)
1111: PetscCall(MatMumpsSetIcntl(F, 26, 0));
1112: #endif
1113: #if defined(PETSC_HAVE_MKL_PARDISO)
1114: PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1115: #endif
1116: PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
1117: PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
1118: if (matl_dbg_viewer) {
1119: PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
1120: PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
1121: PetscCall(MatDenseGetArray(S2, &S2_data));
1122: PetscCall(MatDenseGetArray(S3, &S3_data));
1123: }
1124: for (i = 0; i < benign_n; i++) {
1125: PetscScalar *array, sum = 0., one = 1., *sums;
1126: const PetscInt *idxs;
1127: PetscInt k, j, nz;
1128: PetscBLASInt B_k, B_n;
1130: PetscCall(PetscCalloc1(benign_n, &sums));
1131: PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
1132: PetscCall(VecCopy(benign_AIIm1_ones, v));
1133: PetscCall(MatSolve(F, v, benign_AIIm1_ones));
1134: PetscCall(MatMult(A, benign_AIIm1_ones, v));
1135: PetscCall(VecResetArray(benign_AIIm1_ones));
1136: /* p0 dofs (eliminated) are excluded from the sums */
1137: for (k = 0; k < benign_n; k++) {
1138: PetscCall(ISGetLocalSize(is_p_r[k], &nz));
1139: PetscCall(ISGetIndices(is_p_r[k], &idxs));
1140: for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1141: PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
1142: }
1143: PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
1144: if (matl_dbg_viewer) {
1145: Vec vv;
1146: char name[16];
1148: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
1149: PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
1150: PetscCall(PetscObjectSetName((PetscObject)vv, name));
1151: PetscCall(VecView(vv, matl_dbg_viewer));
1152: }
1153: /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1154: /* cs_AIB already scaled by 1./nz */
1155: B_k = 1;
1156: PetscCall(PetscBLASIntCast(size_schur, &B_n));
1157: for (k = 0; k < benign_n; k++) {
1158: sum = sums[k];
1160: if (PetscAbsScalar(sum) == 0.0) continue;
1161: if (k == i) {
1162: if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1163: if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1164: } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1165: sum /= 2.0;
1166: if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1167: if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1168: }
1169: }
1170: sum = 1.;
1171: if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1172: if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
1173: PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
1174: /* set p0 entry of AIIm1_ones to zero */
1175: PetscCall(ISGetLocalSize(is_p_r[i], &nz));
1176: PetscCall(ISGetIndices(is_p_r[i], &idxs));
1177: for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1178: PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
1179: PetscCall(PetscFree(sums));
1180: }
1181: PetscCall(VecDestroy(&benign_AIIm1_ones));
1182: if (matl_dbg_viewer) {
1183: PetscCall(MatDenseRestoreArray(S2, &S2_data));
1184: PetscCall(MatDenseRestoreArray(S3, &S3_data));
1185: }
1186: if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1187: PetscInt k, j;
1188: for (k = 0; k < size_schur; k++) {
1189: for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1190: }
1191: }
1193: /* restore defaults */
1194: #if defined(PETSC_HAVE_MUMPS)
1195: PetscCall(MatMumpsSetIcntl(F, 26, -1));
1196: #endif
1197: #if defined(PETSC_HAVE_MKL_PARDISO)
1198: PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
1199: #endif
1200: PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
1201: PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1202: PetscCall(VecDestroy(&v));
1203: PetscCall(MatDenseRestoreArray(S_all, &S_data));
1204: if (matl_dbg_viewer) {
1205: Mat S;
1207: PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1208: PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1209: PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
1210: PetscCall(MatView(S, matl_dbg_viewer));
1211: PetscCall(MatDestroy(&S));
1212: PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
1213: PetscCall(MatView(S2, matl_dbg_viewer));
1214: PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
1215: PetscCall(MatView(S3, matl_dbg_viewer));
1216: PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
1217: PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
1218: PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1219: }
1220: PetscCall(MatDestroy(&S2));
1221: PetscCall(MatDestroy(&S3));
1222: }
1223: if (!reuse_solvers) {
1224: for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i]));
1225: PetscCall(PetscFree(is_p_r));
1226: PetscCall(MatDestroy(&cs_AIB_mat));
1227: PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1228: }
1229: } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1230: PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
1231: PetscCall(MatGetType(S_all, &Stype));
1232: reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1233: factor_workaround = PETSC_FALSE;
1234: solver_S = PETSC_FALSE;
1235: }
1237: if (reuse_solvers) {
1238: Mat A_II, pA_II, Afake;
1239: Vec vec1_B;
1240: PCBDDCReuseSolvers msolv_ctx;
1241: PetscInt n_R;
1243: if (sub_schurs->reuse_solver) {
1244: PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1245: } else {
1246: PetscCall(PetscNew(&sub_schurs->reuse_solver));
1247: }
1248: msolv_ctx = sub_schurs->reuse_solver;
1249: PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
1250: PetscCall(PetscObjectReference((PetscObject)F));
1251: msolv_ctx->F = F;
1252: PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1253: /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1254: {
1255: PetscScalar *array;
1256: PetscInt n;
1258: PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
1259: PetscCall(VecGetArray(msolv_ctx->sol, &array));
1260: PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
1261: PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1262: }
1263: msolv_ctx->has_vertices = schur_has_vertices;
1265: /* interior solver */
1266: PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
1267: PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
1268: PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
1269: PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
1270: PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
1271: PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1272: PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
1273: PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
1274: if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));
1276: /* correction solver */
1277: if (!sub_schurs->gdsw) {
1278: PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
1279: PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
1280: PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
1281: PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
1282: PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1283: PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
1284: PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));
1286: /* scatter and vecs for Schur complement solver */
1287: PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
1288: PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
1289: if (!schur_has_vertices) {
1290: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
1291: PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
1292: PetscCall(PetscObjectReference((PetscObject)is_A_all));
1293: msolv_ctx->is_R = is_A_all;
1294: } else {
1295: IS is_B_all;
1296: const PetscInt *idxs;
1297: PetscInt dual, n_v, n;
1299: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1300: dual = size_schur - n_v;
1301: PetscCall(ISGetLocalSize(is_A_all, &n));
1302: PetscCall(ISGetIndices(is_A_all, &idxs));
1303: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
1304: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
1305: PetscCall(ISDestroy(&is_B_all));
1306: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
1307: PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
1308: PetscCall(ISDestroy(&is_B_all));
1309: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
1310: PetscCall(ISRestoreIndices(is_A_all, &idxs));
1311: }
1312: PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
1313: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
1314: PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
1315: PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
1316: PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
1317: PetscCall(MatDestroy(&Afake));
1318: PetscCall(VecDestroy(&vec1_B));
1319: }
1320: /* communicate benign info to solver context */
1321: if (benign_n) {
1322: PetscScalar *array;
1324: msolv_ctx->benign_n = benign_n;
1325: msolv_ctx->benign_zerodiag_subs = is_p_r;
1326: PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
1327: msolv_ctx->benign_csAIB = cs_AIB_mat;
1328: PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
1329: PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
1330: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
1331: PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
1332: msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1333: }
1334: } else {
1335: if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1336: PetscCall(PetscFree(sub_schurs->reuse_solver));
1337: }
1338: PetscCall(MatDestroy(&A));
1339: PetscCall(ISDestroy(&is_A_all));
1341: /* Work arrays */
1342: PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));
1344: /* S_Ej_all */
1345: cum = cum2 = 0;
1346: PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1347: PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
1348: if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1349: if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA));
1350: for (i = 0; i < sub_schurs->n_subs; i++) {
1351: PetscInt j;
1353: /* get S_E (or K^i_EE for GDSW) */
1354: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1355: if (sub_schurs->gdsw) {
1356: Mat T;
1358: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
1359: PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
1360: PetscCall(MatDestroy(&T));
1361: } else {
1362: if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1363: PetscInt k;
1364: for (k = 0; k < subset_size; k++) {
1365: for (j = k; j < subset_size; j++) {
1366: work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1367: work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1368: }
1369: }
1370: } else { /* just copy to workspace */
1371: PetscInt k;
1372: for (k = 0; k < subset_size; k++) {
1373: for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1374: }
1375: }
1376: }
1377: /* insert S_E values */
1378: if (sub_schurs->change) {
1379: Mat change_sub, SEj, T;
1381: /* change basis */
1382: PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1383: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1384: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1385: Mat T2;
1386: PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1387: PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1388: PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1389: PetscCall(MatDestroy(&T2));
1390: } else {
1391: PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1392: }
1393: PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1394: PetscCall(MatDestroy(&T));
1395: PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
1396: PetscCall(MatDestroy(&SEj));
1397: }
1398: PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1399: if (compute_Stilda) {
1400: if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1401: Mat M;
1402: const PetscScalar *vals;
1403: PetscBool isdense, isdensecuda;
1405: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
1406: PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1407: PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1408: if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype));
1409: PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
1410: PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
1411: if (use_cholesky) {
1412: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1413: } else {
1414: PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1415: }
1416: if (isdense) {
1417: PetscCall(MatSeqDenseInvertFactors_Private(M));
1418: #if defined(PETSC_HAVE_CUDA)
1419: } else if (isdensecuda) {
1420: PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M));
1421: #endif
1422: } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1423: PetscCall(MatDenseGetArrayRead(M, &vals));
1424: PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
1425: PetscCall(MatDenseRestoreArrayRead(M, &vals));
1426: PetscCall(MatDestroy(&M));
1427: } else if (scaling) { /* not using deluxe */
1428: Mat SEj;
1429: Vec D;
1430: PetscScalar *array;
1432: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1433: PetscCall(VecGetArray(Dall, &array));
1434: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
1435: PetscCall(VecRestoreArray(Dall, &array));
1436: PetscCall(VecShift(D, -1.));
1437: PetscCall(MatDiagonalScale(SEj, D, D));
1438: PetscCall(MatDestroy(&SEj));
1439: PetscCall(VecDestroy(&D));
1440: PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1441: }
1442: }
1443: cum += subset_size;
1444: cum2 += subset_size * (size_schur + 1);
1445: SEj_arr += subset_size * subset_size;
1446: if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1447: }
1448: if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA));
1449: PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
1450: PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
1451: if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1452: if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1454: /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1455: however, preliminary tests indicate using GPUs is still faster in the solve phase */
1456: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1457: if (reuse_solvers) {
1458: Mat St;
1459: MatFactorSchurStatus st;
1461: flg = PETSC_FALSE;
1462: PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
1463: PetscCall(MatFactorGetSchurComplement(F, &St, &st));
1464: PetscCall(MatBindToCPU(St, flg));
1465: PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
1466: }
1467: #endif
1469: schur_factor = NULL;
1470: if (compute_Stilda && size_active_schur) {
1471: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1472: PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1473: PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
1474: PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1475: } else {
1476: Mat S_all_inv = NULL;
1478: if (solver_S && !sub_schurs->gdsw) {
1479: /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1480: The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1481: if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1482: PetscScalar *data;
1483: PetscInt nd = 0;
1485: PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1486: PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1487: PetscCall(MatDenseGetArray(S_all_inv, &data));
1488: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1489: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1490: }
1492: /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1493: if (schur_has_vertices) {
1494: Mat M;
1495: PetscScalar *tdata;
1496: PetscInt nv = 0, news;
1498: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
1499: news = size_active_schur + nv;
1500: PetscCall(PetscCalloc1(news * news, &tdata));
1501: for (i = 0; i < size_active_schur; i++) {
1502: PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
1503: PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
1504: }
1505: for (i = 0; i < nv; i++) {
1506: PetscInt k = i + size_active_schur;
1507: PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
1508: }
1510: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
1511: PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1512: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1513: /* save the factors */
1514: cum = 0;
1515: PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
1516: for (i = 0; i < size_active_schur; i++) {
1517: PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1518: cum += size_active_schur - i;
1519: }
1520: for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1521: PetscCall(MatSeqDenseInvertFactors_Private(M));
1522: /* move back just the active dofs to the Schur complement */
1523: for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur));
1524: PetscCall(PetscFree(tdata));
1525: PetscCall(MatDestroy(&M));
1526: } else { /* we can factorize and invert just the activedofs */
1527: Mat M;
1528: PetscScalar *aux;
1530: PetscCall(PetscMalloc1(nd, &aux));
1531: for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1532: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
1533: PetscCall(MatDenseSetLDA(M, size_schur));
1534: PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1535: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1536: PetscCall(MatSeqDenseInvertFactors_Private(M));
1537: PetscCall(MatDestroy(&M));
1538: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
1539: PetscCall(MatZeroEntries(M));
1540: PetscCall(MatDestroy(&M));
1541: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
1542: PetscCall(MatDenseSetLDA(M, size_schur));
1543: PetscCall(MatZeroEntries(M));
1544: PetscCall(MatDestroy(&M));
1545: for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1546: PetscCall(PetscFree(aux));
1547: }
1548: PetscCall(MatDenseRestoreArray(S_all_inv, &data));
1549: } else { /* use MatFactor calls to invert S */
1550: PetscCall(MatFactorInvertSchurComplement(F));
1551: PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1552: }
1553: } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1554: PetscCall(PetscObjectReference((PetscObject)S_all));
1555: S_all_inv = S_all;
1556: PetscCall(MatDenseGetArray(S_all_inv, &S_data));
1557: PetscCall(PetscBLASIntCast(size_schur, &B_N));
1558: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1559: if (use_potr) {
1560: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1561: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1562: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1563: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1564: } else if (use_sytr) {
1565: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1566: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1567: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1568: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1569: } else {
1570: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1571: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1572: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1573: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1574: }
1575: PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
1576: PetscCall(PetscFPTrapPop());
1577: PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1578: } else if (sub_schurs->gdsw) {
1579: Mat tS, tX, SEj, S_II, S_IE, S_EE;
1580: KSP pS_II;
1581: PC pS_II_pc;
1582: IS EE, II;
1583: PetscInt nS;
1585: PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
1586: PetscCall(MatGetSize(tS, &nS, NULL));
1587: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1588: for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1589: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1590: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));
1592: PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
1593: PetscCall(ISComplement(EE, 0, nS, &II));
1594: PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
1595: PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
1596: PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
1597: PetscCall(ISDestroy(&II));
1598: PetscCall(ISDestroy(&EE));
1600: PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
1601: PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */
1602: PetscCall(KSPSetType(pS_II, KSPPREONLY));
1603: PetscCall(KSPGetPC(pS_II, &pS_II_pc));
1604: PetscCall(PCSetType(pS_II_pc, PCSVD));
1605: PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
1606: PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
1607: PetscCall(KSPSetOperators(pS_II, S_II, S_II));
1608: PetscCall(MatDestroy(&S_II));
1609: PetscCall(KSPSetFromOptions(pS_II));
1610: PetscCall(KSPSetUp(pS_II));
1611: PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
1612: PetscCall(KSPMatSolve(pS_II, S_IE, tX));
1613: PetscCall(KSPDestroy(&pS_II));
1615: PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj));
1616: PetscCall(MatDestroy(&S_IE));
1617: PetscCall(MatDestroy(&tX));
1618: PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
1619: PetscCall(MatDestroy(&S_EE));
1621: PetscCall(MatDestroy(&SEj));
1622: cum += subset_size;
1623: SEjinv_arr += subset_size * subset_size;
1624: }
1625: PetscCall(MatDestroy(&tS));
1626: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1627: }
1628: /* S_Ej_tilda_all */
1629: cum = cum2 = 0;
1630: rS_data = NULL;
1631: if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
1632: PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1633: for (i = 0; i < sub_schurs->n_subs; i++) {
1634: PetscInt j;
1636: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1637: /* get (St^-1)_E */
1638: /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1639: will be properly accessed later during adaptive selection */
1640: if (rS_data) {
1641: if (S_lower_triangular) {
1642: PetscInt k;
1643: if (sub_schurs->change) {
1644: for (k = 0; k < subset_size; k++) {
1645: for (j = k; j < subset_size; j++) {
1646: work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1647: work[j * subset_size + k] = work[k * subset_size + j];
1648: }
1649: }
1650: } else {
1651: for (k = 0; k < subset_size; k++) {
1652: for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1653: }
1654: }
1655: } else {
1656: PetscInt k;
1657: for (k = 0; k < subset_size; k++) {
1658: for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1659: }
1660: }
1661: }
1662: if (sub_schurs->change) {
1663: Mat change_sub, SEj, T;
1664: PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
1666: /* change basis */
1667: PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1668: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj));
1669: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1670: Mat T2;
1671: PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1672: PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1673: PetscCall(MatDestroy(&T2));
1674: PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1675: } else {
1676: PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1677: }
1678: PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1679: PetscCall(MatDestroy(&T));
1680: PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
1681: PetscCall(MatDestroy(&SEj));
1682: }
1683: if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1684: cum += subset_size;
1685: cum2 += subset_size * (size_schur + 1);
1686: SEjinv_arr += subset_size * subset_size;
1687: }
1688: PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1689: if (S_all_inv) {
1690: PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1691: if (solver_S) {
1692: if (schur_has_vertices) {
1693: PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
1694: } else {
1695: PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
1696: }
1697: }
1698: }
1699: PetscCall(MatDestroy(&S_all_inv));
1700: }
1702: /* move back factors if needed */
1703: if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1704: Mat S_tmp;
1705: PetscInt nd = 0;
1706: PetscScalar *data;
1708: PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1709: PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
1710: PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
1711: PetscCall(MatDenseGetArray(S_tmp, &data));
1712: PetscCall(PetscArrayzero(data, size_schur * size_schur));
1714: if (S_lower_triangular) {
1715: cum = 0;
1716: for (i = 0; i < size_active_schur; i++) {
1717: PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1718: cum += size_active_schur - i;
1719: }
1720: } else {
1721: PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1722: }
1723: if (sub_schurs->is_dir) {
1724: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1725: for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1726: }
1727: /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1728: set the diagonal entry of the Schur factor to a very large value */
1729: for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
1730: PetscCall(MatDenseRestoreArray(S_tmp, &data));
1731: PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
1732: }
1733: } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1734: PetscScalar *data;
1735: PetscInt nd = 0;
1737: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1738: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1739: }
1740: PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1741: PetscCall(MatDenseGetArray(S_all, &data));
1742: for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1743: for (i = size_active_schur + nd; i < size_schur; i++) {
1744: PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1745: data[i * (size_schur + 1)] = infty;
1746: }
1747: PetscCall(MatDenseRestoreArray(S_all, &data));
1748: PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1749: }
1750: PetscCall(PetscFree(work));
1751: PetscCall(PetscFree(schur_factor));
1752: PetscCall(VecDestroy(&Dall));
1753: }
1754: PetscCall(ISDestroy(&is_I_layer));
1755: PetscCall(MatDestroy(&S_all));
1756: PetscCall(MatDestroy(&A_BB));
1757: PetscCall(MatDestroy(&A_IB));
1758: PetscCall(MatDestroy(&A_BI));
1759: PetscCall(MatDestroy(&F));
1761: PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz));
1762: for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i]));
1763: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer));
1764: PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz));
1765: PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1766: PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1767: if (compute_Stilda) {
1768: PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz));
1769: PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1770: PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1771: if (deluxe) {
1772: PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz));
1773: PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1774: PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1775: }
1776: }
1777: PetscCall(ISDestroy(&is_I_layer));
1779: /* Get local part of (\sum_j S_Ej) */
1780: if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all));
1781: PetscCall(VecSet(gstash, 0.0));
1782: PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
1783: PetscCall(VecPlaceArray(lstash, stasharray));
1784: PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1785: PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1786: PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
1787: PetscCall(VecResetArray(lstash));
1788: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
1789: PetscCall(VecPlaceArray(lstash, stasharray));
1790: PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1791: PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1792: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
1793: PetscCall(VecResetArray(lstash));
1795: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1796: if (compute_Stilda) {
1797: PetscCall(VecSet(gstash, 0.0));
1798: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1799: PetscCall(VecPlaceArray(lstash, stasharray));
1800: PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1801: PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1802: PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1803: PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1804: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1805: PetscCall(VecResetArray(lstash));
1806: if (deluxe) {
1807: PetscCall(VecSet(gstash, 0.0));
1808: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1809: PetscCall(VecPlaceArray(lstash, stasharray));
1810: PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1811: PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1812: PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1813: PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1814: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1815: PetscCall(VecResetArray(lstash));
1816: } else if (!sub_schurs->gdsw) {
1817: PetscScalar *array;
1818: PetscInt cum;
1820: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1821: cum = 0;
1822: for (i = 0; i < sub_schurs->n_subs; i++) {
1823: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1824: PetscCall(PetscBLASIntCast(subset_size, &B_N));
1825: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1826: if (use_potr) {
1827: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
1828: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1829: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
1830: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1831: } else if (use_sytr) {
1832: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1833: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1834: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
1835: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1836: } else {
1837: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
1838: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1839: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1840: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1841: }
1842: PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
1843: PetscCall(PetscFPTrapPop());
1844: cum += subset_size * subset_size;
1845: }
1846: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1847: PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1848: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1849: sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1850: }
1851: }
1852: PetscCall(VecDestroy(&lstash));
1853: PetscCall(VecDestroy(&gstash));
1854: PetscCall(VecScatterDestroy(&sstash));
1856: if (matl_dbg_viewer) {
1857: if (sub_schurs->S_Ej_all) {
1858: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
1859: PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
1860: }
1861: if (sub_schurs->sum_S_Ej_all) {
1862: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
1863: PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
1864: }
1865: if (sub_schurs->sum_S_Ej_inv_all) {
1866: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
1867: PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
1868: }
1869: if (sub_schurs->sum_S_Ej_tilda_all) {
1870: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
1871: PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
1872: }
1873: }
1875: /* free workspace */
1876: if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1877: if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1878: PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1879: PetscCall(PetscFree2(Bwork, pivots));
1880: PetscCall(PetscCommDestroy(&comm_n));
1881: PetscFunctionReturn(PETSC_SUCCESS);
1882: }
1884: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
1885: {
1886: IS *faces, *edges, *all_cc, vertices;
1887: PetscInt s, i, n_faces, n_edges, n_all_cc;
1888: PetscBool is_sorted, ispardiso, ismumps;
1890: PetscFunctionBegin;
1891: PetscCall(ISSorted(is_I, &is_sorted));
1892: PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
1893: PetscCall(ISSorted(is_B, &is_sorted));
1894: PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");
1896: /* reset any previous data */
1897: PetscCall(PCBDDCSubSchursReset(sub_schurs));
1899: sub_schurs->gdsw = gdsw;
1901: /* get index sets for faces and edges (already sorted by global ordering) */
1902: PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1903: n_all_cc = n_faces + n_edges;
1904: PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
1905: PetscCall(PetscMalloc1(n_all_cc, &all_cc));
1906: n_all_cc = 0;
1907: for (i = 0; i < n_faces; i++) {
1908: PetscCall(ISGetSize(faces[i], &s));
1909: if (!s) continue;
1910: if (copycc) {
1911: PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
1912: } else {
1913: PetscCall(PetscObjectReference((PetscObject)faces[i]));
1914: all_cc[n_all_cc] = faces[i];
1915: }
1916: n_all_cc++;
1917: }
1918: for (i = 0; i < n_edges; i++) {
1919: PetscCall(ISGetSize(edges[i], &s));
1920: if (!s) continue;
1921: if (copycc) {
1922: PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
1923: } else {
1924: PetscCall(PetscObjectReference((PetscObject)edges[i]));
1925: all_cc[n_all_cc] = edges[i];
1926: }
1927: PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
1928: n_all_cc++;
1929: }
1930: PetscCall(PetscObjectReference((PetscObject)vertices));
1931: sub_schurs->is_vertices = vertices;
1932: PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1933: sub_schurs->is_dir = NULL;
1934: PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));
1936: /* Determine if MatFactor can be used */
1937: PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
1938: #if defined(PETSC_HAVE_MUMPS)
1939: PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
1940: #elif defined(PETSC_HAVE_MKL_PARDISO)
1941: PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
1942: #else
1943: PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
1944: #endif
1945: #if defined(PETSC_USE_COMPLEX)
1946: sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1947: #else
1948: sub_schurs->is_hermitian = PETSC_TRUE;
1949: #endif
1950: sub_schurs->is_posdef = PETSC_TRUE;
1951: sub_schurs->is_symmetric = PETSC_TRUE;
1952: sub_schurs->debug = PETSC_FALSE;
1953: sub_schurs->restrict_comm = PETSC_FALSE;
1954: PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
1955: PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
1956: PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
1957: PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
1958: PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
1959: PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
1960: PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
1961: PetscOptionsEnd();
1962: PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
1963: PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
1964: sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1966: /* for reals, symmetric and Hermitian are synonyms */
1967: #if !defined(PETSC_USE_COMPLEX)
1968: sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1969: sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1970: #endif
1972: PetscCall(PetscObjectReference((PetscObject)is_I));
1973: sub_schurs->is_I = is_I;
1974: PetscCall(PetscObjectReference((PetscObject)is_B));
1975: sub_schurs->is_B = is_B;
1976: PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
1977: sub_schurs->l2gmap = graph->l2gmap;
1978: PetscCall(PetscObjectReference((PetscObject)BtoNmap));
1979: sub_schurs->BtoNmap = BtoNmap;
1980: sub_schurs->n_subs = n_all_cc;
1981: sub_schurs->is_subs = all_cc;
1982: sub_schurs->S_Ej_all = NULL;
1983: sub_schurs->sum_S_Ej_all = NULL;
1984: sub_schurs->sum_S_Ej_inv_all = NULL;
1985: sub_schurs->sum_S_Ej_tilda_all = NULL;
1986: sub_schurs->is_Ej_all = NULL;
1987: PetscFunctionReturn(PETSC_SUCCESS);
1988: }
1990: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1991: {
1992: PCBDDCSubSchurs schurs_ctx;
1994: PetscFunctionBegin;
1995: PetscCall(PetscNew(&schurs_ctx));
1996: schurs_ctx->n_subs = 0;
1997: *sub_schurs = schurs_ctx;
1998: PetscFunctionReturn(PETSC_SUCCESS);
1999: }
2001: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2002: {
2003: PetscInt i;
2005: PetscFunctionBegin;
2006: if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS);
2007: PetscCall(PetscFree(sub_schurs->prefix));
2008: PetscCall(MatDestroy(&sub_schurs->A));
2009: PetscCall(MatDestroy(&sub_schurs->S));
2010: PetscCall(ISDestroy(&sub_schurs->is_I));
2011: PetscCall(ISDestroy(&sub_schurs->is_B));
2012: PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2013: PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2014: PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2015: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2016: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2017: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2018: PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2019: PetscCall(ISDestroy(&sub_schurs->is_vertices));
2020: PetscCall(ISDestroy(&sub_schurs->is_dir));
2021: PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2022: for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2023: if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2024: if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2025: PetscCall(PetscFree(sub_schurs->reuse_solver));
2026: if (sub_schurs->change) {
2027: for (i = 0; i < sub_schurs->n_subs; i++) {
2028: PetscCall(KSPDestroy(&sub_schurs->change[i]));
2029: PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2030: }
2031: }
2032: PetscCall(PetscFree(sub_schurs->change));
2033: PetscCall(PetscFree(sub_schurs->change_primal_sub));
2034: sub_schurs->n_subs = 0;
2035: PetscFunctionReturn(PETSC_SUCCESS);
2036: }
2038: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2039: {
2040: PetscFunctionBegin;
2041: PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2042: PetscCall(PetscFree(*sub_schurs));
2043: PetscFunctionReturn(PETSC_SUCCESS);
2044: }
2046: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2047: {
2048: PetscInt i, j, n;
2050: PetscFunctionBegin;
2051: n = 0;
2052: for (i = -n_prev; i < 0; i++) {
2053: PetscInt start_dof = queue_tip[i];
2054: for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2055: PetscInt dof = adjncy[j];
2056: if (!PetscBTLookup(touched, dof)) {
2057: PetscCall(PetscBTSet(touched, dof));
2058: queue_tip[n] = dof;
2059: n++;
2060: }
2061: }
2062: }
2063: *n_added = n;
2064: PetscFunctionReturn(PETSC_SUCCESS);
2065: }