Actual source code: superlu_dist.c
1: /*
2: Provides an interface to the SuperLU_DIST sparse solver
3: */
5: #include <../src/mat/impls/aij/seq/aij.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscpkg_version.h>
9: EXTERN_C_BEGIN
10: #if defined(PETSC_USE_COMPLEX)
11: #define CASTDOUBLECOMPLEX (doublecomplex *)
12: #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
13: #include <superlu_zdefs.h>
14: #define LUstructInit zLUstructInit
15: #define ScalePermstructInit zScalePermstructInit
16: #define ScalePermstructFree zScalePermstructFree
17: #define LUstructFree zLUstructFree
18: #define Destroy_LU zDestroy_LU
19: #define ScalePermstruct_t zScalePermstruct_t
20: #define LUstruct_t zLUstruct_t
21: #define SOLVEstruct_t zSOLVEstruct_t
22: #define SolveFinalize zSolveFinalize
23: #define pGetDiagU pzGetDiagU
24: #define pgssvx pzgssvx
25: #define allocateA_dist zallocateA_dist
26: #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
27: #define SLU SLU_Z
28: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
29: #define DeAllocLlu_3d zDeAllocLlu_3d
30: #define DeAllocGlu_3d zDeAllocGlu_3d
31: #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
32: #define pgssvx3d pzgssvx3d
33: #endif
34: #elif defined(PETSC_USE_REAL_SINGLE)
35: #define CASTDOUBLECOMPLEX
36: #define CASTDOUBLECOMPLEXSTAR
37: #include <superlu_sdefs.h>
38: #define LUstructInit sLUstructInit
39: #define ScalePermstructInit sScalePermstructInit
40: #define ScalePermstructFree sScalePermstructFree
41: #define LUstructFree sLUstructFree
42: #define Destroy_LU sDestroy_LU
43: #define ScalePermstruct_t sScalePermstruct_t
44: #define LUstruct_t sLUstruct_t
45: #define SOLVEstruct_t sSOLVEstruct_t
46: #define SolveFinalize sSolveFinalize
47: #define pGetDiagU psGetDiagU
48: #define pgssvx psgssvx
49: #define allocateA_dist sallocateA_dist
50: #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
51: #define SLU SLU_S
52: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
53: #define DeAllocLlu_3d sDeAllocLlu_3d
54: #define DeAllocGlu_3d sDeAllocGlu_3d
55: #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
56: #define pgssvx3d psgssvx3d
57: #endif
58: #else
59: #define CASTDOUBLECOMPLEX
60: #define CASTDOUBLECOMPLEXSTAR
61: #include <superlu_ddefs.h>
62: #define LUstructInit dLUstructInit
63: #define ScalePermstructInit dScalePermstructInit
64: #define ScalePermstructFree dScalePermstructFree
65: #define LUstructFree dLUstructFree
66: #define Destroy_LU dDestroy_LU
67: #define ScalePermstruct_t dScalePermstruct_t
68: #define LUstruct_t dLUstruct_t
69: #define SOLVEstruct_t dSOLVEstruct_t
70: #define SolveFinalize dSolveFinalize
71: #define pGetDiagU pdGetDiagU
72: #define pgssvx pdgssvx
73: #define allocateA_dist dallocateA_dist
74: #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
75: #define SLU SLU_D
76: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
77: #define DeAllocLlu_3d dDeAllocLlu_3d
78: #define DeAllocGlu_3d dDeAllocGlu_3d
79: #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
80: #define pgssvx3d pdgssvx3d
81: #endif
82: #endif
83: EXTERN_C_END
85: typedef struct {
86: int_t nprow, npcol, *row, *col;
87: gridinfo_t grid;
88: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
89: PetscBool use3d;
90: int_t npdep; /* replication factor, must be power of two */
91: gridinfo3d_t grid3d;
92: #endif
93: superlu_dist_options_t options;
94: SuperMatrix A_sup;
95: ScalePermstruct_t ScalePermstruct;
96: LUstruct_t LUstruct;
97: int StatPrint;
98: SOLVEstruct_t SOLVEstruct;
99: fact_t FactPattern;
100: MPI_Comm comm_superlu;
101: PetscScalar *val;
102: PetscBool matsolve_iscalled, matmatsolve_iscalled;
103: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
104: } Mat_SuperLU_DIST;
106: PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
107: {
108: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
110: PetscFunctionBegin;
111: PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
116: {
117: PetscFunctionBegin;
119: PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
120: PetscFunctionReturn(PETSC_SUCCESS);
121: }
123: /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
124: typedef struct {
125: MPI_Comm comm;
126: PetscBool busy;
127: gridinfo_t grid;
128: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
129: PetscBool use3d;
130: gridinfo3d_t grid3d;
131: #endif
132: } PetscSuperLU_DIST;
133: static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
135: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
136: {
137: PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
139: PetscFunctionBegin;
140: if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
141: PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n"));
142: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
143: if (context->use3d) {
144: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
145: } else
146: #endif
147: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
148: PetscCallMPI(MPI_Comm_free(&context->comm));
149: PetscCall(PetscFree(context));
150: PetscFunctionReturn(MPI_SUCCESS);
151: }
153: /*
154: Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
155: users who do not destroy all PETSc objects before PetscFinalize().
157: The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
158: can still check that the keyval associated with the MPI communicator is correct when the MPI
159: communicator is destroyed.
161: This is called in PetscFinalize()
162: */
163: static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
164: {
165: PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
167: PetscFunctionBegin;
168: PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n"));
169: PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
170: PetscFunctionReturn(PETSC_SUCCESS);
171: }
173: static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
174: {
175: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
177: PetscFunctionBegin;
178: if (lu->CleanUpSuperLU_Dist) {
179: /* Deallocate SuperLU_DIST storage */
180: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
181: if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
182: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
183: if (lu->use3d) {
184: if (lu->grid3d.zscp.Iam == 0) {
185: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
186: } else {
187: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
188: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
189: }
190: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
191: } else
192: #endif
193: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
194: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
195: PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
197: /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
198: if (lu->comm_superlu) {
199: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
200: if (lu->use3d) {
201: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
202: } else
203: #endif
204: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
205: }
206: }
207: /*
208: * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
209: * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
210: * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
211: * is off, and the communicator was not released or marked as "not busy " in the old code.
212: * Here we try to release comm regardless.
213: */
214: if (lu->comm_superlu) {
215: PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
216: } else {
217: PetscSuperLU_DIST *context;
218: MPI_Comm comm;
219: PetscMPIInt flg;
221: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
222: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg));
223: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute");
224: context->busy = PETSC_FALSE;
225: }
227: PetscCall(PetscFree(A->data));
228: /* clear composed functions */
229: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
230: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
232: PetscFunctionReturn(PETSC_SUCCESS);
233: }
235: static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
236: {
237: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
238: PetscInt m = A->rmap->n;
239: SuperLUStat_t stat;
240: PetscReal berr[1];
241: PetscScalar *bptr = NULL;
242: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
243: static PetscBool cite = PETSC_FALSE;
245: PetscFunctionBegin;
246: PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
247: PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM "
248: "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",
249: &cite));
251: if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
252: /* see comments in MatMatSolve() */
253: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
254: lu->options.SolveInitialized = NO;
255: }
256: PetscCall(VecCopy(b_mpi, x));
257: PetscCall(VecGetArray(x, &bptr));
259: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
260: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
261: if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
262: else
263: #endif
264: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
265: PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
267: if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
268: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
270: PetscCall(VecRestoreArray(x, &bptr));
271: lu->matsolve_iscalled = PETSC_TRUE;
272: lu->matmatsolve_iscalled = PETSC_FALSE;
273: PetscFunctionReturn(PETSC_SUCCESS);
274: }
276: static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
277: {
278: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
279: PetscInt m = A->rmap->n, nrhs;
280: SuperLUStat_t stat;
281: PetscReal berr[1];
282: PetscScalar *bptr;
283: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
284: PetscBool flg;
286: PetscFunctionBegin;
287: PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
288: PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
289: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
290: if (X != B_mpi) {
291: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
292: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
293: }
295: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
296: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
297: thus destroy it and create a new SOLVEstruct.
298: Otherwise it may result in memory corruption or incorrect solution
299: See src/mat/tests/ex125.c */
300: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
301: lu->options.SolveInitialized = NO;
302: }
303: if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
305: PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
307: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
308: PetscCall(MatDenseGetArray(X, &bptr));
310: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
311: if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
312: else
313: #endif
314: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
316: PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
317: PetscCall(MatDenseRestoreArray(X, &bptr));
319: if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
320: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
321: lu->matsolve_iscalled = PETSC_FALSE;
322: lu->matmatsolve_iscalled = PETSC_TRUE;
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
326: /*
327: input:
328: F: numeric Cholesky factor
329: output:
330: nneg: total number of negative pivots
331: nzero: total number of zero pivots
332: npos: (global dimension of F) - nneg - nzero
333: */
334: static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
335: {
336: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
337: PetscScalar *diagU = NULL;
338: PetscInt M, i, neg = 0, zero = 0, pos = 0;
339: PetscReal r;
341: PetscFunctionBegin;
342: PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
343: PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
344: PetscCall(MatGetSize(F, &M, NULL));
345: PetscCall(PetscMalloc1(M, &diagU));
346: PetscCall(MatSuperluDistGetDiagU(F, diagU));
347: for (i = 0; i < M; i++) {
348: #if defined(PETSC_USE_COMPLEX)
349: r = PetscImaginaryPart(diagU[i]) / 10.0;
350: PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0));
351: r = PetscRealPart(diagU[i]);
352: #else
353: r = diagU[i];
354: #endif
355: if (r > 0) {
356: pos++;
357: } else if (r < 0) {
358: neg++;
359: } else zero++;
360: }
362: PetscCall(PetscFree(diagU));
363: if (nneg) *nneg = neg;
364: if (nzero) *nzero = zero;
365: if (npos) *npos = pos;
366: PetscFunctionReturn(PETSC_SUCCESS);
367: }
369: static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
370: {
371: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
372: Mat Aloc;
373: const PetscScalar *av;
374: const PetscInt *ai = NULL, *aj = NULL;
375: PetscInt nz, dummy;
376: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
377: SuperLUStat_t stat;
378: PetscReal *berr = 0;
379: PetscBool ismpiaij, isseqaij, flg;
381: PetscFunctionBegin;
382: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
383: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
384: if (ismpiaij) {
385: PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
386: } else if (isseqaij) {
387: PetscCall(PetscObjectReference((PetscObject)A));
388: Aloc = A;
389: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
391: PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
392: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
393: PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
394: nz = ai[Aloc->rmap->n];
396: /* Allocations for A_sup */
397: if (lu->options.Fact == DOFACT) { /* first numeric factorization */
398: PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
399: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
400: if (lu->FactPattern == SamePattern_SameRowPerm) {
401: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
402: } else if (lu->FactPattern == SamePattern) {
403: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
404: if (lu->use3d) {
405: if (lu->grid3d.zscp.Iam == 0) {
406: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
407: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
408: } else {
409: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
410: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
411: }
412: } else
413: #endif
414: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
415: lu->options.Fact = SamePattern;
416: } else if (lu->FactPattern == DOFACT) {
417: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
418: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
419: lu->options.Fact = DOFACT;
420: PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
421: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
422: }
424: /* Copy AIJ matrix to superlu_dist matrix */
425: PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
426: PetscCall(PetscArraycpy(lu->col, aj, nz));
427: PetscCall(PetscArraycpy(lu->val, av, nz));
428: PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
429: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
430: PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
431: PetscCall(MatDestroy(&Aloc));
433: /* Create and setup A_sup */
434: if (lu->options.Fact == DOFACT) {
435: PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
436: }
438: /* Factor the matrix. */
439: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
440: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
441: if (lu->use3d) {
442: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
443: } else
444: #endif
445: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
446: if (sinfo > 0) {
447: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
448: if (sinfo <= lu->A_sup.ncol) {
449: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
450: PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
451: } else if (sinfo > lu->A_sup.ncol) {
452: /*
453: number of bytes allocated when memory allocation
454: failure occurred, plus A->ncol.
455: */
456: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
457: PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
458: }
459: } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
461: if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
462: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
463: F->assembled = PETSC_TRUE;
464: F->preallocated = PETSC_TRUE;
465: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
466: PetscFunctionReturn(PETSC_SUCCESS);
467: }
469: /* Note the Petsc r and c permutations are ignored */
470: static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
471: {
472: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
473: PetscInt M = A->rmap->N, N = A->cmap->N, indx;
474: PetscMPIInt size, mpiflg;
475: PetscBool flg, set;
476: const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
477: const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
478: const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
479: MPI_Comm comm;
480: PetscSuperLU_DIST *context = NULL;
482: PetscFunctionBegin;
483: /* Set options to F */
484: PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
485: PetscCallMPI(MPI_Comm_size(comm, &size));
487: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
488: PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
489: if (set && !flg) lu->options.Equil = NO;
491: PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
492: if (flg) {
493: switch (indx) {
494: case 0:
495: lu->options.RowPerm = NOROWPERM;
496: break;
497: case 1:
498: lu->options.RowPerm = LargeDiag_MC64;
499: break;
500: case 2:
501: lu->options.RowPerm = LargeDiag_AWPM;
502: break;
503: case 3:
504: lu->options.RowPerm = MY_PERMR;
505: break;
506: default:
507: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
508: }
509: }
511: PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
512: if (flg) {
513: switch (indx) {
514: case 0:
515: lu->options.ColPerm = NATURAL;
516: break;
517: case 1:
518: lu->options.ColPerm = MMD_AT_PLUS_A;
519: break;
520: case 2:
521: lu->options.ColPerm = MMD_ATA;
522: break;
523: case 3:
524: lu->options.ColPerm = METIS_AT_PLUS_A;
525: break;
526: case 4:
527: lu->options.ColPerm = PARMETIS; /* only works for np>1 */
528: break;
529: default:
530: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
531: }
532: }
534: lu->options.ReplaceTinyPivot = NO;
535: PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
536: if (set && flg) lu->options.ReplaceTinyPivot = YES;
538: lu->options.ParSymbFact = NO;
539: PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
540: if (set && flg && size > 1) {
541: #if defined(PETSC_HAVE_PARMETIS)
542: lu->options.ParSymbFact = YES;
543: lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
544: #else
545: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
546: #endif
547: }
549: lu->FactPattern = SamePattern;
550: PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
551: if (flg) {
552: switch (indx) {
553: case 0:
554: lu->FactPattern = SamePattern;
555: break;
556: case 1:
557: lu->FactPattern = SamePattern_SameRowPerm;
558: break;
559: case 2:
560: lu->FactPattern = DOFACT;
561: break;
562: }
563: }
565: lu->options.IterRefine = NOREFINE;
566: PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
567: if (set) {
568: if (flg) lu->options.IterRefine = SLU_DOUBLE;
569: else lu->options.IterRefine = NOREFINE;
570: }
572: if (PetscLogPrintInfo) lu->options.PrintStat = YES;
573: else lu->options.PrintStat = NO;
574: PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
575: PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
577: /* Additional options for special cases */
578: if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
579: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0));
580: PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
581: }
582: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg));
583: if (!mpiflg || context->busy) { /* additional options */
584: if (!mpiflg) {
585: PetscCall(PetscNew(&context));
586: context->busy = PETSC_TRUE;
587: PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
588: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
589: } else {
590: PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
591: }
593: /* Default number of process columns and rows */
594: lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
595: if (!lu->nprow) lu->nprow = 1;
596: while (lu->nprow > 0) {
597: lu->npcol = (int_t)(size / lu->nprow);
598: if (size == lu->nprow * lu->npcol) break;
599: lu->nprow--;
600: }
601: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
602: lu->use3d = PETSC_FALSE;
603: lu->npdep = 1;
604: #endif
606: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
607: PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
608: PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation");
609: if (lu->use3d) {
610: PetscInt t;
611: PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
612: t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
613: PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
614: if (lu->npdep > 1) {
615: lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
616: if (!lu->nprow) lu->nprow = 1;
617: while (lu->nprow > 0) {
618: lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
619: if (size == lu->nprow * lu->npcol * lu->npdep) break;
620: lu->nprow--;
621: }
622: }
623: }
624: #endif
625: PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
626: PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
627: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
628: PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
629: #else
630: PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
631: #endif
632: /* end of adding additional options */
634: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
635: if (lu->use3d) {
636: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d));
637: if (context) {
638: context->grid3d = lu->grid3d;
639: context->use3d = lu->use3d;
640: }
641: } else {
642: #endif
643: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
644: if (context) context->grid = lu->grid;
645: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
646: }
647: #endif
648: PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
649: if (mpiflg) {
650: PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
651: } else {
652: PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
653: }
654: } else { /* (mpiflg && !context->busy) */
655: PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute."));
656: context->busy = PETSC_TRUE;
657: lu->grid = context->grid;
658: }
659: PetscOptionsEnd();
661: /* Initialize ScalePermstruct and LUstruct. */
662: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
663: PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
664: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
665: F->ops->solve = MatSolve_SuperLU_DIST;
666: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
667: F->ops->getinertia = NULL;
669: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
670: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
671: PetscFunctionReturn(PETSC_SUCCESS);
672: }
674: static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
675: {
676: PetscFunctionBegin;
677: PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
678: F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
679: PetscFunctionReturn(PETSC_SUCCESS);
680: }
682: static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
683: {
684: PetscFunctionBegin;
685: *type = MATSOLVERSUPERLU_DIST;
686: PetscFunctionReturn(PETSC_SUCCESS);
687: }
689: static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
690: {
691: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
692: superlu_dist_options_t options;
694: PetscFunctionBegin;
695: /* check if matrix is superlu_dist type */
696: if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
698: options = lu->options;
699: PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
700: /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
701: * format spec for int64_t is set to %d for whatever reason */
702: PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
703: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
704: if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
705: #endif
707: PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
708: PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
709: PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
710: PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
712: switch (options.RowPerm) {
713: case NOROWPERM:
714: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n"));
715: break;
716: case LargeDiag_MC64:
717: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n"));
718: break;
719: case LargeDiag_AWPM:
720: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n"));
721: break;
722: case MY_PERMR:
723: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n"));
724: break;
725: default:
726: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
727: }
729: switch (options.ColPerm) {
730: case NATURAL:
731: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n"));
732: break;
733: case MMD_AT_PLUS_A:
734: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n"));
735: break;
736: case MMD_ATA:
737: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n"));
738: break;
739: /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
740: case METIS_AT_PLUS_A:
741: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n"));
742: break;
743: case PARMETIS:
744: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n"));
745: break;
746: default:
747: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
748: }
750: PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
752: if (lu->FactPattern == SamePattern) {
753: PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n"));
754: } else if (lu->FactPattern == SamePattern_SameRowPerm) {
755: PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n"));
756: } else if (lu->FactPattern == DOFACT) {
757: PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n"));
758: } else {
759: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
760: }
761: PetscFunctionReturn(PETSC_SUCCESS);
762: }
764: static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
765: {
766: PetscBool iascii;
767: PetscViewerFormat format;
769: PetscFunctionBegin;
770: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
771: if (iascii) {
772: PetscCall(PetscViewerGetFormat(viewer, &format));
773: if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
774: }
775: PetscFunctionReturn(PETSC_SUCCESS);
776: }
778: static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
779: {
780: Mat B;
781: Mat_SuperLU_DIST *lu;
782: PetscInt M = A->rmap->N, N = A->cmap->N;
783: PetscMPIInt size;
784: superlu_dist_options_t options;
786: PetscFunctionBegin;
787: /* Create the factorization matrix */
788: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
789: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
790: PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
791: PetscCall(MatSetUp(B));
792: B->ops->getinfo = MatGetInfo_External;
793: B->ops->view = MatView_SuperLU_DIST;
794: B->ops->destroy = MatDestroy_SuperLU_DIST;
796: /* Set the default input options:
797: options.Fact = DOFACT;
798: options.Equil = YES;
799: options.ParSymbFact = NO;
800: options.ColPerm = METIS_AT_PLUS_A;
801: options.RowPerm = LargeDiag_MC64;
802: options.ReplaceTinyPivot = YES;
803: options.IterRefine = DOUBLE;
804: options.Trans = NOTRANS;
805: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
806: options.RefineInitialized = NO;
807: options.PrintStat = YES;
808: options.SymPattern = NO;
809: */
810: set_default_options_dist(&options);
812: B->trivialsymbolic = PETSC_TRUE;
813: if (ftype == MAT_FACTOR_LU) {
814: B->factortype = MAT_FACTOR_LU;
815: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
816: } else {
817: B->factortype = MAT_FACTOR_CHOLESKY;
818: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
819: options.SymPattern = YES;
820: }
822: /* set solvertype */
823: PetscCall(PetscFree(B->solvertype));
824: PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
826: PetscCall(PetscNew(&lu));
827: B->data = lu;
828: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
830: lu->options = options;
831: lu->options.Fact = DOFACT;
832: lu->matsolve_iscalled = PETSC_FALSE;
833: lu->matmatsolve_iscalled = PETSC_FALSE;
835: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
836: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
838: *F = B;
839: PetscFunctionReturn(PETSC_SUCCESS);
840: }
842: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
843: {
844: PetscFunctionBegin;
845: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
846: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
847: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
848: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
849: PetscFunctionReturn(PETSC_SUCCESS);
850: }
852: /*MC
853: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
855: Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST
857: Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
859: Works with `MATAIJ` matrices
861: Options Database Keys:
862: + -mat_superlu_dist_r <n> - number of rows in processor partition
863: . -mat_superlu_dist_c <n> - number of columns in processor partition
864: . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
865: . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
866: . -mat_superlu_dist_equil - equilibrate the matrix
867: . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
868: . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
869: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
870: . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
871: . -mat_superlu_dist_iterrefine - use iterative refinement
872: - -mat_superlu_dist_printstat - print factorization information
874: Level: beginner
876: Note:
877: If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs.
879: .seealso: [](chapter_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
880: M*/