Actual source code: superlu.c

  1: /*
  2:      This file implements a subclass of the SeqAIJ matrix class that uses
  3:      the SuperLU sparse solver.
  4: */

  6: /*
  7:      Defines the data structure for the base matrix type (SeqAIJ)
  8: */
  9: #include <../src/mat/impls/aij/seq/aij.h>

 11: /*
 12:      SuperLU include files
 13: */
 14: EXTERN_C_BEGIN
 15: #if defined(PETSC_USE_COMPLEX)
 16:   #if defined(PETSC_USE_REAL_SINGLE)
 17:     #include <slu_cdefs.h>
 18:   #else
 19:     #include <slu_zdefs.h>
 20:   #endif
 21: #else
 22:   #if defined(PETSC_USE_REAL_SINGLE)
 23:     #include <slu_sdefs.h>
 24:   #else
 25:     #include <slu_ddefs.h>
 26:   #endif
 27: #endif
 28: #include <slu_util.h>
 29: EXTERN_C_END

 31: /*
 32:      This is the data that defines the SuperLU factored matrix type
 33: */
 34: typedef struct {
 35:   SuperMatrix       A, L, U, B, X;
 36:   superlu_options_t options;
 37:   PetscInt         *perm_c; /* column permutation vector */
 38:   PetscInt         *perm_r; /* row permutations from partial pivoting */
 39:   PetscInt         *etree;
 40:   PetscReal        *R, *C;
 41:   char              equed[1];
 42:   PetscInt          lwork;
 43:   void             *work;
 44:   PetscReal         rpg, rcond;
 45:   mem_usage_t       mem_usage;
 46:   MatStructure      flg;
 47:   SuperLUStat_t     stat;
 48:   Mat               A_dup;
 49:   PetscScalar      *rhs_dup;
 50:   GlobalLU_t        Glu;
 51:   PetscBool         needconversion;

 53:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 54:   PetscBool CleanUpSuperLU;
 55: } Mat_SuperLU;

 57: /*
 58:     Utility function
 59: */
 60: static PetscErrorCode MatView_Info_SuperLU(Mat A, PetscViewer viewer)
 61: {
 62:   Mat_SuperLU      *lu = (Mat_SuperLU *)A->data;
 63:   superlu_options_t options;

 65:   PetscFunctionBegin;
 66:   options = lu->options;

 68:   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU run parameters:\n"));
 69:   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equil: %s\n", (options.Equil != NO) ? "YES" : "NO"));
 70:   PetscCall(PetscViewerASCIIPrintf(viewer, "  ColPerm: %" PetscInt_FMT "\n", options.ColPerm));
 71:   PetscCall(PetscViewerASCIIPrintf(viewer, "  IterRefine: %" PetscInt_FMT "\n", options.IterRefine));
 72:   PetscCall(PetscViewerASCIIPrintf(viewer, "  SymmetricMode: %s\n", (options.SymmetricMode != NO) ? "YES" : "NO"));
 73:   PetscCall(PetscViewerASCIIPrintf(viewer, "  DiagPivotThresh: %g\n", options.DiagPivotThresh));
 74:   PetscCall(PetscViewerASCIIPrintf(viewer, "  PivotGrowth: %s\n", (options.PivotGrowth != NO) ? "YES" : "NO"));
 75:   PetscCall(PetscViewerASCIIPrintf(viewer, "  ConditionNumber: %s\n", (options.ConditionNumber != NO) ? "YES" : "NO"));
 76:   PetscCall(PetscViewerASCIIPrintf(viewer, "  RowPerm: %" PetscInt_FMT "\n", options.RowPerm));
 77:   PetscCall(PetscViewerASCIIPrintf(viewer, "  ReplaceTinyPivot: %s\n", (options.ReplaceTinyPivot != NO) ? "YES" : "NO"));
 78:   PetscCall(PetscViewerASCIIPrintf(viewer, "  PrintStat: %s\n", (options.PrintStat != NO) ? "YES" : "NO"));
 79:   PetscCall(PetscViewerASCIIPrintf(viewer, "  lwork: %" PetscInt_FMT "\n", lu->lwork));
 80:   if (A->factortype == MAT_FACTOR_ILU) {
 81:     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropTol: %g\n", options.ILU_DropTol));
 82:     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillTol: %g\n", options.ILU_FillTol));
 83:     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillFactor: %g\n", options.ILU_FillFactor));
 84:     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropRule: %" PetscInt_FMT "\n", options.ILU_DropRule));
 85:     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_Norm: %" PetscInt_FMT "\n", options.ILU_Norm));
 86:     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_MILU: %" PetscInt_FMT "\n", options.ILU_MILU));
 87:   }
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: static PetscErrorCode MatSolve_SuperLU_Private(Mat A, Vec b, Vec x)
 92: {
 93:   Mat_SuperLU       *lu = (Mat_SuperLU *)A->data;
 94:   const PetscScalar *barray;
 95:   PetscScalar       *xarray;
 96:   PetscInt           info, i, n;
 97:   PetscReal          ferr, berr;
 98:   static PetscBool   cite = PETSC_FALSE;

100:   PetscFunctionBegin;
101:   if (lu->lwork == -1) PetscFunctionReturn(PETSC_SUCCESS);
102:   PetscCall(PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial "
103:                                    "pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",
104:                                    &cite));

106:   PetscCall(VecGetLocalSize(x, &n));
107:   lu->B.ncol = 1; /* Set the number of right-hand side */
108:   if (lu->options.Equil && !lu->rhs_dup) {
109:     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
110:     PetscCall(PetscMalloc1(n, &lu->rhs_dup));
111:   }
112:   if (lu->options.Equil) {
113:     /* Copy b into rsh_dup */
114:     PetscCall(VecGetArrayRead(b, &barray));
115:     PetscCall(PetscArraycpy(lu->rhs_dup, barray, n));
116:     PetscCall(VecRestoreArrayRead(b, &barray));
117:     barray = lu->rhs_dup;
118:   } else {
119:     PetscCall(VecGetArrayRead(b, &barray));
120:   }
121:   PetscCall(VecGetArray(x, &xarray));

123: #if defined(PETSC_USE_COMPLEX)
124:   #if defined(PETSC_USE_REAL_SINGLE)
125:   ((DNformat *)lu->B.Store)->nzval = (singlecomplex *)barray;
126:   ((DNformat *)lu->X.Store)->nzval = (singlecomplex *)xarray;
127:   #else
128:   ((DNformat *)lu->B.Store)->nzval = (doublecomplex *)barray;
129:   ((DNformat *)lu->X.Store)->nzval = (doublecomplex *)xarray;
130:   #endif
131: #else
132:   ((DNformat *)lu->B.Store)->nzval = (void *)barray;
133:   ((DNformat *)lu->X.Store)->nzval = xarray;
134: #endif

136:   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
137:   if (A->factortype == MAT_FACTOR_LU) {
138: #if defined(PETSC_USE_COMPLEX)
139:   #if defined(PETSC_USE_REAL_SINGLE)
140:     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
141:   #else
142:     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
143:   #endif
144: #else
145:   #if defined(PETSC_USE_REAL_SINGLE)
146:     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
147:   #else
148:     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
149:   #endif
150: #endif
151:   } else if (A->factortype == MAT_FACTOR_ILU) {
152: #if defined(PETSC_USE_COMPLEX)
153:   #if defined(PETSC_USE_REAL_SINGLE)
154:     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
155:   #else
156:     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
157:   #endif
158: #else
159:   #if defined(PETSC_USE_REAL_SINGLE)
160:     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
161:   #else
162:     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
163:   #endif
164: #endif
165:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
166:   if (!lu->options.Equil) PetscCall(VecRestoreArrayRead(b, &barray));
167:   PetscCall(VecRestoreArray(x, &xarray));

169:   if (!info || info == lu->A.ncol + 1) {
170:     if (lu->options.IterRefine) {
171:       PetscCall(PetscPrintf(PETSC_COMM_SELF, "Iterative Refinement:\n"));
172:       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"));
173:       for (i = 0; i < 1; ++i) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8d%8d%16e%16e\n", i + 1, lu->stat.RefineSteps, ferr, berr));
174:     }
175:   } else if (info > 0) {
176:     if (lu->lwork == -1) {
177:       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol));
178:     } else {
179:       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Warning: gssvx() returns info %" PetscInt_FMT "\n", info));
180:     }
181:   } else PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info, -info);

183:   if (lu->options.PrintStat) {
184:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatSolve__SuperLU():\n"));
185:     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
186:   }
187:   PetscFunctionReturn(PETSC_SUCCESS);
188: }

190: static PetscErrorCode MatSolve_SuperLU(Mat A, Vec b, Vec x)
191: {
192:   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
193:   trans_t      oldOption;

195:   PetscFunctionBegin;
196:   if (A->factorerrortype) {
197:     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
198:     PetscCall(VecSetInf(x));
199:     PetscFunctionReturn(PETSC_SUCCESS);
200:   }

202:   oldOption         = lu->options.Trans;
203:   lu->options.Trans = TRANS;
204:   PetscCall(MatSolve_SuperLU_Private(A, b, x));
205:   lu->options.Trans = oldOption;
206:   PetscFunctionReturn(PETSC_SUCCESS);
207: }

209: static PetscErrorCode MatSolveTranspose_SuperLU(Mat A, Vec b, Vec x)
210: {
211:   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
212:   trans_t      oldOption;

214:   PetscFunctionBegin;
215:   if (A->factorerrortype) {
216:     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
217:     PetscCall(VecSetInf(x));
218:     PetscFunctionReturn(PETSC_SUCCESS);
219:   }

221:   oldOption         = lu->options.Trans;
222:   lu->options.Trans = NOTRANS;
223:   PetscCall(MatSolve_SuperLU_Private(A, b, x));
224:   lu->options.Trans = oldOption;
225:   PetscFunctionReturn(PETSC_SUCCESS);
226: }

228: static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F, Mat A, const MatFactorInfo *info)
229: {
230:   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
231:   Mat_SeqAIJ  *aa;
232:   PetscInt     sinfo;
233:   PetscReal    ferr, berr;
234:   NCformat    *Ustore;
235:   SCformat    *Lstore;

237:   PetscFunctionBegin;
238:   if (lu->flg == SAME_NONZERO_PATTERN) { /* successive numerical factorization */
239:     lu->options.Fact = SamePattern;
240:     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
241:     Destroy_SuperMatrix_Store(&lu->A);
242:     if (lu->A_dup) PetscCall(MatCopy_SeqAIJ(A, lu->A_dup, SAME_NONZERO_PATTERN));

244:     if (lu->lwork >= 0) {
245:       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
246:       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
247:       lu->options.Fact = SamePattern;
248:     }
249:   }

251:   /* Create the SuperMatrix for lu->A=A^T:
252:        Since SuperLU likes column-oriented matrices,we pass it the transpose,
253:        and then solve A^T X = B in MatSolve(). */
254:   if (lu->A_dup) {
255:     aa = (Mat_SeqAIJ *)lu->A_dup->data;
256:   } else {
257:     aa = (Mat_SeqAIJ *)(A)->data;
258:   }
259: #if defined(PETSC_USE_COMPLEX)
260:   #if defined(PETSC_USE_REAL_SINGLE)
261:   PetscStackCallExternalVoid("SuperLU:cCreate_CompCol_Matrix", cCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (singlecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_C, SLU_GE));
262:   #else
263:   PetscStackCallExternalVoid("SuperLU:zCreate_CompCol_Matrix", zCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (doublecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_Z, SLU_GE));
264:   #endif
265: #else
266:   #if defined(PETSC_USE_REAL_SINGLE)
267:   PetscStackCallExternalVoid("SuperLU:sCreate_CompCol_Matrix", sCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_S, SLU_GE));
268:   #else
269:   PetscStackCallExternalVoid("SuperLU:dCreate_CompCol_Matrix", dCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_D, SLU_GE));
270:   #endif
271: #endif

273:   /* Numerical factorization */
274:   lu->B.ncol = 0; /* Indicate not to solve the system */
275:   if (F->factortype == MAT_FACTOR_LU) {
276: #if defined(PETSC_USE_COMPLEX)
277:   #if defined(PETSC_USE_REAL_SINGLE)
278:     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
279:   #else
280:     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
281:   #endif
282: #else
283:   #if defined(PETSC_USE_REAL_SINGLE)
284:     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
285:   #else
286:     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
287:   #endif
288: #endif
289:   } else if (F->factortype == MAT_FACTOR_ILU) {
290:     /* Compute the incomplete factorization, condition number and pivot growth */
291: #if defined(PETSC_USE_COMPLEX)
292:   #if defined(PETSC_USE_REAL_SINGLE)
293:     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
294:   #else
295:     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
296:   #endif
297: #else
298:   #if defined(PETSC_USE_REAL_SINGLE)
299:     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
300:   #else
301:     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
302:   #endif
303: #endif
304:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
305:   if (!sinfo || sinfo == lu->A.ncol + 1) {
306:     if (lu->options.PivotGrowth) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. pivot growth = %e\n", lu->rpg));
307:     if (lu->options.ConditionNumber) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. condition number = %e\n", lu->rcond));
308:   } else if (sinfo > 0) {
309:     if (A->erroriffailure) {
310:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %" PetscInt_FMT, sinfo);
311:     } else {
312:       if (sinfo <= lu->A.ncol) {
313:         if (lu->options.ILU_FillTol == 0.0) F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
314:         PetscCall(PetscInfo(F, "Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n", sinfo, lu->options.ILU_FillTol));
315:       } else if (sinfo == lu->A.ncol + 1) {
316:         /*
317:          U is nonsingular, but RCOND is less than machine
318:                       precision, meaning that the matrix is singular to
319:                       working precision. Nevertheless, the solution and
320:                       error bounds are computed because there are a number
321:                       of situations where the computed solution can be more
322:                       accurate than the value of RCOND would suggest.
323:          */
324:         PetscCall(PetscInfo(F, "Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT "\n", sinfo));
325:       } else { /* sinfo > lu->A.ncol + 1 */
326:         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
327:         PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n", sinfo));
328:       }
329:     }
330:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo, -sinfo);

332:   if (lu->options.PrintStat) {
333:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatLUFactorNumeric_SuperLU():\n"));
334:     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
335:     Lstore = (SCformat *)lu->L.Store;
336:     Ustore = (NCformat *)lu->U.Store;
337:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz));
338:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz));
339:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol));
340:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu / 1e6, lu->mem_usage.total_needed / 1e6));
341:   }

343:   lu->flg                = SAME_NONZERO_PATTERN;
344:   F->ops->solve          = MatSolve_SuperLU;
345:   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
346:   F->ops->matsolve       = NULL;
347:   PetscFunctionReturn(PETSC_SUCCESS);
348: }

350: static PetscErrorCode MatDestroy_SuperLU(Mat A)
351: {
352:   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;

354:   PetscFunctionBegin;
355:   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
356:     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->A));
357:     if (lu->lwork >= 0) {
358:       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
359:       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
360:     }
361:   }
362:   PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->B));
363:   PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->X));
364:   PetscStackCallExternalVoid("SuperLU:StatFree", StatFree(&lu->stat));
365:   PetscCall(PetscFree(lu->etree));
366:   PetscCall(PetscFree(lu->perm_r));
367:   PetscCall(PetscFree(lu->perm_c));
368:   PetscCall(PetscFree(lu->R));
369:   PetscCall(PetscFree(lu->C));
370:   PetscCall(PetscFree(lu->rhs_dup));
371:   PetscCall(MatDestroy(&lu->A_dup));
372:   PetscCall(PetscFree(A->data));

374:   /* clear composed functions */
375:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
376:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluSetILUDropTol_C", NULL));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: static PetscErrorCode MatView_SuperLU(Mat A, PetscViewer viewer)
381: {
382:   PetscBool         iascii;
383:   PetscViewerFormat format;

385:   PetscFunctionBegin;
386:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
387:   if (iascii) {
388:     PetscCall(PetscViewerGetFormat(viewer, &format));
389:     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU(A, viewer));
390:   }
391:   PetscFunctionReturn(PETSC_SUCCESS);
392: }

394: static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
395: {
396:   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
397:   PetscInt     indx;
398:   PetscBool    flg, set;
399:   PetscReal    real_input;
400:   const char  *colperm[]    = {"NATURAL", "MMD_ATA", "MMD_AT_PLUS_A", "COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
401:   const char  *iterrefine[] = {"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
402:   const char  *rowperm[]    = {"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */

404:   PetscFunctionBegin;
405:   /* Set options to F */
406:   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU Options", "Mat");
407:   PetscCall(PetscOptionsBool("-mat_superlu_equil", "Equil", "None", (PetscBool)lu->options.Equil, (PetscBool *)&lu->options.Equil, NULL));
408:   PetscCall(PetscOptionsEList("-mat_superlu_colperm", "ColPerm", "None", colperm, 4, colperm[3], &indx, &flg));
409:   if (flg) lu->options.ColPerm = (colperm_t)indx;
410:   PetscCall(PetscOptionsEList("-mat_superlu_iterrefine", "IterRefine", "None", iterrefine, 4, iterrefine[0], &indx, &flg));
411:   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
412:   PetscCall(PetscOptionsBool("-mat_superlu_symmetricmode", "SymmetricMode", "None", (PetscBool)lu->options.SymmetricMode, &flg, &set));
413:   if (set && flg) lu->options.SymmetricMode = YES;
414:   PetscCall(PetscOptionsReal("-mat_superlu_diagpivotthresh", "DiagPivotThresh", "None", lu->options.DiagPivotThresh, &real_input, &flg));
415:   if (flg) lu->options.DiagPivotThresh = (double)real_input;
416:   PetscCall(PetscOptionsBool("-mat_superlu_pivotgrowth", "PivotGrowth", "None", (PetscBool)lu->options.PivotGrowth, &flg, &set));
417:   if (set && flg) lu->options.PivotGrowth = YES;
418:   PetscCall(PetscOptionsBool("-mat_superlu_conditionnumber", "ConditionNumber", "None", (PetscBool)lu->options.ConditionNumber, &flg, &set));
419:   if (set && flg) lu->options.ConditionNumber = YES;
420:   PetscCall(PetscOptionsEList("-mat_superlu_rowperm", "rowperm", "None", rowperm, 2, rowperm[lu->options.RowPerm], &indx, &flg));
421:   if (flg) lu->options.RowPerm = (rowperm_t)indx;
422:   PetscCall(PetscOptionsBool("-mat_superlu_replacetinypivot", "ReplaceTinyPivot", "None", (PetscBool)lu->options.ReplaceTinyPivot, &flg, &set));
423:   if (set && flg) lu->options.ReplaceTinyPivot = YES;
424:   PetscCall(PetscOptionsBool("-mat_superlu_printstat", "PrintStat", "None", (PetscBool)lu->options.PrintStat, &flg, &set));
425:   if (set && flg) lu->options.PrintStat = YES;
426:   PetscCall(PetscOptionsInt("-mat_superlu_lwork", "size of work array in bytes used by factorization", "None", lu->lwork, &lu->lwork, NULL));
427:   if (lu->lwork > 0) {
428:     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
429:     PetscCall(PetscMalloc(lu->lwork, &lu->work));
430:   } else if (lu->lwork != 0 && lu->lwork != -1) {
431:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "   Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n", lu->lwork));
432:     lu->lwork = 0;
433:   }
434:   /* ilu options */
435:   PetscCall(PetscOptionsReal("-mat_superlu_ilu_droptol", "ILU_DropTol", "None", lu->options.ILU_DropTol, &real_input, &flg));
436:   if (flg) lu->options.ILU_DropTol = (double)real_input;
437:   PetscCall(PetscOptionsReal("-mat_superlu_ilu_filltol", "ILU_FillTol", "None", lu->options.ILU_FillTol, &real_input, &flg));
438:   if (flg) lu->options.ILU_FillTol = (double)real_input;
439:   PetscCall(PetscOptionsReal("-mat_superlu_ilu_fillfactor", "ILU_FillFactor", "None", lu->options.ILU_FillFactor, &real_input, &flg));
440:   if (flg) lu->options.ILU_FillFactor = (double)real_input;
441:   PetscCall(PetscOptionsInt("-mat_superlu_ilu_droprull", "ILU_DropRule", "None", lu->options.ILU_DropRule, &lu->options.ILU_DropRule, NULL));
442:   PetscCall(PetscOptionsInt("-mat_superlu_ilu_norm", "ILU_Norm", "None", lu->options.ILU_Norm, &indx, &flg));
443:   if (flg) lu->options.ILU_Norm = (norm_t)indx;
444:   PetscCall(PetscOptionsInt("-mat_superlu_ilu_milu", "ILU_MILU", "None", lu->options.ILU_MILU, &indx, &flg));
445:   if (flg) lu->options.ILU_MILU = (milu_t)indx;
446:   PetscOptionsEnd();

448:   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
449:   lu->CleanUpSuperLU      = PETSC_TRUE;
450:   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;

452:   /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
453:   PetscCall(MatDestroy(&lu->A_dup));
454:   if (lu->needconversion) PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &lu->A_dup));
455:   if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
456:     PetscCall(MatDuplicate_SeqAIJ(A, MAT_COPY_VALUES, &lu->A_dup));
457:   }
458:   PetscFunctionReturn(PETSC_SUCCESS);
459: }

461: static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F, PetscReal dtol)
462: {
463:   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;

465:   PetscFunctionBegin;
466:   lu->options.ILU_DropTol = dtol;
467:   PetscFunctionReturn(PETSC_SUCCESS);
468: }

470: /*@
471:   MatSuperluSetILUDropTol - Set SuperLU <https://portal.nersc.gov/project/sparse/superlu/superlu_ug.pdf> ILU drop tolerance

473:   Logically Collective

475:   Input Parameters:
476: + F    - the factored matrix obtained by calling `MatGetFactor()`
477: - dtol - drop tolerance

479:   Options Database Key:
480: . -mat_superlu_ilu_droptol <dtol> - the drop tolerance

482:   Level: beginner

484: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MATSOLVERSUPERLU`
485: @*/
486: PetscErrorCode MatSuperluSetILUDropTol(Mat F, PetscReal dtol)
487: {
488:   PetscFunctionBegin;
491:   PetscTryMethod(F, "MatSuperluSetILUDropTol_C", (Mat, PetscReal), (F, dtol));
492:   PetscFunctionReturn(PETSC_SUCCESS);
493: }

495: static PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A, MatSolverType *type)
496: {
497:   PetscFunctionBegin;
498:   *type = MATSOLVERSUPERLU;
499:   PetscFunctionReturn(PETSC_SUCCESS);
500: }

502: /*MC
503:   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
504:   via the external package SuperLU <https://portal.nersc.gov/project/sparse/superlu/superlu_ug.pdf>

506:   Use `./configure --download-superlu` to have PETSc installed with SuperLU

508:   Use `-pc_type lu` `-pc_factor_mat_solver_type superlu` to use this direct solver

510:   Options Database Keys:
511: + -mat_superlu_equil <FALSE>            - Equil (None)
512: . -mat_superlu_colperm <COLAMD>         - (choose one of) `NATURAL`, `MMD_ATA MMD_AT_PLUS_A`, `COLAMD`
513: . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) `NOREFINE`, `SINGLE`, `DOUBLE`, `EXTRA`
514: . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
515: . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
516: . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
517: . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
518: . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) `NOROWPERM`, `LargeDiag`
519: . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
520: . -mat_superlu_printstat <FALSE>        - PrintStat (None)
521: . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
522: . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
523: . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
524: . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
525: . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
526: . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
527: - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)

529:    Level: beginner

531:    Notes:
532:    Do not confuse this with `MATSOLVERSUPERLU_DIST` which is for parallel sparse solves

534:    Cannot use ordering provided by PETSc, provides its own.

536: .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCILU`, `MATSOLVERSUPERLU_DIST`, `MATSOLVERMUMPS`, `PCFactorSetMatSolverType()`, `MatSolverType`
537: M*/

539: static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A, MatFactorType ftype, Mat *F)
540: {
541:   Mat          B;
542:   Mat_SuperLU *lu;
543:   PetscInt     m = A->rmap->n, n = A->cmap->n;

545:   PetscFunctionBegin;
546:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
547:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DETERMINE, PETSC_DETERMINE));
548:   PetscCall(PetscStrallocpy("superlu", &((PetscObject)B)->type_name));
549:   PetscCall(MatSetUp(B));
550:   B->trivialsymbolic = PETSC_TRUE;
551:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
552:     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
553:     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
554:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");

556:   PetscCall(PetscFree(B->solvertype));
557:   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU, &B->solvertype));

559:   B->ops->getinfo = MatGetInfo_External;
560:   B->ops->destroy = MatDestroy_SuperLU;
561:   B->ops->view    = MatView_SuperLU;
562:   B->factortype   = ftype;
563:   B->assembled    = PETSC_TRUE; /* required by -ksp_view */
564:   B->preallocated = PETSC_TRUE;

566:   PetscCall(PetscNew(&lu));

568:   if (ftype == MAT_FACTOR_LU) {
569:     set_default_options(&lu->options);
570:     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
571:       "Whether or not the system will be equilibrated depends on the
572:        scaling of the matrix A, but if equilibration is used, A is
573:        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
574:        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
575:      We set 'options.Equil = NO' as default because additional space is needed for it.
576:     */
577:     lu->options.Equil = NO;
578:   } else if (ftype == MAT_FACTOR_ILU) {
579:     /* Set the default input options of ilu: */
580:     PetscStackCallExternalVoid("SuperLU:ilu_set_default_options", ilu_set_default_options(&lu->options));
581:   }
582:   lu->options.PrintStat = NO;

584:   /* Initialize the statistics variables. */
585:   PetscStackCallExternalVoid("SuperLU:StatInit", StatInit(&lu->stat));
586:   lu->lwork = 0; /* allocate space internally by system malloc */

588:   /* Allocate spaces (notice sizes are for the transpose) */
589:   PetscCall(PetscMalloc1(m, &lu->etree));
590:   PetscCall(PetscMalloc1(n, &lu->perm_r));
591:   PetscCall(PetscMalloc1(m, &lu->perm_c));
592:   PetscCall(PetscMalloc1(n, &lu->R));
593:   PetscCall(PetscMalloc1(m, &lu->C));

595:   /* create rhs and solution x without allocate space for .Store */
596: #if defined(PETSC_USE_COMPLEX)
597:   #if defined(PETSC_USE_REAL_SINGLE)
598:   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
599:   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
600:   #else
601:   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
602:   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
603:   #endif
604: #else
605:   #if defined(PETSC_USE_REAL_SINGLE)
606:   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
607:   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
608:   #else
609:   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
610:   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
611:   #endif
612: #endif

614:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_seqaij_superlu));
615:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluSetILUDropTol_C", MatSuperluSetILUDropTol_SuperLU));
616:   B->data = lu;

618:   *F = B;
619:   PetscFunctionReturn(PETSC_SUCCESS);
620: }

622: static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A, MatFactorType ftype, Mat *F)
623: {
624:   Mat_SuperLU *lu;

626:   PetscFunctionBegin;
627:   PetscCall(MatGetFactor_seqaij_superlu(A, ftype, F));
628:   lu                 = (Mat_SuperLU *)((*F)->data);
629:   lu->needconversion = PETSC_TRUE;
630:   PetscFunctionReturn(PETSC_SUCCESS);
631: }

633: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void)
634: {
635:   PetscFunctionBegin;
636:   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_seqaij_superlu));
637:   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_ILU, MatGetFactor_seqaij_superlu));
638:   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_seqsell_superlu));
639:   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_ILU, MatGetFactor_seqsell_superlu));
640:   PetscFunctionReturn(PETSC_SUCCESS);
641: }