Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.2 sparse solver
  5: */

  7: #include "../src/mat/impls/aij/seq/aij.h"
  8: #include "../src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 13: #if defined(PETSC_USE_64BIT_INDICES)
 14: /* ugly SuperLU_Dist variable telling it to use long long int */
 15: #define _LONGINT
 16: #endif

 19: #if defined(PETSC_USE_COMPLEX)
 20: #include "superlu_zdefs.h"
 21: #else
 22: #include "superlu_ddefs.h"
 23: #endif

 26: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
 27: const char *SuperLU_MatInputModes[]    = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};

 29: typedef struct {
 30:   int_t                   nprow,npcol,*row,*col;
 31:   gridinfo_t              grid;
 32:   superlu_options_t       options;
 33:   SuperMatrix             A_sup;
 34:   ScalePermstruct_t       ScalePermstruct;
 35:   LUstruct_t              LUstruct;
 36:   int                     StatPrint;
 37:   SuperLU_MatInputMode    MatInputMode;
 38:   SOLVEstruct_t           SOLVEstruct;
 39:   fact_t                  FactPattern;
 40:   MPI_Comm                comm_superlu;
 41: #if defined(PETSC_USE_COMPLEX)
 42:   doublecomplex           *val;
 43: #else
 44:   double                  *val;
 45: #endif

 47:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 48:   PetscTruth CleanUpSuperLU_Dist;
 49: } Mat_SuperLU_DIST;


 61: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 62: {
 63:   PetscErrorCode   ierr;
 64:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
 65:   PetscTruth       flg;
 66: 
 68:   if (lu->CleanUpSuperLU_Dist) {
 69:     /* Deallocate SuperLU_DIST storage */
 70:     if (lu->MatInputMode == GLOBAL) {
 71:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
 72:     } else {
 73:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
 74:       if ( lu->options.SolveInitialized ) {
 75: #if defined(PETSC_USE_COMPLEX)
 76:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
 77: #else
 78:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
 79: #endif
 80:       }
 81:     }
 82:     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
 83:     ScalePermstructFree(&lu->ScalePermstruct);
 84:     LUstructFree(&lu->LUstruct);
 85: 
 86:     /* Release the SuperLU_DIST process grid. */
 87:     superlu_gridexit(&lu->grid);
 88: 
 89:     MPI_Comm_free(&(lu->comm_superlu));
 90:   }

 92:   PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
 93:   if (flg) {
 94:     MatDestroy_SeqAIJ(A);
 95:   } else {
 96:     MatDestroy_MPIAIJ(A);
 97:   }
 98:   return(0);
 99: }

103: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
104: {
105:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
106:   PetscErrorCode   ierr;
107:   PetscMPIInt      size;
108:   PetscInt         m=A->rmap->N, N=A->cmap->N;
109:   SuperLUStat_t    stat;
110:   double           berr[1];
111:   PetscScalar      *bptr;
112:   PetscInt         nrhs=1;
113:   Vec              x_seq;
114:   IS               iden;
115:   VecScatter       scat;
116:   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */

119:   MPI_Comm_size(((PetscObject)A)->comm,&size);
120:   if (size > 1) {
121:     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
122:       VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
123:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
124:       VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
125:       ISDestroy(iden);

127:       VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
128:       VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
129:       VecGetArray(x_seq,&bptr);
130:     } else { /* distributed mat input */
131:       VecCopy(b_mpi,x);
132:       VecGetArray(x,&bptr);
133:     }
134:   } else { /* size == 1 */
135:     VecCopy(b_mpi,x);
136:     VecGetArray(x,&bptr);
137:   }
138: 
139:   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

141:   PStatInit(&stat);        /* Initialize the statistics variables. */
142:   if (lu->MatInputMode == GLOBAL) {
143: #if defined(PETSC_USE_COMPLEX)
144:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
145:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
146: #else
147:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
148:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
149: #endif 
150:   } else { /* distributed mat input */
151: #if defined(PETSC_USE_COMPLEX)
152:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
153:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
154:     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
155: #else
156:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
157:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
158:     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
159: #endif
160:   }
161:   if (lu->options.PrintStat) {
162:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
163:   }
164:   PStatFree(&stat);
165: 
166:   if (size > 1) {
167:     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
168:       VecRestoreArray(x_seq,&bptr);
169:       VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
170:       VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
171:       VecScatterDestroy(scat);
172:       VecDestroy(x_seq);
173:     } else {
174:       VecRestoreArray(x,&bptr);
175:     }
176:   } else {
177:     VecRestoreArray(x,&bptr);
178:   }
179:   return(0);
180: }

184: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
185: {
186:   Mat              *tseq,A_seq = PETSC_NULL;
187:   Mat_SeqAIJ       *aa,*bb;
188:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
189:   PetscErrorCode   ierr;
190:   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
191:                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
192:   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
193:   PetscMPIInt      size,rank;
194:   SuperLUStat_t    stat;
195:   double           *berr=0;
196:   IS               isrow;
197:   PetscLogDouble   time0,time,time_min,time_max;
198:   Mat              F_diag=PETSC_NULL;
199: #if defined(PETSC_USE_COMPLEX)
200:   doublecomplex    *av, *bv;
201: #else
202:   double           *av, *bv;
203: #endif

206:   MPI_Comm_size(((PetscObject)A)->comm,&size);
207:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);
208: 
209:   if (lu->options.PrintStat) { /* collect time for mat conversion */
210:     MPI_Barrier(((PetscObject)A)->comm);
211:     PetscGetTime(&time0);
212:   }

214:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
215:     if (size > 1) { /* convert mpi A to seq mat A */
216:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
217:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
218:       ISDestroy(isrow);
219: 
220:       A_seq = *tseq;
221:       PetscFree(tseq);
222:       aa =  (Mat_SeqAIJ*)A_seq->data;
223:     } else {
224:       PetscTruth flg;
225:       PetscTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
226:       if (flg) {
227:         Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
228:         A = At->A;
229:       }
230:       aa =  (Mat_SeqAIJ*)A->data;
231:     }

233:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
234:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
235:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
236:       if (lu->FactPattern == SamePattern_SameRowPerm){
237:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
238:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
239:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
240:       } else {
241:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
242:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
243:         lu->options.Fact = SamePattern;
244:       }
245:     }
246: #if defined(PETSC_USE_COMPLEX)
247:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
248: #else
249:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
250: #endif

252:     /* Create compressed column matrix A_sup. */
253: #if defined(PETSC_USE_COMPLEX)
254:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
255: #else
256:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
257: #endif
258:   } else { /* distributed mat input */
259:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
260:     aa=(Mat_SeqAIJ*)(mat->A)->data;
261:     bb=(Mat_SeqAIJ*)(mat->B)->data;
262:     ai=aa->i; aj=aa->j;
263:     bi=bb->i; bj=bb->j;
264: #if defined(PETSC_USE_COMPLEX)
265:     av=(doublecomplex*)aa->a;
266:     bv=(doublecomplex*)bb->a;
267: #else
268:     av=aa->a;
269:     bv=bb->a;
270: #endif
271:     rstart = A->rmap->rstart;
272:     nz     = aa->nz + bb->nz;
273:     garray = mat->garray;
274: 
275:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
276: #if defined(PETSC_USE_COMPLEX)
277:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
278: #else
279:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
280: #endif
281:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
282:       if (lu->FactPattern == SamePattern_SameRowPerm){
283:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
284:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
285:       } else {
286:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
287:         lu->options.Fact = SamePattern;
288:       }
289:     }
290:     nz = 0; irow = rstart;
291:     for ( i=0; i<m; i++ ) {
292:       lu->row[i] = nz;
293:       countA = ai[i+1] - ai[i];
294:       countB = bi[i+1] - bi[i];
295:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
296:       bjj = bj + bi[i];

298:       /* B part, smaller col index */
299:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
300:       jB = 0;
301:       for (j=0; j<countB; j++){
302:         jcol = garray[bjj[j]];
303:         if (jcol > colA_start) {
304:           jB = j;
305:           break;
306:         }
307:         lu->col[nz] = jcol;
308:         lu->val[nz++] = *bv++;
309:         if (j==countB-1) jB = countB;
310:       }

312:       /* A part */
313:       for (j=0; j<countA; j++){
314:         lu->col[nz] = rstart + ajj[j];
315:         lu->val[nz++] = *av++;
316:       }

318:       /* B part, larger col index */
319:       for (j=jB; j<countB; j++){
320:         lu->col[nz] = garray[bjj[j]];
321:         lu->val[nz++] = *bv++;
322:       }
323:     }
324:     lu->row[m] = nz;
325: #if defined(PETSC_USE_COMPLEX)
326:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
327:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
328: #else
329:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
330:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
331: #endif
332:   }
333:   if (lu->options.PrintStat) {
334:     PetscGetTime(&time);
335:     time0 = time - time0;
336:   }

338:   /* Factor the matrix. */
339:   PStatInit(&stat);   /* Initialize the statistics variables. */
340:   CHKMEMQ;
341:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
342: #if defined(PETSC_USE_COMPLEX)
343:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
344:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
345: #else
346:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
347:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
348: #endif 
349:   } else { /* distributed mat input */
350: #if defined(PETSC_USE_COMPLEX)
351:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
352:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
353:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
354: #else
355:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
356:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
357:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
358: #endif
359:   }

361:   if (lu->MatInputMode == GLOBAL && size > 1){
362:     MatDestroy(A_seq);
363:   }

365:   if (lu->options.PrintStat) {
366:     if (size > 1){
367:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
368:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
369:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
370:       time = time/size; /* average time */
371:       if (!rank) {
372:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);
373:       }
374:     } else {
375:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);
376:     }
377:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
378:   }
379:   PStatFree(&stat);
380:   if (size > 1){
381:     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
382:     F_diag->assembled = PETSC_TRUE;
383:   }
384:   (F)->assembled    = PETSC_TRUE;
385:   (F)->preallocated = PETSC_TRUE;
386:   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
387:   return(0);
388: }

390: /* Note the Petsc r and c permutations are ignored */
393: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
394: {
395:   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
396:   PetscInt          M=A->rmap->N,N=A->cmap->N;

399:   /* Initialize the SuperLU process grid. */
400:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

402:   /* Initialize ScalePermstruct and LUstruct. */
403:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
404:   LUstructInit(M, N, &lu->LUstruct);
405:   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
406:   (F)->ops->solve            = MatSolve_SuperLU_DIST;
407:   return(0);
408: }

413: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
414: {
416:   *type = MAT_SOLVER_SUPERLU_DIST;
417:   return(0);
418: }

423: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
424: {
425:   Mat               B;
426:   Mat_SuperLU_DIST  *lu;
427:   PetscErrorCode    ierr;
428:   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
429:   PetscMPIInt       size;
430:   superlu_options_t options;
431:   PetscTruth        flg;
432:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
433:   const char        *prtype[] = {"LargeDiag","NATURAL"};
434:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

437:   /* Create the factorization matrix */
438:   MatCreate(((PetscObject)A)->comm,&B);
439:   MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
440:   MatSetType(B,((PetscObject)A)->type_name);
441:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
442:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

444:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
445:   B->ops->view             = MatView_SuperLU_DIST;
446:   B->ops->destroy          = MatDestroy_SuperLU_DIST;
447:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
448:   B->factor                = MAT_FACTOR_LU;

450:   PetscNewLog(B,Mat_SuperLU_DIST,&lu);
451:   B->spptr = lu;

453:   /* Set the default input options:
454:      options.Fact              = DOFACT;
455:      options.Equil             = YES;
456:      options.ParSymbFact       = NO;
457:      options.ColPerm           = MMD_AT_PLUS_A;
458:      options.RowPerm           = LargeDiag;
459:      options.ReplaceTinyPivot  = YES;
460:      options.IterRefine        = DOUBLE;
461:      options.Trans             = NOTRANS;
462:      options.SolveInitialized  = NO;
463:      options.RefineInitialized = NO;
464:      options.PrintStat         = YES;
465:   */
466:   set_default_options_dist(&options);

468:   MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
469:   MPI_Comm_size(((PetscObject)A)->comm,&size);
470:   /* Default num of process columns and rows */
471:   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
472:   if (!lu->npcol) lu->npcol = 1;
473:   while (lu->npcol > 0) {
474:     lu->nprow = PetscMPIIntCast(size/lu->npcol);
475:     if (size == lu->nprow * lu->npcol) break;
476:     lu->npcol --;
477:   }
478: 
479:   PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
480:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
481:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
482:     if (size != lu->nprow * lu->npcol)
483:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
484: 
485:     lu->MatInputMode = DISTRIBUTED;
486:     PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
487:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

489:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
490:     if (!flg) {
491:       options.Equil = NO;
492:     }

494:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
495:     if (flg) {
496:       switch (indx) {
497:       case 0:
498:         options.RowPerm = LargeDiag;
499:         break;
500:       case 1:
501:         options.RowPerm = NOROWPERM;
502:         break;
503:       }
504:     }

506:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);
507:     if (flg) {
508:       switch (indx) {
509:       case 0:
510:         options.ColPerm = MMD_AT_PLUS_A;
511:         break;
512:       case 1:
513:         options.ColPerm = NATURAL;
514:         break;
515:       case 2:
516:         options.ColPerm = MMD_ATA;
517:         break;
518:       case 3:
519:         options.ColPerm = PARMETIS;
520:         break;
521:       }
522:     }

524:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
525:     if (!flg) {
526:       options.ReplaceTinyPivot = NO;
527:     }

529:     options.ParSymbFact = NO;
530:     PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
531:     if (flg){
532: #ifdef PETSC_HAVE_PARMETIS
533:       options.ParSymbFact = YES;
534:       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
535: #else
536:       printf("parsymbfact needs PARMETIS");
537: #endif
538:     }

540:     lu->FactPattern = SamePattern;
541:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
542:     if (flg) {
543:       switch (indx) {
544:       case 0:
545:         lu->FactPattern = SamePattern;
546:         break;
547:       case 1:
548:         lu->FactPattern = SamePattern_SameRowPerm;
549:         break;
550:       }
551:     }
552: 
553:     options.IterRefine = NOREFINE;
554:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
555:     if (flg) {
556:       options.IterRefine = DOUBLE;
557:     }

559:     if (PetscLogPrintInfo) {
560:       options.PrintStat = YES;
561:     } else {
562:       options.PrintStat = NO;
563:     }
564:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
565:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
566:   PetscOptionsEnd();

568:   lu->options             = options;
569:   lu->options.Fact        = DOFACT;
570:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
571:   *F = B;
572:   return(0);
573: }

578: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
579: {

583:   MatGetFactor_aij_superlu_dist(A,ftype,F);
584:   return(0);
585: }

591: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
592: {

596:   MatGetFactor_aij_superlu_dist(A,ftype,F);
597:   return(0);
598: }

603: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
604: {
605:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
606:   superlu_options_t options;
607:   PetscErrorCode    ierr;

610:   /* check if matrix is superlu_dist type */
611:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

613:   options = lu->options;
614:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
615:   PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
616:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
617:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
618:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
619:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
620:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
621:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
622:   if (options.ColPerm == NATURAL) {
623:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
624:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
625:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
626:   } else if (options.ColPerm == MMD_ATA) {
627:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
628:   } else if (options.ColPerm == PARMETIS) {
629:     PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");
630:   } else {
631:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
632:   }

634:   PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);
635: 
636:   if (lu->FactPattern == SamePattern){
637:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
638:   } else {
639:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
640:   }
641:   return(0);
642: }

646: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
647: {
648:   PetscErrorCode    ierr;
649:   PetscTruth        iascii;
650:   PetscViewerFormat format;

653:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
654:   if (iascii) {
655:     PetscViewerGetFormat(viewer,&format);
656:     if (format == PETSC_VIEWER_ASCII_INFO) {
657:       MatFactorInfo_SuperLU_DIST(A,viewer);
658:     }
659:   }
660:   return(0);
661: }


664: /*MC
665:   MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization

667:    Works with AIJ matrices  

669:   Options Database Keys:
670: + -mat_superlu_dist_r <n> - number of rows in processor partition
671: . -mat_superlu_dist_c <n> - number of columns in processor partition
672: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
673: . -mat_superlu_dist_equil - equilibrate the matrix
674: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
675: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
676: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
677: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
678: . -mat_superlu_dist_iterrefine - use iterative refinement
679: - -mat_superlu_dist_statprint - print factorization information

681:    Level: beginner

683: .seealso: PCLU

685: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage

687: M*/