Actual source code: mkl_cpardiso.c

petsc-3.14.2 2020-12-03
Report Typos and Errors

  2: #include <petscsys.h>
  3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  4: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>

  6: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
  7: #define MKL_ILP64
  8: #endif
  9: #include <mkl.h>
 10: #include <mkl_cluster_sparse_solver.h>

 12: /*
 13:  *  Possible mkl_cpardiso phases that controls the execution of the solver.
 14:  *  For more information check mkl_cpardiso manual.
 15:  */
 16: #define JOB_ANALYSIS 11
 17: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
 18: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
 19: #define JOB_NUMERICAL_FACTORIZATION 22
 20: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
 21: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
 22: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
 23: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
 24: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
 25: #define JOB_RELEASE_OF_LU_MEMORY 0
 26: #define JOB_RELEASE_OF_ALL_MEMORY -1

 28: #define IPARM_SIZE 64
 29: #define INT_TYPE MKL_INT

 31: static const char *Err_MSG_CPardiso(int errNo) {
 32:   switch (errNo) {
 33:     case -1:
 34:       return "input inconsistent"; break;
 35:     case -2:
 36:       return "not enough memory"; break;
 37:     case -3:
 38:       return "reordering problem"; break;
 39:     case -4:
 40:       return "zero pivot, numerical factorization or iterative refinement problem"; break;
 41:     case -5:
 42:       return "unclassified (internal) error"; break;
 43:     case -6:
 44:       return "preordering failed (matrix types 11, 13 only)"; break;
 45:     case -7:
 46:       return "diagonal matrix problem"; break;
 47:     case -8:
 48:       return "32-bit integer overflow problem"; break;
 49:     case -9:
 50:       return "not enough memory for OOC"; break;
 51:     case -10:
 52:       return "problems with opening OOC temporary files"; break;
 53:     case -11:
 54:       return "read/write problems with the OOC data file"; break;
 55:     default :
 56:       return "unknown error";
 57:   }
 58: }

 60: /*
 61:  *  Internal data structure.
 62:  *  For more information check mkl_cpardiso manual.
 63:  */

 65: typedef struct {

 67:   /* Configuration vector */
 68:   INT_TYPE     iparm[IPARM_SIZE];

 70:   /*
 71:    * Internal mkl_cpardiso memory location.
 72:    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
 73:    */
 74:   void         *pt[IPARM_SIZE];

 76:   MPI_Fint     comm_mkl_cpardiso;

 78:   /* Basic mkl_cpardiso info*/
 79:   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;

 81:   /* Matrix structure */
 82:   PetscScalar  *a;

 84:   INT_TYPE     *ia, *ja;

 86:   /* Number of non-zero elements */
 87:   INT_TYPE     nz;

 89:   /* Row permutaton vector*/
 90:   INT_TYPE     *perm;

 92:   /* Define is matrix preserve sparce structure. */
 93:   MatStructure matstruc;

 95:   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);

 97:   /* True if mkl_cpardiso function have been used. */
 98:   PetscBool CleanUp;
 99: } Mat_MKL_CPARDISO;

101: /*
102:  * Copy the elements of matrix A.
103:  * Input:
104:  *   - Mat A: MATSEQAIJ matrix
105:  *   - int shift: matrix index.
106:  *     - 0 for c representation
107:  *     - 1 for fortran representation
108:  *   - MatReuse reuse:
109:  *     - MAT_INITIAL_MATRIX: Create a new aij representation
110:  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
111:  * Output:
112:  *   - int *nnz: Number of nonzero-elements.
113:  *   - int **r pointer to i index
114:  *   - int **c pointer to j elements
115:  *   - MATRIXTYPE **v: Non-zero elements
116:  */
117: PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
118: {
119:   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;

122:   *v=aa->a;
123:   if (reuse == MAT_INITIAL_MATRIX) {
124:     *r   = (INT_TYPE*)aa->i;
125:     *c   = (INT_TYPE*)aa->j;
126:     *nnz = aa->nz;
127:   }
128:   return(0);
129: }

131: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
132: {
133:   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
134:   PetscErrorCode    ierr;
135:   PetscInt          rstart,nz,i,j,countA,countB;
136:   PetscInt          *row,*col;
137:   const PetscScalar *av, *bv;
138:   PetscScalar       *val;
139:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
140:   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
141:   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
142:   PetscInt          colA_start,jB,jcol;

145:   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart;
146:   av=aa->a; bv=bb->a;

148:   garray = mat->garray;

150:   if (reuse == MAT_INITIAL_MATRIX) {
151:     nz   = aa->nz + bb->nz;
152:     *nnz = nz;
153:     PetscMalloc3(m+1,&row,nz,&col,nz,&val);
154:     *r = row; *c = col; *v = val;
155:   } else {
156:     row = *r; col = *c; val = *v;
157:   }

159:   nz = 0;
160:   for (i=0; i<m; i++) {
161:     row[i] = nz;
162:     countA     = ai[i+1] - ai[i];
163:     countB     = bi[i+1] - bi[i];
164:     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
165:     bjj        = bj + bi[i];

167:     /* B part, smaller col index */
168:     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
169:     jB         = 0;
170:     for (j=0; j<countB; j++) {
171:       jcol = garray[bjj[j]];
172:       if (jcol > colA_start) break;
173:       col[nz]   = jcol;
174:       val[nz++] = *bv++;
175:     }
176:     jB = j;

178:     /* A part */
179:     for (j=0; j<countA; j++) {
180:       col[nz]   = rstart + ajj[j];
181:       val[nz++] = *av++;
182:     }

184:     /* B part, larger col index */
185:     for (j=jB; j<countB; j++) {
186:       col[nz]   = garray[bjj[j]];
187:       val[nz++] = *bv++;
188:     }
189:   }
190:   row[m] = nz;

192:   return(0);
193: }

195: PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
196: {
197:   const PetscInt    *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
198:   PetscErrorCode    ierr;
199:   PetscInt          rstart,nz,i,j,countA,countB;
200:   PetscInt          *row,*col;
201:   const PetscScalar *av, *bv;
202:   PetscScalar       *val;
203:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ*)A->data;
204:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ*)(mat->A)->data;
205:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
206:   PetscInt          colA_start,jB,jcol;

209:   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
210:   av=aa->a; bv=bb->a;

212:   garray = mat->garray;

214:   if (reuse == MAT_INITIAL_MATRIX) {
215:     nz   = aa->nz + bb->nz;
216:     *nnz = nz;
217:     PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
218:     *r = row; *c = col; *v = val;
219:   } else {
220:     row = *r; col = *c; val = *v;
221:   }

223:   nz = 0;
224:   for (i=0; i<m; i++) {
225:     row[i]     = nz+1;
226:     countA     = ai[i+1] - ai[i];
227:     countB     = bi[i+1] - bi[i];
228:     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
229:     bjj        = bj + bi[i];

231:     /* B part, smaller col index */
232:     colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */
233:     jB         = 0;
234:     for (j=0; j<countB; j++) {
235:       jcol = garray[bjj[j]];
236:       if (jcol > colA_start) break;
237:       col[nz++] = jcol + 1;
238:     }
239:     jB = j;
240:     PetscArraycpy(val,bv,jB*bs2);
241:     val += jB*bs2;
242:     bv  += jB*bs2;

244:     /* A part */
245:     for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
246:     PetscArraycpy(val,av,countA*bs2);
247:     val += countA*bs2;
248:     av  += countA*bs2;

250:     /* B part, larger col index */
251:     for (j=jB; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
252:     PetscArraycpy(val,bv,(countB-jB)*bs2);
253:     val += (countB-jB)*bs2;
254:     bv  += (countB-jB)*bs2;
255:   }
256:   row[m] = nz+1;

258:   return(0);
259: }

261: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
262: {
263:   const PetscInt    *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
264:   PetscErrorCode    ierr;
265:   PetscInt          rstart,nz,i,j,countA,countB;
266:   PetscInt          *row,*col;
267:   const PetscScalar *av, *bv;
268:   PetscScalar       *val;
269:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
270:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
271:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;

274:   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
275:   av=aa->a; bv=bb->a;

277:   garray = mat->garray;

279:   if (reuse == MAT_INITIAL_MATRIX) {
280:     nz   = aa->nz + bb->nz;
281:     *nnz = nz;
282:     PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
283:     *r = row; *c = col; *v = val;
284:   } else {
285:     row = *r; col = *c; val = *v;
286:   }

288:   nz = 0;
289:   for (i=0; i<m; i++) {
290:     row[i]     = nz+1;
291:     countA     = ai[i+1] - ai[i];
292:     countB     = bi[i+1] - bi[i];
293:     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
294:     bjj        = bj + bi[i];

296:     /* A part */
297:     for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
298:     PetscArraycpy(val,av,countA*bs2);
299:     val += countA*bs2;
300:     av  += countA*bs2;

302:     /* B part, larger col index */
303:     for (j=0; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
304:     PetscArraycpy(val,bv,countB*bs2);
305:     val += countB*bs2;
306:     bv  += countB*bs2;
307:   }
308:   row[m] = nz+1;

310:   return(0);
311: }

313: /*
314:  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
315:  */
316: PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
317: {
318:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
319:   MPI_Comm         comm;
320:   PetscErrorCode   ierr;

323:   /* Terminate instance, deallocate memories */
324:   if (mat_mkl_cpardiso->CleanUp) {
325:     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;

327:     cluster_sparse_solver (
328:       mat_mkl_cpardiso->pt,
329:       &mat_mkl_cpardiso->maxfct,
330:       &mat_mkl_cpardiso->mnum,
331:       &mat_mkl_cpardiso->mtype,
332:       &mat_mkl_cpardiso->phase,
333:       &mat_mkl_cpardiso->n,
334:       NULL,
335:       NULL,
336:       NULL,
337:       mat_mkl_cpardiso->perm,
338:       &mat_mkl_cpardiso->nrhs,
339:       mat_mkl_cpardiso->iparm,
340:       &mat_mkl_cpardiso->msglvl,
341:       NULL,
342:       NULL,
343:       &mat_mkl_cpardiso->comm_mkl_cpardiso,
344:       (PetscInt*)&mat_mkl_cpardiso->err);
345:   }

347:   if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) {
348:     PetscFree3(mat_mkl_cpardiso->ia,mat_mkl_cpardiso->ja,mat_mkl_cpardiso->a);
349:   }
350:   comm = MPI_Comm_f2c(mat_mkl_cpardiso->comm_mkl_cpardiso);
351:   MPI_Comm_free(&comm);
352:   PetscFree(A->data);

354:   /* clear composed functions */
355:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
356:   PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);
357:   return(0);
358: }

360: /*
361:  * Computes Ax = b
362:  */
363: PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
364: {
365:   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
366:   PetscErrorCode    ierr;
367:   PetscScalar       *xarray;
368:   const PetscScalar *barray;

371:   mat_mkl_cpardiso->nrhs = 1;
372:   VecGetArray(x,&xarray);
373:   VecGetArrayRead(b,&barray);

375:   /* solve phase */
376:   /*-------------*/
377:   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
378:   cluster_sparse_solver (
379:     mat_mkl_cpardiso->pt,
380:     &mat_mkl_cpardiso->maxfct,
381:     &mat_mkl_cpardiso->mnum,
382:     &mat_mkl_cpardiso->mtype,
383:     &mat_mkl_cpardiso->phase,
384:     &mat_mkl_cpardiso->n,
385:     mat_mkl_cpardiso->a,
386:     mat_mkl_cpardiso->ia,
387:     mat_mkl_cpardiso->ja,
388:     mat_mkl_cpardiso->perm,
389:     &mat_mkl_cpardiso->nrhs,
390:     mat_mkl_cpardiso->iparm,
391:     &mat_mkl_cpardiso->msglvl,
392:     (void*)barray,
393:     (void*)xarray,
394:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
395:     (PetscInt*)&mat_mkl_cpardiso->err);

397:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

399:   VecRestoreArray(x,&xarray);
400:   VecRestoreArrayRead(b,&barray);
401:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
402:   return(0);
403: }

405: PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
406: {
407:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
408:   PetscErrorCode   ierr;

411: #if defined(PETSC_USE_COMPLEX)
412:   mat_mkl_cpardiso->iparm[12 - 1] = 1;
413: #else
414:   mat_mkl_cpardiso->iparm[12 - 1] = 2;
415: #endif
416:   MatSolve_MKL_CPARDISO(A,b,x);
417:   mat_mkl_cpardiso->iparm[12 - 1] = 0;
418:   return(0);
419: }

421: PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
422: {
423:   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
424:   PetscErrorCode    ierr;
425:   PetscScalar       *xarray;
426:   const PetscScalar *barray;

429:   MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);

431:   if (mat_mkl_cpardiso->nrhs > 0) {
432:     MatDenseGetArrayRead(B,&barray);
433:     MatDenseGetArray(X,&xarray);

435:     if (barray == xarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location");

437:     /* solve phase */
438:     /*-------------*/
439:     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
440:     cluster_sparse_solver (
441:       mat_mkl_cpardiso->pt,
442:       &mat_mkl_cpardiso->maxfct,
443:       &mat_mkl_cpardiso->mnum,
444:       &mat_mkl_cpardiso->mtype,
445:       &mat_mkl_cpardiso->phase,
446:       &mat_mkl_cpardiso->n,
447:       mat_mkl_cpardiso->a,
448:       mat_mkl_cpardiso->ia,
449:       mat_mkl_cpardiso->ja,
450:       mat_mkl_cpardiso->perm,
451:       &mat_mkl_cpardiso->nrhs,
452:       mat_mkl_cpardiso->iparm,
453:       &mat_mkl_cpardiso->msglvl,
454:       (void*)barray,
455:       (void*)xarray,
456:       &mat_mkl_cpardiso->comm_mkl_cpardiso,
457:       (PetscInt*)&mat_mkl_cpardiso->err);
458:     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
459:     MatDenseRestoreArrayRead(B,&barray);
460:     MatDenseRestoreArray(X,&xarray);

462:   }
463:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
464:   return(0);
465: }

467: /*
468:  * LU Decomposition
469:  */
470: PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
471: {
472:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
473:   PetscErrorCode   ierr;

476:   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
477:   (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);

479:   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
480:   cluster_sparse_solver (
481:     mat_mkl_cpardiso->pt,
482:     &mat_mkl_cpardiso->maxfct,
483:     &mat_mkl_cpardiso->mnum,
484:     &mat_mkl_cpardiso->mtype,
485:     &mat_mkl_cpardiso->phase,
486:     &mat_mkl_cpardiso->n,
487:     mat_mkl_cpardiso->a,
488:     mat_mkl_cpardiso->ia,
489:     mat_mkl_cpardiso->ja,
490:     mat_mkl_cpardiso->perm,
491:     &mat_mkl_cpardiso->nrhs,
492:     mat_mkl_cpardiso->iparm,
493:     &mat_mkl_cpardiso->msglvl,
494:     NULL,
495:     NULL,
496:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
497:     &mat_mkl_cpardiso->err);
498:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

500:   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
501:   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
502:   return(0);
503: }

505: /* Sets mkl_cpardiso options from the options database */
506: PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
507: {
508:   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
509:   PetscErrorCode      ierr;
510:   PetscInt            icntl,threads;
511:   PetscBool           flg;

514:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");
515:   PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);
516:   if (flg) mkl_set_num_threads((int)threads);

518:   PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);
519:   if (flg) mat_mkl_cpardiso->maxfct = icntl;

521:   PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);
522:   if (flg) mat_mkl_cpardiso->mnum = icntl;

524:   PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);
525:   if (flg) mat_mkl_cpardiso->msglvl = icntl;

527:   PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);
528:   if (flg) mat_mkl_cpardiso->mtype = icntl;
529:   PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);

531:   if (flg && icntl != 0) {
532:     PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);
533:     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;

535:     PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);
536:     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;

538:     PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);
539:     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;

541:     PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);
542:     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;

544:     PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);
545:     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;

547:     PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);
548:     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;

550:     PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);
551:     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;

553:     PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);
554:     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;

556:     PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
557:       &flg);
558:     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;

560:     PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
561:       &flg);
562:     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;

564:     PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);
565:     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;

567:     PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);
568:     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;

570:     PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);
571:     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;

573:     PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);
574:     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;

576:     PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);
577:     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;

579:     PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);
580:     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;

582:     PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);
583:     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;

585:     PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);
586:     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
587:   }

589:   PetscOptionsEnd();
590:   return(0);
591: }

593: PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
594: {
595:   PetscErrorCode  ierr;
596:   PetscInt        bs;
597:   PetscBool       match;
598:   PetscMPIInt     size;
599:   MPI_Comm        comm;


603:   MPI_Comm_dup(PetscObjectComm((PetscObject)A),&comm);
604:   MPI_Comm_size(comm, &size);
605:   mat_mkl_cpardiso->comm_mkl_cpardiso = MPI_Comm_c2f(comm);

607:   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
608:   mat_mkl_cpardiso->maxfct = 1;
609:   mat_mkl_cpardiso->mnum = 1;
610:   mat_mkl_cpardiso->n = A->rmap->N;
611:   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
612:   mat_mkl_cpardiso->msglvl = 0;
613:   mat_mkl_cpardiso->nrhs = 1;
614:   mat_mkl_cpardiso->err = 0;
615:   mat_mkl_cpardiso->phase = -1;
616: #if defined(PETSC_USE_COMPLEX)
617:   mat_mkl_cpardiso->mtype = 13;
618: #else
619:   mat_mkl_cpardiso->mtype = 11;
620: #endif

622: #if defined(PETSC_USE_REAL_SINGLE)
623:   mat_mkl_cpardiso->iparm[27] = 1;
624: #else
625:   mat_mkl_cpardiso->iparm[27] = 0;
626: #endif

628:   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
629:   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
630:   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
631:   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
632:   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
633:   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
634:   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
635:   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
636:   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
637:   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */

639:   mat_mkl_cpardiso->iparm[39] = 0;
640:   if (size > 1) {
641:     mat_mkl_cpardiso->iparm[39] = 2;
642:     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
643:     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
644:   }
645:   PetscObjectTypeCompareAny((PetscObject)A,&match,MATMPIBAIJ,MATMPISBAIJ,"");
646:   if (match) {
647:     MatGetBlockSize(A,&bs);
648:     mat_mkl_cpardiso->iparm[36] = bs;
649:     mat_mkl_cpardiso->iparm[40] /= bs;
650:     mat_mkl_cpardiso->iparm[41] /= bs;
651:     mat_mkl_cpardiso->iparm[40]++;
652:     mat_mkl_cpardiso->iparm[41]++;
653:     mat_mkl_cpardiso->iparm[34] = 0;  /* Fortran style */
654:   } else {
655:     mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
656:   }

658:   mat_mkl_cpardiso->perm = 0;
659:   return(0);
660: }

662: /*
663:  * Symbolic decomposition. Mkl_Pardiso analysis phase.
664:  */
665: PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
666: {
667:   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
668:   PetscErrorCode  ierr;

671:   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;

673:   /* Set MKL_CPARDISO options from the options database */
674:   PetscSetMKL_CPARDISOFromOptions(F,A);
675:   (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);

677:   mat_mkl_cpardiso->n = A->rmap->N;
678:   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];

680:   /* analysis phase */
681:   /*----------------*/
682:   mat_mkl_cpardiso->phase = JOB_ANALYSIS;

684:   cluster_sparse_solver (
685:     mat_mkl_cpardiso->pt,
686:     &mat_mkl_cpardiso->maxfct,
687:     &mat_mkl_cpardiso->mnum,
688:     &mat_mkl_cpardiso->mtype,
689:     &mat_mkl_cpardiso->phase,
690:     &mat_mkl_cpardiso->n,
691:     mat_mkl_cpardiso->a,
692:     mat_mkl_cpardiso->ia,
693:     mat_mkl_cpardiso->ja,
694:     mat_mkl_cpardiso->perm,
695:     &mat_mkl_cpardiso->nrhs,
696:     mat_mkl_cpardiso->iparm,
697:     &mat_mkl_cpardiso->msglvl,
698:     NULL,
699:     NULL,
700:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
701:     (PetscInt*)&mat_mkl_cpardiso->err);

703:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

705:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
706:   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
707:   F->ops->solve           = MatSolve_MKL_CPARDISO;
708:   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
709:   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
710:   return(0);
711: }

713: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS perm,const MatFactorInfo *info)
714: {
715:   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
716:   PetscErrorCode  ierr;

719:   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;


722:   /* Set MKL_CPARDISO options from the options database */
723:   PetscSetMKL_CPARDISOFromOptions(F,A);
724:   (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);

726:   mat_mkl_cpardiso->n = A->rmap->N;
727:   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
728: #if defined(PETSC_USE_COMPLEX)
729:   SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
730: #endif
731:   if (A->spd_set && A->spd) mat_mkl_cpardiso->mtype = 2;
732:   else                      mat_mkl_cpardiso->mtype = -2;

734:   /* analysis phase */
735:   /*----------------*/
736:   mat_mkl_cpardiso->phase = JOB_ANALYSIS;

738:   cluster_sparse_solver (
739:     mat_mkl_cpardiso->pt,
740:     &mat_mkl_cpardiso->maxfct,
741:     &mat_mkl_cpardiso->mnum,
742:     &mat_mkl_cpardiso->mtype,
743:     &mat_mkl_cpardiso->phase,
744:     &mat_mkl_cpardiso->n,
745:     mat_mkl_cpardiso->a,
746:     mat_mkl_cpardiso->ia,
747:     mat_mkl_cpardiso->ja,
748:     mat_mkl_cpardiso->perm,
749:     &mat_mkl_cpardiso->nrhs,
750:     mat_mkl_cpardiso->iparm,
751:     &mat_mkl_cpardiso->msglvl,
752:     NULL,
753:     NULL,
754:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
755:     (PetscInt*)&mat_mkl_cpardiso->err);

757:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

759:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
760:   F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO;
761:   F->ops->solve                 = MatSolve_MKL_CPARDISO;
762:   F->ops->solvetranspose        = MatSolveTranspose_MKL_CPARDISO;
763:   F->ops->matsolve              = MatMatSolve_MKL_CPARDISO;
764:   return(0);
765: }

767: PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
768: {
769:   PetscErrorCode    ierr;
770:   PetscBool         iascii;
771:   PetscViewerFormat format;
772:   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
773:   PetscInt          i;

776:   /* check if matrix is mkl_cpardiso type */
777:   if (A->ops->solve != MatSolve_MKL_CPARDISO) return(0);

779:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
780:   if (iascii) {
781:     PetscViewerGetFormat(viewer,&format);
782:     if (format == PETSC_VIEWER_ASCII_INFO) {
783:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");
784:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);
785:       for (i = 1; i <= 64; i++) {
786:         PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);
787:       }
788:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);
789:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);
790:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);
791:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);
792:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);
793:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);
794:     }
795:   }
796:   return(0);
797: }

799: PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
800: {
801:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;

804:   info->block_size        = 1.0;
805:   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
806:   info->nz_unneeded       = 0.0;
807:   info->assemblies        = 0.0;
808:   info->mallocs           = 0.0;
809:   info->memory            = 0.0;
810:   info->fill_ratio_given  = 0;
811:   info->fill_ratio_needed = 0;
812:   info->factor_mallocs    = 0;
813:   return(0);
814: }

816: PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
817: {
818:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)F->data;

821:   if (icntl <= 64) {
822:     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
823:   } else {
824:     if (icntl == 65) mkl_set_num_threads((int)ival);
825:     else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival;
826:     else if (icntl == 67) mat_mkl_cpardiso->mnum = ival;
827:     else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival;
828:     else if (icntl == 69) mat_mkl_cpardiso->mtype = ival;
829:   }
830:   return(0);
831: }

833: /*@
834:   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters

836:    Logically Collective on Mat

838:    Input Parameters:
839: +  F - the factored matrix obtained by calling MatGetFactor()
840: .  icntl - index of Mkl_Pardiso parameter
841: -  ival - value of Mkl_Pardiso parameter

843:   Options Database:
844: .   -mat_mkl_cpardiso_<icntl> <ival>

846:    Level: Intermediate

848:    Notes:
849:     This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
850:           database approach when working with TS, SNES, or KSP.

852:    References:
853: .      Mkl_Pardiso Users' Guide

855: .seealso: MatGetFactor()
856: @*/
857: PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
858: {

862:   PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
863:   return(0);
864: }

866: static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
867: {
869:   *type = MATSOLVERMKL_CPARDISO;
870:   return(0);
871: }

873: /* MatGetFactor for MPI AIJ matrices */
874: static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
875: {
876:   Mat              B;
877:   PetscErrorCode   ierr;
878:   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
879:   PetscBool        isSeqAIJ,isMPIBAIJ,isMPISBAIJ;

882:   /* Create the factorization matrix */

884:   PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
885:   PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&isMPIBAIJ);
886:   PetscObjectTypeCompare((PetscObject)A,MATMPISBAIJ,&isMPISBAIJ);
887:   MatCreate(PetscObjectComm((PetscObject)A),&B);
888:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
889:   PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);
890:   MatSetUp(B);

892:   PetscNewLog(B,&mat_mkl_cpardiso);

894:   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
895:   else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO;
896:   else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO;
897:   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;

899:   if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
900:   else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO;
901:   B->ops->destroy = MatDestroy_MKL_CPARDISO;

903:   B->ops->view    = MatView_MKL_CPARDISO;
904:   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;

906:   B->factortype   = ftype;
907:   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */

909:   B->data = mat_mkl_cpardiso;

911:   /* set solvertype */
912:   PetscFree(B->solvertype);
913:   PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);

915:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);
916:   PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);
917:   PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);

919:   *F = B;
920:   return(0);
921: }

923: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
924: {

928:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
929:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
930:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
931:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_mpiaij_mkl_cpardiso);
932:   return(0);
933: }