Actual source code: mumps.c
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
6: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
10: #if defined(PETSC_USE_COMPLEX)
11: #include <zmumps_c.h>
12: #else
13: #include <dmumps_c.h>
14: #endif
16: #define JOB_INIT -1
17: #define JOB_FACTSYMBOLIC 1
18: #define JOB_FACTNUMERIC 2
19: #define JOB_SOLVE 3
20: #define JOB_END -2
23: /* macros s.t. indices match MUMPS documentation */
24: #define ICNTL(I) icntl[(I)-1]
25: #define CNTL(I) cntl[(I)-1]
26: #define INFOG(I) infog[(I)-1]
27: #define INFO(I) info[(I)-1]
28: #define RINFOG(I) rinfog[(I)-1]
29: #define RINFO(I) rinfo[(I)-1]
31: typedef struct {
32: #if defined(PETSC_USE_COMPLEX)
33: ZMUMPS_STRUC_C id;
34: #else
35: DMUMPS_STRUC_C id;
36: #endif
37: MatStructure matstruc;
38: PetscMPIInt myid,size;
39: PetscInt *irn,*jcn,nz,sym,nSolve;
40: PetscScalar *val;
41: MPI_Comm comm_mumps;
42: VecScatter scat_rhs, scat_sol;
43: PetscBool isAIJ,CleanUpMUMPS;
44: Vec b_seq,x_seq;
45: PetscErrorCode (*Destroy)(Mat);
46: PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**);
47: } Mat_MUMPS;
52: /* MatConvertToTriples_A_B */
53: /*convert Petsc matrix to triples: row[nz], col[nz], val[nz] */
54: /*
55: input:
56: A - matrix in aij,baij or sbaij (bs=1) format
57: shift - 0: C style output triple; 1: Fortran style output triple.
58: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
59: MAT_REUSE_MATRIX: only the values in v array are updated
60: output:
61: nnz - dim of r, c, and v (number of local nonzero entries of A)
62: r, c, v - row and col index, matrix values (matrix triples)
63: */
67: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
68: {
69: const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
70: PetscInt nz,rnz,i,j;
71: PetscErrorCode ierr;
72: PetscInt *row,*col;
73: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
76: *v=aa->a;
77: if (reuse == MAT_INITIAL_MATRIX){
78: nz = aa->nz; ai = aa->i; aj = aa->j;
79: *nnz = nz;
80: PetscMalloc(2*nz*sizeof(PetscInt), &row);
81: col = row + nz;
83: nz = 0;
84: for(i=0; i<M; i++) {
85: rnz = ai[i+1] - ai[i];
86: ajj = aj + ai[i];
87: for(j=0; j<rnz; j++) {
88: row[nz] = i+shift; col[nz++] = ajj[j] + shift;
89: }
90: }
91: *r = row; *c = col;
92: }
93: return(0);
94: }
98: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
99: {
100: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
101: const PetscInt *ai,*aj,*ajj,bs=A->rmap->bs,bs2=aa->bs2,M=A->rmap->N/bs;
102: PetscInt nz,idx=0,rnz,i,j,k,m;
103: PetscErrorCode ierr;
104: PetscInt *row,*col;
107: *v = aa->a;
108: if (reuse == MAT_INITIAL_MATRIX){
109: ai = aa->i; aj = aa->j;
110: nz = bs2*aa->nz;
111: *nnz = nz;
112: PetscMalloc(2*nz*sizeof(PetscInt), &row);
113: col = row + nz;
115: for(i=0; i<M; i++) {
116: ajj = aj + ai[i];
117: rnz = ai[i+1] - ai[i];
118: for(k=0; k<rnz; k++) {
119: for(j=0; j<bs; j++) {
120: for(m=0; m<bs; m++) {
121: row[idx] = i*bs + m + shift;
122: col[idx++] = bs*(ajj[k]) + j + shift;
123: }
124: }
125: }
126: }
127: *r = row; *c = col;
128: }
129: return(0);
130: }
134: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
135: {
136: const PetscInt *ai, *aj,*ajj,M=A->rmap->n;
137: PetscInt nz,rnz,i,j;
138: PetscErrorCode ierr;
139: PetscInt *row,*col;
140: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
143: if (reuse == MAT_INITIAL_MATRIX){
144: nz = aa->nz;ai=aa->i; aj=aa->j;*v=aa->a;
145: *nnz = nz;
146: PetscMalloc(2*nz*sizeof(PetscInt), &row);
147: col = row + nz;
149: nz = 0;
150: for(i=0; i<M; i++) {
151: rnz = ai[i+1] - ai[i];
152: ajj = aj + ai[i];
153: for(j=0; j<rnz; j++) {
154: row[nz] = i+shift; col[nz++] = ajj[j] + shift;
155: }
156: }
157: *r = row; *c = col;
158: }
159: return(0);
160: }
164: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
165: {
166: const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
167: PetscInt nz,rnz,i,j;
168: const PetscScalar *av,*v1;
169: PetscScalar *val;
170: PetscErrorCode ierr;
171: PetscInt *row,*col;
172: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
175: ai=aa->i; aj=aa->j;av=aa->a;
176: adiag=aa->diag;
177: if (reuse == MAT_INITIAL_MATRIX){
178: nz = M + (aa->nz-M)/2;
179: *nnz = nz;
180: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
181: col = row + nz;
182: val = (PetscScalar*)(col + nz);
184: nz = 0;
185: for(i=0; i<M; i++) {
186: rnz = ai[i+1] - adiag[i];
187: ajj = aj + adiag[i];
188: v1 = av + adiag[i];
189: for(j=0; j<rnz; j++) {
190: row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j];
191: }
192: }
193: *r = row; *c = col; *v = val;
194: } else {
195: nz = 0; val = *v;
196: for(i=0; i <M; i++) {
197: rnz = ai[i+1] - adiag[i];
198: ajj = aj + adiag[i];
199: v1 = av + adiag[i];
200: for(j=0; j<rnz; j++) {
201: val[nz++] = v1[j];
202: }
203: }
204: }
205: return(0);
206: }
210: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
211: {
212: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
213: PetscErrorCode ierr;
214: PetscInt rstart,nz,i,j,jj,irow,countA,countB;
215: PetscInt *row,*col;
216: const PetscScalar *av, *bv,*v1,*v2;
217: PetscScalar *val;
218: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
219: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
220: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
223: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
224: garray = mat->garray;
225: av=aa->a; bv=bb->a;
227: if (reuse == MAT_INITIAL_MATRIX){
228: nz = aa->nz + bb->nz;
229: *nnz = nz;
230: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
231: col = row + nz;
232: val = (PetscScalar*)(col + nz);
234: *r = row; *c = col; *v = val;
235: } else {
236: row = *r; col = *c; val = *v;
237: }
239: jj = 0; irow = rstart;
240: for ( i=0; i<m; i++ ) {
241: ajj = aj + ai[i]; /* ptr to the beginning of this row */
242: countA = ai[i+1] - ai[i];
243: countB = bi[i+1] - bi[i];
244: bjj = bj + bi[i];
245: v1 = av + ai[i];
246: v2 = bv + bi[i];
248: /* A-part */
249: for (j=0; j<countA; j++){
250: if (reuse == MAT_INITIAL_MATRIX) {
251: row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
252: }
253: val[jj++] = v1[j];
254: }
256: /* B-part */
257: for(j=0; j < countB; j++){
258: if (reuse == MAT_INITIAL_MATRIX) {
259: row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
260: }
261: val[jj++] = v2[j];
262: }
263: irow++;
264: }
265: return(0);
266: }
270: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
271: {
272: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
273: PetscErrorCode ierr;
274: PetscInt rstart,nz,i,j,jj,irow,countA,countB;
275: PetscInt *row,*col;
276: const PetscScalar *av, *bv,*v1,*v2;
277: PetscScalar *val;
278: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
279: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
280: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
283: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
284: garray = mat->garray;
285: av=aa->a; bv=bb->a;
287: if (reuse == MAT_INITIAL_MATRIX){
288: nz = aa->nz + bb->nz;
289: *nnz = nz;
290: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
291: col = row + nz;
292: val = (PetscScalar*)(col + nz);
294: *r = row; *c = col; *v = val;
295: } else {
296: row = *r; col = *c; val = *v;
297: }
299: jj = 0; irow = rstart;
300: for ( i=0; i<m; i++ ) {
301: ajj = aj + ai[i]; /* ptr to the beginning of this row */
302: countA = ai[i+1] - ai[i];
303: countB = bi[i+1] - bi[i];
304: bjj = bj + bi[i];
305: v1 = av + ai[i];
306: v2 = bv + bi[i];
308: /* A-part */
309: for (j=0; j<countA; j++){
310: if (reuse == MAT_INITIAL_MATRIX){
311: row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
312: }
313: val[jj++] = v1[j];
314: }
316: /* B-part */
317: for(j=0; j < countB; j++){
318: if (reuse == MAT_INITIAL_MATRIX){
319: row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
320: }
321: val[jj++] = v2[j];
322: }
323: irow++;
324: }
325: return(0);
326: }
330: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
331: {
332: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
333: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)(mat->A)->data;
334: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
335: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
336: const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
337: const PetscInt bs = A->rmap->bs,bs2=mat->bs2;
338: PetscErrorCode ierr;
339: PetscInt nz,i,j,k,n,jj,irow,countA,countB,idx;
340: PetscInt *row,*col;
341: const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
342: PetscScalar *val;
346: if (reuse == MAT_INITIAL_MATRIX) {
347: nz = bs2*(aa->nz + bb->nz);
348: *nnz = nz;
349: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
350: col = row + nz;
351: val = (PetscScalar*)(col + nz);
353: *r = row; *c = col; *v = val;
354: } else {
355: row = *r; col = *c; val = *v;
356: }
358: jj = 0; irow = rstart;
359: for ( i=0; i<mbs; i++ ) {
360: countA = ai[i+1] - ai[i];
361: countB = bi[i+1] - bi[i];
362: ajj = aj + ai[i];
363: bjj = bj + bi[i];
364: v1 = av + bs2*ai[i];
365: v2 = bv + bs2*bi[i];
367: idx = 0;
368: /* A-part */
369: for (k=0; k<countA; k++){
370: for (j=0; j<bs; j++) {
371: for (n=0; n<bs; n++) {
372: if (reuse == MAT_INITIAL_MATRIX){
373: row[jj] = irow + n + shift;
374: col[jj] = rstart + bs*ajj[k] + j + shift;
375: }
376: val[jj++] = v1[idx++];
377: }
378: }
379: }
381: idx = 0;
382: /* B-part */
383: for(k=0; k<countB; k++){
384: for (j=0; j<bs; j++) {
385: for (n=0; n<bs; n++) {
386: if (reuse == MAT_INITIAL_MATRIX){
387: row[jj] = irow + n + shift;
388: col[jj] = bs*garray[bjj[k]] + j + shift;
389: }
390: val[jj++] = v2[idx++];
391: }
392: }
393: }
394: irow += bs;
395: }
396: return(0);
397: }
401: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
402: {
403: const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
404: PetscErrorCode ierr;
405: PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
406: PetscInt *row,*col;
407: const PetscScalar *av, *bv,*v1,*v2;
408: PetscScalar *val;
409: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
410: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
411: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
414: ai=aa->i; aj=aa->j; adiag=aa->diag;
415: bi=bb->i; bj=bb->j; garray = mat->garray;
416: av=aa->a; bv=bb->a;
417: rstart = A->rmap->rstart;
419: if (reuse == MAT_INITIAL_MATRIX) {
420: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
421: nzb = 0; /* num of upper triangular entries in mat->B */
422: for(i=0; i<m; i++){
423: nza += (ai[i+1] - adiag[i]);
424: countB = bi[i+1] - bi[i];
425: bjj = bj + bi[i];
426: for (j=0; j<countB; j++){
427: if (garray[bjj[j]] > rstart) nzb++;
428: }
429: }
430:
431: nz = nza + nzb; /* total nz of upper triangular part of mat */
432: *nnz = nz;
433: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
434: col = row + nz;
435: val = (PetscScalar*)(col + nz);
437: *r = row; *c = col; *v = val;
438: } else {
439: row = *r; col = *c; val = *v;
440: }
442: jj = 0; irow = rstart;
443: for ( i=0; i<m; i++ ) {
444: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
445: v1 = av + adiag[i];
446: countA = ai[i+1] - adiag[i];
447: countB = bi[i+1] - bi[i];
448: bjj = bj + bi[i];
449: v2 = bv + bi[i];
451: /* A-part */
452: for (j=0; j<countA; j++){
453: if (reuse == MAT_INITIAL_MATRIX) {
454: row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
455: }
456: val[jj++] = v1[j];
457: }
459: /* B-part */
460: for(j=0; j < countB; j++){
461: if (garray[bjj[j]] > rstart) {
462: if (reuse == MAT_INITIAL_MATRIX) {
463: row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
464: }
465: val[jj++] = v2[j];
466: }
467: }
468: irow++;
469: }
470: return(0);
471: }
475: PetscErrorCode MatDestroy_MUMPS(Mat A)
476: {
477: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
481: if (lu && lu->CleanUpMUMPS) {
482: /* Terminate instance, deallocate memories */
483: PetscFree2(lu->id.sol_loc,lu->id.isol_loc);
484: VecScatterDestroy(&lu->scat_rhs);
485: VecDestroy(&lu->b_seq);
486: VecScatterDestroy(&lu->scat_sol);
487: VecDestroy(&lu->x_seq);
488: ierr=PetscFree(lu->id.perm_in);
489: PetscFree(lu->irn);
490: lu->id.job=JOB_END;
491: #if defined(PETSC_USE_COMPLEX)
492: zmumps_c(&lu->id);
493: #else
494: dmumps_c(&lu->id);
495: #endif
496: MPI_Comm_free(&(lu->comm_mumps));
497: }
498: if (lu && lu->Destroy) {
499: (lu->Destroy)(A);
500: }
501: PetscFree(A->spptr);
503: /* clear composed functions */
504: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);
505: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMumpsSetIcntl_C","",PETSC_NULL);
506: return(0);
507: }
511: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
512: {
513: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
514: PetscScalar *array;
515: Vec b_seq;
516: IS is_iden,is_petsc;
518: PetscInt i;
521: lu->id.nrhs = 1;
522: b_seq = lu->b_seq;
523: if (lu->size > 1){
524: /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
525: VecScatterBegin(lu->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);
526: VecScatterEnd(lu->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);
527: if (!lu->myid) {VecGetArray(b_seq,&array);}
528: } else { /* size == 1 */
529: VecCopy(b,x);
530: VecGetArray(x,&array);
531: }
532: if (!lu->myid) { /* define rhs on the host */
533: lu->id.nrhs = 1;
534: #if defined(PETSC_USE_COMPLEX)
535: lu->id.rhs = (mumps_double_complex*)array;
536: #else
537: lu->id.rhs = array;
538: #endif
539: }
541: /* solve phase */
542: /*-------------*/
543: lu->id.job = JOB_SOLVE;
544: #if defined(PETSC_USE_COMPLEX)
545: zmumps_c(&lu->id);
546: #else
547: dmumps_c(&lu->id);
548: #endif
549: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
551: if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
552: if (!lu->nSolve){ /* create scatter scat_sol */
553: ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden); /* from */
554: for (i=0; i<lu->id.lsol_loc; i++){
555: lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
556: }
557: ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,PETSC_COPY_VALUES,&is_petsc); /* to */
558: VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);
559: ISDestroy(&is_iden);
560: ISDestroy(&is_petsc);
561: }
562: VecScatterBegin(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
563: VecScatterEnd(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
564: }
565: lu->nSolve++;
566: return(0);
567: }
571: PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
572: {
573: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
577: lu->id.ICNTL(9) = 0;
578: MatSolve_MUMPS(A,b,x);
579: lu->id.ICNTL(9) = 1;
580: return(0);
581: }
585: PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
586: {
588: PetscBool flg;
591: PetscTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
592: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
593: PetscTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
594: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_MUMPS() is not implemented yet");
595: return(0);
596: }
598: #if !defined(PETSC_USE_COMPLEX)
599: /*
600: input:
601: F: numeric factor
602: output:
603: nneg: total number of negative pivots
604: nzero: 0
605: npos: (global dimension of F) - nneg
606: */
610: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
611: {
612: Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr;
614: PetscMPIInt size;
617: MPI_Comm_size(((PetscObject)F)->comm,&size);
618: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
619: if (size > 1 && lu->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
620: if (nneg){
621: if (!lu->myid){
622: *nneg = lu->id.INFOG(12);
623: }
624: MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);
625: }
626: if (nzero) *nzero = 0;
627: if (npos) *npos = F->rmap->N - (*nneg);
628: return(0);
629: }
630: #endif /* !defined(PETSC_USE_COMPLEX) */
634: PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
635: {
636: Mat_MUMPS *lu =(Mat_MUMPS*)(F)->spptr;
637: PetscErrorCode ierr;
638: MatReuse reuse;
639: Mat F_diag;
640: PetscBool isMPIAIJ;
643: reuse = MAT_REUSE_MATRIX;
644: (*lu->ConvertToTriples)(A, 1, reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
646: /* numerical factorization phase */
647: /*-------------------------------*/
648: lu->id.job = JOB_FACTNUMERIC;
649: if(!lu->id.ICNTL(18)) {
650: if (!lu->myid) {
651: #if defined(PETSC_USE_COMPLEX)
652: lu->id.a = (mumps_double_complex*)lu->val;
653: #else
654: lu->id.a = lu->val;
655: #endif
656: }
657: } else {
658: #if defined(PETSC_USE_COMPLEX)
659: lu->id.a_loc = (mumps_double_complex*)lu->val;
660: #else
661: lu->id.a_loc = lu->val;
662: #endif
663: }
664: #if defined(PETSC_USE_COMPLEX)
665: zmumps_c(&lu->id);
666: #else
667: dmumps_c(&lu->id);
668: #endif
669: if (lu->id.INFOG(1) < 0) {
670: if (lu->id.INFO(1) == -13) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
671: else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
672: }
673: if (!lu->myid && lu->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
675: if (lu->size > 1){
676: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);
677: if(isMPIAIJ) {
678: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
679: } else {
680: F_diag = ((Mat_MPISBAIJ *)(F)->data)->A;
681: }
682: F_diag->assembled = PETSC_TRUE;
683: if (lu->nSolve){
684: VecScatterDestroy(&lu->scat_sol);
685: PetscFree2(lu->id.sol_loc,lu->id.isol_loc);
686: VecDestroy(&lu->x_seq);
687: }
688: }
689: (F)->assembled = PETSC_TRUE;
690: lu->matstruc = SAME_NONZERO_PATTERN;
691: lu->CleanUpMUMPS = PETSC_TRUE;
692: lu->nSolve = 0;
693:
694: if (lu->size > 1){
695: /* distributed solution */
696: lu->id.ICNTL(21) = 1;
697: if (!lu->nSolve){
698: /* Create x_seq=sol_loc for repeated use */
699: PetscInt lsol_loc;
700: PetscScalar *sol_loc;
701: lsol_loc = lu->id.INFO(23); /* length of sol_loc */
702: PetscMalloc2(lsol_loc,PetscScalar,&sol_loc,lsol_loc,PetscInt,&lu->id.isol_loc);
703: lu->id.lsol_loc = lsol_loc;
704: #if defined(PETSC_USE_COMPLEX)
705: lu->id.sol_loc = (mumps_double_complex*)sol_loc;
706: #else
707: lu->id.sol_loc = sol_loc;
708: #endif
709: VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);
710: }
711: }
712: return(0);
713: }
717: PetscErrorCode PetscSetMUMPSOptions(Mat F, Mat A)
718: {
719: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
720: PetscErrorCode ierr;
721: PetscInt icntl;
722: PetscBool flg;
725: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"MUMPS Options","Mat");
726: if (lu->size == 1){
727: lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */
728: } else {
729: lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */
730: }
732: icntl=-1;
733: lu->id.ICNTL(4) = 0; /* level of printing; overwrite mumps default ICNTL(4)=2 */
734: PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);
735: if ((flg && icntl > 0) || PetscLogPrintInfo) {
736: lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
737: } else { /* no output */
738: lu->id.ICNTL(1) = 0; /* error message, default= 6 */
739: lu->id.ICNTL(2) = 0; /* output stream for diagnostic printing, statistics, and warning. default=0 */
740: lu->id.ICNTL(3) = 0; /* output stream for global information, default=6 */
741: }
742: PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): column permutation and/or scaling to get a zero-free diagonal (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);
743: icntl=-1;
744: PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): sequential matrix ordering (0 to 7) 3 = Scotch, 5 = Metis","None",lu->id.ICNTL(7),&icntl,&flg);
745: if (flg) {
746: if (icntl== 1 && lu->size > 1){
747: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
748: } else {
749: lu->id.ICNTL(7) = icntl;
750: }
751: }
752:
753: PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 7 or 77)","None",lu->id.ICNTL(8),&lu->id.ICNTL(8),PETSC_NULL);
754: PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);
755: PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to the linear system solved (via -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);
756: PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control: defines the ordering strategy with scaling constraints (0 to 3","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);
757: PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control: with or without ScaLAPACK","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);
758: PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);
759: PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): Schur complement","None",lu->id.ICNTL(19),&lu->id.ICNTL(19),PETSC_NULL);
761: PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core facility (0 or 1)","None",lu->id.ICNTL(22),&lu->id.ICNTL(22),PETSC_NULL);
762: PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",lu->id.ICNTL(23),&lu->id.ICNTL(23),PETSC_NULL);
763: PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",lu->id.ICNTL(24),&lu->id.ICNTL(24),PETSC_NULL);
764: if (lu->id.ICNTL(24)){
765: lu->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
766: }
768: PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computation of a null space basis","None",lu->id.ICNTL(25),&lu->id.ICNTL(25),PETSC_NULL);
769: PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): Schur options for right-hand side or solution vector","None",lu->id.ICNTL(26),&lu->id.ICNTL(26),PETSC_NULL);
770: PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): experimental parameter","None",lu->id.ICNTL(27),&lu->id.ICNTL(27),PETSC_NULL);
771: PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",lu->id.ICNTL(28),&lu->id.ICNTL(28),PETSC_NULL);
772: PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch 2 = parmetis","None",lu->id.ICNTL(29),&lu->id.ICNTL(29),PETSC_NULL);
774: PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);
775: PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);
776: PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);
777: PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);
778: PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",lu->id.CNTL(5),&lu->id.CNTL(5),PETSC_NULL);
779: PetscOptionsEnd();
780: return(0);
781: }
782:
785: PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS* mumps)
786: {
787: PetscErrorCode ierr;
790: MPI_Comm_rank(((PetscObject)A)->comm, &mumps->myid);
791: MPI_Comm_size(((PetscObject)A)->comm,&mumps->size);
792: MPI_Comm_dup(((PetscObject)A)->comm,&(mumps->comm_mumps));
793: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->comm_mumps);
795: mumps->id.job = JOB_INIT;
796: mumps->id.par = 1; /* host participates factorizaton and solve */
797: mumps->id.sym = mumps->sym;
798: #if defined(PETSC_USE_COMPLEX)
799: zmumps_c(&mumps->id);
800: #else
801: dmumps_c(&mumps->id);
802: #endif
804: mumps->CleanUpMUMPS = PETSC_FALSE;
805: mumps->scat_rhs = PETSC_NULL;
806: mumps->scat_sol = PETSC_NULL;
807: mumps->nSolve = 0;
808: return(0);
809: }
810:
811: /* Note the Petsc r and c permutations are ignored */
814: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
815: {
816: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
817: PetscErrorCode ierr;
818: MatReuse reuse;
819: Vec b;
820: IS is_iden;
821: const PetscInt M = A->rmap->N;
824: lu->matstruc = DIFFERENT_NONZERO_PATTERN;
826: /* Set MUMPS options */
827: PetscSetMUMPSOptions(F,A);
828:
829: reuse = MAT_INITIAL_MATRIX;
830: (*lu->ConvertToTriples)(A, 1, reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
832: /* analysis phase */
833: /*----------------*/
834: lu->id.job = JOB_FACTSYMBOLIC;
835: lu->id.n = M;
836: switch (lu->id.ICNTL(18)){
837: case 0: /* centralized assembled matrix input */
838: if (!lu->myid) {
839: lu->id.nz =lu->nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
840: if (lu->id.ICNTL(6)>1){
841: #if defined(PETSC_USE_COMPLEX)
842: lu->id.a = (mumps_double_complex*)lu->val;
843: #else
844: lu->id.a = lu->val;
845: #endif
846: }
847: if (lu->id.ICNTL(7) == 1){ /* use user-provide matrix ordering */
848: if (!lu->myid) {
849: const PetscInt *idx;
850: PetscInt i,*perm_in;
851: PetscMalloc(M*sizeof(PetscInt),&perm_in);
852: ISGetIndices(r,&idx);
853: lu->id.perm_in = perm_in;
854: for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */
855: ISRestoreIndices(r,&idx);
856: }
857: }
858: }
859: break;
860: case 3: /* distributed assembled matrix input (size>1) */
861: lu->id.nz_loc = lu->nz;
862: lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
863: if (lu->id.ICNTL(6)>1) {
864: #if defined(PETSC_USE_COMPLEX)
865: lu->id.a_loc = (mumps_double_complex*)lu->val;
866: #else
867: lu->id.a_loc = lu->val;
868: #endif
869: }
870: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
871: if (!lu->myid){
872: VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);
873: ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);
874: } else {
875: VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
876: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
877: }
878: VecCreate(((PetscObject)A)->comm,&b);
879: VecSetSizes(b,A->rmap->n,PETSC_DECIDE);
880: VecSetFromOptions(b);
882: VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
883: ISDestroy(&is_iden);
884: VecDestroy(&b);
885: break;
886: }
887: #if defined(PETSC_USE_COMPLEX)
888: zmumps_c(&lu->id);
889: #else
890: dmumps_c(&lu->id);
891: #endif
892: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
893:
894: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
895: F->ops->solve = MatSolve_MUMPS;
896: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
897: F->ops->matsolve = MatMatSolve_MUMPS;
898: return(0);
899: }
901: /* Note the Petsc r and c permutations are ignored */
904: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
905: {
907: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
908: PetscErrorCode ierr;
909: MatReuse reuse;
910: Vec b;
911: IS is_iden;
912: const PetscInt M = A->rmap->N;
915: lu->matstruc = DIFFERENT_NONZERO_PATTERN;
917: /* Set MUMPS options */
918: PetscSetMUMPSOptions(F,A);
920: reuse = MAT_INITIAL_MATRIX;
921: (*lu->ConvertToTriples)(A, 1, reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
923: /* analysis phase */
924: /*----------------*/
925: lu->id.job = JOB_FACTSYMBOLIC;
926: lu->id.n = M;
927: switch (lu->id.ICNTL(18)){
928: case 0: /* centralized assembled matrix input */
929: if (!lu->myid) {
930: lu->id.nz =lu->nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
931: if (lu->id.ICNTL(6)>1){
932: #if defined(PETSC_USE_COMPLEX)
933: lu->id.a = (mumps_double_complex*)lu->val;
934: #else
935: lu->id.a = lu->val;
936: #endif
937: }
938: }
939: break;
940: case 3: /* distributed assembled matrix input (size>1) */
941: lu->id.nz_loc = lu->nz;
942: lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
943: if (lu->id.ICNTL(6)>1) {
944: #if defined(PETSC_USE_COMPLEX)
945: lu->id.a_loc = (mumps_double_complex*)lu->val;
946: #else
947: lu->id.a_loc = lu->val;
948: #endif
949: }
950: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
951: if (!lu->myid){
952: VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);
953: ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);
954: } else {
955: VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
956: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
957: }
958: VecCreate(((PetscObject)A)->comm,&b);
959: VecSetSizes(b,A->rmap->n,PETSC_DECIDE);
960: VecSetFromOptions(b);
962: VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
963: ISDestroy(&is_iden);
964: VecDestroy(&b);
965: break;
966: }
967: #if defined(PETSC_USE_COMPLEX)
968: zmumps_c(&lu->id);
969: #else
970: dmumps_c(&lu->id);
971: #endif
972: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
973:
974: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
975: F->ops->solve = MatSolve_MUMPS;
976: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
977: return(0);
978: }
980: /* Note the Petsc r permutation and factor info are ignored */
983: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
984: {
985: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
986: PetscErrorCode ierr;
987: MatReuse reuse;
988: Vec b;
989: IS is_iden;
990: const PetscInt M = A->rmap->N;
993: lu->matstruc = DIFFERENT_NONZERO_PATTERN;
995: /* Set MUMPS options */
996: PetscSetMUMPSOptions(F,A);
998: reuse = MAT_INITIAL_MATRIX;
999: (*lu->ConvertToTriples)(A, 1 , reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
1001: /* analysis phase */
1002: /*----------------*/
1003: lu->id.job = JOB_FACTSYMBOLIC;
1004: lu->id.n = M;
1005: switch (lu->id.ICNTL(18)){
1006: case 0: /* centralized assembled matrix input */
1007: if (!lu->myid) {
1008: lu->id.nz =lu->nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
1009: if (lu->id.ICNTL(6)>1){
1010: #if defined(PETSC_USE_COMPLEX)
1011: lu->id.a = (mumps_double_complex*)lu->val;
1012: #else
1013: lu->id.a = lu->val;
1014: #endif
1015: }
1016: }
1017: break;
1018: case 3: /* distributed assembled matrix input (size>1) */
1019: lu->id.nz_loc = lu->nz;
1020: lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
1021: if (lu->id.ICNTL(6)>1) {
1022: #if defined(PETSC_USE_COMPLEX)
1023: lu->id.a_loc = (mumps_double_complex*)lu->val;
1024: #else
1025: lu->id.a_loc = lu->val;
1026: #endif
1027: }
1028: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1029: if (!lu->myid){
1030: VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);
1031: ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);
1032: } else {
1033: VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
1034: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
1035: }
1036: VecCreate(((PetscObject)A)->comm,&b);
1037: VecSetSizes(b,A->rmap->n,PETSC_DECIDE);
1038: VecSetFromOptions(b);
1040: VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
1041: ISDestroy(&is_iden);
1042: VecDestroy(&b);
1043: break;
1044: }
1045: #if defined(PETSC_USE_COMPLEX)
1046: zmumps_c(&lu->id);
1047: #else
1048: dmumps_c(&lu->id);
1049: #endif
1050: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
1052: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
1053: F->ops->solve = MatSolve_MUMPS;
1054: F->ops->solvetranspose = MatSolve_MUMPS;
1055: #if !defined(PETSC_USE_COMPLEX)
1056: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
1057: #else
1058: F->ops->getinertia = PETSC_NULL;
1059: #endif
1060: return(0);
1061: }
1065: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
1066: {
1067: PetscErrorCode ierr;
1068: PetscBool iascii;
1069: PetscViewerFormat format;
1070: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
1073: /* check if matrix is mumps type */
1074: if (A->ops->solve != MatSolve_MUMPS) return(0);
1076: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1077: if (iascii) {
1078: PetscViewerGetFormat(viewer,&format);
1079: if (format == PETSC_VIEWER_ASCII_INFO){
1080: PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
1081: PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);
1082: PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);
1083: PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));
1084: PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",lu->id.ICNTL(2));
1085: PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));
1086: PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));
1087: PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));
1088: PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));
1089: PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequentia matrix ordering):%d \n",lu->id.ICNTL(7));
1090: PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));
1091: PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));
1092: PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));
1093: if (lu->id.ICNTL(11)>0) {
1094: PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));
1095: PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));
1096: PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));
1097: PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));
1098: PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));
1099: PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));
1100: }
1101: PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));
1102: PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));
1103: PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));
1104: /* ICNTL(15-17) not used */
1105: PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));
1106: PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));
1107: PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));
1108: PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));
1109: PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",lu->id.ICNTL(22));
1110: PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",lu->id.ICNTL(23));
1111:
1112: PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",lu->id.ICNTL(24));
1113: PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",lu->id.ICNTL(25));
1114: PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",lu->id.ICNTL(26));
1115: PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",lu->id.ICNTL(27));
1116: PetscViewerASCIIPrintf(viewer," ICNTL(28) (Use parallel or sequential ordering): %d \n",lu->id.ICNTL(28));
1117: PetscViewerASCIIPrintf(viewer," ICNTL(29) (Parallel ordering): %d \n",lu->id.ICNTL(29));
1118:
1119: PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));
1120: PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));
1121: PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));
1122: PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));
1123: PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",lu->id.CNTL(5));
1124:
1125: /* infomation local to each processor */
1126: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");
1127: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1128: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",lu->myid,lu->id.RINFO(1));
1129: PetscViewerFlush(viewer);
1130: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");
1131: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",lu->myid,lu->id.RINFO(2));
1132: PetscViewerFlush(viewer);
1133: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");
1134: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",lu->myid,lu->id.RINFO(3));
1135: PetscViewerFlush(viewer);
1136:
1137: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");
1138: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",lu->myid,lu->id.INFO(15));
1139: PetscViewerFlush(viewer);
1140:
1141: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");
1142: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",lu->myid,lu->id.INFO(16));
1143: PetscViewerFlush(viewer);
1144:
1145: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");
1146: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",lu->myid,lu->id.INFO(23));
1147: PetscViewerFlush(viewer);
1148: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1150: if (!lu->myid){ /* information from the host */
1151: PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));
1152: PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));
1153: PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));
1154:
1155: PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));
1156: PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));
1157: PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));
1158: PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));
1159: PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",lu->id.INFOG(7));
1160: PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));
1161: PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));
1162: PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));
1163: PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));
1164: PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));
1165: PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));
1166: PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));
1167: PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));
1168: PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));
1169: PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));
1170: PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));
1171: PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));
1172: PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));
1173: PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));
1174: PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));
1175: PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));
1176: PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));
1177: PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));
1178: }
1179: }
1180: }
1181: return(0);
1182: }
1186: PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
1187: {
1188: Mat_MUMPS *mumps =(Mat_MUMPS*)A->spptr;
1191: info->block_size = 1.0;
1192: info->nz_allocated = mumps->id.INFOG(20);
1193: info->nz_used = mumps->id.INFOG(20);
1194: info->nz_unneeded = 0.0;
1195: info->assemblies = 0.0;
1196: info->mallocs = 0.0;
1197: info->memory = 0.0;
1198: info->fill_ratio_given = 0;
1199: info->fill_ratio_needed = 0;
1200: info->factor_mallocs = 0;
1201: return(0);
1202: }
1204: /* -------------------------------------------------------------------------------------------*/
1207: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
1208: {
1209: Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr;
1212: lu->id.ICNTL(icntl) = ival;
1213: return(0);
1214: }
1218: /*@
1219: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
1221: Logically Collective on Mat
1223: Input Parameters:
1224: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
1225: . icntl - index of MUMPS parameter array ICNTL()
1226: - ival - value of MUMPS ICNTL(icntl)
1228: Options Database:
1229: . -mat_mumps_icntl_<icntl> <ival>
1231: Level: beginner
1233: References: MUMPS Users' Guide
1235: .seealso: MatGetFactor()
1236: @*/
1237: PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
1238: {
1244: PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
1245: return(0);
1246: }
1248: /*MC
1249: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
1250: distributed and sequential matrices via the external package MUMPS.
1252: Works with MATAIJ and MATSBAIJ matrices
1254: Options Database Keys:
1255: + -mat_mumps_icntl_4 <0,...,4> - print level
1256: . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1257: . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guidec)
1258: . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1259: . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1260: . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1261: . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1262: . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1263: . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1264: . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1265: . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1266: . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1267: - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1269: Level: beginner
1271: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
1273: M*/
1278: PetscErrorCode MatFactorGetSolverPackage_mumps(Mat A,const MatSolverPackage *type)
1279: {
1281: *type = MATSOLVERMUMPS;
1282: return(0);
1283: }
1287: /* MatGetFactor for Seq and MPI AIJ matrices */
1290: PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
1291: {
1292: Mat B;
1294: Mat_MUMPS *mumps;
1295: PetscBool isSeqAIJ;
1298: /* Create the factorization matrix */
1299: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
1300: MatCreate(((PetscObject)A)->comm,&B);
1301: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1302: MatSetType(B,((PetscObject)A)->type_name);
1303: if (isSeqAIJ) {
1304: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
1305: } else {
1306: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
1307: }
1309: PetscNewLog(B,Mat_MUMPS,&mumps);
1310: B->ops->view = MatView_MUMPS;
1311: B->ops->getinfo = MatGetInfo_MUMPS;
1312: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mumps",MatFactorGetSolverPackage_mumps);
1313: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMumpsSetIcntl_C","MatMumpsSetIcntl_MUMPS",MatMumpsSetIcntl_MUMPS);
1314: if (ftype == MAT_FACTOR_LU) {
1315: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
1316: B->factortype = MAT_FACTOR_LU;
1317: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
1318: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
1319: mumps->sym = 0;
1320: } else {
1321: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
1322: B->factortype = MAT_FACTOR_CHOLESKY;
1323: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
1324: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
1325: if (A->spd_set && A->spd) mumps->sym = 1;
1326: else mumps->sym = 2;
1327: }
1329: mumps->isAIJ = PETSC_TRUE;
1330: mumps->Destroy = B->ops->destroy;
1331: B->ops->destroy = MatDestroy_MUMPS;
1332: B->spptr = (void*)mumps;
1333: PetscInitializeMUMPS(A,mumps);
1335: *F = B;
1336: return(0);
1337: }
1342: /* MatGetFactor for Seq and MPI SBAIJ matrices */
1345: PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
1346: {
1347: Mat B;
1349: Mat_MUMPS *mumps;
1350: PetscBool isSeqSBAIJ;
1353: if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix");
1354: if(A->rmap->bs > 1) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead");
1355: PetscTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
1356: /* Create the factorization matrix */
1357: MatCreate(((PetscObject)A)->comm,&B);
1358: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1359: MatSetType(B,((PetscObject)A)->type_name);
1360: PetscNewLog(B,Mat_MUMPS,&mumps);
1361: if (isSeqSBAIJ) {
1362: MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);
1363: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
1364: } else {
1365: MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);
1366: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
1367: }
1369: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
1370: B->ops->view = MatView_MUMPS;
1371: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mumps",MatFactorGetSolverPackage_mumps);
1372: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMumpsSetIcntl_C","MatMumpsSetIcntl",MatMumpsSetIcntl);
1373: B->factortype = MAT_FACTOR_CHOLESKY;
1374: if (A->spd_set && A->spd) mumps->sym = 1;
1375: else mumps->sym = 2;
1377: mumps->isAIJ = PETSC_FALSE;
1378: mumps->Destroy = B->ops->destroy;
1379: B->ops->destroy = MatDestroy_MUMPS;
1380: B->spptr = (void*)mumps;
1381: PetscInitializeMUMPS(A,mumps);
1383: *F = B;
1384: return(0);
1385: }
1391: PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
1392: {
1393: Mat B;
1395: Mat_MUMPS *mumps;
1396: PetscBool isSeqBAIJ;
1399: /* Create the factorization matrix */
1400: PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
1401: MatCreate(((PetscObject)A)->comm,&B);
1402: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1403: MatSetType(B,((PetscObject)A)->type_name);
1404: if (isSeqBAIJ) {
1405: MatSeqBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL);
1406: } else {
1407: MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1408: }
1410: PetscNewLog(B,Mat_MUMPS,&mumps);
1411: if (ftype == MAT_FACTOR_LU) {
1412: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
1413: B->factortype = MAT_FACTOR_LU;
1414: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
1415: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
1416: mumps->sym = 0;
1417: } else {
1418: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
1419: }
1421: B->ops->view = MatView_MUMPS;
1422: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mumps",MatFactorGetSolverPackage_mumps);
1423: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMumpsSetIcntl_C","MatMumpsSetIcntl_MUMPS",MatMumpsSetIcntl_MUMPS);
1425: mumps->isAIJ = PETSC_TRUE;
1426: mumps->Destroy = B->ops->destroy;
1427: B->ops->destroy = MatDestroy_MUMPS;
1428: B->spptr = (void*)mumps;
1429: PetscInitializeMUMPS(A,mumps);
1431: *F = B;
1432: return(0);
1433: }