Actual source code: mpispooles.c
1: /*
2: Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
3: */
5: #include src/mat/impls/aij/seq/aij.h
6: #include src/mat/impls/sbaij/seq/sbaij.h
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/mat/impls/aij/mpi/mpiaij.h
9: #include src/mat/impls/sbaij/mpi/mpisbaij.h
10: #include src/mat/impls/aij/seq/spooles/spooles.h
12: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);
16: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
17: {
18: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
20:
22: if (lu->CleanUpSpooles) {
23: FrontMtx_free(lu->frontmtx);
24: IV_free(lu->newToOldIV);
25: IV_free(lu->oldToNewIV);
26: IV_free(lu->vtxmapIV);
27: InpMtx_free(lu->mtxA);
28: ETree_free(lu->frontETree);
29: IVL_free(lu->symbfacIVL);
30: SubMtxManager_free(lu->mtxmanager);
31: DenseMtx_free(lu->mtxX);
32: DenseMtx_free(lu->mtxY);
33: MPI_Comm_free(&(lu->comm_spooles));
34: if ( lu->scat ){
35: VecDestroy(lu->vec_spooles);
36: ISDestroy(lu->iden);
37: ISDestroy(lu->is_petsc);
38: VecScatterDestroy(lu->scat);
39: }
40: }
41: MatConvert_Spooles_Base(A,lu->basetype,&A);
42: (*A->ops->destroy)(A);
44: return(0);
45: }
49: PetscErrorCode MatSolve_MPIAIJSpooles(Mat A,Vec b,Vec x)
50: {
51: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
53: int size,rank,m=A->m,irow,*rowindY;
54: PetscScalar *array;
55: DenseMtx *newY ;
56: SubMtxManager *solvemanager ;
57: #if defined(PETSC_USE_COMPLEX)
58: double x_real,x_imag;
59: #endif
62: MPI_Comm_size(A->comm,&size);
63: MPI_Comm_rank(A->comm,&rank);
64:
65: /* copy b into spooles' rhs mtxY */
66: DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
67: VecGetArray(b,&array);
69: DenseMtx_rowIndices(lu->mtxY, &m, &rowindY); /* get m, rowind */
70: for ( irow = 0 ; irow < m ; irow++ ) {
71: rowindY[irow] = irow + lu->rstart; /* global rowind */
72: #if !defined(PETSC_USE_COMPLEX)
73: DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
74: #else
75: DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
76: array++;
77: #endif
78: }
79: VecRestoreArray(b,&array);
80:
81: if ( lu->options.msglvl > 2 ) {
82: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
83: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
84: fflush(lu->options.msgFile);
85: }
86:
87: /* permute and redistribute Y if necessary */
88: DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
89: if ( lu->options.msglvl > 2 ) {
90: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
91: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
92: fflush(lu->options.msgFile);
93: }
95: MPI_Barrier(A->comm); /* for initializing firsttag, because the num. of tags used
96: by FrontMtx_MPI_split() is unknown */
97: lu->firsttag = 0;
98: newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
99: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
100: DenseMtx_free(lu->mtxY);
101: lu->mtxY = newY ;
102: lu->firsttag += size ;
103: if ( lu->options.msglvl > 2 ) {
104: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
105: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
106: fflush(lu->options.msgFile);
107: }
109: if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
110: /* pivoting has taken place, redistribute the right hand side
111: to match the final rows and columns in the fronts */
112: IV *rowmapIV ;
113: rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
114: lu->options.msgFile, lu->comm_spooles);
115: newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
116: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
117: DenseMtx_free(lu->mtxY);
118: lu->mtxY = newY ;
119: IV_free(rowmapIV);
120: lu->firsttag += size;
121: }
122: if ( lu->options.msglvl > 2 ) {
123: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
124: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
125: fflush(lu->options.msgFile);
126: }
128: if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
129:
130: /* solve the linear system */
131: solvemanager = SubMtxManager_new();
132: SubMtxManager_init(solvemanager, NO_LOCK, 0);
133: FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
134: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
135: SubMtxManager_free(solvemanager);
136: if ( lu->options.msglvl > 2 ) {
137: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
138: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
139: }
141: /* permute the solution into the original ordering */
142: DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
143: if ( lu->options.msglvl > 2 ) {
144: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
145: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
146: fflush(lu->options.msgFile);
147: }
148:
149: /* scatter local solution mtxX into mpi vector x */
150: if( !lu->scat ){ /* create followings once for each numfactorization */
151: /* vec_spooles <- mtxX */
152: #if !defined(PETSC_USE_COMPLEX)
153: VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
154: #else
155: VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
156: VecGetArray(lu->vec_spooles,&array);
157: for (irow = 0; irow < lu->nmycol; irow++){
158: DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
159: array[irow] = x_real+x_imag*PETSC_i;
160: }
161: VecRestoreArray(lu->vec_spooles,&array);
162: #endif
163: ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
164: ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
165: VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
166: }
168: VecScatterBegin(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
169: VecScatterEnd(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
170:
171: return(0);
172: }
176: PetscErrorCode MatFactorNumeric_MPIAIJSpooles(Mat A,Mat *F)
177: {
178: Mat_Spooles *lu = (Mat_Spooles*)(*F)->spptr;
179: PetscErrorCode ierr;
180: int rank,size,lookahead=0,sierr;
181: ChvManager *chvmanager ;
182: Chv *rootchv ;
183: Graph *graph ;
184: IVL *adjIVL;
185: DV *cumopsDV ;
186: double droptol=0.0,*opcounts,minops,cutoff;
187: #if !defined(PETSC_USE_COMPLEX)
188: double *val;
189: #endif
190: InpMtx *newA ;
191: PetscScalar *av, *bv;
192: int *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
193: i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
194: int M=A->M,m=A->m,root,nedges,tagbound,lasttag;
195:
197: MPI_Comm_size(A->comm,&size);
198: MPI_Comm_rank(A->comm,&rank);
200: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
201: /* get input parameters */
202: SetSpoolesOptions(A, &lu->options);
204: (*F)->ops->solve = MatSolve_MPIAIJSpooles;
205: (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
206: (*F)->assembled = PETSC_TRUE;
208: /* to be used by MatSolve() */
209: lu->mtxY = DenseMtx_new();
210: lu->mtxX = DenseMtx_new();
211: lu->scat = PETSC_NULL;
213: IVzero(20, lu->stats);
214: DVzero(20, lu->cpus);
216: lu->mtxA = InpMtx_new();
217: }
218:
219: /* copy A to Spooles' InpMtx object */
220: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
221: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
222: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
223: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
224: ai=aa->i; aj=aa->j; av=aa->a;
225: bi=bb->i; bj=bb->j; bv=bb->a;
226: lu->rstart = mat->rstart;
227: nz = aa->nz + bb->nz;
228: garray = mat->garray;
229: } else { /* SPOOLES_SYMMETRIC */
230: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
231: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
232: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
233: ai=aa->i; aj=aa->j; av=aa->a;
234: bi=bb->i; bj=bb->j; bv=bb->a;
235: lu->rstart = mat->rstart;
236: nz = aa->nz + bb->nz;
237: garray = mat->garray;
238: }
239:
240: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
241: row = InpMtx_ivec1(lu->mtxA);
242: col = InpMtx_ivec2(lu->mtxA);
243: #if !defined(PETSC_USE_COMPLEX)
244: val = InpMtx_dvec(lu->mtxA);
245: #endif
247: jj = 0; irow = lu->rstart;
248: for ( i=0; i<m; i++ ) {
249: ajj = aj + ai[i]; /* ptr to the beginning of this row */
250: countA = ai[i+1] - ai[i];
251: countB = bi[i+1] - bi[i];
252: bjj = bj + bi[i];
253: jB = 0;
254:
255: if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
256: /* B part, smaller col index */
257: colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
258: for (j=0; j<countB; j++){
259: jcol = garray[bjj[j]];
260: if (jcol > colA_start) {
261: jB = j;
262: break;
263: }
264: row[jj] = irow; col[jj] = jcol;
265: #if !defined(PETSC_USE_COMPLEX)
266: val[jj++] = *bv++;
267: #else
268: InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
269: bv++; jj++;
270: #endif
271: if (j==countB-1) jB = countB;
272: }
273: }
274: /* A part */
275: for (j=0; j<countA; j++){
276: row[jj] = irow; col[jj] = lu->rstart + ajj[j];
277: #if !defined(PETSC_USE_COMPLEX)
278: val[jj++] = *av++;
279: #else
280: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
281: av++; jj++;
282: #endif
283: }
284: /* B part, larger col index */
285: for (j=jB; j<countB; j++){
286: row[jj] = irow; col[jj] = garray[bjj[j]];
287: #if !defined(PETSC_USE_COMPLEX)
288: val[jj++] = *bv++;
289: #else
290: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
291: bv++; jj++;
292: #endif
293: }
294: irow++;
295: }
296: #if !defined(PETSC_USE_COMPLEX)
297: InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
298: #endif
299: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
300: if ( lu->options.msglvl > 0 ) {
301: printf("[%d] input matrix\n",rank);
302: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
303: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
304: fflush(lu->options.msgFile);
305: }
307: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
308: /*
309: find a low-fill ordering
310: (1) create the Graph object
311: (2) order the graph using multiple minimum degree
312: (3) find out who has the best ordering w.r.t. op count,
313: and broadcast that front tree object
314: */
315: graph = Graph_new();
316: adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
317: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
318: nedges = IVL_tsize(adjIVL);
319: Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
320: if ( lu->options.msglvl > 2 ) {
321: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
322: Graph_writeForHumanEye(graph, lu->options.msgFile);
323: fflush(lu->options.msgFile);
324: }
326: switch (lu->options.ordering) {
327: case 0:
328: lu->frontETree = orderViaBestOfNDandMS(graph,
329: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
330: lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
331: case 1:
332: lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
333: case 2:
334: lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
335: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
336: case 3:
337: lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
338: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
339: default:
340: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
341: }
343: Graph_free(graph);
344: if ( lu->options.msglvl > 2 ) {
345: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
346: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
347: fflush(lu->options.msgFile);
348: }
350: opcounts = DVinit(size, 0.0);
351: opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
352: MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
353: (void*) opcounts, 1, MPI_DOUBLE, A->comm);
354: minops = DVmin(size, opcounts, &root);
355: DVfree(opcounts);
356:
357: lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
358: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
359: if ( lu->options.msglvl > 2 ) {
360: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
361: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
362: fflush(lu->options.msgFile);
363: }
364:
365: /* get the permutations, permute the front tree, permute the matrix */
366: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
367: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);
369: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
371: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
372:
373: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
375: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
376: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
378: /* generate the owners map IV object and the map from vertices to owners */
379: cutoff = 1./(2*size);
380: cumopsDV = DV_new();
381: DV_init(cumopsDV, size, NULL);
382: lu->ownersIV = ETree_ddMap(lu->frontETree,
383: lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
384: DV_free(cumopsDV);
385: lu->vtxmapIV = IV_new();
386: IV_init(lu->vtxmapIV, M, NULL);
387: IVgather(M, IV_entries(lu->vtxmapIV),
388: IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
389: if ( lu->options.msglvl > 2 ) {
390: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
391: IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
392: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
393: IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
394: fflush(lu->options.msgFile);
395: }
397: /* redistribute the matrix */
398: lu->firsttag = 0 ;
399: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
400: lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
401: lu->firsttag += size ;
403: InpMtx_free(lu->mtxA);
404: lu->mtxA = newA ;
405: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
406: if ( lu->options.msglvl > 2 ) {
407: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
408: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
409: fflush(lu->options.msgFile);
410: }
411:
412: /* compute the symbolic factorization */
413: lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
414: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
415: lu->firsttag += lu->frontETree->nfront ;
416: if ( lu->options.msglvl > 2 ) {
417: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
418: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
419: fflush(lu->options.msgFile);
420: }
422: lu->mtxmanager = SubMtxManager_new();
423: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
424: lu->frontmtx = FrontMtx_new();
426: } else { /* new num factorization using previously computed symbolic factor */
427: if (lu->options.pivotingflag) { /* different FrontMtx is required */
428: FrontMtx_free(lu->frontmtx);
429: lu->frontmtx = FrontMtx_new();
430: }
432: SubMtxManager_free(lu->mtxmanager);
433: lu->mtxmanager = SubMtxManager_new();
434: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
436: /* permute mtxA */
437: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
438: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
439:
440: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
441: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
443: /* redistribute the matrix */
444: MPI_Barrier(A->comm);
445: lu->firsttag = 0;
446: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
447: lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
448: lu->firsttag += size ;
450: InpMtx_free(lu->mtxA);
451: lu->mtxA = newA ;
452: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
453: if ( lu->options.msglvl > 2 ) {
454: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
455: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
456: fflush(lu->options.msgFile);
457: }
458: } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */
460: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
461: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
462: lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
464: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
465: if ( lu->options.patchAndGoFlag == 1 ) {
466: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
467: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
468: lu->options.storeids, lu->options.storevalues);
469: } else if ( lu->options.patchAndGoFlag == 2 ) {
470: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
471: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
472: lu->options.storeids, lu->options.storevalues);
473: }
474: }
476: /* numerical factorization */
477: chvmanager = ChvManager_new();
478: ChvManager_init(chvmanager, NO_LOCK, 0);
480: tagbound = maxTagMPI(lu->comm_spooles);
481: lasttag = lu->firsttag + 3*lu->frontETree->nfront + 2;
482: /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
483: if ( lasttag > tagbound ) {
484: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
485: lu->firsttag, lasttag, tagbound);
486: }
487: rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
488: chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
489: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
490: ChvManager_free(chvmanager);
491: lu->firsttag = lasttag;
492: if ( lu->options.msglvl > 2 ) {
493: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
494: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
495: fflush(lu->options.msgFile);
496: }
498: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
499: if ( lu->options.patchAndGoFlag == 1 ) {
500: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
501: if (lu->options.msglvl > 0 ){
502: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
503: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
504: }
505: }
506: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
507: } else if ( lu->options.patchAndGoFlag == 2 ) {
508: if (lu->options.msglvl > 0 ){
509: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
510: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
511: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
512: }
513: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
514: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
515: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
516: }
517: }
518: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
519: }
520: }
521: if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
522:
523: /* post-process the factorization and split
524: the factor matrices into submatrices */
525: lasttag = lu->firsttag + 5*size;
526: if ( lasttag > tagbound ) {
527: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
528: lu->firsttag, lasttag, tagbound);
529: }
530: FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
531: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
532: lu->firsttag += 5*size ;
533: if ( lu->options.msglvl > 2 ) {
534: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
535: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
536: fflush(lu->options.msgFile);
537: }
538:
539: /* create the solve map object */
540: lu->solvemap = SolveMap_new();
541: SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
542: FrontMtx_upperBlockIVL(lu->frontmtx),
543: FrontMtx_lowerBlockIVL(lu->frontmtx),
544: size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
545: lu->options.seed, lu->options.msglvl, lu->options.msgFile);
546: if ( lu->options.msglvl > 2 ) {
547: SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
548: fflush(lu->options.msgFile);
549: }
551: /* redistribute the submatrices of the factors */
552: FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
553: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
554: if ( lu->options.msglvl > 2 ) {
555: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
556: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
557: fflush(lu->options.msgFile);
558: }
560: /* create a solution DenseMtx object */
561: lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
562: lu->options.msglvl, lu->options.msgFile);
563: lu->nmycol = IV_size(lu->ownedColumnsIV);
564: if ( lu->nmycol > 0) {
565: DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
566: /* get pointers rowindX and entX */
567: DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
568: lu->entX = DenseMtx_entries(lu->mtxX);
569: } else { /* lu->nmycol == 0 */
570: lu->entX = 0;
571: lu->rowindX = 0;
572: }
574: if ( lu->scat ){
575: VecDestroy(lu->vec_spooles);
576: ISDestroy(lu->iden);
577: ISDestroy(lu->is_petsc);
578: VecScatterDestroy(lu->scat);
579: }
580: lu->scat = PETSC_NULL;
581: lu->flg = SAME_NONZERO_PATTERN;
583: lu->CleanUpSpooles = PETSC_TRUE;
584: return(0);
585: }
590: PetscErrorCode MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,const MatType type,Mat *newmat)
591: {
592: /* This routine is only called to convert a MATMPIAIJ matrix */
593: /* to a MATMPIAIJSPOOLES matrix, so we will ignore 'MatType type'. */
595: Mat B=*newmat;
596: Mat_Spooles *lu;
599: if (B != A) {
600: /* This routine is inherited, so we know the type is correct. */
601: MatDuplicate(A,MAT_COPY_VALUES,&B);
602: }
604: PetscNew(Mat_Spooles,&lu);
605: B->spptr = (void*)lu;
607: lu->basetype = MATMPIAIJ;
608: lu->CleanUpSpooles = PETSC_FALSE;
609: lu->MatDuplicate = A->ops->duplicate;
610: lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic;
611: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
612: lu->MatView = A->ops->view;
613: lu->MatAssemblyEnd = A->ops->assemblyend;
614: lu->MatDestroy = A->ops->destroy;
616: B->ops->duplicate = MatDuplicate_Spooles;
617: B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJSpooles;
618: B->ops->view = MatView_SeqAIJSpooles;
619: B->ops->assemblyend = MatAssemblyEnd_MPIAIJSpooles;
620: B->ops->destroy = MatDestroy_MPIAIJSpooles;
622: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
623: "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
624: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
625: "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
626: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
627: *newmat = B;
628: return(0);
629: }
632: /*MC
633: MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices
634: via the external package Spooles.
636: If MPIAIJSPOOLES is installed (see the manual for
637: instructions on how to declare the existence of external packages),
638: a matrix type can be constructed which invokes SPOOLES solvers.
639: After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES).
640: This matrix type is only supported for double precision real.
642: This matrix inherits from MATMPIAIJ. As a result, MatMPIAIJSetPreallocation is
643: supported for this matrix type. One can also call MatConvert for an inplace conversion to or from
644: the MATMPIAIJ type without data copy.
646: Consult Spooles documentation for more information about the options database keys below.
648: Options Database Keys:
649: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
650: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
651: . -mat_spooles_seed <seed> - random number seed used for ordering
652: . -mat_spooles_msglvl <msglvl> - message output level
653: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
654: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
655: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
656: . -mat_spooles_maxsize <n> - maximum size of a supernode
657: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
658: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
659: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
660: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
661: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
662: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
663: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object
665: Level: beginner
667: .seealso: PCLU
668: M*/
673: PetscErrorCode MatCreate_MPIAIJSpooles(Mat A)
674: {
676: Mat A_diag;
679: /* Change type name before calling MatSetType to force proper construction of MPIAIJ and MPIAIJSpooles types */
680: PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJSPOOLES);
681: MatSetType(A,MATMPIAIJ);
682: A_diag = ((Mat_MPIAIJ *)A->data)->A;
683: MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,&A_diag);
684: MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,&A);
685: return(0);
686: }