Actual source code: mpidense.c
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
6:
7: #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/
8: #if defined(PETSC_HAVE_PLAPACK)
9: static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
10: static MPI_Comm Plapack_comm_2d;
12: #include <PLA.h>
15: typedef struct {
16: PLA_Obj A,pivots;
17: PLA_Template templ;
18: MPI_Datatype datatype;
19: PetscInt nb,rstart;
20: VecScatter ctx;
21: IS is_pla,is_petsc;
22: PetscBool pla_solved;
23: MatStructure mstruct;
24: } Mat_Plapack;
25: #endif
29: /*@
31: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
32: matrix that represents the operator. For sequential matrices it returns itself.
34: Input Parameter:
35: . A - the Seq or MPI dense matrix
37: Output Parameter:
38: . B - the inner matrix
40: Level: intermediate
42: @*/
43: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
44: {
45: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
47: PetscBool flg;
50: PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
51: if (flg) {
52: *B = mat->A;
53: } else {
54: *B = A;
55: }
56: return(0);
57: }
61: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62: {
63: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
65: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
69: lrow = row - rstart;
70: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
71: return(0);
72: }
76: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
77: {
81: if (idx) {PetscFree(*idx);}
82: if (v) {PetscFree(*v);}
83: return(0);
84: }
89: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
90: {
91: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
93: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
94: PetscScalar *array;
95: MPI_Comm comm;
96: Mat B;
99: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
101: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
102: if (!B) {
103: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
104: MatCreate(comm,&B);
105: MatSetSizes(B,m,m,m,m);
106: MatSetType(B,((PetscObject)mdn->A)->type_name);
107: MatGetArray(mdn->A,&array);
108: MatSeqDenseSetPreallocation(B,array+m*rstart);
109: MatRestoreArray(mdn->A,&array);
110: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
111: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
112: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
113: *a = B;
114: MatDestroy(&B);
115: } else {
116: *a = B;
117: }
118: return(0);
119: }
124: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
125: {
126: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
128: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129: PetscBool roworiented = A->roworiented;
133: for (i=0; i<m; i++) {
134: if (idxm[i] < 0) continue;
135: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
136: if (idxm[i] >= rstart && idxm[i] < rend) {
137: row = idxm[i] - rstart;
138: if (roworiented) {
139: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
140: } else {
141: for (j=0; j<n; j++) {
142: if (idxn[j] < 0) continue;
143: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
144: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
145: }
146: }
147: } else {
148: if (!A->donotstash) {
149: if (roworiented) {
150: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
151: } else {
152: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
153: }
154: }
155: }
156: }
157: return(0);
158: }
162: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
163: {
164: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
166: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
169: for (i=0; i<m; i++) {
170: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
171: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
172: if (idxm[i] >= rstart && idxm[i] < rend) {
173: row = idxm[i] - rstart;
174: for (j=0; j<n; j++) {
175: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
176: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
177: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
178: }
179: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
180: }
181: return(0);
182: }
186: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
187: {
188: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
192: MatGetArray(a->A,array);
193: return(0);
194: }
198: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
199: {
200: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
201: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
203: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
204: const PetscInt *irow,*icol;
205: PetscScalar *av,*bv,*v = lmat->v;
206: Mat newmat;
207: IS iscol_local;
210: ISAllGather(iscol,&iscol_local);
211: ISGetIndices(isrow,&irow);
212: ISGetIndices(iscol_local,&icol);
213: ISGetLocalSize(isrow,&nrows);
214: ISGetLocalSize(iscol,&ncols);
215: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
217: /* No parallel redistribution currently supported! Should really check each index set
218: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
219: original matrix! */
221: MatGetLocalSize(A,&nlrows,&nlcols);
222: MatGetOwnershipRange(A,&rstart,&rend);
223:
224: /* Check submatrix call */
225: if (scall == MAT_REUSE_MATRIX) {
226: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
227: /* Really need to test rows and column sizes! */
228: newmat = *B;
229: } else {
230: /* Create and fill new matrix */
231: MatCreate(((PetscObject)A)->comm,&newmat);
232: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
233: MatSetType(newmat,((PetscObject)A)->type_name);
234: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
235: }
237: /* Now extract the data pointers and do the copy, column at a time */
238: newmatd = (Mat_MPIDense*)newmat->data;
239: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
240:
241: for (i=0; i<Ncols; i++) {
242: av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
243: for (j=0; j<nrows; j++) {
244: *bv++ = av[irow[j] - rstart];
245: }
246: }
248: /* Assemble the matrices so that the correct flags are set */
249: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
250: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
252: /* Free work space */
253: ISRestoreIndices(isrow,&irow);
254: ISRestoreIndices(iscol_local,&icol);
255: *B = newmat;
256: return(0);
257: }
261: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
262: {
264: return(0);
265: }
269: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270: {
271: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
272: MPI_Comm comm = ((PetscObject)mat)->comm;
274: PetscInt nstash,reallocs;
275: InsertMode addv;
278: /* make sure all processors are either in INSERTMODE or ADDMODE */
279: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
280: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
281: mat->insertmode = addv; /* in case this processor had no cache */
283: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
284: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
285: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
286: return(0);
287: }
291: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
292: {
293: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
294: PetscErrorCode ierr;
295: PetscInt i,*row,*col,flg,j,rstart,ncols;
296: PetscMPIInt n;
297: PetscScalar *val;
298: InsertMode addv=mat->insertmode;
301: /* wait on receives */
302: while (1) {
303: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
304: if (!flg) break;
305:
306: for (i=0; i<n;) {
307: /* Now identify the consecutive vals belonging to the same row */
308: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
309: if (j < n) ncols = j-i;
310: else ncols = n-i;
311: /* Now assemble all these values with a single function call */
312: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
313: i = j;
314: }
315: }
316: MatStashScatterEnd_Private(&mat->stash);
317:
318: MatAssemblyBegin(mdn->A,mode);
319: MatAssemblyEnd(mdn->A,mode);
321: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322: MatSetUpMultiply_MPIDense(mat);
323: }
324: return(0);
325: }
329: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
330: {
332: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
335: MatZeroEntries(l->A);
336: return(0);
337: }
339: /* the code does not do the diagonal entries correctly unless the
340: matrix is square and the column and row owerships are identical.
341: This is a BUG. The only way to fix it seems to be to access
342: mdn->A and mdn->B directly and not through the MatZeroRows()
343: routine.
344: */
347: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
348: {
349: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
350: PetscErrorCode ierr;
351: PetscInt i,*owners = A->rmap->range;
352: PetscInt *nprocs,j,idx,nsends;
353: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
354: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
355: PetscInt *lens,*lrows,*values;
356: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
357: MPI_Comm comm = ((PetscObject)A)->comm;
358: MPI_Request *send_waits,*recv_waits;
359: MPI_Status recv_status,*send_status;
360: PetscBool found;
361: const PetscScalar *xx;
362: PetscScalar *bb;
365: /* first count number of contributors to each processor */
366: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
367: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
368: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
369: for (i=0; i<N; i++) {
370: idx = rows[i];
371: found = PETSC_FALSE;
372: for (j=0; j<size; j++) {
373: if (idx >= owners[j] && idx < owners[j+1]) {
374: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
375: }
376: }
377: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
378: }
379: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
381: /* inform other processors of number of messages and max length*/
382: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
384: /* post receives: */
385: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
386: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
387: for (i=0; i<nrecvs; i++) {
388: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
389: }
391: /* do sends:
392: 1) starts[i] gives the starting index in svalues for stuff going to
393: the ith processor
394: */
395: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
396: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
397: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
398: starts[0] = 0;
399: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
400: for (i=0; i<N; i++) {
401: svalues[starts[owner[i]]++] = rows[i];
402: }
404: starts[0] = 0;
405: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
406: count = 0;
407: for (i=0; i<size; i++) {
408: if (nprocs[2*i+1]) {
409: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
410: }
411: }
412: PetscFree(starts);
414: base = owners[rank];
416: /* wait on receives */
417: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
418: count = nrecvs;
419: slen = 0;
420: while (count) {
421: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
422: /* unpack receives into our local space */
423: MPI_Get_count(&recv_status,MPIU_INT,&n);
424: source[imdex] = recv_status.MPI_SOURCE;
425: lens[imdex] = n;
426: slen += n;
427: count--;
428: }
429: PetscFree(recv_waits);
430:
431: /* move the data into the send scatter */
432: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
433: count = 0;
434: for (i=0; i<nrecvs; i++) {
435: values = rvalues + i*nmax;
436: for (j=0; j<lens[i]; j++) {
437: lrows[count++] = values[j] - base;
438: }
439: }
440: PetscFree(rvalues);
441: PetscFree2(lens,source);
442: PetscFree(owner);
443: PetscFree(nprocs);
444:
445: /* fix right hand side if needed */
446: if (x && b) {
447: VecGetArrayRead(x,&xx);
448: VecGetArray(b,&bb);
449: for (i=0; i<slen; i++) {
450: bb[lrows[i]] = diag*xx[lrows[i]];
451: }
452: VecRestoreArrayRead(x,&xx);
453: VecRestoreArray(b,&bb);
454: }
456: /* actually zap the local rows */
457: MatZeroRows(l->A,slen,lrows,diag,0,0);
458: PetscFree(lrows);
460: /* wait on sends */
461: if (nsends) {
462: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
463: MPI_Waitall(nsends,send_waits,send_status);
464: PetscFree(send_status);
465: }
466: PetscFree(send_waits);
467: PetscFree(svalues);
469: return(0);
470: }
474: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
475: {
476: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
480: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
481: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
482: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
483: return(0);
484: }
488: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
489: {
490: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
494: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
495: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
496: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
497: return(0);
498: }
502: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
503: {
504: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
506: PetscScalar zero = 0.0;
509: VecSet(yy,zero);
510: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
511: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
512: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
513: return(0);
514: }
518: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
519: {
520: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
524: VecCopy(yy,zz);
525: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
526: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
527: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
528: return(0);
529: }
533: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
534: {
535: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
536: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
538: PetscInt len,i,n,m = A->rmap->n,radd;
539: PetscScalar *x,zero = 0.0;
540:
542: VecSet(v,zero);
543: VecGetArray(v,&x);
544: VecGetSize(v,&n);
545: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
546: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
547: radd = A->rmap->rstart*m;
548: for (i=0; i<len; i++) {
549: x[i] = aloc->v[radd + i*m + i];
550: }
551: VecRestoreArray(v,&x);
552: return(0);
553: }
557: PetscErrorCode MatDestroy_MPIDense(Mat mat)
558: {
559: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
561: #if defined(PETSC_HAVE_PLAPACK)
562: Mat_Plapack *lu=(Mat_Plapack*)mat->spptr;
563: #endif
567: #if defined(PETSC_USE_LOG)
568: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569: #endif
570: MatStashDestroy_Private(&mat->stash);
571: MatDestroy(&mdn->A);
572: VecDestroy(&mdn->lvec);
573: VecScatterDestroy(&mdn->Mvctx);
574: #if defined(PETSC_HAVE_PLAPACK)
575: if (lu) {
576: PLA_Obj_free(&lu->A);
577: PLA_Obj_free (&lu->pivots);
578: PLA_Temp_free(&lu->templ);
579: ISDestroy(&lu->is_pla);
580: ISDestroy(&lu->is_petsc);
581: VecScatterDestroy(&lu->ctx);
582: }
583: PetscFree(mat->spptr);
584: #endif
586: PetscFree(mat->data);
587: PetscObjectChangeTypeName((PetscObject)mat,0);
588: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
589: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
590: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
591: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
592: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
593: return(0);
594: }
598: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
599: {
600: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
601: PetscErrorCode ierr;
602: PetscViewerFormat format;
603: int fd;
604: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
605: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
606: PetscScalar *work,*v,*vv;
607: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
610: if (mdn->size == 1) {
611: MatView(mdn->A,viewer);
612: } else {
613: PetscViewerBinaryGetDescriptor(viewer,&fd);
614: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
615: MPI_Comm_size(((PetscObject)mat)->comm,&size);
617: PetscViewerGetFormat(viewer,&format);
618: if (format == PETSC_VIEWER_NATIVE) {
620: if (!rank) {
621: /* store the matrix as a dense matrix */
622: header[0] = MAT_FILE_CLASSID;
623: header[1] = mat->rmap->N;
624: header[2] = N;
625: header[3] = MATRIX_BINARY_FORMAT_DENSE;
626: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
628: /* get largest work array needed for transposing array */
629: mmax = mat->rmap->n;
630: for (i=1; i<size; i++) {
631: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
632: }
633: PetscMalloc(mmax*N*sizeof(PetscScalar),&work);
635: /* write out local array, by rows */
636: m = mat->rmap->n;
637: v = a->v;
638: for (j=0; j<N; j++) {
639: for (i=0; i<m; i++) {
640: work[j + i*N] = *v++;
641: }
642: }
643: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
644: /* get largest work array to receive messages from other processes, excludes process zero */
645: mmax = 0;
646: for (i=1; i<size; i++) {
647: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
648: }
649: PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
650: for(k = 1; k < size; k++) {
651: v = vv;
652: m = mat->rmap->range[k+1] - mat->rmap->range[k];
653: MPILong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);
655: for(j = 0; j < N; j++) {
656: for(i = 0; i < m; i++) {
657: work[j + i*N] = *v++;
658: }
659: }
660: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
661: }
662: PetscFree(work);
663: PetscFree(vv);
664: } else {
665: MPILong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
666: }
667: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
668: }
669: return(0);
670: }
674: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675: {
676: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
677: PetscErrorCode ierr;
678: PetscMPIInt size = mdn->size,rank = mdn->rank;
679: const PetscViewerType vtype;
680: PetscBool iascii,isdraw;
681: PetscViewer sviewer;
682: PetscViewerFormat format;
683: #if defined(PETSC_HAVE_PLAPACK)
684: Mat_Plapack *lu;
685: #endif
688: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
689: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
690: if (iascii) {
691: PetscViewerGetType(viewer,&vtype);
692: PetscViewerGetFormat(viewer,&format);
693: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
694: MatInfo info;
695: MatGetInfo(mat,MAT_LOCAL,&info);
696: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
697: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
698: PetscViewerFlush(viewer);
699: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
700: #if defined(PETSC_HAVE_PLAPACK)
701: PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");
702: PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);
703: PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);
704: PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);
705: if (mat->factortype){
706: lu=(Mat_Plapack*)(mat->spptr);
707: PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);
708: }
709: #else
710: VecScatterView(mdn->Mvctx,viewer);
711: #endif
712: return(0);
713: } else if (format == PETSC_VIEWER_ASCII_INFO) {
714: return(0);
715: }
716: } else if (isdraw) {
717: PetscDraw draw;
718: PetscBool isnull;
720: PetscViewerDrawGetDraw(viewer,0,&draw);
721: PetscDrawIsNull(draw,&isnull);
722: if (isnull) return(0);
723: }
725: if (size == 1) {
726: MatView(mdn->A,viewer);
727: } else {
728: /* assemble the entire matrix onto first processor. */
729: Mat A;
730: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
731: PetscInt *cols;
732: PetscScalar *vals;
734: MatCreate(((PetscObject)mat)->comm,&A);
735: if (!rank) {
736: MatSetSizes(A,M,N,M,N);
737: } else {
738: MatSetSizes(A,0,0,M,N);
739: }
740: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
741: MatSetType(A,MATMPIDENSE);
742: MatMPIDenseSetPreallocation(A,PETSC_NULL);
743: PetscLogObjectParent(mat,A);
745: /* Copy the matrix ... This isn't the most efficient means,
746: but it's quick for now */
747: A->insertmode = INSERT_VALUES;
748: row = mat->rmap->rstart; m = mdn->A->rmap->n;
749: for (i=0; i<m; i++) {
750: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
751: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
752: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
753: row++;
754: }
756: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
757: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
758: PetscViewerGetSingleton(viewer,&sviewer);
759: if (!rank) {
760: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
761: /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
762: PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
763: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
764: }
765: PetscViewerRestoreSingleton(viewer,&sviewer);
766: PetscViewerFlush(viewer);
767: MatDestroy(&A);
768: }
769: return(0);
770: }
774: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
775: {
777: PetscBool iascii,isbinary,isdraw,issocket;
778:
780:
781: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
782: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
783: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
784: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
786: if (iascii || issocket || isdraw) {
787: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
788: } else if (isbinary) {
789: MatView_MPIDense_Binary(mat,viewer);
790: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
791: return(0);
792: }
796: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
797: {
798: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
799: Mat mdn = mat->A;
801: PetscReal isend[5],irecv[5];
804: info->block_size = 1.0;
805: MatGetInfo(mdn,MAT_LOCAL,info);
806: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
807: isend[3] = info->memory; isend[4] = info->mallocs;
808: if (flag == MAT_LOCAL) {
809: info->nz_used = isend[0];
810: info->nz_allocated = isend[1];
811: info->nz_unneeded = isend[2];
812: info->memory = isend[3];
813: info->mallocs = isend[4];
814: } else if (flag == MAT_GLOBAL_MAX) {
815: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);
816: info->nz_used = irecv[0];
817: info->nz_allocated = irecv[1];
818: info->nz_unneeded = irecv[2];
819: info->memory = irecv[3];
820: info->mallocs = irecv[4];
821: } else if (flag == MAT_GLOBAL_SUM) {
822: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);
823: info->nz_used = irecv[0];
824: info->nz_allocated = irecv[1];
825: info->nz_unneeded = irecv[2];
826: info->memory = irecv[3];
827: info->mallocs = irecv[4];
828: }
829: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
830: info->fill_ratio_needed = 0;
831: info->factor_mallocs = 0;
832: return(0);
833: }
837: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
838: {
839: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
843: switch (op) {
844: case MAT_NEW_NONZERO_LOCATIONS:
845: case MAT_NEW_NONZERO_LOCATION_ERR:
846: case MAT_NEW_NONZERO_ALLOCATION_ERR:
847: MatSetOption(a->A,op,flg);
848: break;
849: case MAT_ROW_ORIENTED:
850: a->roworiented = flg;
851: MatSetOption(a->A,op,flg);
852: break;
853: case MAT_NEW_DIAGONALS:
854: case MAT_USE_HASH_TABLE:
855: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
856: break;
857: case MAT_IGNORE_OFF_PROC_ENTRIES:
858: a->donotstash = flg;
859: break;
860: case MAT_SYMMETRIC:
861: case MAT_STRUCTURALLY_SYMMETRIC:
862: case MAT_HERMITIAN:
863: case MAT_SYMMETRY_ETERNAL:
864: case MAT_IGNORE_LOWER_TRIANGULAR:
865: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
866: break;
867: default:
868: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
869: }
870: return(0);
871: }
876: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
877: {
878: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
879: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
880: PetscScalar *l,*r,x,*v;
882: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
885: MatGetLocalSize(A,&s2,&s3);
886: if (ll) {
887: VecGetLocalSize(ll,&s2a);
888: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
889: VecGetArray(ll,&l);
890: for (i=0; i<m; i++) {
891: x = l[i];
892: v = mat->v + i;
893: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
894: }
895: VecRestoreArray(ll,&l);
896: PetscLogFlops(n*m);
897: }
898: if (rr) {
899: VecGetLocalSize(rr,&s3a);
900: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
901: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
902: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
903: VecGetArray(mdn->lvec,&r);
904: for (i=0; i<n; i++) {
905: x = r[i];
906: v = mat->v + i*m;
907: for (j=0; j<m; j++) { (*v++) *= x;}
908: }
909: VecRestoreArray(mdn->lvec,&r);
910: PetscLogFlops(n*m);
911: }
912: return(0);
913: }
917: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
918: {
919: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
920: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
922: PetscInt i,j;
923: PetscReal sum = 0.0;
924: PetscScalar *v = mat->v;
927: if (mdn->size == 1) {
928: MatNorm(mdn->A,type,nrm);
929: } else {
930: if (type == NORM_FROBENIUS) {
931: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
932: #if defined(PETSC_USE_COMPLEX)
933: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
934: #else
935: sum += (*v)*(*v); v++;
936: #endif
937: }
938: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);
939: *nrm = PetscSqrtReal(*nrm);
940: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
941: } else if (type == NORM_1) {
942: PetscReal *tmp,*tmp2;
943: PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);
944: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
945: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
946: *nrm = 0.0;
947: v = mat->v;
948: for (j=0; j<mdn->A->cmap->n; j++) {
949: for (i=0; i<mdn->A->rmap->n; i++) {
950: tmp[j] += PetscAbsScalar(*v); v++;
951: }
952: }
953: MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);
954: for (j=0; j<A->cmap->N; j++) {
955: if (tmp2[j] > *nrm) *nrm = tmp2[j];
956: }
957: PetscFree2(tmp,tmp);
958: PetscLogFlops(A->cmap->n*A->rmap->n);
959: } else if (type == NORM_INFINITY) { /* max row norm */
960: PetscReal ntemp;
961: MatNorm(mdn->A,type,&ntemp);
962: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);
963: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
964: }
965: return(0);
966: }
970: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
971: {
972: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
973: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
974: Mat B;
975: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
977: PetscInt j,i;
978: PetscScalar *v;
981: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
982: if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
983: MatCreate(((PetscObject)A)->comm,&B);
984: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
985: MatSetType(B,((PetscObject)A)->type_name);
986: MatMPIDenseSetPreallocation(B,PETSC_NULL);
987: } else {
988: B = *matout;
989: }
991: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
992: PetscMalloc(m*sizeof(PetscInt),&rwork);
993: for (i=0; i<m; i++) rwork[i] = rstart + i;
994: for (j=0; j<n; j++) {
995: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
996: v += m;
997: }
998: PetscFree(rwork);
999: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1000: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1001: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1002: *matout = B;
1003: } else {
1004: MatHeaderMerge(A,B);
1005: }
1006: return(0);
1007: }
1010: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1015: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1016: {
1020: MatMPIDenseSetPreallocation(A,0);
1021: return(0);
1022: }
1024: #if defined(PETSC_HAVE_PLAPACK)
1028: PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1029: {
1030: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1032: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1033: PetscScalar *array;
1034: PetscReal one = 1.0;
1037: /* Copy A into F->lu->A */
1038: PLA_Obj_set_to_zero(lu->A);
1039: PLA_API_begin();
1040: PLA_Obj_API_open(lu->A);
1041: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1042: MatGetArray(A,&array);
1043: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1044: MatRestoreArray(A,&array);
1045: PLA_Obj_API_close(lu->A);
1046: PLA_API_end();
1047: lu->rstart = rstart;
1048: return(0);
1049: }
1053: PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1054: {
1055: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1057: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1058: PetscScalar *array;
1059: PetscReal one = 1.0;
1062: /* Copy F into A->lu->A */
1063: MatZeroEntries(A);
1064: PLA_API_begin();
1065: PLA_Obj_API_open(lu->A);
1066: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1067: MatGetArray(A,&array);
1068: PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);
1069: MatRestoreArray(A,&array);
1070: PLA_Obj_API_close(lu->A);
1071: PLA_API_end();
1072: lu->rstart = rstart;
1073: return(0);
1074: }
1078: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1079: {
1081: Mat_Plapack *luA = (Mat_Plapack*)A->spptr;
1082: Mat_Plapack *luB = (Mat_Plapack*)B->spptr;
1083: Mat_Plapack *luC = (Mat_Plapack*)C->spptr;
1084: PLA_Obj alpha = NULL,beta = NULL;
1087: MatMPIDenseCopyToPlapack(A,A);
1088: MatMPIDenseCopyToPlapack(B,B);
1090: /*
1091: PLA_Global_show("A = ",luA->A,"%g ","");
1092: PLA_Global_show("B = ",luB->A,"%g ","");
1093: */
1095: /* do the multiply in PLA */
1096: PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);
1097: PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);
1098: CHKMEMQ;
1100: PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* */
1101: CHKMEMQ;
1102: PLA_Obj_free(&alpha);
1103: PLA_Obj_free(&beta);
1105: /*
1106: PLA_Global_show("C = ",luC->A,"%g ","");
1107: */
1108: MatMPIDenseCopyFromPlapack(C,C);
1109: return(0);
1110: }
1114: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1115: {
1117: PetscInt m=A->rmap->n,n=B->cmap->n;
1118: Mat Cmat;
1121: if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1122: SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1123: MatCreate(((PetscObject)B)->comm,&Cmat);
1124: MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);
1125: MatSetType(Cmat,MATMPIDENSE);
1126: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1127: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
1129: *C = Cmat;
1130: return(0);
1131: }
1135: PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1136: {
1140: if (scall == MAT_INITIAL_MATRIX){
1141: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1142: }
1143: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1144: return(0);
1145: }
1149: PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1150: {
1151: MPI_Comm comm = ((PetscObject)A)->comm;
1152: Mat_Plapack *lu = (Mat_Plapack*)A->spptr;
1154: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1155: PetscScalar *array;
1156: PetscReal one = 1.0;
1157: PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1158: PLA_Obj v_pla = NULL;
1159: PetscScalar *loc_buf;
1160: Vec loc_x;
1161:
1163: MPI_Comm_size(comm,&size);
1164: MPI_Comm_rank(comm,&rank);
1166: /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1167: PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1168: PLA_Obj_set_to_zero(v_pla);
1170: /* Copy b into rhs_pla */
1171: PLA_API_begin();
1172: PLA_Obj_API_open(v_pla);
1173: VecGetArray(b,&array);
1174: PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1175: VecRestoreArray(b,&array);
1176: PLA_Obj_API_close(v_pla);
1177: PLA_API_end();
1179: if (A->factortype == MAT_FACTOR_LU){
1180: /* Apply the permutations to the right hand sides */
1181: PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1183: /* Solve L y = b, overwriting b with y */
1184: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1186: /* Solve U x = y (=b), overwriting b with x */
1187: PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1188: } else { /* MAT_FACTOR_CHOLESKY */
1189: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1190: PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1191: PLA_NONUNIT_DIAG,lu->A,v_pla);
1192: }
1194: /* Copy PLAPACK x into Petsc vector x */
1195: PLA_Obj_local_length(v_pla, &loc_m);
1196: PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1197: PLA_Obj_local_stride(v_pla, &loc_stride);
1198: /*
1199: PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1200: */
1201: VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);
1202: if (!lu->pla_solved){
1203:
1204: PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1205: PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1207: /* Create IS and cts for VecScatterring */
1208: PLA_Obj_local_length(v_pla, &loc_m);
1209: PLA_Obj_local_stride(v_pla, &loc_stride);
1210: PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);
1212: rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1213: for (i=0; i<loc_m; i+=lu->nb){
1214: j = 0;
1215: while (j < lu->nb && i+j < loc_m){
1216: idx_petsc[i+j] = rstart + j; j++;
1217: }
1218: rstart += size*lu->nb;
1219: }
1221: for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1223: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);
1224: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);
1225: PetscFree2(idx_pla,idx_petsc);
1226: VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);
1227: }
1228: VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1229: VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1230:
1231: /* Free data */
1232: VecDestroy(&loc_x);
1233: PLA_Obj_free(&v_pla);
1235: lu->pla_solved = PETSC_TRUE;
1236: return(0);
1237: }
1241: PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1242: {
1243: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1245: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1246: PetscInt info_pla=0;
1247: PetscScalar *array,one = 1.0;
1250: if (lu->mstruct == SAME_NONZERO_PATTERN){
1251: PLA_Obj_free(&lu->A);
1252: PLA_Obj_free (&lu->pivots);
1253: }
1254: /* Create PLAPACK matrix object */
1255: lu->A = NULL; lu->pivots = NULL;
1256: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1257: PLA_Obj_set_to_zero(lu->A);
1258: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1260: /* Copy A into lu->A */
1261: PLA_API_begin();
1262: PLA_Obj_API_open(lu->A);
1263: MatGetOwnershipRange(A,&rstart,&rend);
1264: MatGetArray(A,&array);
1265: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1266: MatRestoreArray(A,&array);
1267: PLA_Obj_API_close(lu->A);
1268: PLA_API_end();
1270: /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1271: info_pla = PLA_LU(lu->A,lu->pivots);
1272: if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1274: lu->rstart = rstart;
1275: lu->mstruct = SAME_NONZERO_PATTERN;
1276: F->ops->solve = MatSolve_MPIDense;
1277: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1278: return(0);
1279: }
1283: PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1284: {
1285: Mat_Plapack *lu = (Mat_Plapack*)F->spptr;
1287: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1288: PetscInt info_pla=0;
1289: PetscScalar *array,one = 1.0;
1292: if (lu->mstruct == SAME_NONZERO_PATTERN){
1293: PLA_Obj_free(&lu->A);
1294: }
1295: /* Create PLAPACK matrix object */
1296: lu->A = NULL;
1297: lu->pivots = NULL;
1298: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1300: /* Copy A into lu->A */
1301: PLA_API_begin();
1302: PLA_Obj_API_open(lu->A);
1303: MatGetOwnershipRange(A,&rstart,&rend);
1304: MatGetArray(A,&array);
1305: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1306: MatRestoreArray(A,&array);
1307: PLA_Obj_API_close(lu->A);
1308: PLA_API_end();
1310: /* Factor P A -> Chol */
1311: info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1312: if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1314: lu->rstart = rstart;
1315: lu->mstruct = SAME_NONZERO_PATTERN;
1316: F->ops->solve = MatSolve_MPIDense;
1317: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1318: return(0);
1319: }
1321: /* Note the Petsc perm permutation is ignored */
1324: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1325: {
1327: PetscBool issymmetric,set;
1330: MatIsSymmetricKnown(A,&set,&issymmetric);
1331: if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1332: F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense;
1333: return(0);
1334: }
1336: /* Note the Petsc r and c permutations are ignored */
1339: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1340: {
1342: PetscInt M = A->rmap->N;
1343: Mat_Plapack *lu;
1346: lu = (Mat_Plapack*)F->spptr;
1347: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1348: F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense;
1349: return(0);
1350: }
1355: PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1356: {
1358: *type = MATSOLVERPLAPACK;
1359: return(0);
1360: }
1366: PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1367: {
1369: Mat_Plapack *lu;
1370: PetscMPIInt size;
1371: PetscInt M=A->rmap->N;
1374: /* Create the factorization matrix */
1375: MatCreate(((PetscObject)A)->comm,F);
1376: MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1377: MatSetType(*F,((PetscObject)A)->type_name);
1378: PetscNewLog(*F,Mat_Plapack,&lu);
1379: (*F)->spptr = (void*)lu;
1381: /* Set default Plapack parameters */
1382: MPI_Comm_size(((PetscObject)A)->comm,&size);
1383: lu->nb = M/size;
1384: if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1385:
1386: /* Set runtime options */
1387: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");
1388: PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);
1389: PetscOptionsEnd();
1391: /* Create object distribution template */
1392: lu->templ = NULL;
1393: PLA_Temp_create(lu->nb, 0, &lu->templ);
1395: /* Set the datatype */
1396: #if defined(PETSC_USE_COMPLEX)
1397: lu->datatype = MPI_DOUBLE_COMPLEX;
1398: #else
1399: lu->datatype = MPI_DOUBLE;
1400: #endif
1402: PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1405: lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1406: lu->mstruct = DIFFERENT_NONZERO_PATTERN;
1408: if (ftype == MAT_FACTOR_LU) {
1409: (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1410: } else if (ftype == MAT_FACTOR_CHOLESKY) {
1411: (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1412: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1413: (*F)->factortype = ftype;
1414: PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);
1415: return(0);
1416: }
1418: #endif
1422: PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1423: {
1424: #if defined(PETSC_HAVE_PLAPACK)
1426: #endif
1429: #if defined(PETSC_HAVE_PLAPACK)
1430: MatGetFactor_mpidense_plapack(A,ftype,F);
1431: #else
1432: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1433: #endif
1434: return(0);
1435: }
1439: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1440: {
1442: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1445: MatAXPY(A->A,alpha,B->A,str);
1446: return(0);
1447: }
1451: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1452: {
1453: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1457: MatConjugate(a->A);
1458: return(0);
1459: }
1463: PetscErrorCode MatRealPart_MPIDense(Mat A)
1464: {
1465: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1469: MatRealPart(a->A);
1470: return(0);
1471: }
1475: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1476: {
1477: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1481: MatImaginaryPart(a->A);
1482: return(0);
1483: }
1488: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1489: {
1491: PetscInt i,n;
1492: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1493: PetscReal *work;
1496: MatGetSize(A,PETSC_NULL,&n);
1497: PetscMalloc(n*sizeof(PetscReal),&work);
1498: MatGetColumnNorms_SeqDense(a->A,type,work);
1499: if (type == NORM_2) {
1500: for (i=0; i<n; i++) work[i] *= work[i];
1501: }
1502: if (type == NORM_INFINITY) {
1503: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1504: } else {
1505: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1506: }
1507: PetscFree(work);
1508: if (type == NORM_2) {
1509: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1510: }
1511: return(0);
1512: }
1514: /* -------------------------------------------------------------------*/
1515: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1516: MatGetRow_MPIDense,
1517: MatRestoreRow_MPIDense,
1518: MatMult_MPIDense,
1519: /* 4*/ MatMultAdd_MPIDense,
1520: MatMultTranspose_MPIDense,
1521: MatMultTransposeAdd_MPIDense,
1522: 0,
1523: 0,
1524: 0,
1525: /*10*/ 0,
1526: 0,
1527: 0,
1528: 0,
1529: MatTranspose_MPIDense,
1530: /*15*/ MatGetInfo_MPIDense,
1531: MatEqual_MPIDense,
1532: MatGetDiagonal_MPIDense,
1533: MatDiagonalScale_MPIDense,
1534: MatNorm_MPIDense,
1535: /*20*/ MatAssemblyBegin_MPIDense,
1536: MatAssemblyEnd_MPIDense,
1537: MatSetOption_MPIDense,
1538: MatZeroEntries_MPIDense,
1539: /*24*/ MatZeroRows_MPIDense,
1540: 0,
1541: 0,
1542: 0,
1543: 0,
1544: /*29*/ MatSetUpPreallocation_MPIDense,
1545: 0,
1546: 0,
1547: MatGetArray_MPIDense,
1548: MatRestoreArray_MPIDense,
1549: /*34*/ MatDuplicate_MPIDense,
1550: 0,
1551: 0,
1552: 0,
1553: 0,
1554: /*39*/ MatAXPY_MPIDense,
1555: MatGetSubMatrices_MPIDense,
1556: 0,
1557: MatGetValues_MPIDense,
1558: 0,
1559: /*44*/ 0,
1560: MatScale_MPIDense,
1561: 0,
1562: 0,
1563: 0,
1564: /*49*/ 0,
1565: 0,
1566: 0,
1567: 0,
1568: 0,
1569: /*54*/ 0,
1570: 0,
1571: 0,
1572: 0,
1573: 0,
1574: /*59*/ MatGetSubMatrix_MPIDense,
1575: MatDestroy_MPIDense,
1576: MatView_MPIDense,
1577: 0,
1578: 0,
1579: /*64*/ 0,
1580: 0,
1581: 0,
1582: 0,
1583: 0,
1584: /*69*/ 0,
1585: 0,
1586: 0,
1587: 0,
1588: 0,
1589: /*74*/ 0,
1590: 0,
1591: 0,
1592: 0,
1593: 0,
1594: /*79*/ 0,
1595: 0,
1596: 0,
1597: 0,
1598: /*83*/ MatLoad_MPIDense,
1599: 0,
1600: 0,
1601: 0,
1602: 0,
1603: 0,
1604: /*89*/
1605: #if defined(PETSC_HAVE_PLAPACK)
1606: MatMatMult_MPIDense_MPIDense,
1607: MatMatMultSymbolic_MPIDense_MPIDense,
1608: MatMatMultNumeric_MPIDense_MPIDense,
1609: #else
1610: 0,
1611: 0,
1612: 0,
1613: #endif
1614: 0,
1615: 0,
1616: /*94*/ 0,
1617: 0,
1618: 0,
1619: 0,
1620: 0,
1621: /*99*/ 0,
1622: 0,
1623: 0,
1624: MatConjugate_MPIDense,
1625: 0,
1626: /*104*/0,
1627: MatRealPart_MPIDense,
1628: MatImaginaryPart_MPIDense,
1629: 0,
1630: 0,
1631: /*109*/0,
1632: 0,
1633: 0,
1634: 0,
1635: 0,
1636: /*114*/0,
1637: 0,
1638: 0,
1639: 0,
1640: 0,
1641: /*119*/0,
1642: 0,
1643: 0,
1644: 0,
1645: 0,
1646: /*124*/0,
1647: MatGetColumnNorms_MPIDense
1648: };
1653: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1654: {
1655: Mat_MPIDense *a;
1659: mat->preallocated = PETSC_TRUE;
1660: /* Note: For now, when data is specified above, this assumes the user correctly
1661: allocates the local dense storage space. We should add error checking. */
1663: a = (Mat_MPIDense*)mat->data;
1664: PetscLayoutSetBlockSize(mat->rmap,1);
1665: PetscLayoutSetBlockSize(mat->cmap,1);
1666: PetscLayoutSetUp(mat->rmap);
1667: PetscLayoutSetUp(mat->cmap);
1668: a->nvec = mat->cmap->n;
1670: MatCreate(PETSC_COMM_SELF,&a->A);
1671: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1672: MatSetType(a->A,MATSEQDENSE);
1673: MatSeqDenseSetPreallocation(a->A,data);
1674: PetscLogObjectParent(mat,a->A);
1675: return(0);
1676: }
1679: /*MC
1680: MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1682: run ./configure with the option --download-plapack
1685: Options Database Keys:
1686: . -mat_plapack_nprows <n> - number of rows in processor partition
1687: . -mat_plapack_npcols <n> - number of columns in processor partition
1688: . -mat_plapack_nb <n> - block size of template vector
1689: . -mat_plapack_nb_alg <n> - algorithmic block size
1690: - -mat_plapack_ckerror <n> - error checking flag
1692: Level: intermediate
1694: .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1696: M*/
1701: PetscErrorCode MatCreate_MPIDense(Mat mat)
1702: {
1703: Mat_MPIDense *a;
1707: PetscNewLog(mat,Mat_MPIDense,&a);
1708: mat->data = (void*)a;
1709: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1711: mat->insertmode = NOT_SET_VALUES;
1712: MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);
1713: MPI_Comm_size(((PetscObject)mat)->comm,&a->size);
1715: /* build cache for off array entries formed */
1716: a->donotstash = PETSC_FALSE;
1717: MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);
1719: /* stuff used for matrix vector multiply */
1720: a->lvec = 0;
1721: a->Mvctx = 0;
1722: a->roworiented = PETSC_TRUE;
1724: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1725: "MatGetDiagonalBlock_MPIDense",
1726: MatGetDiagonalBlock_MPIDense);
1727: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1728: "MatMPIDenseSetPreallocation_MPIDense",
1729: MatMPIDenseSetPreallocation_MPIDense);
1730: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1731: "MatMatMult_MPIAIJ_MPIDense",
1732: MatMatMult_MPIAIJ_MPIDense);
1733: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1734: "MatMatMultSymbolic_MPIAIJ_MPIDense",
1735: MatMatMultSymbolic_MPIAIJ_MPIDense);
1736: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1737: "MatMatMultNumeric_MPIAIJ_MPIDense",
1738: MatMatMultNumeric_MPIAIJ_MPIDense);
1739: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1740: "MatGetFactor_mpidense_petsc",
1741: MatGetFactor_mpidense_petsc);
1742: #if defined(PETSC_HAVE_PLAPACK)
1743: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1744: "MatGetFactor_mpidense_plapack",
1745: MatGetFactor_mpidense_plapack);
1746: PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);
1747: #endif
1748: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1750: return(0);
1751: }
1754: /*MC
1755: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1757: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1758: and MATMPIDENSE otherwise.
1760: Options Database Keys:
1761: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1763: Level: beginner
1766: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1767: M*/
1771: /*@C
1772: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1774: Not collective
1776: Input Parameters:
1777: . A - the matrix
1778: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1779: to control all matrix memory allocation.
1781: Notes:
1782: The dense format is fully compatible with standard Fortran 77
1783: storage by columns.
1785: The data input variable is intended primarily for Fortran programmers
1786: who wish to allocate their own matrix memory space. Most users should
1787: set data=PETSC_NULL.
1789: Level: intermediate
1791: .keywords: matrix,dense, parallel
1793: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1794: @*/
1795: PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1796: {
1800: PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));
1801: return(0);
1802: }
1806: /*@C
1807: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1809: Collective on MPI_Comm
1811: Input Parameters:
1812: + comm - MPI communicator
1813: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1814: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1815: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1816: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1817: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1818: to control all matrix memory allocation.
1820: Output Parameter:
1821: . A - the matrix
1823: Notes:
1824: The dense format is fully compatible with standard Fortran 77
1825: storage by columns.
1827: The data input variable is intended primarily for Fortran programmers
1828: who wish to allocate their own matrix memory space. Most users should
1829: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1831: The user MUST specify either the local or global matrix dimensions
1832: (possibly both).
1834: Level: intermediate
1836: .keywords: matrix,dense, parallel
1838: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1839: @*/
1840: PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1841: {
1843: PetscMPIInt size;
1846: MatCreate(comm,A);
1847: MatSetSizes(*A,m,n,M,N);
1848: MPI_Comm_size(comm,&size);
1849: if (size > 1) {
1850: MatSetType(*A,MATMPIDENSE);
1851: MatMPIDenseSetPreallocation(*A,data);
1852: } else {
1853: MatSetType(*A,MATSEQDENSE);
1854: MatSeqDenseSetPreallocation(*A,data);
1855: }
1856: return(0);
1857: }
1861: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1862: {
1863: Mat mat;
1864: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1868: *newmat = 0;
1869: MatCreate(((PetscObject)A)->comm,&mat);
1870: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1871: MatSetType(mat,((PetscObject)A)->type_name);
1872: a = (Mat_MPIDense*)mat->data;
1873: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1875: mat->factortype = A->factortype;
1876: mat->assembled = PETSC_TRUE;
1877: mat->preallocated = PETSC_TRUE;
1879: a->size = oldmat->size;
1880: a->rank = oldmat->rank;
1881: mat->insertmode = NOT_SET_VALUES;
1882: a->nvec = oldmat->nvec;
1883: a->donotstash = oldmat->donotstash;
1885: PetscLayoutReference(A->rmap,&mat->rmap);
1886: PetscLayoutReference(A->cmap,&mat->cmap);
1888: MatSetUpMultiply_MPIDense(mat);
1889: MatDuplicate(oldmat->A,cpvalues,&a->A);
1890: PetscLogObjectParent(mat,a->A);
1892: *newmat = mat;
1893: return(0);
1894: }
1898: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1899: {
1901: PetscMPIInt rank,size;
1902: PetscInt *rowners,i,m,nz,j;
1903: PetscScalar *array,*vals,*vals_ptr;
1906: MPI_Comm_rank(comm,&rank);
1907: MPI_Comm_size(comm,&size);
1909: /* determine ownership of all rows */
1910: if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1911: else m = newmat->rmap->n;
1912: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1913: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1914: rowners[0] = 0;
1915: for (i=2; i<=size; i++) {
1916: rowners[i] += rowners[i-1];
1917: }
1919: if (!sizesset) {
1920: MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1921: }
1922: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
1923: MatGetArray(newmat,&array);
1925: if (!rank) {
1926: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1928: /* read in my part of the matrix numerical values */
1929: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1930:
1931: /* insert into matrix-by row (this is why cannot directly read into array */
1932: vals_ptr = vals;
1933: for (i=0; i<m; i++) {
1934: for (j=0; j<N; j++) {
1935: array[i + j*m] = *vals_ptr++;
1936: }
1937: }
1939: /* read in other processors and ship out */
1940: for (i=1; i<size; i++) {
1941: nz = (rowners[i+1] - rowners[i])*N;
1942: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1943: MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1944: }
1945: } else {
1946: /* receive numeric values */
1947: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1949: /* receive message of values*/
1950: MPILong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1952: /* insert into matrix-by row (this is why cannot directly read into array */
1953: vals_ptr = vals;
1954: for (i=0; i<m; i++) {
1955: for (j=0; j<N; j++) {
1956: array[i + j*m] = *vals_ptr++;
1957: }
1958: }
1959: }
1960: PetscFree(rowners);
1961: PetscFree(vals);
1962: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1963: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1964: return(0);
1965: }
1969: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1970: {
1971: PetscScalar *vals,*svals;
1972: MPI_Comm comm = ((PetscObject)viewer)->comm;
1973: MPI_Status status;
1974: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1975: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1976: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1977: PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1978: int fd;
1982: MPI_Comm_size(comm,&size);
1983: MPI_Comm_rank(comm,&rank);
1984: if (!rank) {
1985: PetscViewerBinaryGetDescriptor(viewer,&fd);
1986: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1987: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1988: }
1989: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1991: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1992: M = header[1]; N = header[2]; nz = header[3];
1994: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1995: if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1996: if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1997:
1998: /* If global sizes are set, check if they are consistent with that given in the file */
1999: if (sizesset) {
2000: MatGetSize(newmat,&grows,&gcols);
2001: }
2002: if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2003: if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2005: /*
2006: Handle case where matrix is stored on disk as a dense matrix
2007: */
2008: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
2009: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);
2010: return(0);
2011: }
2013: /* determine ownership of all rows */
2014: if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank));
2015: else m = PetscMPIIntCast(newmat->rmap->n);
2016: PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
2017: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2018: rowners[0] = 0;
2019: for (i=2; i<=size; i++) {
2020: rowners[i] += rowners[i-1];
2021: }
2022: rstart = rowners[rank];
2023: rend = rowners[rank+1];
2025: /* distribute row lengths to all processors */
2026: PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);
2027: if (!rank) {
2028: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
2029: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2030: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
2031: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2032: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
2033: PetscFree(sndcounts);
2034: } else {
2035: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
2036: }
2038: if (!rank) {
2039: /* calculate the number of nonzeros on each processor */
2040: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2041: PetscMemzero(procsnz,size*sizeof(PetscInt));
2042: for (i=0; i<size; i++) {
2043: for (j=rowners[i]; j< rowners[i+1]; j++) {
2044: procsnz[i] += rowlengths[j];
2045: }
2046: }
2047: PetscFree(rowlengths);
2049: /* determine max buffer needed and allocate it */
2050: maxnz = 0;
2051: for (i=0; i<size; i++) {
2052: maxnz = PetscMax(maxnz,procsnz[i]);
2053: }
2054: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2056: /* read in my part of the matrix column indices */
2057: nz = procsnz[0];
2058: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2059: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2061: /* read in every one elses and ship off */
2062: for (i=1; i<size; i++) {
2063: nz = procsnz[i];
2064: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2065: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2066: }
2067: PetscFree(cols);
2068: } else {
2069: /* determine buffer space needed for message */
2070: nz = 0;
2071: for (i=0; i<m; i++) {
2072: nz += ourlens[i];
2073: }
2074: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
2076: /* receive message of column indices*/
2077: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2078: MPI_Get_count(&status,MPIU_INT,&maxnz);
2079: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2080: }
2082: /* loop over local rows, determining number of off diagonal entries */
2083: PetscMemzero(offlens,m*sizeof(PetscInt));
2084: jj = 0;
2085: for (i=0; i<m; i++) {
2086: for (j=0; j<ourlens[i]; j++) {
2087: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2088: jj++;
2089: }
2090: }
2092: /* create our matrix */
2093: for (i=0; i<m; i++) {
2094: ourlens[i] -= offlens[i];
2095: }
2097: if (!sizesset) {
2098: MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
2099: }
2100: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
2101: for (i=0; i<m; i++) {
2102: ourlens[i] += offlens[i];
2103: }
2105: if (!rank) {
2106: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
2108: /* read in my part of the matrix numerical values */
2109: nz = procsnz[0];
2110: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2111:
2112: /* insert into matrix */
2113: jj = rstart;
2114: smycols = mycols;
2115: svals = vals;
2116: for (i=0; i<m; i++) {
2117: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2118: smycols += ourlens[i];
2119: svals += ourlens[i];
2120: jj++;
2121: }
2123: /* read in other processors and ship out */
2124: for (i=1; i<size; i++) {
2125: nz = procsnz[i];
2126: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2127: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
2128: }
2129: PetscFree(procsnz);
2130: } else {
2131: /* receive numeric values */
2132: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2134: /* receive message of values*/
2135: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
2136: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2137: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2139: /* insert into matrix */
2140: jj = rstart;
2141: smycols = mycols;
2142: svals = vals;
2143: for (i=0; i<m; i++) {
2144: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2145: smycols += ourlens[i];
2146: svals += ourlens[i];
2147: jj++;
2148: }
2149: }
2150: PetscFree2(ourlens,offlens);
2151: PetscFree(vals);
2152: PetscFree(mycols);
2153: PetscFree(rowners);
2155: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
2156: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
2157: return(0);
2158: }
2162: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2163: {
2164: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2165: Mat a,b;
2166: PetscBool flg;
2170: a = matA->A;
2171: b = matB->A;
2172: MatEqual(a,b,&flg);
2173: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2174: return(0);
2175: }
2177: #if defined(PETSC_HAVE_PLAPACK)
2181: /*@C
2182: PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2183: Level: developer
2185: .keywords: Petsc, destroy, package, PLAPACK
2186: .seealso: PetscFinalize()
2187: @*/
2188: PetscErrorCode PetscPLAPACKFinalizePackage(void)
2189: {
2193: PLA_Finalize();
2194: return(0);
2195: }
2199: /*@C
2200: PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2201: called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2203: Input Parameter:
2204: . comm - the communicator the matrix lives on
2206: Level: developer
2208: Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2209: same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2210: PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2211: cannot overlap.
2213: .keywords: Petsc, initialize, package, PLAPACK
2214: .seealso: PetscSysInitializePackage(), PetscInitialize()
2215: @*/
2216: PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm)
2217: {
2218: PetscMPIInt size;
2222: if (!PLA_Initialized(PETSC_NULL)) {
2224: MPI_Comm_size(comm,&size);
2225: Plapack_nprows = 1;
2226: Plapack_npcols = size;
2227:
2228: PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");
2229: PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);
2230: PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);
2231: #if defined(PETSC_USE_DEBUG)
2232: Plapack_ierror = 3;
2233: #else
2234: Plapack_ierror = 0;
2235: #endif
2236: PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);
2237: if (Plapack_ierror){
2238: PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
2239: } else {
2240: PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
2241: }
2242:
2243: Plapack_nb_alg = 0;
2244: PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);
2245: if (Plapack_nb_alg) {
2246: pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);
2247: }
2248: PetscOptionsEnd();
2250: PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);
2251: PLA_Init(Plapack_comm_2d);
2252: PetscRegisterFinalize(PetscPLAPACKFinalizePackage);
2253: }
2254: return(0);
2255: }
2257: #endif