Actual source code: mpidense.c
1: #define PETSCMAT_DLL
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
7:
8: #include ../src/mat/impls/dense/mpi/mpidense.h
9: #if defined(PETSC_HAVE_PLAPACK)
10: static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11: static MPI_Comm Plapack_comm_2d;
12: #endif
16: /*@
18: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19: matrix that represents the operator. For sequential matrices it returns itself.
21: Input Parameter:
22: . A - the Seq or MPI dense matrix
24: Output Parameter:
25: . B - the inner matrix
27: Level: intermediate
29: @*/
30: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31: {
32: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
34: PetscTruth flg;
37: PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
38: if (flg) {
39: *B = mat->A;
40: } else {
41: *B = A;
42: }
43: return(0);
44: }
48: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49: {
50: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
52: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
55: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56: lrow = row - rstart;
57: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
58: return(0);
59: }
63: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64: {
68: if (idx) {PetscFree(*idx);}
69: if (v) {PetscFree(*v);}
70: return(0);
71: }
76: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77: {
78: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
80: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
81: PetscScalar *array;
82: MPI_Comm comm;
85: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
87: /* The reuse aspect is not implemented efficiently */
88: if (reuse) { MatDestroy(*B);}
90: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
91: MatGetArray(mdn->A,&array);
92: MatCreate(comm,B);
93: MatSetSizes(*B,m,m,m,m);
94: MatSetType(*B,((PetscObject)mdn->A)->type_name);
95: MatSeqDenseSetPreallocation(*B,array+m*rstart);
96: MatRestoreArray(mdn->A,&array);
97: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
98: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
99:
100: *iscopy = PETSC_TRUE;
101: return(0);
102: }
107: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108: {
109: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
111: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112: PetscTruth roworiented = A->roworiented;
115: for (i=0; i<m; i++) {
116: if (idxm[i] < 0) continue;
117: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118: if (idxm[i] >= rstart && idxm[i] < rend) {
119: row = idxm[i] - rstart;
120: if (roworiented) {
121: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
122: } else {
123: for (j=0; j<n; j++) {
124: if (idxn[j] < 0) continue;
125: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
127: }
128: }
129: } else {
130: if (!A->donotstash) {
131: if (roworiented) {
132: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
133: } else {
134: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
135: }
136: }
137: }
138: }
139: return(0);
140: }
144: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145: {
146: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
148: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
151: for (i=0; i<m; i++) {
152: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154: if (idxm[i] >= rstart && idxm[i] < rend) {
155: row = idxm[i] - rstart;
156: for (j=0; j<n; j++) {
157: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158: if (idxn[j] >= mat->cmap->N) {
159: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160: }
161: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
162: }
163: } else {
164: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165: }
166: }
167: return(0);
168: }
172: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173: {
174: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
178: MatGetArray(a->A,array);
179: return(0);
180: }
184: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185: {
186: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
187: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
189: PetscInt i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190: const PetscInt *irow,*icol;
191: PetscScalar *av,*bv,*v = lmat->v;
192: Mat newmat;
195: ISGetIndices(isrow,&irow);
196: ISGetIndices(iscol,&icol);
197: ISGetLocalSize(isrow,&nrows);
198: ISGetLocalSize(iscol,&ncols);
200: /* No parallel redistribution currently supported! Should really check each index set
201: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
202: original matrix! */
204: MatGetLocalSize(A,&nlrows,&nlcols);
205: MatGetOwnershipRange(A,&rstart,&rend);
206:
207: /* Check submatrix call */
208: if (scall == MAT_REUSE_MATRIX) {
209: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210: /* Really need to test rows and column sizes! */
211: newmat = *B;
212: } else {
213: /* Create and fill new matrix */
214: MatCreate(((PetscObject)A)->comm,&newmat);
215: MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);
216: MatSetType(newmat,((PetscObject)A)->type_name);
217: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
218: }
220: /* Now extract the data pointers and do the copy, column at a time */
221: newmatd = (Mat_MPIDense*)newmat->data;
222: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
223:
224: for (i=0; i<ncols; i++) {
225: av = v + ((Mat_SeqDense *)newmatd->A->data)->lda*icol[i];
226: for (j=0; j<nrows; j++) {
227: *bv++ = av[irow[j] - rstart];
228: }
229: }
231: /* Assemble the matrices so that the correct flags are set */
232: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
233: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
235: /* Free work space */
236: ISRestoreIndices(isrow,&irow);
237: ISRestoreIndices(iscol,&icol);
238: *B = newmat;
239: return(0);
240: }
244: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245: {
247: return(0);
248: }
252: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253: {
254: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
255: MPI_Comm comm = ((PetscObject)mat)->comm;
257: PetscInt nstash,reallocs;
258: InsertMode addv;
261: /* make sure all processors are either in INSERTMODE or ADDMODE */
262: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
263: if (addv == (ADD_VALUES|INSERT_VALUES)) {
264: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265: }
266: mat->insertmode = addv; /* in case this processor had no cache */
268: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
269: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
270: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
271: return(0);
272: }
276: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277: {
278: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
279: PetscErrorCode ierr;
280: PetscInt i,*row,*col,flg,j,rstart,ncols;
281: PetscMPIInt n;
282: PetscScalar *val;
283: InsertMode addv=mat->insertmode;
286: /* wait on receives */
287: while (1) {
288: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
289: if (!flg) break;
290:
291: for (i=0; i<n;) {
292: /* Now identify the consecutive vals belonging to the same row */
293: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294: if (j < n) ncols = j-i;
295: else ncols = n-i;
296: /* Now assemble all these values with a single function call */
297: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
298: i = j;
299: }
300: }
301: MatStashScatterEnd_Private(&mat->stash);
302:
303: MatAssemblyBegin(mdn->A,mode);
304: MatAssemblyEnd(mdn->A,mode);
306: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307: MatSetUpMultiply_MPIDense(mat);
308: }
309: return(0);
310: }
314: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315: {
317: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
320: MatZeroEntries(l->A);
321: return(0);
322: }
324: /* the code does not do the diagonal entries correctly unless the
325: matrix is square and the column and row owerships are identical.
326: This is a BUG. The only way to fix it seems to be to access
327: mdn->A and mdn->B directly and not through the MatZeroRows()
328: routine.
329: */
332: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333: {
334: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
336: PetscInt i,*owners = A->rmap->range;
337: PetscInt *nprocs,j,idx,nsends;
338: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
339: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340: PetscInt *lens,*lrows,*values;
341: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
342: MPI_Comm comm = ((PetscObject)A)->comm;
343: MPI_Request *send_waits,*recv_waits;
344: MPI_Status recv_status,*send_status;
345: PetscTruth found;
348: /* first count number of contributors to each processor */
349: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
350: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
351: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
352: for (i=0; i<N; i++) {
353: idx = rows[i];
354: found = PETSC_FALSE;
355: for (j=0; j<size; j++) {
356: if (idx >= owners[j] && idx < owners[j+1]) {
357: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358: }
359: }
360: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361: }
362: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
364: /* inform other processors of number of messages and max length*/
365: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
367: /* post receives: */
368: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
369: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
370: for (i=0; i<nrecvs; i++) {
371: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
372: }
374: /* do sends:
375: 1) starts[i] gives the starting index in svalues for stuff going to
376: the ith processor
377: */
378: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
379: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
380: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
381: starts[0] = 0;
382: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383: for (i=0; i<N; i++) {
384: svalues[starts[owner[i]]++] = rows[i];
385: }
387: starts[0] = 0;
388: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389: count = 0;
390: for (i=0; i<size; i++) {
391: if (nprocs[2*i+1]) {
392: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
393: }
394: }
395: PetscFree(starts);
397: base = owners[rank];
399: /* wait on receives */
400: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
401: source = lens + nrecvs;
402: count = nrecvs; slen = 0;
403: while (count) {
404: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
405: /* unpack receives into our local space */
406: MPI_Get_count(&recv_status,MPIU_INT,&n);
407: source[imdex] = recv_status.MPI_SOURCE;
408: lens[imdex] = n;
409: slen += n;
410: count--;
411: }
412: PetscFree(recv_waits);
413:
414: /* move the data into the send scatter */
415: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
416: count = 0;
417: for (i=0; i<nrecvs; i++) {
418: values = rvalues + i*nmax;
419: for (j=0; j<lens[i]; j++) {
420: lrows[count++] = values[j] - base;
421: }
422: }
423: PetscFree(rvalues);
424: PetscFree(lens);
425: PetscFree(owner);
426: PetscFree(nprocs);
427:
428: /* actually zap the local rows */
429: MatZeroRows(l->A,slen,lrows,diag);
430: PetscFree(lrows);
432: /* wait on sends */
433: if (nsends) {
434: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
435: MPI_Waitall(nsends,send_waits,send_status);
436: PetscFree(send_status);
437: }
438: PetscFree(send_waits);
439: PetscFree(svalues);
441: return(0);
442: }
446: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447: {
448: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
452: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
453: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
454: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
455: return(0);
456: }
460: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461: {
462: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
466: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
467: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
468: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
469: return(0);
470: }
474: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475: {
476: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
478: PetscScalar zero = 0.0;
481: VecSet(yy,zero);
482: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
483: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
484: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
485: return(0);
486: }
490: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491: {
492: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
496: VecCopy(yy,zz);
497: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
498: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
499: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
500: return(0);
501: }
505: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506: {
507: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
508: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
510: PetscInt len,i,n,m = A->rmap->n,radd;
511: PetscScalar *x,zero = 0.0;
512:
514: VecSet(v,zero);
515: VecGetArray(v,&x);
516: VecGetSize(v,&n);
517: if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
519: radd = A->rmap->rstart*m;
520: for (i=0; i<len; i++) {
521: x[i] = aloc->v[radd + i*m + i];
522: }
523: VecRestoreArray(v,&x);
524: return(0);
525: }
529: PetscErrorCode MatDestroy_MPIDense(Mat mat)
530: {
531: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
533: #if defined(PETSC_HAVE_PLAPACK)
534: Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr);
535: #endif
539: #if defined(PETSC_USE_LOG)
540: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541: #endif
542: MatStashDestroy_Private(&mat->stash);
543: MatDestroy(mdn->A);
544: if (mdn->lvec) {VecDestroy(mdn->lvec);}
545: if (mdn->Mvctx) {VecScatterDestroy(mdn->Mvctx);}
546: #if defined(PETSC_HAVE_PLAPACK)
547: if (lu) {
548: PLA_Obj_free(&lu->A);
549: PLA_Obj_free (&lu->pivots);
550: PLA_Temp_free(&lu->templ);
552: if (lu->is_pla) {
553: ISDestroy(lu->is_pla);
554: ISDestroy(lu->is_petsc);
555: VecScatterDestroy(lu->ctx);
556: }
557: }
558: #endif
560: PetscFree(mdn);
561: PetscObjectChangeTypeName((PetscObject)mat,0);
562: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
563: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
564: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
565: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
566: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
567: return(0);
568: }
572: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573: {
574: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
575: PetscErrorCode ierr;
576: PetscViewerFormat format;
577: int fd;
578: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
579: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
580: PetscScalar *work,*v,*vv;
581: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
582: MPI_Status status;
585: if (mdn->size == 1) {
586: MatView(mdn->A,viewer);
587: } else {
588: PetscViewerBinaryGetDescriptor(viewer,&fd);
589: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
590: MPI_Comm_size(((PetscObject)mat)->comm,&size);
592: PetscViewerGetFormat(viewer,&format);
593: if (format == PETSC_VIEWER_NATIVE) {
595: if (!rank) {
596: /* store the matrix as a dense matrix */
597: header[0] = MAT_FILE_COOKIE;
598: header[1] = mat->rmap->N;
599: header[2] = N;
600: header[3] = MATRIX_BINARY_FORMAT_DENSE;
601: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
603: /* get largest work array needed for transposing array */
604: mmax = mat->rmap->n;
605: for (i=1; i<size; i++) {
606: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607: }
608: PetscMalloc(mmax*N*sizeof(PetscScalar),&work);
610: /* write out local array, by rows */
611: m = mat->rmap->n;
612: v = a->v;
613: for (j=0; j<N; j++) {
614: for (i=0; i<m; i++) {
615: work[j + i*N] = *v++;
616: }
617: }
618: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
619: /* get largest work array to receive messages from other processes, excludes process zero */
620: mmax = 0;
621: for (i=1; i<size; i++) {
622: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623: }
624: PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
625: for(k = 1; k < size; k++) {
626: v = vv;
627: m = mat->rmap->range[k+1] - mat->rmap->range[k];
628: MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);
630: for(j = 0; j < N; j++) {
631: for(i = 0; i < m; i++) {
632: work[j + i*N] = *v++;
633: }
634: }
635: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
636: }
637: PetscFree(work);
638: PetscFree(vv);
639: } else {
640: MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
641: }
642: } else {
643: SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644: }
645: }
646: return(0);
647: }
651: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652: {
653: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
654: PetscErrorCode ierr;
655: PetscMPIInt size = mdn->size,rank = mdn->rank;
656: const PetscViewerType vtype;
657: PetscTruth iascii,isdraw;
658: PetscViewer sviewer;
659: PetscViewerFormat format;
660: #if defined(PETSC_HAVE_PLAPACK)
661: Mat_Plapack *lu;
662: #endif
665: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
666: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
667: if (iascii) {
668: PetscViewerGetType(viewer,&vtype);
669: PetscViewerGetFormat(viewer,&format);
670: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671: MatInfo info;
672: MatGetInfo(mat,MAT_LOCAL,&info);
673: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
675: PetscViewerFlush(viewer);
676: #if defined(PETSC_HAVE_PLAPACK)
677: PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");
678: PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);
679: PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);
680: PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);
681: if (mat->factor){
682: lu=(Mat_Plapack*)(mat->spptr);
683: PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);
684: }
685: #else
686: VecScatterView(mdn->Mvctx,viewer);
687: #endif
688: return(0);
689: } else if (format == PETSC_VIEWER_ASCII_INFO) {
690: return(0);
691: }
692: } else if (isdraw) {
693: PetscDraw draw;
694: PetscTruth isnull;
696: PetscViewerDrawGetDraw(viewer,0,&draw);
697: PetscDrawIsNull(draw,&isnull);
698: if (isnull) return(0);
699: }
701: if (size == 1) {
702: MatView(mdn->A,viewer);
703: } else {
704: /* assemble the entire matrix onto first processor. */
705: Mat A;
706: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707: PetscInt *cols;
708: PetscScalar *vals;
710: MatCreate(((PetscObject)mat)->comm,&A);
711: if (!rank) {
712: MatSetSizes(A,M,N,M,N);
713: } else {
714: MatSetSizes(A,0,0,M,N);
715: }
716: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717: MatSetType(A,MATMPIDENSE);
718: MatMPIDenseSetPreallocation(A,PETSC_NULL);
719: PetscLogObjectParent(mat,A);
721: /* Copy the matrix ... This isn't the most efficient means,
722: but it's quick for now */
723: A->insertmode = INSERT_VALUES;
724: row = mat->rmap->rstart; m = mdn->A->rmap->n;
725: for (i=0; i<m; i++) {
726: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
727: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
728: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
729: row++;
730: }
732: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
733: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
734: PetscViewerGetSingleton(viewer,&sviewer);
735: if (!rank) {
736: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
737: }
738: PetscViewerRestoreSingleton(viewer,&sviewer);
739: PetscViewerFlush(viewer);
740: MatDestroy(A);
741: }
742: return(0);
743: }
747: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
748: {
750: PetscTruth iascii,isbinary,isdraw,issocket;
751:
753:
754: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
755: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
756: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
757: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
759: if (iascii || issocket || isdraw) {
760: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
761: } else if (isbinary) {
762: MatView_MPIDense_Binary(mat,viewer);
763: } else {
764: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
765: }
766: return(0);
767: }
771: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772: {
773: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
774: Mat mdn = mat->A;
776: PetscReal isend[5],irecv[5];
779: info->block_size = 1.0;
780: MatGetInfo(mdn,MAT_LOCAL,info);
781: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782: isend[3] = info->memory; isend[4] = info->mallocs;
783: if (flag == MAT_LOCAL) {
784: info->nz_used = isend[0];
785: info->nz_allocated = isend[1];
786: info->nz_unneeded = isend[2];
787: info->memory = isend[3];
788: info->mallocs = isend[4];
789: } else if (flag == MAT_GLOBAL_MAX) {
790: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
791: info->nz_used = irecv[0];
792: info->nz_allocated = irecv[1];
793: info->nz_unneeded = irecv[2];
794: info->memory = irecv[3];
795: info->mallocs = irecv[4];
796: } else if (flag == MAT_GLOBAL_SUM) {
797: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
798: info->nz_used = irecv[0];
799: info->nz_allocated = irecv[1];
800: info->nz_unneeded = irecv[2];
801: info->memory = irecv[3];
802: info->mallocs = irecv[4];
803: }
804: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
805: info->fill_ratio_needed = 0;
806: info->factor_mallocs = 0;
807: return(0);
808: }
812: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813: {
814: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
818: switch (op) {
819: case MAT_NEW_NONZERO_LOCATIONS:
820: case MAT_NEW_NONZERO_LOCATION_ERR:
821: case MAT_NEW_NONZERO_ALLOCATION_ERR:
822: MatSetOption(a->A,op,flg);
823: break;
824: case MAT_ROW_ORIENTED:
825: a->roworiented = flg;
826: MatSetOption(a->A,op,flg);
827: break;
828: case MAT_NEW_DIAGONALS:
829: case MAT_USE_HASH_TABLE:
830: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
831: break;
832: case MAT_IGNORE_OFF_PROC_ENTRIES:
833: a->donotstash = flg;
834: break;
835: case MAT_SYMMETRIC:
836: case MAT_STRUCTURALLY_SYMMETRIC:
837: case MAT_HERMITIAN:
838: case MAT_SYMMETRY_ETERNAL:
839: case MAT_IGNORE_LOWER_TRIANGULAR:
840: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
841: break;
842: default:
843: SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844: }
845: return(0);
846: }
851: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852: {
853: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
854: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
855: PetscScalar *l,*r,x,*v;
857: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
860: MatGetLocalSize(A,&s2,&s3);
861: if (ll) {
862: VecGetLocalSize(ll,&s2a);
863: if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864: VecGetArray(ll,&l);
865: for (i=0; i<m; i++) {
866: x = l[i];
867: v = mat->v + i;
868: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869: }
870: VecRestoreArray(ll,&l);
871: PetscLogFlops(n*m);
872: }
873: if (rr) {
874: VecGetLocalSize(rr,&s3a);
875: if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
877: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
878: VecGetArray(mdn->lvec,&r);
879: for (i=0; i<n; i++) {
880: x = r[i];
881: v = mat->v + i*m;
882: for (j=0; j<m; j++) { (*v++) *= x;}
883: }
884: VecRestoreArray(mdn->lvec,&r);
885: PetscLogFlops(n*m);
886: }
887: return(0);
888: }
892: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893: {
894: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
895: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
897: PetscInt i,j;
898: PetscReal sum = 0.0;
899: PetscScalar *v = mat->v;
902: if (mdn->size == 1) {
903: MatNorm(mdn->A,type,nrm);
904: } else {
905: if (type == NORM_FROBENIUS) {
906: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907: #if defined(PETSC_USE_COMPLEX)
908: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909: #else
910: sum += (*v)*(*v); v++;
911: #endif
912: }
913: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
914: *nrm = sqrt(*nrm);
915: PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);
916: } else if (type == NORM_1) {
917: PetscReal *tmp,*tmp2;
918: PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);
919: tmp2 = tmp + A->cmap->N;
920: PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));
921: *nrm = 0.0;
922: v = mat->v;
923: for (j=0; j<mdn->A->cmap->n; j++) {
924: for (i=0; i<mdn->A->rmap->n; i++) {
925: tmp[j] += PetscAbsScalar(*v); v++;
926: }
927: }
928: MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
929: for (j=0; j<A->cmap->N; j++) {
930: if (tmp2[j] > *nrm) *nrm = tmp2[j];
931: }
932: PetscFree(tmp);
933: PetscLogFlops(A->cmap->n*A->rmap->n);
934: } else if (type == NORM_INFINITY) { /* max row norm */
935: PetscReal ntemp;
936: MatNorm(mdn->A,type,&ntemp);
937: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
938: } else {
939: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940: }
941: }
942: return(0);
943: }
947: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948: {
949: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
950: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
951: Mat B;
952: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
954: PetscInt j,i;
955: PetscScalar *v;
958: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959: if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960: MatCreate(((PetscObject)A)->comm,&B);
961: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
962: MatSetType(B,((PetscObject)A)->type_name);
963: MatMPIDenseSetPreallocation(B,PETSC_NULL);
964: } else {
965: B = *matout;
966: }
968: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969: PetscMalloc(m*sizeof(PetscInt),&rwork);
970: for (i=0; i<m; i++) rwork[i] = rstart + i;
971: for (j=0; j<n; j++) {
972: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
973: v += m;
974: }
975: PetscFree(rwork);
976: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
977: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
978: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979: *matout = B;
980: } else {
981: MatHeaderCopy(A,B);
982: }
983: return(0);
984: }
986: #include petscblaslapack.h
989: PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990: {
991: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
992: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
993: PetscScalar oalpha = alpha;
995: PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
998: BLASscal_(&nz,&oalpha,a->v,&one);
999: PetscLogFlops(nz);
1000: return(0);
1001: }
1003: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1007: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008: {
1012: MatMPIDenseSetPreallocation(A,0);
1013: return(0);
1014: }
1016: #if defined(PETSC_HAVE_PLAPACK)
1020: PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021: {
1022: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1024: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1025: PetscScalar *array;
1026: PetscReal one = 1.0;
1029: /* Copy A into F->lu->A */
1030: PLA_Obj_set_to_zero(lu->A);
1031: PLA_API_begin();
1032: PLA_Obj_API_open(lu->A);
1033: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1034: MatGetArray(A,&array);
1035: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1036: MatRestoreArray(A,&array);
1037: PLA_Obj_API_close(lu->A);
1038: PLA_API_end();
1039: lu->rstart = rstart;
1040: return(0);
1041: }
1045: PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046: {
1047: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1049: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1050: PetscScalar *array;
1051: PetscReal one = 1.0;
1054: /* Copy F into A->lu->A */
1055: MatZeroEntries(A);
1056: PLA_API_begin();
1057: PLA_Obj_API_open(lu->A);
1058: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1059: MatGetArray(A,&array);
1060: PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);
1061: MatRestoreArray(A,&array);
1062: PLA_Obj_API_close(lu->A);
1063: PLA_API_end();
1064: lu->rstart = rstart;
1065: return(0);
1066: }
1070: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071: {
1073: Mat_Plapack *luA = (Mat_Plapack*)A->spptr;
1074: Mat_Plapack *luB = (Mat_Plapack*)B->spptr;
1075: Mat_Plapack *luC = (Mat_Plapack*)C->spptr;
1076: PLA_Obj alpha = NULL,beta = NULL;
1079: MatMPIDenseCopyToPlapack(A,A);
1080: MatMPIDenseCopyToPlapack(B,B);
1082: /*
1083: PLA_Global_show("A = ",luA->A,"%g ","");
1084: PLA_Global_show("B = ",luB->A,"%g ","");
1085: */
1087: /* do the multiply in PLA */
1088: PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);
1089: PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);
1090: CHKMEMQ;
1092: PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* */
1093: CHKMEMQ;
1094: PLA_Obj_free(&alpha);
1095: PLA_Obj_free(&beta);
1097: /*
1098: PLA_Global_show("C = ",luC->A,"%g ","");
1099: */
1100: MatMPIDenseCopyFromPlapack(C,C);
1101: return(0);
1102: }
1106: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107: {
1109: PetscInt m=A->rmap->n,n=B->cmap->n;
1110: Mat Cmat;
1113: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114: SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115: MatCreate(((PetscObject)B)->comm,&Cmat);
1116: MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);
1117: MatSetType(Cmat,MATMPIDENSE);
1118: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1119: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
1121: *C = Cmat;
1122: return(0);
1123: }
1127: PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1128: {
1132: if (scall == MAT_INITIAL_MATRIX){
1133: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1134: }
1135: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1136: return(0);
1137: }
1141: PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1142: {
1143: MPI_Comm comm = ((PetscObject)A)->comm;
1144: Mat_Plapack *lu = (Mat_Plapack*)A->spptr;
1146: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1147: PetscScalar *array;
1148: PetscReal one = 1.0;
1149: PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1150: PLA_Obj v_pla = NULL;
1151: PetscScalar *loc_buf;
1152: Vec loc_x;
1153:
1155: MPI_Comm_size(comm,&size);
1156: MPI_Comm_rank(comm,&rank);
1158: /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1159: PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1160: PLA_Obj_set_to_zero(v_pla);
1162: /* Copy b into rhs_pla */
1163: PLA_API_begin();
1164: PLA_Obj_API_open(v_pla);
1165: VecGetArray(b,&array);
1166: PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1167: VecRestoreArray(b,&array);
1168: PLA_Obj_API_close(v_pla);
1169: PLA_API_end();
1171: if (A->factor == MAT_FACTOR_LU){
1172: /* Apply the permutations to the right hand sides */
1173: PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1175: /* Solve L y = b, overwriting b with y */
1176: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1178: /* Solve U x = y (=b), overwriting b with x */
1179: PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1180: } else { /* MAT_FACTOR_CHOLESKY */
1181: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1182: PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1183: PLA_NONUNIT_DIAG,lu->A,v_pla);
1184: }
1186: /* Copy PLAPACK x into Petsc vector x */
1187: PLA_Obj_local_length(v_pla, &loc_m);
1188: PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1189: PLA_Obj_local_stride(v_pla, &loc_stride);
1190: /*
1191: PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1192: */
1193: VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);
1194: if (!lu->pla_solved){
1195:
1196: PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1197: PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1199: /* Create IS and cts for VecScatterring */
1200: PLA_Obj_local_length(v_pla, &loc_m);
1201: PLA_Obj_local_stride(v_pla, &loc_stride);
1202: PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);
1203: idx_petsc = idx_pla + loc_m;
1205: rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1206: for (i=0; i<loc_m; i+=lu->nb){
1207: j = 0;
1208: while (j < lu->nb && i+j < loc_m){
1209: idx_petsc[i+j] = rstart + j; j++;
1210: }
1211: rstart += size*lu->nb;
1212: }
1214: for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1216: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);
1217: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);
1218: PetscFree(idx_pla);
1219: VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);
1220: }
1221: VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1222: VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1223:
1224: /* Free data */
1225: VecDestroy(loc_x);
1226: PLA_Obj_free(&v_pla);
1228: lu->pla_solved = PETSC_TRUE;
1229: return(0);
1230: }
1234: PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1235: {
1236: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1238: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1239: PetscInt info_pla=0;
1240: PetscScalar *array,one = 1.0;
1243: if (lu->mstruct == SAME_NONZERO_PATTERN){
1244: PLA_Obj_free(&lu->A);
1245: PLA_Obj_free (&lu->pivots);
1246: }
1247: /* Create PLAPACK matrix object */
1248: lu->A = NULL; lu->pivots = NULL;
1249: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1250: PLA_Obj_set_to_zero(lu->A);
1251: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1253: /* Copy A into lu->A */
1254: PLA_API_begin();
1255: PLA_Obj_API_open(lu->A);
1256: MatGetOwnershipRange(A,&rstart,&rend);
1257: MatGetArray(A,&array);
1258: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1259: MatRestoreArray(A,&array);
1260: PLA_Obj_API_close(lu->A);
1261: PLA_API_end();
1263: /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1264: info_pla = PLA_LU(lu->A,lu->pivots);
1265: if (info_pla != 0)
1266: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1268: lu->rstart = rstart;
1269: lu->mstruct = SAME_NONZERO_PATTERN;
1270: F->ops->solve = MatSolve_MPIDense;
1271: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1272: return(0);
1273: }
1277: PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1278: {
1279: Mat_Plapack *lu = (Mat_Plapack*)F->spptr;
1281: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1282: PetscInt info_pla=0;
1283: PetscScalar *array,one = 1.0;
1286: if (lu->mstruct == SAME_NONZERO_PATTERN){
1287: PLA_Obj_free(&lu->A);
1288: }
1289: /* Create PLAPACK matrix object */
1290: lu->A = NULL;
1291: lu->pivots = NULL;
1292: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1294: /* Copy A into lu->A */
1295: PLA_API_begin();
1296: PLA_Obj_API_open(lu->A);
1297: MatGetOwnershipRange(A,&rstart,&rend);
1298: MatGetArray(A,&array);
1299: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1300: MatRestoreArray(A,&array);
1301: PLA_Obj_API_close(lu->A);
1302: PLA_API_end();
1304: /* Factor P A -> Chol */
1305: info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1306: if (info_pla != 0)
1307: SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1309: lu->rstart = rstart;
1310: lu->mstruct = SAME_NONZERO_PATTERN;
1311: F->ops->solve = MatSolve_MPIDense;
1312: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1313: return(0);
1314: }
1316: /* Note the Petsc perm permutation is ignored */
1319: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1320: {
1322: PetscTruth issymmetric,set;
1325: MatIsSymmetricKnown(A,&set,&issymmetric);
1326: if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1327: F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense;
1328: return(0);
1329: }
1331: /* Note the Petsc r and c permutations are ignored */
1334: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1335: {
1337: PetscInt M = A->rmap->N;
1338: Mat_Plapack *lu;
1341: lu = (Mat_Plapack*)F->spptr;
1342: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1343: F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense;
1344: return(0);
1345: }
1350: PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1351: {
1353: *type = MAT_SOLVER_PLAPACK;
1354: return(0);
1355: }
1360: PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1361: {
1363: Mat_Plapack *lu;
1364: PetscMPIInt size;
1365: PetscInt M=A->rmap->N;
1368: /* Create the factorization matrix */
1369: MatCreate(((PetscObject)A)->comm,F);
1370: MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1371: MatSetType(*F,((PetscObject)A)->type_name);
1372: PetscNewLog(*F,Mat_Plapack,&lu);
1373: (*F)->spptr = (void*)lu;
1375: /* Set default Plapack parameters */
1376: MPI_Comm_size(((PetscObject)A)->comm,&size);
1377: lu->nb = M/size;
1378: if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1379:
1380: /* Set runtime options */
1381: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");
1382: PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);
1383: PetscOptionsEnd();
1385: /* Create object distribution template */
1386: lu->templ = NULL;
1387: PLA_Temp_create(lu->nb, 0, &lu->templ);
1389: /* Set the datatype */
1390: #if defined(PETSC_USE_COMPLEX)
1391: lu->datatype = MPI_DOUBLE_COMPLEX;
1392: #else
1393: lu->datatype = MPI_DOUBLE;
1394: #endif
1396: PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1399: lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1400: lu->mstruct = DIFFERENT_NONZERO_PATTERN;
1402: if (ftype == MAT_FACTOR_LU) {
1403: (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1404: } else if (ftype == MAT_FACTOR_CHOLESKY) {
1405: (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1406: } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1407: (*F)->factor = ftype;
1408: PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);
1409: return(0);
1410: }
1411: #endif
1415: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1416: {
1418: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1421: MatAXPY(A->A,alpha,B->A,str);
1422: return(0);
1423: }
1425: /* -------------------------------------------------------------------*/
1426: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1427: MatGetRow_MPIDense,
1428: MatRestoreRow_MPIDense,
1429: MatMult_MPIDense,
1430: /* 4*/ MatMultAdd_MPIDense,
1431: MatMultTranspose_MPIDense,
1432: MatMultTransposeAdd_MPIDense,
1433: 0,
1434: 0,
1435: 0,
1436: /*10*/ 0,
1437: 0,
1438: 0,
1439: 0,
1440: MatTranspose_MPIDense,
1441: /*15*/ MatGetInfo_MPIDense,
1442: MatEqual_MPIDense,
1443: MatGetDiagonal_MPIDense,
1444: MatDiagonalScale_MPIDense,
1445: MatNorm_MPIDense,
1446: /*20*/ MatAssemblyBegin_MPIDense,
1447: MatAssemblyEnd_MPIDense,
1448: 0,
1449: MatSetOption_MPIDense,
1450: MatZeroEntries_MPIDense,
1451: /*25*/ MatZeroRows_MPIDense,
1452: 0,
1453: 0,
1454: 0,
1455: 0,
1456: /*30*/ MatSetUpPreallocation_MPIDense,
1457: 0,
1458: 0,
1459: MatGetArray_MPIDense,
1460: MatRestoreArray_MPIDense,
1461: /*35*/ MatDuplicate_MPIDense,
1462: 0,
1463: 0,
1464: 0,
1465: 0,
1466: /*40*/ MatAXPY_MPIDense,
1467: MatGetSubMatrices_MPIDense,
1468: 0,
1469: MatGetValues_MPIDense,
1470: 0,
1471: /*45*/ 0,
1472: MatScale_MPIDense,
1473: 0,
1474: 0,
1475: 0,
1476: /*50*/ 0,
1477: 0,
1478: 0,
1479: 0,
1480: 0,
1481: /*55*/ 0,
1482: 0,
1483: 0,
1484: 0,
1485: 0,
1486: /*60*/ MatGetSubMatrix_MPIDense,
1487: MatDestroy_MPIDense,
1488: MatView_MPIDense,
1489: 0,
1490: 0,
1491: /*65*/ 0,
1492: 0,
1493: 0,
1494: 0,
1495: 0,
1496: /*70*/ 0,
1497: 0,
1498: 0,
1499: 0,
1500: 0,
1501: /*75*/ 0,
1502: 0,
1503: 0,
1504: 0,
1505: 0,
1506: /*80*/ 0,
1507: 0,
1508: 0,
1509: 0,
1510: /*84*/ MatLoad_MPIDense,
1511: 0,
1512: 0,
1513: 0,
1514: 0,
1515: 0,
1516: /*90*/
1517: #if defined(PETSC_HAVE_PLAPACK)
1518: MatMatMult_MPIDense_MPIDense,
1519: MatMatMultSymbolic_MPIDense_MPIDense,
1520: MatMatMultNumeric_MPIDense_MPIDense,
1521: #else
1522: 0,
1523: 0,
1524: 0,
1525: #endif
1526: 0,
1527: /*95*/ 0,
1528: 0,
1529: 0,
1530: 0};
1535: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1536: {
1537: Mat_MPIDense *a;
1541: mat->preallocated = PETSC_TRUE;
1542: /* Note: For now, when data is specified above, this assumes the user correctly
1543: allocates the local dense storage space. We should add error checking. */
1545: a = (Mat_MPIDense*)mat->data;
1546: MatCreate(PETSC_COMM_SELF,&a->A);
1547: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1548: MatSetType(a->A,MATSEQDENSE);
1549: MatSeqDenseSetPreallocation(a->A,data);
1550: PetscLogObjectParent(mat,a->A);
1551: return(0);
1552: }
1555: /*MC
1556: MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1558: run config/configure.py with the option --download-plapack
1561: Options Database Keys:
1562: . -mat_plapack_nprows <n> - number of rows in processor partition
1563: . -mat_plapack_npcols <n> - number of columns in processor partition
1564: . -mat_plapack_nb <n> - block size of template vector
1565: . -mat_plapack_nb_alg <n> - algorithmic block size
1566: - -mat_plapack_ckerror <n> - error checking flag
1568: .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1570: M*/
1575: PetscErrorCode MatCreate_MPIDense(Mat mat)
1576: {
1577: Mat_MPIDense *a;
1581: PetscNewLog(mat,Mat_MPIDense,&a);
1582: mat->data = (void*)a;
1583: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1584: mat->mapping = 0;
1586: mat->insertmode = NOT_SET_VALUES;
1587: MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);
1588: MPI_Comm_size(((PetscObject)mat)->comm,&a->size);
1590: PetscMapSetBlockSize(mat->rmap,1);
1591: PetscMapSetBlockSize(mat->cmap,1);
1592: PetscMapSetUp(mat->rmap);
1593: PetscMapSetUp(mat->cmap);
1594: a->nvec = mat->cmap->n;
1596: /* build cache for off array entries formed */
1597: a->donotstash = PETSC_FALSE;
1598: MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);
1600: /* stuff used for matrix vector multiply */
1601: a->lvec = 0;
1602: a->Mvctx = 0;
1603: a->roworiented = PETSC_TRUE;
1605: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1606: "MatGetDiagonalBlock_MPIDense",
1607: MatGetDiagonalBlock_MPIDense);
1608: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1609: "MatMPIDenseSetPreallocation_MPIDense",
1610: MatMPIDenseSetPreallocation_MPIDense);
1611: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1612: "MatMatMult_MPIAIJ_MPIDense",
1613: MatMatMult_MPIAIJ_MPIDense);
1614: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1615: "MatMatMultSymbolic_MPIAIJ_MPIDense",
1616: MatMatMultSymbolic_MPIAIJ_MPIDense);
1617: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1618: "MatMatMultNumeric_MPIAIJ_MPIDense",
1619: MatMatMultNumeric_MPIAIJ_MPIDense);
1620: #if defined(PETSC_HAVE_PLAPACK)
1621: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1622: "MatGetFactor_mpidense_plapack",
1623: MatGetFactor_mpidense_plapack);
1624: PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);
1625: #endif
1626: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1628: return(0);
1629: }
1632: /*MC
1633: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1635: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1636: and MATMPIDENSE otherwise.
1638: Options Database Keys:
1639: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1641: Level: beginner
1644: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1645: M*/
1650: PetscErrorCode MatCreate_Dense(Mat A)
1651: {
1653: PetscMPIInt size;
1656: MPI_Comm_size(((PetscObject)A)->comm,&size);
1657: if (size == 1) {
1658: MatSetType(A,MATSEQDENSE);
1659: } else {
1660: MatSetType(A,MATMPIDENSE);
1661: }
1662: return(0);
1663: }
1668: /*@C
1669: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1671: Not collective
1673: Input Parameters:
1674: . A - the matrix
1675: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1676: to control all matrix memory allocation.
1678: Notes:
1679: The dense format is fully compatible with standard Fortran 77
1680: storage by columns.
1682: The data input variable is intended primarily for Fortran programmers
1683: who wish to allocate their own matrix memory space. Most users should
1684: set data=PETSC_NULL.
1686: Level: intermediate
1688: .keywords: matrix,dense, parallel
1690: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1691: @*/
1692: PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1693: {
1694: PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1697: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1698: if (f) {
1699: (*f)(mat,data);
1700: }
1701: return(0);
1702: }
1706: /*@C
1707: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1709: Collective on MPI_Comm
1711: Input Parameters:
1712: + comm - MPI communicator
1713: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1714: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1715: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1716: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1717: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1718: to control all matrix memory allocation.
1720: Output Parameter:
1721: . A - the matrix
1723: Notes:
1724: The dense format is fully compatible with standard Fortran 77
1725: storage by columns.
1727: The data input variable is intended primarily for Fortran programmers
1728: who wish to allocate their own matrix memory space. Most users should
1729: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1731: The user MUST specify either the local or global matrix dimensions
1732: (possibly both).
1734: Level: intermediate
1736: .keywords: matrix,dense, parallel
1738: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1739: @*/
1740: PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1741: {
1743: PetscMPIInt size;
1746: MatCreate(comm,A);
1747: MatSetSizes(*A,m,n,M,N);
1748: MPI_Comm_size(comm,&size);
1749: if (size > 1) {
1750: MatSetType(*A,MATMPIDENSE);
1751: MatMPIDenseSetPreallocation(*A,data);
1752: } else {
1753: MatSetType(*A,MATSEQDENSE);
1754: MatSeqDenseSetPreallocation(*A,data);
1755: }
1756: return(0);
1757: }
1761: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1762: {
1763: Mat mat;
1764: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1768: *newmat = 0;
1769: MatCreate(((PetscObject)A)->comm,&mat);
1770: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1771: MatSetType(mat,((PetscObject)A)->type_name);
1772: a = (Mat_MPIDense*)mat->data;
1773: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1774: mat->factor = A->factor;
1775: mat->assembled = PETSC_TRUE;
1776: mat->preallocated = PETSC_TRUE;
1778: mat->rmap->rstart = A->rmap->rstart;
1779: mat->rmap->rend = A->rmap->rend;
1780: a->size = oldmat->size;
1781: a->rank = oldmat->rank;
1782: mat->insertmode = NOT_SET_VALUES;
1783: a->nvec = oldmat->nvec;
1784: a->donotstash = oldmat->donotstash;
1785:
1786: PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));
1787: PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));
1788: MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);
1790: MatSetUpMultiply_MPIDense(mat);
1791: MatDuplicate(oldmat->A,cpvalues,&a->A);
1792: PetscLogObjectParent(mat,a->A);
1794: *newmat = mat;
1795: return(0);
1796: }
1798: #include petscsys.h
1802: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1803: {
1805: PetscMPIInt rank,size;
1806: PetscInt *rowners,i,m,nz,j;
1807: PetscScalar *array,*vals,*vals_ptr;
1808: MPI_Status status;
1811: MPI_Comm_rank(comm,&rank);
1812: MPI_Comm_size(comm,&size);
1814: /* determine ownership of all rows */
1815: m = M/size + ((M % size) > rank);
1816: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1817: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1818: rowners[0] = 0;
1819: for (i=2; i<=size; i++) {
1820: rowners[i] += rowners[i-1];
1821: }
1823: MatCreate(comm,newmat);
1824: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1825: MatSetType(*newmat,type);
1826: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1827: MatGetArray(*newmat,&array);
1829: if (!rank) {
1830: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1832: /* read in my part of the matrix numerical values */
1833: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1834:
1835: /* insert into matrix-by row (this is why cannot directly read into array */
1836: vals_ptr = vals;
1837: for (i=0; i<m; i++) {
1838: for (j=0; j<N; j++) {
1839: array[i + j*m] = *vals_ptr++;
1840: }
1841: }
1843: /* read in other processors and ship out */
1844: for (i=1; i<size; i++) {
1845: nz = (rowners[i+1] - rowners[i])*N;
1846: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1847: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);
1848: }
1849: } else {
1850: /* receive numeric values */
1851: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1853: /* receive message of values*/
1854: MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);
1856: /* insert into matrix-by row (this is why cannot directly read into array */
1857: vals_ptr = vals;
1858: for (i=0; i<m; i++) {
1859: for (j=0; j<N; j++) {
1860: array[i + j*m] = *vals_ptr++;
1861: }
1862: }
1863: }
1864: PetscFree(rowners);
1865: PetscFree(vals);
1866: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1867: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1868: return(0);
1869: }
1873: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1874: {
1875: Mat A;
1876: PetscScalar *vals,*svals;
1877: MPI_Comm comm = ((PetscObject)viewer)->comm;
1878: MPI_Status status;
1879: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1880: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1881: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1882: PetscInt i,nz,j,rstart,rend;
1883: int fd;
1887: MPI_Comm_size(comm,&size);
1888: MPI_Comm_rank(comm,&rank);
1889: if (!rank) {
1890: PetscViewerBinaryGetDescriptor(viewer,&fd);
1891: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1892: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1893: }
1895: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1896: M = header[1]; N = header[2]; nz = header[3];
1898: /*
1899: Handle case where matrix is stored on disk as a dense matrix
1900: */
1901: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1902: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1903: return(0);
1904: }
1906: /* determine ownership of all rows */
1907: m = PetscMPIIntCast(M/size + ((M % size) > rank));
1908: PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
1909: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1910: rowners[0] = 0;
1911: for (i=2; i<=size; i++) {
1912: rowners[i] += rowners[i-1];
1913: }
1914: rstart = rowners[rank];
1915: rend = rowners[rank+1];
1917: /* distribute row lengths to all processors */
1918: PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1919: offlens = ourlens + (rend-rstart);
1920: if (!rank) {
1921: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1922: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1923: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1924: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1925: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1926: PetscFree(sndcounts);
1927: } else {
1928: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1929: }
1931: if (!rank) {
1932: /* calculate the number of nonzeros on each processor */
1933: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1934: PetscMemzero(procsnz,size*sizeof(PetscInt));
1935: for (i=0; i<size; i++) {
1936: for (j=rowners[i]; j< rowners[i+1]; j++) {
1937: procsnz[i] += rowlengths[j];
1938: }
1939: }
1940: PetscFree(rowlengths);
1942: /* determine max buffer needed and allocate it */
1943: maxnz = 0;
1944: for (i=0; i<size; i++) {
1945: maxnz = PetscMax(maxnz,procsnz[i]);
1946: }
1947: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1949: /* read in my part of the matrix column indices */
1950: nz = procsnz[0];
1951: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1952: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1954: /* read in every one elses and ship off */
1955: for (i=1; i<size; i++) {
1956: nz = procsnz[i];
1957: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1958: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1959: }
1960: PetscFree(cols);
1961: } else {
1962: /* determine buffer space needed for message */
1963: nz = 0;
1964: for (i=0; i<m; i++) {
1965: nz += ourlens[i];
1966: }
1967: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
1969: /* receive message of column indices*/
1970: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1971: MPI_Get_count(&status,MPIU_INT,&maxnz);
1972: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973: }
1975: /* loop over local rows, determining number of off diagonal entries */
1976: PetscMemzero(offlens,m*sizeof(PetscInt));
1977: jj = 0;
1978: for (i=0; i<m; i++) {
1979: for (j=0; j<ourlens[i]; j++) {
1980: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1981: jj++;
1982: }
1983: }
1985: /* create our matrix */
1986: for (i=0; i<m; i++) {
1987: ourlens[i] -= offlens[i];
1988: }
1989: MatCreate(comm,newmat);
1990: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1991: MatSetType(*newmat,type);
1992: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1993: A = *newmat;
1994: for (i=0; i<m; i++) {
1995: ourlens[i] += offlens[i];
1996: }
1998: if (!rank) {
1999: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
2001: /* read in my part of the matrix numerical values */
2002: nz = procsnz[0];
2003: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2004:
2005: /* insert into matrix */
2006: jj = rstart;
2007: smycols = mycols;
2008: svals = vals;
2009: for (i=0; i<m; i++) {
2010: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2011: smycols += ourlens[i];
2012: svals += ourlens[i];
2013: jj++;
2014: }
2016: /* read in other processors and ship out */
2017: for (i=1; i<size; i++) {
2018: nz = procsnz[i];
2019: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2020: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2021: }
2022: PetscFree(procsnz);
2023: } else {
2024: /* receive numeric values */
2025: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2027: /* receive message of values*/
2028: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2029: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2030: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2032: /* insert into matrix */
2033: jj = rstart;
2034: smycols = mycols;
2035: svals = vals;
2036: for (i=0; i<m; i++) {
2037: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2038: smycols += ourlens[i];
2039: svals += ourlens[i];
2040: jj++;
2041: }
2042: }
2043: PetscFree(ourlens);
2044: PetscFree(vals);
2045: PetscFree(mycols);
2046: PetscFree(rowners);
2048: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2049: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2050: return(0);
2051: }
2055: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2056: {
2057: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2058: Mat a,b;
2059: PetscTruth flg;
2063: a = matA->A;
2064: b = matB->A;
2065: MatEqual(a,b,&flg);
2066: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2067: return(0);
2068: }
2070: #if defined(PETSC_HAVE_PLAPACK)
2074: /*@C
2075: PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2076: Level: developer
2078: .keywords: Petsc, destroy, package, PLAPACK
2079: .seealso: PetscFinalize()
2080: @*/
2081: PetscErrorCode PetscPLAPACKFinalizePackage(void)
2082: {
2086: PLA_Finalize();
2087: return(0);
2088: }
2092: /*@C
2093: PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2094: called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2096: Input Parameter:
2097: . comm - the communicator the matrix lives on
2099: Level: developer
2101: Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2102: same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2103: PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2104: cannot overlap.
2106: .keywords: Petsc, initialize, package, PLAPACK
2107: .seealso: PetscInitializePackage(), PetscInitialize()
2108: @*/
2109: PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm)
2110: {
2111: PetscMPIInt size;
2115: if (!PLA_Initialized(PETSC_NULL)) {
2117: MPI_Comm_size(comm,&size);
2118: Plapack_nprows = 1;
2119: Plapack_npcols = size;
2120:
2121: PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");
2122: PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);
2123: PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);
2124: #if defined(PETSC_USE_DEBUG)
2125: Plapack_ierror = 3;
2126: #else
2127: Plapack_ierror = 0;
2128: #endif
2129: PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);
2130: if (Plapack_ierror){
2131: PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
2132: } else {
2133: PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
2134: }
2135:
2136: Plapack_nb_alg = 0;
2137: PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);
2138: if (Plapack_nb_alg) {
2139: pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);
2140: }
2141: PetscOptionsEnd();
2143: PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);
2144: PLA_Init(Plapack_comm_2d);
2145: PetscRegisterFinalize(PetscPLAPACKFinalizePackage);
2146: }
2147: return(0);
2148: }
2150: #endif