Actual source code: matscalapack.c
petsc-3.14.5 2021-03-03
1: #include <petsc/private/petscscalapack.h>
3: #define DEFAULT_BLOCKSIZE 64
5: /*
6: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8: */
9: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
11: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
12: {
16: PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
17: MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
18: return(0);
19: }
21: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
22: {
23: PetscErrorCode ierr;
24: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
25: PetscBool iascii;
26: PetscViewerFormat format;
27: Mat Adense;
30: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
31: if (iascii) {
32: PetscViewerGetFormat(viewer,&format);
33: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
34: PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
35: PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
36: PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
37: PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
38: return(0);
39: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
40: return(0);
41: }
42: }
43: /* convert to dense format and call MatView() */
44: MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
45: MatView(Adense,viewer);
46: MatDestroy(&Adense);
47: return(0);
48: }
50: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
51: {
53: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
54: PetscLogDouble isend[2],irecv[2];
57: info->block_size = 1.0;
59: isend[0] = a->lld*a->locc; /* locally allocated */
60: isend[1] = a->locr*a->locc; /* used submatrix */
61: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
62: info->nz_allocated = isend[0];
63: info->nz_used = isend[1];
64: } else if (flag == MAT_GLOBAL_MAX) {
65: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
66: info->nz_allocated = irecv[0];
67: info->nz_used = irecv[1];
68: } else if (flag == MAT_GLOBAL_SUM) {
69: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
70: info->nz_allocated = irecv[0];
71: info->nz_used = irecv[1];
72: }
74: info->nz_unneeded = 0;
75: info->assemblies = A->num_ass;
76: info->mallocs = 0;
77: info->memory = ((PetscObject)A)->mem;
78: info->fill_ratio_given = 0;
79: info->fill_ratio_needed = 0;
80: info->factor_mallocs = 0;
81: return(0);
82: }
84: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
85: {
87: switch (op) {
88: case MAT_NEW_NONZERO_LOCATIONS:
89: case MAT_NEW_NONZERO_LOCATION_ERR:
90: case MAT_NEW_NONZERO_ALLOCATION_ERR:
91: case MAT_SYMMETRIC:
92: case MAT_SORTED_FULL:
93: case MAT_HERMITIAN:
94: break;
95: default:
96: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
97: }
98: return(0);
99: }
101: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102: {
103: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
105: PetscInt i,j;
106: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
109: for (i=0;i<nr;i++) {
110: if (rows[i] < 0) continue;
111: PetscBLASIntCast(rows[i]+1,&gridx);
112: for (j=0;j<nc;j++) {
113: if (cols[j] < 0) continue;
114: PetscBLASIntCast(cols[j]+1,&gcidx);
115: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116: if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117: switch (imode) {
118: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120: default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121: }
122: } else {
123: if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124: A->assembled = PETSC_FALSE;
125: MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
126: }
127: }
128: }
129: return(0);
130: }
132: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133: {
135: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
136: PetscScalar *x2d,*y2d,alpha=1.0;
137: const PetscInt *ranges;
138: PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
141: if (transpose) {
143: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144: PetscLayoutGetRanges(A->rmap,&ranges);
145: PetscBLASIntCast(ranges[1],&mb); /* x block size */
146: xlld = PetscMax(1,A->rmap->n);
147: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
149: PetscLayoutGetRanges(A->cmap,&ranges);
150: PetscBLASIntCast(ranges[1],&nb); /* y block size */
151: ylld = 1;
152: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
155: /* allocate 2d vectors */
156: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157: lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158: PetscMalloc2(lszx,&x2d,lszy,&y2d);
159: xlld = PetscMax(1,lszx);
161: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
164: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
167: /* redistribute x as a column of a 2d matrix */
168: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
170: /* redistribute y as a row of a 2d matrix */
171: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
173: /* call PBLAS subroutine */
174: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
176: /* redistribute y from a row of a 2d matrix */
177: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
179: } else { /* non-transpose */
181: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182: PetscLayoutGetRanges(A->cmap,&ranges);
183: PetscBLASIntCast(ranges[1],&nb); /* x block size */
184: xlld = 1;
185: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
187: PetscLayoutGetRanges(A->rmap,&ranges);
188: PetscBLASIntCast(ranges[1],&mb); /* y block size */
189: ylld = PetscMax(1,A->rmap->n);
190: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
193: /* allocate 2d vectors */
194: lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195: lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196: PetscMalloc2(lszx,&x2d,lszy,&y2d);
197: ylld = PetscMax(1,lszy);
199: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
202: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
205: /* redistribute x as a row of a 2d matrix */
206: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
208: /* redistribute y as a column of a 2d matrix */
209: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
211: /* call PBLAS subroutine */
212: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
214: /* redistribute y from a column of a 2d matrix */
215: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
217: }
218: PetscFree2(x2d,y2d);
219: return(0);
220: }
222: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223: {
224: PetscErrorCode ierr;
225: const PetscScalar *xarray;
226: PetscScalar *yarray;
229: VecGetArrayRead(x,&xarray);
230: VecGetArray(y,&yarray);
231: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
232: VecRestoreArrayRead(x,&xarray);
233: VecRestoreArray(y,&yarray);
234: return(0);
235: }
237: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238: {
239: PetscErrorCode ierr;
240: const PetscScalar *xarray;
241: PetscScalar *yarray;
244: VecGetArrayRead(x,&xarray);
245: VecGetArray(y,&yarray);
246: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
247: VecRestoreArrayRead(x,&xarray);
248: VecRestoreArray(y,&yarray);
249: return(0);
250: }
252: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253: {
254: PetscErrorCode ierr;
255: const PetscScalar *xarray;
256: PetscScalar *zarray;
259: if (y != z) { VecCopy(y,z); }
260: VecGetArrayRead(x,&xarray);
261: VecGetArray(z,&zarray);
262: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
263: VecRestoreArrayRead(x,&xarray);
264: VecRestoreArray(z,&zarray);
265: return(0);
266: }
268: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269: {
270: PetscErrorCode ierr;
271: const PetscScalar *xarray;
272: PetscScalar *zarray;
275: if (y != z) { VecCopy(y,z); }
276: VecGetArrayRead(x,&xarray);
277: VecGetArray(z,&zarray);
278: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
279: VecRestoreArrayRead(x,&xarray);
280: VecRestoreArray(z,&zarray);
281: return(0);
282: }
284: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285: {
286: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289: PetscScalar sone=1.0,zero=0.0;
290: PetscBLASInt one=1;
293: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294: C->assembled = PETSC_TRUE;
295: return(0);
296: }
298: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299: {
303: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
304: MatSetType(C,MATSCALAPACK);
305: MatSetUp(C);
306: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307: return(0);
308: }
310: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311: {
312: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315: PetscScalar sone=1.0,zero=0.0;
316: PetscBLASInt one=1;
319: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320: C->assembled = PETSC_TRUE;
321: return(0);
322: }
324: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325: {
329: MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
330: MatSetType(C,MATSCALAPACK);
331: MatSetUp(C);
332: return(0);
333: }
335: /* --------------------------------------- */
336: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337: {
339: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340: C->ops->productsymbolic = MatProductSymbolic_AB;
341: return(0);
342: }
344: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345: {
347: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348: C->ops->productsymbolic = MatProductSymbolic_ABt;
349: return(0);
350: }
352: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353: {
355: Mat_Product *product = C->product;
358: switch (product->type) {
359: case MATPRODUCT_AB:
360: MatProductSetFromOptions_ScaLAPACK_AB(C);
361: break;
362: case MATPRODUCT_ABt:
363: MatProductSetFromOptions_ScaLAPACK_ABt(C);
364: break;
365: default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366: }
367: return(0);
368: }
369: /* --------------------------------------- */
371: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372: {
373: PetscErrorCode ierr;
374: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
375: PetscScalar *darray,*d2d,v;
376: const PetscInt *ranges;
377: PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
380: VecGetArray(D,&darray);
382: if (A->rmap->N<=A->cmap->N) { /* row version */
384: /* create ScaLAPACK descriptor for vector (1d block distribution) */
385: PetscLayoutGetRanges(A->rmap,&ranges);
386: PetscBLASIntCast(ranges[1],&mb); /* D block size */
387: dlld = PetscMax(1,A->rmap->n);
388: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
391: /* allocate 2d vector */
392: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393: PetscCalloc1(lszd,&d2d);
394: dlld = PetscMax(1,lszd);
396: /* create ScaLAPACK descriptor for vector (2d block distribution) */
397: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
400: /* collect diagonal */
401: for (j=1;j<=a->M;j++) {
402: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404: }
406: /* redistribute d from a column of a 2d matrix */
407: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408: PetscFree(d2d);
410: } else { /* column version */
412: /* create ScaLAPACK descriptor for vector (1d block distribution) */
413: PetscLayoutGetRanges(A->cmap,&ranges);
414: PetscBLASIntCast(ranges[1],&nb); /* D block size */
415: dlld = 1;
416: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
419: /* allocate 2d vector */
420: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421: PetscCalloc1(lszd,&d2d);
423: /* create ScaLAPACK descriptor for vector (2d block distribution) */
424: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
427: /* collect diagonal */
428: for (j=1;j<=a->N;j++) {
429: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431: }
433: /* redistribute d from a row of a 2d matrix */
434: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435: PetscFree(d2d);
436: }
438: VecRestoreArray(D,&darray);
439: VecAssemblyBegin(D);
440: VecAssemblyEnd(D);
441: return(0);
442: }
444: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445: {
446: PetscErrorCode ierr;
447: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
448: const PetscScalar *d;
449: const PetscInt *ranges;
450: PetscScalar *d2d;
451: PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
454: if (R) {
455: VecGetArrayRead(R,(const PetscScalar **)&d);
456: /* create ScaLAPACK descriptor for vector (1d block distribution) */
457: PetscLayoutGetRanges(A->cmap,&ranges);
458: PetscBLASIntCast(ranges[1],&nb); /* D block size */
459: dlld = 1;
460: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
463: /* allocate 2d vector */
464: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465: PetscCalloc1(lszd,&d2d);
467: /* create ScaLAPACK descriptor for vector (2d block distribution) */
468: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
471: /* redistribute d to a row of a 2d matrix */
472: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
474: /* broadcast along process columns */
475: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476: else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
478: /* local scaling */
479: for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
481: PetscFree(d2d);
482: VecRestoreArrayRead(R,(const PetscScalar **)&d);
483: }
484: if (L) {
485: VecGetArrayRead(L,(const PetscScalar **)&d);
486: /* create ScaLAPACK descriptor for vector (1d block distribution) */
487: PetscLayoutGetRanges(A->rmap,&ranges);
488: PetscBLASIntCast(ranges[1],&mb); /* D block size */
489: dlld = PetscMax(1,A->rmap->n);
490: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
493: /* allocate 2d vector */
494: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495: PetscCalloc1(lszd,&d2d);
496: dlld = PetscMax(1,lszd);
498: /* create ScaLAPACK descriptor for vector (2d block distribution) */
499: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
502: /* redistribute d to a column of a 2d matrix */
503: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
505: /* broadcast along process rows */
506: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507: else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
509: /* local scaling */
510: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
512: PetscFree(d2d);
513: VecRestoreArrayRead(L,(const PetscScalar **)&d);
514: }
515: return(0);
516: }
518: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519: {
521: *missing = PETSC_FALSE;
522: return(0);
523: }
525: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526: {
527: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528: PetscBLASInt n,one=1;
531: n = x->lld*x->locc;
532: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533: return(0);
534: }
536: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537: {
538: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539: PetscBLASInt i,n;
540: PetscScalar v;
543: n = PetscMin(x->M,x->N);
544: for (i=1;i<=n;i++) {
545: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546: v += alpha;
547: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548: }
549: return(0);
550: }
552: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553: {
555: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
556: Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data;
557: PetscBLASInt one=1;
558: PetscScalar beta=1.0;
561: MatScaLAPACKCheckDistribution(Y,1,X,3);
562: PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563: PetscObjectStateIncrease((PetscObject)Y);
564: return(0);
565: }
567: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568: {
570: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
571: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
574: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
575: PetscObjectStateIncrease((PetscObject)B);
576: return(0);
577: }
579: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580: {
581: Mat Bs;
582: MPI_Comm comm;
583: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b;
587: PetscObjectGetComm((PetscObject)A,&comm);
588: MatCreate(comm,&Bs);
589: MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
590: MatSetType(Bs,MATSCALAPACK);
591: b = (Mat_ScaLAPACK*)Bs->data;
592: b->M = a->M;
593: b->N = a->N;
594: b->mb = a->mb;
595: b->nb = a->nb;
596: b->rsrc = a->rsrc;
597: b->csrc = a->csrc;
598: MatSetUp(Bs);
599: *B = Bs;
600: if (op == MAT_COPY_VALUES) {
601: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
602: }
603: Bs->assembled = PETSC_TRUE;
604: return(0);
605: }
607: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608: {
610: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
611: Mat Bs = *B;
612: PetscBLASInt one=1;
613: PetscScalar sone=1.0,zero=0.0;
614: #if defined(PETSC_USE_COMPLEX)
615: PetscInt i,j;
616: #endif
619: if (reuse == MAT_INITIAL_MATRIX) {
620: MatCreate(PetscObjectComm((PetscObject)A),&Bs);
621: MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
622: MatSetType(Bs,MATSCALAPACK);
623: MatSetUp(Bs);
624: *B = Bs;
625: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
626: b = (Mat_ScaLAPACK*)Bs->data;
627: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
628: #if defined(PETSC_USE_COMPLEX)
629: /* undo conjugation */
630: for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
631: #endif
632: Bs->assembled = PETSC_TRUE;
633: return(0);
634: }
636: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
637: {
638: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639: PetscInt i,j;
642: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
643: return(0);
644: }
646: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
647: {
649: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
650: Mat Bs = *B;
651: PetscBLASInt one=1;
652: PetscScalar sone=1.0,zero=0.0;
655: if (reuse == MAT_INITIAL_MATRIX) {
656: MatCreate(PetscObjectComm((PetscObject)A),&Bs);
657: MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
658: MatSetType(Bs,MATSCALAPACK);
659: MatSetUp(Bs);
660: *B = Bs;
661: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
662: b = (Mat_ScaLAPACK*)Bs->data;
663: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
664: Bs->assembled = PETSC_TRUE;
665: return(0);
666: }
668: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
669: {
671: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
672: PetscScalar *x,*x2d;
673: const PetscInt *ranges;
674: PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
677: VecCopy(B,X);
678: VecGetArray(X,&x);
680: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
681: PetscLayoutGetRanges(A->rmap,&ranges);
682: PetscBLASIntCast(ranges[1],&mb); /* x block size */
683: xlld = PetscMax(1,A->rmap->n);
684: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
687: /* allocate 2d vector */
688: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
689: PetscMalloc1(lszx,&x2d);
690: xlld = PetscMax(1,lszx);
692: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
693: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
696: /* redistribute x as a column of a 2d matrix */
697: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
699: /* call ScaLAPACK subroutine */
700: switch (A->factortype) {
701: case MAT_FACTOR_LU:
702: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
704: break;
705: case MAT_FACTOR_CHOLESKY:
706: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
708: break;
709: default:
710: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
711: }
713: /* redistribute x from a column of a 2d matrix */
714: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
716: PetscFree(x2d);
717: VecRestoreArray(X,&x);
718: return(0);
719: }
721: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
722: {
726: MatSolve_ScaLAPACK(A,B,X);
727: VecAXPY(X,1,Y);
728: return(0);
729: }
731: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
732: {
734: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
735: PetscBool flg1,flg2;
736: PetscBLASInt one=1,info;
739: PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
740: PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
741: if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
742: MatScaLAPACKCheckDistribution(B,1,X,2);
743: b = (Mat_ScaLAPACK*)B->data;
744: x = (Mat_ScaLAPACK*)X->data;
745: PetscArraycpy(x->loc,b->loc,b->lld*b->locc);
747: switch (A->factortype) {
748: case MAT_FACTOR_LU:
749: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
751: break;
752: case MAT_FACTOR_CHOLESKY:
753: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
755: break;
756: default:
757: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
758: }
759: return(0);
760: }
762: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
763: {
765: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
766: PetscBLASInt one=1,info;
769: if (!a->pivots) {
770: PetscMalloc1(a->locr+a->mb,&a->pivots);
771: PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
772: }
773: PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
775: A->factortype = MAT_FACTOR_LU;
776: A->assembled = PETSC_TRUE;
778: PetscFree(A->solvertype);
779: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
780: return(0);
781: }
783: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
784: {
788: MatCopy(A,F,SAME_NONZERO_PATTERN);
789: MatLUFactor_ScaLAPACK(F,0,0,info);
790: return(0);
791: }
793: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
794: {
796: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
797: return(0);
798: }
800: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
801: {
803: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
804: PetscBLASInt one=1,info;
807: PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
809: A->factortype = MAT_FACTOR_CHOLESKY;
810: A->assembled = PETSC_TRUE;
812: PetscFree(A->solvertype);
813: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
814: return(0);
815: }
817: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
818: {
822: MatCopy(A,F,SAME_NONZERO_PATTERN);
823: MatCholeskyFactor_ScaLAPACK(F,0,info);
824: return(0);
825: }
827: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
828: {
830: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
831: return(0);
832: }
834: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
835: {
837: *type = MATSOLVERSCALAPACK;
838: return(0);
839: }
841: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
842: {
843: Mat B;
847: /* Create the factorization matrix */
848: MatCreate(PetscObjectComm((PetscObject)A),&B);
849: MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
850: MatSetType(B,MATSCALAPACK);
851: MatSetUp(B);
852: B->factortype = ftype;
853: PetscFree(B->solvertype);
854: PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);
856: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
857: *F = B;
858: return(0);
859: }
861: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
862: {
866: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
867: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
868: return(0);
869: }
871: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
872: {
874: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
875: PetscBLASInt one=1,lwork=0;
876: const char *ntype;
877: PetscScalar *work=NULL,dummy;
880: switch (type){
881: case NORM_1:
882: ntype = "1";
883: lwork = PetscMax(a->locr,a->locc);
884: break;
885: case NORM_FROBENIUS:
886: ntype = "F";
887: work = &dummy;
888: break;
889: case NORM_INFINITY:
890: ntype = "I";
891: lwork = PetscMax(a->locr,a->locc);
892: break;
893: default:
894: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
895: }
896: if (lwork) { PetscMalloc1(lwork,&work); }
897: *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
898: if (lwork) { PetscFree(work); }
899: return(0);
900: }
902: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
903: {
904: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
908: PetscArrayzero(a->loc,a->lld*a->locc);
909: return(0);
910: }
912: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
913: {
914: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
916: PetscInt i,n,nb,isrc,nproc,iproc,*idx;
919: if (rows) {
920: n = a->locr;
921: nb = a->mb;
922: isrc = a->rsrc;
923: nproc = a->grid->nprow;
924: iproc = a->grid->myrow;
925: PetscMalloc1(n,&idx);
926: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
927: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
928: }
929: if (cols) {
930: n = a->locc;
931: nb = a->nb;
932: isrc = a->csrc;
933: nproc = a->grid->npcol;
934: iproc = a->grid->mycol;
935: PetscMalloc1(n,&idx);
936: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
937: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
938: }
939: return(0);
940: }
942: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
943: {
945: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
946: Mat Bmpi;
947: MPI_Comm comm;
948: PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
949: const PetscInt *ranges,*branges,*cwork;
950: const PetscScalar *vwork;
951: PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info;
952: PetscScalar *barray;
953: PetscBool differ=PETSC_FALSE;
954: PetscMPIInt size;
957: PetscObjectGetComm((PetscObject)A,&comm);
958: PetscLayoutGetRanges(A->rmap,&ranges);
960: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
961: MPI_Comm_size(comm,&size);
962: PetscLayoutGetRanges((*B)->rmap,&branges);
963: for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
964: }
966: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
967: MatCreate(comm,&Bmpi);
968: m = PETSC_DECIDE;
969: PetscSplitOwnershipEqual(comm,&m,&M);
970: n = PETSC_DECIDE;
971: PetscSplitOwnershipEqual(comm,&n,&N);
972: MatSetSizes(Bmpi,m,n,M,N);
973: MatSetType(Bmpi,MATDENSE);
974: MatSetUp(Bmpi);
976: /* create ScaLAPACK descriptor for B (1d block distribution) */
977: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
978: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
979: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
982: /* redistribute matrix */
983: MatDenseGetArray(Bmpi,&barray);
984: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
985: MatDenseRestoreArray(Bmpi,&barray);
986: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
987: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
989: /* transfer rows of auxiliary matrix to the final matrix B */
990: MatGetOwnershipRange(Bmpi,&rstart,&rend);
991: for (i=rstart;i<rend;i++) {
992: MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
993: MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
994: MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
995: }
996: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
997: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
998: MatDestroy(&Bmpi);
1000: } else { /* normal cases */
1002: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1003: else {
1004: MatCreate(comm,&Bmpi);
1005: m = PETSC_DECIDE;
1006: PetscSplitOwnershipEqual(comm,&m,&M);
1007: n = PETSC_DECIDE;
1008: PetscSplitOwnershipEqual(comm,&n,&N);
1009: MatSetSizes(Bmpi,m,n,M,N);
1010: MatSetType(Bmpi,MATDENSE);
1011: MatSetUp(Bmpi);
1012: }
1014: /* create ScaLAPACK descriptor for B (1d block distribution) */
1015: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
1016: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
1017: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1020: /* redistribute matrix */
1021: MatDenseGetArray(Bmpi,&barray);
1022: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1023: MatDenseRestoreArray(Bmpi,&barray);
1025: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1026: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1027: if (reuse == MAT_INPLACE_MATRIX) {
1028: MatHeaderReplace(A,&Bmpi);
1029: } else *B = Bmpi;
1030: }
1031: return(0);
1032: }
1034: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1035: {
1037: Mat_ScaLAPACK *b;
1038: Mat Bmpi;
1039: MPI_Comm comm;
1040: PetscInt M=A->rmap->N,N=A->cmap->N,m,n;
1041: const PetscInt *ranges;
1042: PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info;
1043: PetscScalar *aarray;
1044: PetscInt lda;
1047: PetscObjectGetComm((PetscObject)A,&comm);
1049: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1050: else {
1051: MatCreate(comm,&Bmpi);
1052: m = PETSC_DECIDE;
1053: PetscSplitOwnershipEqual(comm,&m,&M);
1054: n = PETSC_DECIDE;
1055: PetscSplitOwnershipEqual(comm,&n,&N);
1056: MatSetSizes(Bmpi,m,n,M,N);
1057: MatSetType(Bmpi,MATSCALAPACK);
1058: MatSetUp(Bmpi);
1059: }
1060: b = (Mat_ScaLAPACK*)Bmpi->data;
1062: /* create ScaLAPACK descriptor for A (1d block distribution) */
1063: PetscLayoutGetRanges(A->rmap,&ranges);
1064: PetscBLASIntCast(ranges[1],&amb); /* row block size */
1065: MatDenseGetLDA(A,&lda);
1066: lld = PetscMax(lda,1); /* local leading dimension */
1067: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1070: /* redistribute matrix */
1071: MatDenseGetArray(A,&aarray);
1072: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1073: MatDenseRestoreArray(A,&aarray);
1075: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1076: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1077: if (reuse == MAT_INPLACE_MATRIX) {
1078: MatHeaderReplace(A,&Bmpi);
1079: } else *B = Bmpi;
1080: return(0);
1081: }
1083: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1084: {
1085: Mat mat_scal;
1086: PetscErrorCode ierr;
1087: PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1088: const PetscInt *cols;
1089: const PetscScalar *vals;
1092: if (reuse == MAT_REUSE_MATRIX) {
1093: mat_scal = *newmat;
1094: MatZeroEntries(mat_scal);
1095: } else {
1096: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1097: m = PETSC_DECIDE;
1098: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1099: n = PETSC_DECIDE;
1100: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1101: MatSetSizes(mat_scal,m,n,M,N);
1102: MatSetType(mat_scal,MATSCALAPACK);
1103: MatSetUp(mat_scal);
1104: }
1105: for (row=rstart;row<rend;row++) {
1106: MatGetRow(A,row,&ncols,&cols,&vals);
1107: MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1108: MatRestoreRow(A,row,&ncols,&cols,&vals);
1109: }
1110: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1111: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1113: if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1114: else *newmat = mat_scal;
1115: return(0);
1116: }
1118: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1119: {
1120: Mat mat_scal;
1121: PetscErrorCode ierr;
1122: PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1123: const PetscInt *cols;
1124: const PetscScalar *vals;
1125: PetscScalar v;
1128: if (reuse == MAT_REUSE_MATRIX) {
1129: mat_scal = *newmat;
1130: MatZeroEntries(mat_scal);
1131: } else {
1132: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1133: m = PETSC_DECIDE;
1134: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1135: n = PETSC_DECIDE;
1136: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1137: MatSetSizes(mat_scal,m,n,M,N);
1138: MatSetType(mat_scal,MATSCALAPACK);
1139: MatSetUp(mat_scal);
1140: }
1141: MatGetRowUpperTriangular(A);
1142: for (row=rstart;row<rend;row++) {
1143: MatGetRow(A,row,&ncols,&cols,&vals);
1144: MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1145: for (j=0;j<ncols;j++) { /* lower triangular part */
1146: if (cols[j] == row) continue;
1147: v = A->hermitian ? PetscConj(vals[j]) : vals[j];
1148: MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1149: }
1150: MatRestoreRow(A,row,&ncols,&cols,&vals);
1151: }
1152: MatRestoreRowUpperTriangular(A);
1153: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1154: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1156: if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1157: else *newmat = mat_scal;
1158: return(0);
1159: }
1161: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1162: {
1163: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1165: PetscInt sz=0;
1168: PetscLayoutSetUp(A->rmap);
1169: PetscLayoutSetUp(A->cmap);
1170: if (!a->lld) a->lld = a->locr;
1172: PetscFree(a->loc);
1173: PetscIntMultError(a->lld,a->locc,&sz);
1174: PetscCalloc1(sz,&a->loc);
1175: PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));
1177: A->preallocated = PETSC_TRUE;
1178: return(0);
1179: }
1181: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1182: {
1183: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1184: PetscErrorCode ierr;
1185: Mat_ScaLAPACK_Grid *grid;
1186: PetscBool flg;
1187: MPI_Comm icomm;
1190: MatStashDestroy_Private(&A->stash);
1191: PetscFree(a->loc);
1192: PetscFree(a->pivots);
1193: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1194: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1195: if (--grid->grid_refct == 0) {
1196: Cblacs_gridexit(grid->ictxt);
1197: Cblacs_gridexit(grid->ictxrow);
1198: Cblacs_gridexit(grid->ictxcol);
1199: PetscFree(grid);
1200: MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1201: }
1202: PetscCommDestroy(&icomm);
1203: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1204: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1205: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1206: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1207: PetscFree(A->data);
1208: return(0);
1209: }
1211: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1212: {
1214: const PetscInt *ranges;
1215: PetscMPIInt size;
1216: PetscInt i,n;
1219: MPI_Comm_size(map->comm,&size);
1220: if (size>2) {
1221: PetscLayoutGetRanges(map,&ranges);
1222: n = ranges[1]-ranges[0];
1223: for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1224: if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1225: }
1226: return(0);
1227: }
1229: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1230: {
1231: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1233: PetscBLASInt info=0;
1236: PetscLayoutSetUp(A->rmap);
1237: PetscLayoutSetUp(A->cmap);
1239: /* check that the layout is as enforced by MatCreateScaLAPACK */
1240: MatScaLAPACKCheckLayout(A->rmap);
1241: MatScaLAPACKCheckLayout(A->cmap);
1243: /* compute local sizes */
1244: PetscBLASIntCast(A->rmap->N,&a->M);
1245: PetscBLASIntCast(A->cmap->N,&a->N);
1246: a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1247: a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1248: a->lld = PetscMax(1,a->locr);
1250: /* allocate local array */
1251: MatScaLAPACKSetPreallocation(A);
1253: /* set up ScaLAPACK descriptor */
1254: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1256: return(0);
1257: }
1259: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1260: {
1262: PetscInt nstash,reallocs;
1265: if (A->nooffprocentries) return(0);
1266: MatStashScatterBegin_Private(A,&A->stash,NULL);
1267: MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1268: PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1269: return(0);
1270: }
1272: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1273: {
1275: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1276: PetscMPIInt n;
1277: PetscInt i,flg,*row,*col;
1278: PetscScalar *val;
1279: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1282: if (A->nooffprocentries) return(0);
1283: while (1) {
1284: MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1285: if (!flg) break;
1286: for (i=0;i<n;i++) {
1287: PetscBLASIntCast(row[i]+1,&gridx);
1288: PetscBLASIntCast(col[i]+1,&gcidx);
1289: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1290: if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1291: switch (A->insertmode) {
1292: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1293: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1294: default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1295: }
1296: }
1297: }
1298: MatStashScatterEnd_Private(&A->stash);
1299: return(0);
1300: }
1302: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1303: {
1305: Mat Adense,As;
1306: MPI_Comm comm;
1309: PetscObjectGetComm((PetscObject)newMat,&comm);
1310: MatCreate(comm,&Adense);
1311: MatSetType(Adense,MATDENSE);
1312: MatLoad(Adense,viewer);
1313: MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1314: MatDestroy(&Adense);
1315: MatHeaderReplace(newMat,&As);
1316: return(0);
1317: }
1319: /* -------------------------------------------------------------------*/
1320: static struct _MatOps MatOps_Values = {
1321: MatSetValues_ScaLAPACK,
1322: 0,
1323: 0,
1324: MatMult_ScaLAPACK,
1325: /* 4*/ MatMultAdd_ScaLAPACK,
1326: MatMultTranspose_ScaLAPACK,
1327: MatMultTransposeAdd_ScaLAPACK,
1328: MatSolve_ScaLAPACK,
1329: MatSolveAdd_ScaLAPACK,
1330: 0,
1331: /*10*/ 0,
1332: MatLUFactor_ScaLAPACK,
1333: MatCholeskyFactor_ScaLAPACK,
1334: 0,
1335: MatTranspose_ScaLAPACK,
1336: /*15*/ MatGetInfo_ScaLAPACK,
1337: 0,
1338: MatGetDiagonal_ScaLAPACK,
1339: MatDiagonalScale_ScaLAPACK,
1340: MatNorm_ScaLAPACK,
1341: /*20*/ MatAssemblyBegin_ScaLAPACK,
1342: MatAssemblyEnd_ScaLAPACK,
1343: MatSetOption_ScaLAPACK,
1344: MatZeroEntries_ScaLAPACK,
1345: /*24*/ 0,
1346: MatLUFactorSymbolic_ScaLAPACK,
1347: MatLUFactorNumeric_ScaLAPACK,
1348: MatCholeskyFactorSymbolic_ScaLAPACK,
1349: MatCholeskyFactorNumeric_ScaLAPACK,
1350: /*29*/ MatSetUp_ScaLAPACK,
1351: 0,
1352: 0,
1353: 0,
1354: 0,
1355: /*34*/ MatDuplicate_ScaLAPACK,
1356: 0,
1357: 0,
1358: 0,
1359: 0,
1360: /*39*/ MatAXPY_ScaLAPACK,
1361: 0,
1362: 0,
1363: 0,
1364: MatCopy_ScaLAPACK,
1365: /*44*/ 0,
1366: MatScale_ScaLAPACK,
1367: MatShift_ScaLAPACK,
1368: 0,
1369: 0,
1370: /*49*/ 0,
1371: 0,
1372: 0,
1373: 0,
1374: 0,
1375: /*54*/ 0,
1376: 0,
1377: 0,
1378: 0,
1379: 0,
1380: /*59*/ 0,
1381: MatDestroy_ScaLAPACK,
1382: MatView_ScaLAPACK,
1383: 0,
1384: 0,
1385: /*64*/ 0,
1386: 0,
1387: 0,
1388: 0,
1389: 0,
1390: /*69*/ 0,
1391: 0,
1392: MatConvert_ScaLAPACK_Dense,
1393: 0,
1394: 0,
1395: /*74*/ 0,
1396: 0,
1397: 0,
1398: 0,
1399: 0,
1400: /*79*/ 0,
1401: 0,
1402: 0,
1403: 0,
1404: MatLoad_ScaLAPACK,
1405: /*84*/ 0,
1406: 0,
1407: 0,
1408: 0,
1409: 0,
1410: /*89*/ 0,
1411: 0,
1412: MatMatMultNumeric_ScaLAPACK,
1413: 0,
1414: 0,
1415: /*94*/ 0,
1416: 0,
1417: 0,
1418: MatMatTransposeMultNumeric_ScaLAPACK,
1419: 0,
1420: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1421: 0,
1422: 0,
1423: MatConjugate_ScaLAPACK,
1424: 0,
1425: /*104*/0,
1426: 0,
1427: 0,
1428: 0,
1429: 0,
1430: /*109*/MatMatSolve_ScaLAPACK,
1431: 0,
1432: 0,
1433: 0,
1434: MatMissingDiagonal_ScaLAPACK,
1435: /*114*/0,
1436: 0,
1437: 0,
1438: 0,
1439: 0,
1440: /*119*/0,
1441: MatHermitianTranspose_ScaLAPACK,
1442: 0,
1443: 0,
1444: 0,
1445: /*124*/0,
1446: 0,
1447: 0,
1448: 0,
1449: 0,
1450: /*129*/0,
1451: 0,
1452: 0,
1453: 0,
1454: 0,
1455: /*134*/0,
1456: 0,
1457: 0,
1458: 0,
1459: 0,
1460: 0,
1461: /*140*/0,
1462: 0,
1463: 0,
1464: 0,
1465: 0,
1466: /*145*/0,
1467: 0,
1468: 0
1469: };
1471: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1472: {
1473: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1474: PetscInt size=stash->size,nsends;
1475: PetscErrorCode ierr;
1476: PetscInt count,*sindices,**rindices,i,j,l;
1477: PetscScalar **rvalues,*svalues;
1478: MPI_Comm comm = stash->comm;
1479: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1480: PetscMPIInt *sizes,*nlengths,nreceives;
1481: PetscInt *sp_idx,*sp_idy;
1482: PetscScalar *sp_val;
1483: PetscMatStashSpace space,space_next;
1484: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1485: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data;
1488: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1489: InsertMode addv;
1490: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1491: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1492: mat->insertmode = addv; /* in case this processor had no cache */
1493: }
1495: bs2 = stash->bs*stash->bs;
1497: /* first count number of contributors to each processor */
1498: PetscCalloc1(size,&nlengths);
1499: PetscMalloc1(stash->n+1,&owner);
1501: i = j = 0;
1502: space = stash->space_head;
1503: while (space) {
1504: space_next = space->next;
1505: for (l=0; l<space->local_used; l++) {
1506: PetscBLASIntCast(space->idx[l]+1,&gridx);
1507: PetscBLASIntCast(space->idy[l]+1,&gcidx);
1508: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1509: j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1510: nlengths[j]++; owner[i] = j;
1511: i++;
1512: }
1513: space = space_next;
1514: }
1516: /* Now check what procs get messages - and compute nsends. */
1517: PetscCalloc1(size,&sizes);
1518: for (i=0, nsends=0; i<size; i++) {
1519: if (nlengths[i]) {
1520: sizes[i] = 1; nsends++;
1521: }
1522: }
1524: {PetscMPIInt *onodes,*olengths;
1525: /* Determine the number of messages to expect, their lengths, from from-ids */
1526: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1527: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1528: /* since clubbing row,col - lengths are multiplied by 2 */
1529: for (i=0; i<nreceives; i++) olengths[i] *=2;
1530: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1531: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1532: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1533: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1534: PetscFree(onodes);
1535: PetscFree(olengths);}
1537: /* do sends:
1538: 1) starts[i] gives the starting index in svalues for stuff going to
1539: the ith processor
1540: */
1541: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1542: PetscMalloc1(2*nsends,&send_waits);
1543: PetscMalloc2(size,&startv,size,&starti);
1544: /* use 2 sends the first with all_a, the next with all_i and all_j */
1545: startv[0] = 0; starti[0] = 0;
1546: for (i=1; i<size; i++) {
1547: startv[i] = startv[i-1] + nlengths[i-1];
1548: starti[i] = starti[i-1] + 2*nlengths[i-1];
1549: }
1551: i = 0;
1552: space = stash->space_head;
1553: while (space) {
1554: space_next = space->next;
1555: sp_idx = space->idx;
1556: sp_idy = space->idy;
1557: sp_val = space->val;
1558: for (l=0; l<space->local_used; l++) {
1559: j = owner[i];
1560: if (bs2 == 1) {
1561: svalues[startv[j]] = sp_val[l];
1562: } else {
1563: PetscInt k;
1564: PetscScalar *buf1,*buf2;
1565: buf1 = svalues+bs2*startv[j];
1566: buf2 = space->val + bs2*l;
1567: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1568: }
1569: sindices[starti[j]] = sp_idx[l];
1570: sindices[starti[j]+nlengths[j]] = sp_idy[l];
1571: startv[j]++;
1572: starti[j]++;
1573: i++;
1574: }
1575: space = space_next;
1576: }
1577: startv[0] = 0;
1578: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1580: for (i=0,count=0; i<size; i++) {
1581: if (sizes[i]) {
1582: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1583: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1584: }
1585: }
1586: #if defined(PETSC_USE_INFO)
1587: PetscInfo1(NULL,"No of messages: %d \n",nsends);
1588: for (i=0; i<size; i++) {
1589: if (sizes[i]) {
1590: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1591: }
1592: }
1593: #endif
1594: PetscFree(nlengths);
1595: PetscFree(owner);
1596: PetscFree2(startv,starti);
1597: PetscFree(sizes);
1599: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1600: PetscMalloc1(2*nreceives,&recv_waits);
1602: for (i=0; i<nreceives; i++) {
1603: recv_waits[2*i] = recv_waits1[i];
1604: recv_waits[2*i+1] = recv_waits2[i];
1605: }
1606: stash->recv_waits = recv_waits;
1608: PetscFree(recv_waits1);
1609: PetscFree(recv_waits2);
1611: stash->svalues = svalues;
1612: stash->sindices = sindices;
1613: stash->rvalues = rvalues;
1614: stash->rindices = rindices;
1615: stash->send_waits = send_waits;
1616: stash->nsends = nsends;
1617: stash->nrecvs = nreceives;
1618: stash->reproduce_count = 0;
1619: return(0);
1620: }
1622: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1623: {
1625: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1628: if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1629: if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1630: if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1631: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1632: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1633: return(0);
1634: }
1636: /*@
1637: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1638: the ScaLAPACK matrix
1640: Logically Collective on A
1642: Input Parameter:
1643: + A - a MATSCALAPACK matrix
1644: . mb - the row block size
1645: - nb - the column block size
1647: Level: intermediate
1649: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1650: @*/
1651: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1652: {
1659: PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1660: return(0);
1661: }
1663: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1664: {
1665: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1668: if (mb) *mb = a->mb;
1669: if (nb) *nb = a->nb;
1670: return(0);
1671: }
1673: /*@
1674: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1675: the ScaLAPACK matrix
1677: Not collective
1679: Input Parameter:
1680: . A - a MATSCALAPACK matrix
1682: Output Parameters:
1683: + mb - the row block size
1684: - nb - the column block size
1686: Level: intermediate
1688: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1689: @*/
1690: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1691: {
1696: PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1697: return(0);
1698: }
1700: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1701: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1703: /*MC
1704: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1706: Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1708: Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1710: Options Database Keys:
1711: + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1712: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1713: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1715: Level: beginner
1717: .seealso: MATDENSE, MATELEMENTAL
1718: M*/
1720: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1721: {
1722: Mat_ScaLAPACK *a;
1723: PetscErrorCode ierr;
1724: PetscBool flg,flg1;
1725: Mat_ScaLAPACK_Grid *grid;
1726: MPI_Comm icomm;
1727: PetscBLASInt nprow,npcol,myrow,mycol;
1728: PetscInt optv1,k=2,array[2]={0,0};
1729: PetscMPIInt size;
1732: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1733: A->insertmode = NOT_SET_VALUES;
1735: MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1736: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1737: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1738: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1739: A->stash.ScatterDestroy = NULL;
1741: PetscNewLog(A,&a);
1742: A->data = (void*)a;
1744: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1745: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1746: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1747: PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1748: }
1749: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1750: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1751: if (!flg) {
1752: PetscNewLog(A,&grid);
1754: MPI_Comm_size(icomm,&size);
1755: grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1757: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1758: PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1759: if (flg1) {
1760: if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1761: grid->nprow = optv1;
1762: }
1763: PetscOptionsEnd();
1765: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1766: grid->npcol = size/grid->nprow;
1767: PetscBLASIntCast(grid->nprow,&nprow);
1768: PetscBLASIntCast(grid->npcol,&npcol);
1769: grid->ictxt = Csys2blacs_handle(icomm);
1770: Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1771: Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1772: grid->grid_refct = 1;
1773: grid->nprow = nprow;
1774: grid->npcol = npcol;
1775: grid->myrow = myrow;
1776: grid->mycol = mycol;
1777: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1778: grid->ictxrow = Csys2blacs_handle(icomm);
1779: Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1780: grid->ictxcol = Csys2blacs_handle(icomm);
1781: Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1782: MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);
1784: } else grid->grid_refct++;
1785: PetscCommDestroy(&icomm);
1786: a->grid = grid;
1787: a->mb = DEFAULT_BLOCKSIZE;
1788: a->nb = DEFAULT_BLOCKSIZE;
1790: PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1791: PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1792: if (flg) {
1793: a->mb = array[0];
1794: a->nb = (k>1)? array[1]: a->mb;
1795: }
1796: PetscOptionsEnd();
1798: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1799: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1800: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1801: PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1802: return(0);
1803: }
1805: /*@C
1806: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1807: (2D block cyclic distribution).
1809: Collective
1811: Input Parameters:
1812: + comm - MPI communicator
1813: . mb - row block size (or PETSC_DECIDE to have it set)
1814: . nb - column block size (or PETSC_DECIDE to have it set)
1815: . M - number of global rows
1816: . N - number of global columns
1817: . rsrc - coordinate of process that owns the first row of the distributed matrix
1818: - csrc - coordinate of process that owns the first column of the distributed matrix
1820: Output Parameter:
1821: . A - the matrix
1823: Options Database Keys:
1824: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1826: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1827: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1828: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1830: Notes:
1831: If PETSC_DECIDE is used for the block sizes, then an appropriate value
1832: is chosen.
1834: Storage Information:
1835: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1836: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1837: significance and are thus ignored. The block sizes refer to the values
1838: used for the distributed matrix, not the same meaning as in BAIJ.
1840: Level: intermediate
1842: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1843: @*/
1844: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1845: {
1847: Mat_ScaLAPACK *a;
1848: PetscInt m,n;
1851: MatCreate(comm,A);
1852: MatSetType(*A,MATSCALAPACK);
1853: if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1854: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1855: m = PETSC_DECIDE;
1856: PetscSplitOwnershipEqual(comm,&m,&M);
1857: n = PETSC_DECIDE;
1858: PetscSplitOwnershipEqual(comm,&n,&N);
1859: MatSetSizes(*A,m,n,M,N);
1860: a = (Mat_ScaLAPACK*)(*A)->data;
1861: PetscBLASIntCast(M,&a->M);
1862: PetscBLASIntCast(N,&a->N);
1863: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1864: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1865: PetscBLASIntCast(rsrc,&a->rsrc);
1866: PetscBLASIntCast(csrc,&a->csrc);
1867: MatSetUp(*A);
1868: return(0);
1869: }