Actual source code: matscalapack.c

petsc-master 2020-10-29
Report Typos and Errors
  1: #include <petsc/private/petscscalapack.h>

  3: #define DEFAULT_BLOCKSIZE 64

  5: /*
  6:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
  7:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
  8: */
  9: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 11: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
 12: {
 13:   PetscErrorCode    ierr;
 14:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
 15:   PetscBool         iascii;
 16:   PetscViewerFormat format;
 17:   Mat               Adense;

 20:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 21:   if (iascii) {
 22:     PetscViewerGetFormat(viewer,&format);
 23:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 24:       PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
 25:       PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
 26:       PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
 27:       PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
 28:       return(0);
 29:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 30:       return(0);
 31:     }
 32:   }
 33:   /* convert to dense format and call MatView() */
 34:   MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
 35:   MatView(Adense,viewer);
 36:   MatDestroy(&Adense);
 37:   return(0);
 38: }

 40: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
 41: {
 43:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 44:   PetscLogDouble isend[2],irecv[2];

 47:   info->block_size = 1.0;

 49:   isend[0] = a->lld*a->locc;     /* locally allocated */
 50:   isend[1] = a->locr*a->locc;    /* used submatrix */
 51:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 52:     info->nz_allocated   = isend[0];
 53:     info->nz_used        = isend[1];
 54:   } else if (flag == MAT_GLOBAL_MAX) {
 55:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
 56:     info->nz_allocated   = irecv[0];
 57:     info->nz_used        = irecv[1];
 58:   } else if (flag == MAT_GLOBAL_SUM) {
 59:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
 60:     info->nz_allocated   = irecv[0];
 61:     info->nz_used        = irecv[1];
 62:   }

 64:   info->nz_unneeded       = 0;
 65:   info->assemblies        = A->num_ass;
 66:   info->mallocs           = 0;
 67:   info->memory            = ((PetscObject)A)->mem;
 68:   info->fill_ratio_given  = 0;
 69:   info->fill_ratio_needed = 0;
 70:   info->factor_mallocs    = 0;
 71:   return(0);
 72: }

 74: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
 75: {
 77:   switch (op) {
 78:     case MAT_NEW_NONZERO_LOCATIONS:
 79:     case MAT_NEW_NONZERO_LOCATION_ERR:
 80:     case MAT_NEW_NONZERO_ALLOCATION_ERR:
 81:     case MAT_SYMMETRIC:
 82:     case MAT_SORTED_FULL:
 83:     case MAT_HERMITIAN:
 84:       break;
 85:     default:
 86:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
 87:   }
 88:   return(0);
 89: }

 91: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
 92: {
 93:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 95:   PetscInt       i,j;
 96:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

 99:   for (i=0;i<nr;i++) {
100:     if (rows[i] < 0) continue;
101:     PetscBLASIntCast(rows[i]+1,&gridx);
102:     for (j=0;j<nc;j++) {
103:       if (cols[j] < 0) continue;
104:       PetscBLASIntCast(cols[j]+1,&gcidx);
105:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106:       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107:         switch (imode) {
108:           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109:           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110:           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111:         }
112:       } else {
113:         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114:         A->assembled = PETSC_FALSE;
115:         MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
116:       }
117:     }
118:   }
119:   return(0);
120: }

122: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123: {
125:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126:   PetscScalar    *x2d,*y2d,alpha=1.0;
127:   const PetscInt *ranges;
128:   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;

131:   if (transpose) {

133:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134:     PetscLayoutGetRanges(A->rmap,&ranges);
135:     PetscBLASIntCast(ranges[1],&mb);  /* x block size */
136:     xlld = PetscMax(1,A->rmap->n);
137:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
139:     PetscLayoutGetRanges(A->cmap,&ranges);
140:     PetscBLASIntCast(ranges[1],&nb);  /* y block size */
141:     ylld = 1;
142:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));

145:     /* allocate 2d vectors */
146:     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147:     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
149:     xlld = PetscMax(1,lszx);

151:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
154:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));

157:     /* redistribute x as a column of a 2d matrix */
158:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

160:     /* redistribute y as a row of a 2d matrix */
161:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));

163:     /* call PBLAS subroutine */
164:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

166:     /* redistribute y from a row of a 2d matrix */
167:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));

169:   } else {   /* non-transpose */

171:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172:     PetscLayoutGetRanges(A->cmap,&ranges);
173:     PetscBLASIntCast(ranges[1],&nb);  /* x block size */
174:     xlld = 1;
175:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
177:     PetscLayoutGetRanges(A->rmap,&ranges);
178:     PetscBLASIntCast(ranges[1],&mb);  /* y block size */
179:     ylld = PetscMax(1,A->rmap->n);
180:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));

183:     /* allocate 2d vectors */
184:     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185:     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
187:     ylld = PetscMax(1,lszy);

189:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
192:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));

195:     /* redistribute x as a row of a 2d matrix */
196:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));

198:     /* redistribute y as a column of a 2d matrix */
199:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));

201:     /* call PBLAS subroutine */
202:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

204:     /* redistribute y from a column of a 2d matrix */
205:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));

207:   }
208:   PetscFree2(x2d,y2d);
209:   return(0);
210: }

212: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213: {
214:   PetscErrorCode    ierr;
215:   const PetscScalar *xarray;
216:   PetscScalar       *yarray;

219:   VecGetArrayRead(x,&xarray);
220:   VecGetArray(y,&yarray);
221:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
222:   VecRestoreArrayRead(x,&xarray);
223:   VecRestoreArray(y,&yarray);
224:   return(0);
225: }

227: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228: {
229:   PetscErrorCode    ierr;
230:   const PetscScalar *xarray;
231:   PetscScalar       *yarray;

234:   VecGetArrayRead(x,&xarray);
235:   VecGetArray(y,&yarray);
236:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
237:   VecRestoreArrayRead(x,&xarray);
238:   VecRestoreArray(y,&yarray);
239:   return(0);
240: }

242: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243: {
244:   PetscErrorCode    ierr;
245:   const PetscScalar *xarray;
246:   PetscScalar       *zarray;

249:   if (y != z) { VecCopy(y,z); }
250:   VecGetArrayRead(x,&xarray);
251:   VecGetArray(z,&zarray);
252:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
253:   VecRestoreArrayRead(x,&xarray);
254:   VecRestoreArray(z,&zarray);
255:   return(0);
256: }

258: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259: {
260:   PetscErrorCode    ierr;
261:   const PetscScalar *xarray;
262:   PetscScalar       *zarray;

265:   if (y != z) { VecCopy(y,z); }
266:   VecGetArrayRead(x,&xarray);
267:   VecGetArray(z,&zarray);
268:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
269:   VecRestoreArrayRead(x,&xarray);
270:   VecRestoreArray(z,&zarray);
271:   return(0);
272: }

274: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275: {
276:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279:   PetscScalar   sone=1.0,zero=0.0;
280:   PetscBLASInt  one=1;

283:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284:   C->assembled = PETSC_TRUE;
285:   return(0);
286: }

288: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289: {

293:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
294:   MatSetType(C,MATSCALAPACK);
295:   MatSetUp(C);
296:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297:   return(0);
298: }

300: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301: {
302:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305:   PetscScalar   sone=1.0,zero=0.0;
306:   PetscBLASInt  one=1;

309:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310:   C->assembled = PETSC_TRUE;
311:   return(0);
312: }

314: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315: {

319:   MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
320:   MatSetType(C,MATSCALAPACK);
321:   MatSetUp(C);
322:   return(0);
323: }

325: /* --------------------------------------- */
326: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327: {
329:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330:   C->ops->productsymbolic = MatProductSymbolic_AB;
331:   return(0);
332: }

334: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335: {
337:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339:   return(0);
340: }

342: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343: {
345:   Mat_Product    *product = C->product;

348:   switch (product->type) {
349:     case MATPRODUCT_AB:
350:       MatProductSetFromOptions_ScaLAPACK_AB(C);
351:       break;
352:     case MATPRODUCT_ABt:
353:       MatProductSetFromOptions_ScaLAPACK_ABt(C);
354:       break;
355:     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356:   }
357:   return(0);
358: }
359: /* --------------------------------------- */

361: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362: {
363:   PetscErrorCode    ierr;
364:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365:   PetscScalar       *darray,*d2d,v;
366:   const PetscInt    *ranges;
367:   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

370:   VecGetArray(D,&darray);

372:   if (A->rmap->N<=A->cmap->N) {   /* row version */

374:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375:     PetscLayoutGetRanges(A->rmap,&ranges);
376:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
377:     dlld = PetscMax(1,A->rmap->n);
378:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

381:     /* allocate 2d vector */
382:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383:     PetscCalloc1(lszd,&d2d);
384:     dlld = PetscMax(1,lszd);

386:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

390:     /* collect diagonal */
391:     for (j=1;j<=a->M;j++) {
392:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394:     }

396:     /* redistribute d from a column of a 2d matrix */
397:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398:     PetscFree(d2d);

400:   } else {   /* column version */

402:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403:     PetscLayoutGetRanges(A->cmap,&ranges);
404:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
405:     dlld = 1;
406:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

409:     /* allocate 2d vector */
410:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411:     PetscCalloc1(lszd,&d2d);

413:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

417:     /* collect diagonal */
418:     for (j=1;j<=a->N;j++) {
419:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421:     }

423:     /* redistribute d from a row of a 2d matrix */
424:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425:     PetscFree(d2d);
426:   }

428:   VecRestoreArray(D,&darray);
429:   VecAssemblyBegin(D);
430:   VecAssemblyEnd(D);
431:   return(0);
432: }

434: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435: {
436:   PetscErrorCode    ierr;
437:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438:   const PetscScalar *d;
439:   const PetscInt    *ranges;
440:   PetscScalar       *d2d;
441:   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

444:   if (R) {
445:     VecGetArrayRead(R,(const PetscScalar **)&d);
446:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447:     PetscLayoutGetRanges(A->cmap,&ranges);
448:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
449:     dlld = 1;
450:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

453:     /* allocate 2d vector */
454:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455:     PetscCalloc1(lszd,&d2d);

457:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

461:     /* redistribute d to a row of a 2d matrix */
462:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));

464:     /* broadcast along process columns */
465:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466:     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);

468:     /* local scaling */
469:     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];

471:     PetscFree(d2d);
472:     VecRestoreArrayRead(R,(const PetscScalar **)&d);
473:   }
474:   if (L) {
475:     VecGetArrayRead(L,(const PetscScalar **)&d);
476:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477:     PetscLayoutGetRanges(A->rmap,&ranges);
478:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
479:     dlld = PetscMax(1,A->rmap->n);
480:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

483:     /* allocate 2d vector */
484:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485:     PetscCalloc1(lszd,&d2d);
486:     dlld = PetscMax(1,lszd);

488:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

492:     /* redistribute d to a column of a 2d matrix */
493:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));

495:     /* broadcast along process rows */
496:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497:     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);

499:     /* local scaling */
500:     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];

502:     PetscFree(d2d);
503:     VecRestoreArrayRead(L,(const PetscScalar **)&d);
504:   }
505:   return(0);
506: }

508: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509: {
511:   *missing = PETSC_FALSE;
512:   return(0);
513: }

515: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516: {
517:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518:   PetscBLASInt  n,one=1;

521:   n = x->lld*x->locc;
522:   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523:   return(0);
524: }

526: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527: {
528:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529:   PetscBLASInt  i,n;
530:   PetscScalar   v;

533:   n = PetscMin(x->M,x->N);
534:   for (i=1;i<=n;i++) {
535:     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536:     v += alpha;
537:     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538:   }
539:   return(0);
540: }

542: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543: {
545:   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546:   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547:   PetscBLASInt   one=1;
548:   PetscScalar    beta=1.0;

551:   MatScaLAPACKCheckDistribution(Y,1,X,3);
552:   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553:   PetscObjectStateIncrease((PetscObject)Y);
554:   return(0);
555: }

557: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558: {
560:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561:   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;

564:   PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
565:   PetscObjectStateIncrease((PetscObject)B);
566:   return(0);
567: }

569: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570: {
571:   Mat            Bs;
572:   MPI_Comm       comm;
573:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;

577:   PetscObjectGetComm((PetscObject)A,&comm);
578:   MatCreate(comm,&Bs);
579:   MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
580:   MatSetType(Bs,MATSCALAPACK);
581:   b = (Mat_ScaLAPACK*)Bs->data;
582:   b->M    = a->M;
583:   b->N    = a->N;
584:   b->mb   = a->mb;
585:   b->nb   = a->nb;
586:   b->rsrc = a->rsrc;
587:   b->csrc = a->csrc;
588:   MatSetUp(Bs);
589:   *B = Bs;
590:   if (op == MAT_COPY_VALUES) {
591:     PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
592:   }
593:   Bs->assembled = PETSC_TRUE;
594:   return(0);
595: }

597: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598: {
600:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601:   Mat            Bs = *B;
602:   PetscBLASInt   one=1;
603:   PetscScalar    sone=1.0,zero=0.0;
604: #if defined(PETSC_USE_COMPLEX)
605:   PetscInt       i,j;
606: #endif

609:   if (reuse == MAT_INITIAL_MATRIX) {
610:     MatCreate(PetscObjectComm((PetscObject)A),&Bs);
611:     MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
612:     MatSetType(Bs,MATSCALAPACK);
613:     MatSetUp(Bs);
614:     *B = Bs;
615:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616:   b = (Mat_ScaLAPACK*)Bs->data;
617:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618: #if defined(PETSC_USE_COMPLEX)
619:   /* undo conjugation */
620:   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621: #endif
622:   Bs->assembled = PETSC_TRUE;
623:   return(0);
624: }

626: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627: {
628:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629:   PetscInt      i,j;

632:   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633:   return(0);
634: }

636: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637: {
639:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640:   Mat            Bs = *B;
641:   PetscBLASInt   one=1;
642:   PetscScalar    sone=1.0,zero=0.0;

645:   if (reuse == MAT_INITIAL_MATRIX) {
646:     MatCreate(PetscObjectComm((PetscObject)A),&Bs);
647:     MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
648:     MatSetType(Bs,MATSCALAPACK);
649:     MatSetUp(Bs);
650:     *B = Bs;
651:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652:   b = (Mat_ScaLAPACK*)Bs->data;
653:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654:   Bs->assembled = PETSC_TRUE;
655:   return(0);
656: }

658: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659: {
661:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662:   PetscScalar    *x,*x2d;
663:   const PetscInt *ranges;
664:   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;

667:   VecCopy(B,X);
668:   VecGetArray(X,&x);

670:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671:   PetscLayoutGetRanges(A->rmap,&ranges);
672:   PetscBLASIntCast(ranges[1],&mb);  /* x block size */
673:   xlld = PetscMax(1,A->rmap->n);
674:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));

677:   /* allocate 2d vector */
678:   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679:   PetscMalloc1(lszx,&x2d);
680:   xlld = PetscMax(1,lszx);

682:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));

686:   /* redistribute x as a column of a 2d matrix */
687:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

689:   /* call ScaLAPACK subroutine */
690:   switch (A->factortype) {
691:     case MAT_FACTOR_LU:
692:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
694:       break;
695:     case MAT_FACTOR_CHOLESKY:
696:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
698:       break;
699:     default:
700:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701:       break;
702:   }

704:   /* redistribute x from a column of a 2d matrix */
705:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));

707:   PetscFree(x2d);
708:   VecRestoreArray(X,&x);
709:   return(0);
710: }

712: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713: {

717:   MatSolve_ScaLAPACK(A,B,X);
718:   VecAXPY(X,1,Y);
719:   return(0);
720: }

722: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723: {
725:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726:   PetscBool      flg1,flg2;
727:   PetscBLASInt   one=1,info;

730:   PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
731:   PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
732:   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733:   MatScaLAPACKCheckDistribution(B,1,X,2);
734:   b = (Mat_ScaLAPACK*)B->data;
735:   x = (Mat_ScaLAPACK*)X->data;
736:   PetscArraycpy(x->loc,b->loc,b->lld*b->locc);

738:   switch (A->factortype) {
739:     case MAT_FACTOR_LU:
740:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
742:       break;
743:     case MAT_FACTOR_CHOLESKY:
744:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
746:       break;
747:     default:
748:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749:       break;
750:   }
751:   return(0);
752: }

754: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755: {
757:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758:   PetscBLASInt   one=1,info;

761:   if (!a->pivots) {
762:     PetscMalloc1(a->locr+a->mb,&a->pivots);
763:     PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
764:   }
765:   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
767:   A->factortype = MAT_FACTOR_LU;
768:   A->assembled  = PETSC_TRUE;

770:   PetscFree(A->solvertype);
771:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
772:   return(0);
773: }

775: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776: {

780:   MatCopy(A,F,SAME_NONZERO_PATTERN);
781:   MatLUFactor_ScaLAPACK(F,0,0,info);
782:   return(0);
783: }

785: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786: {
788:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789:   return(0);
790: }

792: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793: {
795:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
796:   PetscBLASInt   one=1,info;

799:   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
801:   A->factortype = MAT_FACTOR_CHOLESKY;
802:   A->assembled  = PETSC_TRUE;

804:   PetscFree(A->solvertype);
805:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
806:   return(0);
807: }

809: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810: {

814:   MatCopy(A,F,SAME_NONZERO_PATTERN);
815:   MatCholeskyFactor_ScaLAPACK(F,0,info);
816:   return(0);
817: }

819: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820: {
822:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823:   return(0);
824: }

826: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827: {
829:   *type = MATSOLVERSCALAPACK;
830:   return(0);
831: }

833: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834: {
835:   Mat            B;

839:   /* Create the factorization matrix */
840:   MatCreate(PetscObjectComm((PetscObject)A),&B);
841:   MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
842:   MatSetType(B,MATSCALAPACK);
843:   MatSetUp(B);
844:   B->factortype = ftype;
845:   PetscFree(B->solvertype);
846:   PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);

848:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
849:   *F = B;
850:   return(0);
851: }

853: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854: {

858:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
859:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
860:   return(0);
861: }

863: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864: {
866:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867:   PetscBLASInt   one=1,lwork=0;
868:   const char     *ntype;
869:   PetscScalar    *work=NULL,dummy;

872:   switch (type){
873:     case NORM_1:
874:       ntype = "1";
875:       lwork = PetscMax(a->locr,a->locc);
876:       break;
877:     case NORM_FROBENIUS:
878:       ntype = "F";
879:       work  = &dummy;
880:       break;
881:     case NORM_INFINITY:
882:       ntype = "I";
883:       lwork = PetscMax(a->locr,a->locc);
884:       break;
885:     default:
886:       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887:   }
888:   if (lwork) { PetscMalloc1(lwork,&work); }
889:   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890:   if (lwork) { PetscFree(work); }
891:   return(0);
892: }

894: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895: {
896:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

900:   PetscArrayzero(a->loc,a->lld*a->locc);
901:   return(0);
902: }

904: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905: {
906:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
908:   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;

911:   if (rows) {
912:     n     = a->locr;
913:     nb    = a->mb;
914:     isrc  = a->rsrc;
915:     nproc = a->grid->nprow;
916:     iproc = a->grid->myrow;
917:     PetscMalloc1(n,&idx);
918:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
920:   }
921:   if (cols) {
922:     n     = a->locc;
923:     nb    = a->nb;
924:     isrc  = a->csrc;
925:     nproc = a->grid->npcol;
926:     iproc = a->grid->mycol;
927:     PetscMalloc1(n,&idx);
928:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
930:   }
931:   return(0);
932: }

934: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935: {
937:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938:   Mat            Bmpi;
939:   MPI_Comm       comm;
940:   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
941:   const PetscInt *ranges,*branges,*cwork;
942:   const PetscScalar *vwork;
943:   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
944:   PetscScalar    *barray;
945:   PetscBool      differ=PETSC_FALSE;
946:   PetscMPIInt    size;

949:   PetscObjectGetComm((PetscObject)A,&comm);
950:   PetscLayoutGetRanges(A->rmap,&ranges);

952:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
953:     MPI_Comm_size(comm,&size);
954:     PetscLayoutGetRanges((*B)->rmap,&branges);
955:     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
956:   }

958:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
959:     MatCreate(comm,&Bmpi);
960:     m = PETSC_DECIDE;
961:     PetscSplitOwnershipEqual(comm,&m,&M);
962:     n = PETSC_DECIDE;
963:     PetscSplitOwnershipEqual(comm,&n,&N);
964:     MatSetSizes(Bmpi,m,n,M,N);
965:     MatSetType(Bmpi,MATDENSE);
966:     MatSetUp(Bmpi);

968:     /* create ScaLAPACK descriptor for B (1d block distribution) */
969:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
970:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
971:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

974:     /* redistribute matrix */
975:     MatDenseGetArray(Bmpi,&barray);
976:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
977:     MatDenseRestoreArray(Bmpi,&barray);
978:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
979:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);

981:     /* transfer rows of auxiliary matrix to the final matrix B */
982:     MatGetOwnershipRange(Bmpi,&rstart,&rend);
983:     for (i=rstart;i<rend;i++) {
984:       MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
985:       MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
986:       MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
987:     }
988:     MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
989:     MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
990:     MatDestroy(&Bmpi);

992:   } else {  /* normal cases */

994:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
995:     else {
996:       MatCreate(comm,&Bmpi);
997:       m = PETSC_DECIDE;
998:       PetscSplitOwnershipEqual(comm,&m,&M);
999:       n = PETSC_DECIDE;
1000:       PetscSplitOwnershipEqual(comm,&n,&N);
1001:       MatSetSizes(Bmpi,m,n,M,N);
1002:       MatSetType(Bmpi,MATDENSE);
1003:       MatSetUp(Bmpi);
1004:     }

1006:     /* create ScaLAPACK descriptor for B (1d block distribution) */
1007:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
1008:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1009:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

1012:     /* redistribute matrix */
1013:     MatDenseGetArray(Bmpi,&barray);
1014:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1015:     MatDenseRestoreArray(Bmpi,&barray);

1017:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1018:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1019:     if (reuse == MAT_INPLACE_MATRIX) {
1020:       MatHeaderReplace(A,&Bmpi);
1021:     } else *B = Bmpi;
1022:   }
1023:   return(0);
1024: }

1026: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1027: {
1029:   Mat_ScaLAPACK  *b;
1030:   Mat            Bmpi;
1031:   MPI_Comm       comm;
1032:   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1033:   const PetscInt *ranges;
1034:   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1035:   PetscScalar    *aarray;
1036:   PetscInt       lda;

1039:   PetscObjectGetComm((PetscObject)A,&comm);

1041:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1042:   else {
1043:     MatCreate(comm,&Bmpi);
1044:     m = PETSC_DECIDE;
1045:     PetscSplitOwnershipEqual(comm,&m,&M);
1046:     n = PETSC_DECIDE;
1047:     PetscSplitOwnershipEqual(comm,&n,&N);
1048:     MatSetSizes(Bmpi,m,n,M,N);
1049:     MatSetType(Bmpi,MATSCALAPACK);
1050:     MatSetUp(Bmpi);
1051:   }
1052:   b = (Mat_ScaLAPACK*)Bmpi->data;

1054:   /* create ScaLAPACK descriptor for A (1d block distribution) */
1055:   PetscLayoutGetRanges(A->rmap,&ranges);
1056:   PetscBLASIntCast(ranges[1],&amb);  /* row block size */
1057:   MatDenseGetLDA(A,&lda);
1058:   lld = PetscMax(lda,1);  /* local leading dimension */
1059:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));

1062:   /* redistribute matrix */
1063:   MatDenseGetArray(A,&aarray);
1064:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1065:   MatDenseRestoreArray(A,&aarray);

1067:   MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1068:   MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1069:   if (reuse == MAT_INPLACE_MATRIX) {
1070:     MatHeaderReplace(A,&Bmpi);
1071:   } else *B = Bmpi;
1072:   return(0);
1073: }

1075: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1076: {
1077:   Mat               mat_scal;
1078:   PetscErrorCode    ierr;
1079:   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1080:   const PetscInt    *cols;
1081:   const PetscScalar *vals;

1084:   if (reuse == MAT_REUSE_MATRIX) {
1085:     mat_scal = *newmat;
1086:     MatZeroEntries(mat_scal);
1087:   } else {
1088:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1089:     m = PETSC_DECIDE;
1090:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1091:     n = PETSC_DECIDE;
1092:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1093:     MatSetSizes(mat_scal,m,n,M,N);
1094:     MatSetType(mat_scal,MATSCALAPACK);
1095:     MatSetUp(mat_scal);
1096:   }
1097:   for (row=rstart;row<rend;row++) {
1098:     MatGetRow(A,row,&ncols,&cols,&vals);
1099:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1100:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1101:   }
1102:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1103:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1105:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1106:   else *newmat = mat_scal;
1107:   return(0);
1108: }

1110: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1111: {
1112:   Mat               mat_scal;
1113:   PetscErrorCode    ierr;
1114:   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1115:   const PetscInt    *cols;
1116:   const PetscScalar *vals;
1117:   PetscScalar       v;

1120:   if (reuse == MAT_REUSE_MATRIX) {
1121:     mat_scal = *newmat;
1122:     MatZeroEntries(mat_scal);
1123:   } else {
1124:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1125:     m = PETSC_DECIDE;
1126:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1127:     n = PETSC_DECIDE;
1128:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1129:     MatSetSizes(mat_scal,m,n,M,N);
1130:     MatSetType(mat_scal,MATSCALAPACK);
1131:     MatSetUp(mat_scal);
1132:   }
1133:   MatGetRowUpperTriangular(A);
1134:   for (row=rstart;row<rend;row++) {
1135:     MatGetRow(A,row,&ncols,&cols,&vals);
1136:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1137:     for (j=0;j<ncols;j++) { /* lower triangular part */
1138:       if (cols[j] == row) continue;
1139:       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1140:       MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1141:     }
1142:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1143:   }
1144:   MatRestoreRowUpperTriangular(A);
1145:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1146:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1148:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1149:   else *newmat = mat_scal;
1150:   return(0);
1151: }

1153: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1154: {
1155:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1157:   PetscInt       sz=0;

1160:   PetscLayoutSetUp(A->rmap);
1161:   PetscLayoutSetUp(A->cmap);
1162:   if (!a->lld) a->lld = a->locr;

1164:   PetscFree(a->loc);
1165:   PetscIntMultError(a->lld,a->locc,&sz);
1166:   PetscCalloc1(sz,&a->loc);
1167:   PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));

1169:   A->preallocated = PETSC_TRUE;
1170:   return(0);
1171: }

1173: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1174: {
1175:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1176:   PetscErrorCode     ierr;
1177:   Mat_ScaLAPACK_Grid *grid;
1178:   PetscBool          flg;
1179:   MPI_Comm           icomm;

1182:   MatStashDestroy_Private(&A->stash);
1183:   PetscFree(a->loc);
1184:   PetscFree(a->pivots);
1185:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1186:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1187:   if (--grid->grid_refct == 0) {
1188:     Cblacs_gridexit(grid->ictxt);
1189:     Cblacs_gridexit(grid->ictxrow);
1190:     Cblacs_gridexit(grid->ictxcol);
1191:     PetscFree(grid);
1192:     MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
1193:   }
1194:   PetscCommDestroy(&icomm);
1195:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1196:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1197:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1198:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1199:   PetscFree(A->data);
1200:   return(0);
1201: }

1203: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1204: {
1206:   const PetscInt *ranges;
1207:   PetscMPIInt    size;
1208:   PetscInt       i,n;

1211:   MPI_Comm_size(map->comm,&size);
1212:   if (size>2) {
1213:     PetscLayoutGetRanges(map,&ranges);
1214:     n = ranges[1]-ranges[0];
1215:     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1216:     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1217:   }
1218:   return(0);
1219: }

1221: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1222: {
1223:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1225:   PetscBLASInt   info=0;

1228:   PetscLayoutSetUp(A->rmap);
1229:   PetscLayoutSetUp(A->cmap);

1231:   /* check that the layout is as enforced by MatCreateScaLAPACK */
1232:   MatScaLAPACKCheckLayout(A->rmap);
1233:   MatScaLAPACKCheckLayout(A->cmap);

1235:   /* compute local sizes */
1236:   PetscBLASIntCast(A->rmap->N,&a->M);
1237:   PetscBLASIntCast(A->cmap->N,&a->N);
1238:   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1239:   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1240:   a->lld  = PetscMax(1,a->locr);

1242:   /* allocate local array */
1243:   MatScaLAPACKSetPreallocation(A);

1245:   /* set up ScaLAPACK descriptor */
1246:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1248:   return(0);
1249: }

1251: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1252: {
1254:   PetscInt       nstash,reallocs;

1257:   if (A->nooffprocentries) return(0);
1258:   MatStashScatterBegin_Private(A,&A->stash,NULL);
1259:   MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1260:   PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1261:   return(0);
1262: }

1264: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265: {
1267:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1268:   PetscMPIInt    n;
1269:   PetscInt       i,flg,*row,*col;
1270:   PetscScalar    *val;
1271:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

1274:   if (A->nooffprocentries) return(0);
1275:   while (1) {
1276:     MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1277:     if (!flg) break;
1278:     for (i=0;i<n;i++) {
1279:       PetscBLASIntCast(row[i]+1,&gridx);
1280:       PetscBLASIntCast(col[i]+1,&gcidx);
1281:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1282:       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1283:       switch (A->insertmode) {
1284:         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1285:         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1286:         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1287:       }
1288:     }
1289:   }
1290:   MatStashScatterEnd_Private(&A->stash);
1291:   return(0);
1292: }

1294: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1295: {
1297:   Mat            Adense,As;
1298:   MPI_Comm       comm;

1301:   PetscObjectGetComm((PetscObject)newMat,&comm);
1302:   MatCreate(comm,&Adense);
1303:   MatSetType(Adense,MATDENSE);
1304:   MatLoad(Adense,viewer);
1305:   MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1306:   MatDestroy(&Adense);
1307:   MatHeaderReplace(newMat,&As);
1308:   return(0);
1309: }

1311: /* -------------------------------------------------------------------*/
1312: static struct _MatOps MatOps_Values = {
1313:        MatSetValues_ScaLAPACK,
1314:        0,
1315:        0,
1316:        MatMult_ScaLAPACK,
1317: /* 4*/ MatMultAdd_ScaLAPACK,
1318:        MatMultTranspose_ScaLAPACK,
1319:        MatMultTransposeAdd_ScaLAPACK,
1320:        MatSolve_ScaLAPACK,
1321:        MatSolveAdd_ScaLAPACK,
1322:        0,
1323: /*10*/ 0,
1324:        MatLUFactor_ScaLAPACK,
1325:        MatCholeskyFactor_ScaLAPACK,
1326:        0,
1327:        MatTranspose_ScaLAPACK,
1328: /*15*/ MatGetInfo_ScaLAPACK,
1329:        0,
1330:        MatGetDiagonal_ScaLAPACK,
1331:        MatDiagonalScale_ScaLAPACK,
1332:        MatNorm_ScaLAPACK,
1333: /*20*/ MatAssemblyBegin_ScaLAPACK,
1334:        MatAssemblyEnd_ScaLAPACK,
1335:        MatSetOption_ScaLAPACK,
1336:        MatZeroEntries_ScaLAPACK,
1337: /*24*/ 0,
1338:        MatLUFactorSymbolic_ScaLAPACK,
1339:        MatLUFactorNumeric_ScaLAPACK,
1340:        MatCholeskyFactorSymbolic_ScaLAPACK,
1341:        MatCholeskyFactorNumeric_ScaLAPACK,
1342: /*29*/ MatSetUp_ScaLAPACK,
1343:        0,
1344:        0,
1345:        0,
1346:        0,
1347: /*34*/ MatDuplicate_ScaLAPACK,
1348:        0,
1349:        0,
1350:        0,
1351:        0,
1352: /*39*/ MatAXPY_ScaLAPACK,
1353:        0,
1354:        0,
1355:        0,
1356:        MatCopy_ScaLAPACK,
1357: /*44*/ 0,
1358:        MatScale_ScaLAPACK,
1359:        MatShift_ScaLAPACK,
1360:        0,
1361:        0,
1362: /*49*/ 0,
1363:        0,
1364:        0,
1365:        0,
1366:        0,
1367: /*54*/ 0,
1368:        0,
1369:        0,
1370:        0,
1371:        0,
1372: /*59*/ 0,
1373:        MatDestroy_ScaLAPACK,
1374:        MatView_ScaLAPACK,
1375:        0,
1376:        0,
1377: /*64*/ 0,
1378:        0,
1379:        0,
1380:        0,
1381:        0,
1382: /*69*/ 0,
1383:        0,
1384:        MatConvert_ScaLAPACK_Dense,
1385:        0,
1386:        0,
1387: /*74*/ 0,
1388:        0,
1389:        0,
1390:        0,
1391:        0,
1392: /*79*/ 0,
1393:        0,
1394:        0,
1395:        0,
1396:        MatLoad_ScaLAPACK,
1397: /*84*/ 0,
1398:        0,
1399:        0,
1400:        0,
1401:        0,
1402: /*89*/ 0,
1403:        0,
1404:        MatMatMultNumeric_ScaLAPACK,
1405:        0,
1406:        0,
1407: /*94*/ 0,
1408:        0,
1409:        0,
1410:        MatMatTransposeMultNumeric_ScaLAPACK,
1411:        0,
1412: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1413:        0,
1414:        0,
1415:        MatConjugate_ScaLAPACK,
1416:        0,
1417: /*104*/0,
1418:        0,
1419:        0,
1420:        0,
1421:        0,
1422: /*109*/MatMatSolve_ScaLAPACK,
1423:        0,
1424:        0,
1425:        0,
1426:        MatMissingDiagonal_ScaLAPACK,
1427: /*114*/0,
1428:        0,
1429:        0,
1430:        0,
1431:        0,
1432: /*119*/0,
1433:        MatHermitianTranspose_ScaLAPACK,
1434:        0,
1435:        0,
1436:        0,
1437: /*124*/0,
1438:        0,
1439:        0,
1440:        0,
1441:        0,
1442: /*129*/0,
1443:        0,
1444:        0,
1445:        0,
1446:        0,
1447: /*134*/0,
1448:        0,
1449:        0,
1450:        0,
1451:        0,
1452:        0,
1453: /*140*/0,
1454:        0,
1455:        0,
1456:        0,
1457:        0,
1458: /*145*/0,
1459:        0,
1460:        0
1461: };

1463: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1464: {
1465:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1466:   PetscInt           size=stash->size,nsends;
1467:   PetscErrorCode     ierr;
1468:   PetscInt           count,*sindices,**rindices,i,j,l;
1469:   PetscScalar        **rvalues,*svalues;
1470:   MPI_Comm           comm = stash->comm;
1471:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472:   PetscMPIInt        *sizes,*nlengths,nreceives;
1473:   PetscInt           *sp_idx,*sp_idy;
1474:   PetscScalar        *sp_val;
1475:   PetscMatStashSpace space,space_next;
1476:   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;

1480:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1481:     InsertMode addv;
1482:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1483:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484:     mat->insertmode = addv; /* in case this processor had no cache */
1485:   }

1487:   bs2 = stash->bs*stash->bs;

1489:   /*  first count number of contributors to each processor */
1490:   PetscCalloc1(size,&nlengths);
1491:   PetscMalloc1(stash->n+1,&owner);

1493:   i     = j    = 0;
1494:   space = stash->space_head;
1495:   while (space) {
1496:     space_next = space->next;
1497:     for (l=0; l<space->local_used; l++) {
1498:       PetscBLASIntCast(space->idx[l]+1,&gridx);
1499:       PetscBLASIntCast(space->idy[l]+1,&gcidx);
1500:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501:       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502:       nlengths[j]++; owner[i] = j;
1503:       i++;
1504:     }
1505:     space = space_next;
1506:   }

1508:   /* Now check what procs get messages - and compute nsends. */
1509:   PetscCalloc1(size,&sizes);
1510:   for (i=0, nsends=0; i<size; i++) {
1511:     if (nlengths[i]) {
1512:       sizes[i] = 1; nsends++;
1513:     }
1514:   }

1516:   {PetscMPIInt *onodes,*olengths;
1517:    /* Determine the number of messages to expect, their lengths, from from-ids */
1518:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1519:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1520:    /* since clubbing row,col - lengths are multiplied by 2 */
1521:    for (i=0; i<nreceives; i++) olengths[i] *=2;
1522:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1523:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1525:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1526:    PetscFree(onodes);
1527:    PetscFree(olengths);}

1529:   /* do sends:
1530:       1) starts[i] gives the starting index in svalues for stuff going to
1531:          the ith processor
1532:   */
1533:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1534:   PetscMalloc1(2*nsends,&send_waits);
1535:   PetscMalloc2(size,&startv,size,&starti);
1536:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1537:   startv[0] = 0; starti[0] = 0;
1538:   for (i=1; i<size; i++) {
1539:     startv[i] = startv[i-1] + nlengths[i-1];
1540:     starti[i] = starti[i-1] + 2*nlengths[i-1];
1541:   }

1543:   i     = 0;
1544:   space = stash->space_head;
1545:   while (space) {
1546:     space_next = space->next;
1547:     sp_idx     = space->idx;
1548:     sp_idy     = space->idy;
1549:     sp_val     = space->val;
1550:     for (l=0; l<space->local_used; l++) {
1551:       j = owner[i];
1552:       if (bs2 == 1) {
1553:         svalues[startv[j]] = sp_val[l];
1554:       } else {
1555:         PetscInt    k;
1556:         PetscScalar *buf1,*buf2;
1557:         buf1 = svalues+bs2*startv[j];
1558:         buf2 = space->val + bs2*l;
1559:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560:       }
1561:       sindices[starti[j]]             = sp_idx[l];
1562:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563:       startv[j]++;
1564:       starti[j]++;
1565:       i++;
1566:     }
1567:     space = space_next;
1568:   }
1569:   startv[0] = 0;
1570:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

1572:   for (i=0,count=0; i<size; i++) {
1573:     if (sizes[i]) {
1574:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1575:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1576:     }
1577:   }
1578: #if defined(PETSC_USE_INFO)
1579:   PetscInfo1(NULL,"No of messages: %d \n",nsends);
1580:   for (i=0; i<size; i++) {
1581:     if (sizes[i]) {
1582:       PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1583:     }
1584:   }
1585: #endif
1586:   PetscFree(nlengths);
1587:   PetscFree(owner);
1588:   PetscFree2(startv,starti);
1589:   PetscFree(sizes);

1591:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1592:   PetscMalloc1(2*nreceives,&recv_waits);

1594:   for (i=0; i<nreceives; i++) {
1595:     recv_waits[2*i]   = recv_waits1[i];
1596:     recv_waits[2*i+1] = recv_waits2[i];
1597:   }
1598:   stash->recv_waits = recv_waits;

1600:   PetscFree(recv_waits1);
1601:   PetscFree(recv_waits2);

1603:   stash->svalues         = svalues;
1604:   stash->sindices        = sindices;
1605:   stash->rvalues         = rvalues;
1606:   stash->rindices        = rindices;
1607:   stash->send_waits      = send_waits;
1608:   stash->nsends          = nsends;
1609:   stash->nrecvs          = nreceives;
1610:   stash->reproduce_count = 0;
1611:   return(0);
1612: }

1614: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615: {
1617:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

1620:   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1621:   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1622:   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1623:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1624:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1625:   return(0);
1626: }

1628: /*@
1629:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1630:    the ScaLAPACK matrix

1632:    Logically Collective on A

1634:    Input Parameter:
1635: +  A  - a MATSCALAPACK matrix
1636: .  mb - the row block size
1637: -  nb - the column block size

1639:    Level: intermediate

1641: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1642: @*/
1643: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1644: {

1651:   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1652:   return(0);
1653: }

1655: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1656: {
1657:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

1660:   if (mb) *mb = a->mb;
1661:   if (nb) *nb = a->nb;
1662:   return(0);
1663: }

1665: /*@
1666:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1667:    the ScaLAPACK matrix

1669:    Not collective

1671:    Input Parameter:
1672: .  A  - a MATSCALAPACK matrix

1674:    Output Parameters:
1675: +  mb - the row block size
1676: -  nb - the column block size

1678:    Level: intermediate

1680: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1681: @*/
1682: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1683: {

1688:   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1689:   return(0);
1690: }

1692: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1693: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);

1695: /*MC
1696:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1698:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1700:    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver

1702:    Options Database Keys:
1703: +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1704: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1705: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1707:    Level: beginner

1709: .seealso: MATDENSE, MATELEMENTAL
1710: M*/

1712: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1713: {
1714:   Mat_ScaLAPACK      *a;
1715:   PetscErrorCode     ierr;
1716:   PetscBool          flg,flg1;
1717:   Mat_ScaLAPACK_Grid *grid;
1718:   MPI_Comm           icomm;
1719:   PetscBLASInt       nprow,npcol,myrow,mycol;
1720:   PetscInt           optv1,k=2,array[2]={0,0};
1721:   PetscMPIInt        size;

1724:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1725:   A->insertmode = NOT_SET_VALUES;

1727:   MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1728:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1729:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1730:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1731:   A->stash.ScatterDestroy = NULL;

1733:   PetscNewLog(A,&a);
1734:   A->data = (void*)a;

1736:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1737:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1738:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1739:   }
1740:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1741:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1742:   if (!flg) {
1743:     PetscNewLog(A,&grid);

1745:     MPI_Comm_size(icomm,&size);
1746:     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);

1748:     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1749:     PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1750:     if (flg1) {
1751:       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1752:       grid->nprow = optv1;
1753:     }
1754:     PetscOptionsEnd();

1756:     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1757:     grid->npcol = size/grid->nprow;
1758:     PetscBLASIntCast(grid->nprow,&nprow);
1759:     PetscBLASIntCast(grid->npcol,&npcol);
1760:     Cblacs_get(-1,0,&grid->ictxt);
1761:     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1762:     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1763:     grid->grid_refct = 1;
1764:     grid->nprow      = nprow;
1765:     grid->npcol      = npcol;
1766:     grid->myrow      = myrow;
1767:     grid->mycol      = mycol;
1768:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1769:     Cblacs_get(-1,0,&grid->ictxrow);
1770:     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1771:     Cblacs_get(-1,0,&grid->ictxcol);
1772:     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1773:     MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);

1775:   } else grid->grid_refct++;
1776:   PetscCommDestroy(&icomm);
1777:   a->grid = grid;
1778:   a->mb   = DEFAULT_BLOCKSIZE;
1779:   a->nb   = DEFAULT_BLOCKSIZE;

1781:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1782:   PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1783:   if (flg) {
1784:     a->mb = array[0];
1785:     a->nb = (k>1)? array[1]: a->mb;
1786:   }
1787:   PetscOptionsEnd();

1789:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1790:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1791:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1792:   PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1793:   return(0);
1794: }

1796: /*@C
1797:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1798:    (2D block cyclic distribution).

1800:    Collective

1802:    Input Parameters:
1803: +  comm - MPI communicator
1804: .  mb   - row block size (or PETSC_DECIDE to have it set)
1805: .  nb   - column block size (or PETSC_DECIDE to have it set)
1806: .  M    - number of global rows
1807: .  N    - number of global columns
1808: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1809: -  csrc - coordinate of process that owns the first column of the distributed matrix

1811:    Output Parameter:
1812: .  A - the matrix

1814:    Options Database Keys:
1815: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1817:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1818:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1819:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

1821:    Notes:
1822:    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1823:    is chosen.

1825:    Storage Information:
1826:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1827:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1828:    significance and are thus ignored. The block sizes refer to the values
1829:    used for the distributed matrix, not the same meaning as in BAIJ.

1831:    Level: intermediate

1833: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1834: @*/
1835: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1836: {
1838:   Mat_ScaLAPACK  *a;
1839:   PetscInt       m,n;

1842:   MatCreate(comm,A);
1843:   MatSetType(*A,MATSCALAPACK);
1844:   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1845:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1846:   m = PETSC_DECIDE;
1847:   PetscSplitOwnershipEqual(comm,&m,&M);
1848:   n = PETSC_DECIDE;
1849:   PetscSplitOwnershipEqual(comm,&n,&N);
1850:   MatSetSizes(*A,m,n,M,N);
1851:   a = (Mat_ScaLAPACK*)(*A)->data;
1852:   PetscBLASIntCast(M,&a->M);
1853:   PetscBLASIntCast(N,&a->N);
1854:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1855:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1856:   PetscBLASIntCast(rsrc,&a->rsrc);
1857:   PetscBLASIntCast(csrc,&a->csrc);
1858:   MatSetUp(*A);
1859:   return(0);
1860: }