Actual source code: matscalapack.c

petsc-3.14.2 2020-12-03
Report Typos and Errors
  1: #include <petsc/private/petscscalapack.h>

  3: #define DEFAULT_BLOCKSIZE 64

  5: /*
  6:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
  7:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
  8: */
  9: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 11: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 12: {

 16:   PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
 17:   MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
 18:   return(0);
 19: }

 21: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
 22: {
 23:   PetscErrorCode    ierr;
 24:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
 25:   PetscBool         iascii;
 26:   PetscViewerFormat format;
 27:   Mat               Adense;

 30:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 31:   if (iascii) {
 32:     PetscViewerGetFormat(viewer,&format);
 33:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 34:       PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
 35:       PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
 36:       PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
 37:       PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
 38:       return(0);
 39:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 40:       return(0);
 41:     }
 42:   }
 43:   /* convert to dense format and call MatView() */
 44:   MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
 45:   MatView(Adense,viewer);
 46:   MatDestroy(&Adense);
 47:   return(0);
 48: }

 50: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
 51: {
 53:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 54:   PetscLogDouble isend[2],irecv[2];

 57:   info->block_size = 1.0;

 59:   isend[0] = a->lld*a->locc;     /* locally allocated */
 60:   isend[1] = a->locr*a->locc;    /* used submatrix */
 61:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 62:     info->nz_allocated   = isend[0];
 63:     info->nz_used        = isend[1];
 64:   } else if (flag == MAT_GLOBAL_MAX) {
 65:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
 66:     info->nz_allocated   = irecv[0];
 67:     info->nz_used        = irecv[1];
 68:   } else if (flag == MAT_GLOBAL_SUM) {
 69:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
 70:     info->nz_allocated   = irecv[0];
 71:     info->nz_used        = irecv[1];
 72:   }

 74:   info->nz_unneeded       = 0;
 75:   info->assemblies        = A->num_ass;
 76:   info->mallocs           = 0;
 77:   info->memory            = ((PetscObject)A)->mem;
 78:   info->fill_ratio_given  = 0;
 79:   info->fill_ratio_needed = 0;
 80:   info->factor_mallocs    = 0;
 81:   return(0);
 82: }

 84: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
 85: {
 87:   switch (op) {
 88:     case MAT_NEW_NONZERO_LOCATIONS:
 89:     case MAT_NEW_NONZERO_LOCATION_ERR:
 90:     case MAT_NEW_NONZERO_ALLOCATION_ERR:
 91:     case MAT_SYMMETRIC:
 92:     case MAT_SORTED_FULL:
 93:     case MAT_HERMITIAN:
 94:       break;
 95:     default:
 96:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
 97:   }
 98:   return(0);
 99: }

101: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102: {
103:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
105:   PetscInt       i,j;
106:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

109:   for (i=0;i<nr;i++) {
110:     if (rows[i] < 0) continue;
111:     PetscBLASIntCast(rows[i]+1,&gridx);
112:     for (j=0;j<nc;j++) {
113:       if (cols[j] < 0) continue;
114:       PetscBLASIntCast(cols[j]+1,&gcidx);
115:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116:       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117:         switch (imode) {
118:           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119:           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120:           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121:         }
122:       } else {
123:         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124:         A->assembled = PETSC_FALSE;
125:         MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
126:       }
127:     }
128:   }
129:   return(0);
130: }

132: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133: {
135:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136:   PetscScalar    *x2d,*y2d,alpha=1.0;
137:   const PetscInt *ranges;
138:   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;

141:   if (transpose) {

143:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144:     PetscLayoutGetRanges(A->rmap,&ranges);
145:     PetscBLASIntCast(ranges[1],&mb);  /* x block size */
146:     xlld = PetscMax(1,A->rmap->n);
147:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
149:     PetscLayoutGetRanges(A->cmap,&ranges);
150:     PetscBLASIntCast(ranges[1],&nb);  /* y block size */
151:     ylld = 1;
152:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));

155:     /* allocate 2d vectors */
156:     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157:     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
159:     xlld = PetscMax(1,lszx);

161:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
164:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));

167:     /* redistribute x as a column of a 2d matrix */
168:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

170:     /* redistribute y as a row of a 2d matrix */
171:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));

173:     /* call PBLAS subroutine */
174:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

176:     /* redistribute y from a row of a 2d matrix */
177:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));

179:   } else {   /* non-transpose */

181:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182:     PetscLayoutGetRanges(A->cmap,&ranges);
183:     PetscBLASIntCast(ranges[1],&nb);  /* x block size */
184:     xlld = 1;
185:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
187:     PetscLayoutGetRanges(A->rmap,&ranges);
188:     PetscBLASIntCast(ranges[1],&mb);  /* y block size */
189:     ylld = PetscMax(1,A->rmap->n);
190:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));

193:     /* allocate 2d vectors */
194:     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195:     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
197:     ylld = PetscMax(1,lszy);

199:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
202:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));

205:     /* redistribute x as a row of a 2d matrix */
206:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));

208:     /* redistribute y as a column of a 2d matrix */
209:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));

211:     /* call PBLAS subroutine */
212:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

214:     /* redistribute y from a column of a 2d matrix */
215:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));

217:   }
218:   PetscFree2(x2d,y2d);
219:   return(0);
220: }

222: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223: {
224:   PetscErrorCode    ierr;
225:   const PetscScalar *xarray;
226:   PetscScalar       *yarray;

229:   VecGetArrayRead(x,&xarray);
230:   VecGetArray(y,&yarray);
231:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
232:   VecRestoreArrayRead(x,&xarray);
233:   VecRestoreArray(y,&yarray);
234:   return(0);
235: }

237: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238: {
239:   PetscErrorCode    ierr;
240:   const PetscScalar *xarray;
241:   PetscScalar       *yarray;

244:   VecGetArrayRead(x,&xarray);
245:   VecGetArray(y,&yarray);
246:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
247:   VecRestoreArrayRead(x,&xarray);
248:   VecRestoreArray(y,&yarray);
249:   return(0);
250: }

252: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253: {
254:   PetscErrorCode    ierr;
255:   const PetscScalar *xarray;
256:   PetscScalar       *zarray;

259:   if (y != z) { VecCopy(y,z); }
260:   VecGetArrayRead(x,&xarray);
261:   VecGetArray(z,&zarray);
262:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
263:   VecRestoreArrayRead(x,&xarray);
264:   VecRestoreArray(z,&zarray);
265:   return(0);
266: }

268: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269: {
270:   PetscErrorCode    ierr;
271:   const PetscScalar *xarray;
272:   PetscScalar       *zarray;

275:   if (y != z) { VecCopy(y,z); }
276:   VecGetArrayRead(x,&xarray);
277:   VecGetArray(z,&zarray);
278:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
279:   VecRestoreArrayRead(x,&xarray);
280:   VecRestoreArray(z,&zarray);
281:   return(0);
282: }

284: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285: {
286:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289:   PetscScalar   sone=1.0,zero=0.0;
290:   PetscBLASInt  one=1;

293:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294:   C->assembled = PETSC_TRUE;
295:   return(0);
296: }

298: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299: {

303:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
304:   MatSetType(C,MATSCALAPACK);
305:   MatSetUp(C);
306:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307:   return(0);
308: }

310: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311: {
312:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315:   PetscScalar   sone=1.0,zero=0.0;
316:   PetscBLASInt  one=1;

319:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320:   C->assembled = PETSC_TRUE;
321:   return(0);
322: }

324: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325: {

329:   MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
330:   MatSetType(C,MATSCALAPACK);
331:   MatSetUp(C);
332:   return(0);
333: }

335: /* --------------------------------------- */
336: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337: {
339:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340:   C->ops->productsymbolic = MatProductSymbolic_AB;
341:   return(0);
342: }

344: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345: {
347:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349:   return(0);
350: }

352: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353: {
355:   Mat_Product    *product = C->product;

358:   switch (product->type) {
359:     case MATPRODUCT_AB:
360:       MatProductSetFromOptions_ScaLAPACK_AB(C);
361:       break;
362:     case MATPRODUCT_ABt:
363:       MatProductSetFromOptions_ScaLAPACK_ABt(C);
364:       break;
365:     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366:   }
367:   return(0);
368: }
369: /* --------------------------------------- */

371: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372: {
373:   PetscErrorCode    ierr;
374:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
375:   PetscScalar       *darray,*d2d,v;
376:   const PetscInt    *ranges;
377:   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

380:   VecGetArray(D,&darray);

382:   if (A->rmap->N<=A->cmap->N) {   /* row version */

384:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
385:     PetscLayoutGetRanges(A->rmap,&ranges);
386:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
387:     dlld = PetscMax(1,A->rmap->n);
388:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

391:     /* allocate 2d vector */
392:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393:     PetscCalloc1(lszd,&d2d);
394:     dlld = PetscMax(1,lszd);

396:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
397:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

400:     /* collect diagonal */
401:     for (j=1;j<=a->M;j++) {
402:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404:     }

406:     /* redistribute d from a column of a 2d matrix */
407:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408:     PetscFree(d2d);

410:   } else {   /* column version */

412:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
413:     PetscLayoutGetRanges(A->cmap,&ranges);
414:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
415:     dlld = 1;
416:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

419:     /* allocate 2d vector */
420:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421:     PetscCalloc1(lszd,&d2d);

423:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
424:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

427:     /* collect diagonal */
428:     for (j=1;j<=a->N;j++) {
429:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431:     }

433:     /* redistribute d from a row of a 2d matrix */
434:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435:     PetscFree(d2d);
436:   }

438:   VecRestoreArray(D,&darray);
439:   VecAssemblyBegin(D);
440:   VecAssemblyEnd(D);
441:   return(0);
442: }

444: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445: {
446:   PetscErrorCode    ierr;
447:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
448:   const PetscScalar *d;
449:   const PetscInt    *ranges;
450:   PetscScalar       *d2d;
451:   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

454:   if (R) {
455:     VecGetArrayRead(R,(const PetscScalar **)&d);
456:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
457:     PetscLayoutGetRanges(A->cmap,&ranges);
458:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
459:     dlld = 1;
460:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

463:     /* allocate 2d vector */
464:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465:     PetscCalloc1(lszd,&d2d);

467:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
468:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

471:     /* redistribute d to a row of a 2d matrix */
472:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));

474:     /* broadcast along process columns */
475:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476:     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);

478:     /* local scaling */
479:     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];

481:     PetscFree(d2d);
482:     VecRestoreArrayRead(R,(const PetscScalar **)&d);
483:   }
484:   if (L) {
485:     VecGetArrayRead(L,(const PetscScalar **)&d);
486:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
487:     PetscLayoutGetRanges(A->rmap,&ranges);
488:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
489:     dlld = PetscMax(1,A->rmap->n);
490:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

493:     /* allocate 2d vector */
494:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495:     PetscCalloc1(lszd,&d2d);
496:     dlld = PetscMax(1,lszd);

498:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
499:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

502:     /* redistribute d to a column of a 2d matrix */
503:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));

505:     /* broadcast along process rows */
506:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507:     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);

509:     /* local scaling */
510:     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];

512:     PetscFree(d2d);
513:     VecRestoreArrayRead(L,(const PetscScalar **)&d);
514:   }
515:   return(0);
516: }

518: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519: {
521:   *missing = PETSC_FALSE;
522:   return(0);
523: }

525: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526: {
527:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528:   PetscBLASInt  n,one=1;

531:   n = x->lld*x->locc;
532:   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533:   return(0);
534: }

536: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537: {
538:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539:   PetscBLASInt  i,n;
540:   PetscScalar   v;

543:   n = PetscMin(x->M,x->N);
544:   for (i=1;i<=n;i++) {
545:     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546:     v += alpha;
547:     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548:   }
549:   return(0);
550: }

552: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553: {
555:   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
556:   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
557:   PetscBLASInt   one=1;
558:   PetscScalar    beta=1.0;

561:   MatScaLAPACKCheckDistribution(Y,1,X,3);
562:   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563:   PetscObjectStateIncrease((PetscObject)Y);
564:   return(0);
565: }

567: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568: {
570:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
571:   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;

574:   PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
575:   PetscObjectStateIncrease((PetscObject)B);
576:   return(0);
577: }

579: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580: {
581:   Mat            Bs;
582:   MPI_Comm       comm;
583:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;

587:   PetscObjectGetComm((PetscObject)A,&comm);
588:   MatCreate(comm,&Bs);
589:   MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
590:   MatSetType(Bs,MATSCALAPACK);
591:   b = (Mat_ScaLAPACK*)Bs->data;
592:   b->M    = a->M;
593:   b->N    = a->N;
594:   b->mb   = a->mb;
595:   b->nb   = a->nb;
596:   b->rsrc = a->rsrc;
597:   b->csrc = a->csrc;
598:   MatSetUp(Bs);
599:   *B = Bs;
600:   if (op == MAT_COPY_VALUES) {
601:     PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
602:   }
603:   Bs->assembled = PETSC_TRUE;
604:   return(0);
605: }

607: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608: {
610:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
611:   Mat            Bs = *B;
612:   PetscBLASInt   one=1;
613:   PetscScalar    sone=1.0,zero=0.0;
614: #if defined(PETSC_USE_COMPLEX)
615:   PetscInt       i,j;
616: #endif

619:   if (reuse == MAT_INITIAL_MATRIX) {
620:     MatCreate(PetscObjectComm((PetscObject)A),&Bs);
621:     MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
622:     MatSetType(Bs,MATSCALAPACK);
623:     MatSetUp(Bs);
624:     *B = Bs;
625:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
626:   b = (Mat_ScaLAPACK*)Bs->data;
627:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
628: #if defined(PETSC_USE_COMPLEX)
629:   /* undo conjugation */
630:   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
631: #endif
632:   Bs->assembled = PETSC_TRUE;
633:   return(0);
634: }

636: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
637: {
638:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639:   PetscInt      i,j;

642:   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
643:   return(0);
644: }

646: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
647: {
649:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
650:   Mat            Bs = *B;
651:   PetscBLASInt   one=1;
652:   PetscScalar    sone=1.0,zero=0.0;

655:   if (reuse == MAT_INITIAL_MATRIX) {
656:     MatCreate(PetscObjectComm((PetscObject)A),&Bs);
657:     MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
658:     MatSetType(Bs,MATSCALAPACK);
659:     MatSetUp(Bs);
660:     *B = Bs;
661:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
662:   b = (Mat_ScaLAPACK*)Bs->data;
663:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
664:   Bs->assembled = PETSC_TRUE;
665:   return(0);
666: }

668: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
669: {
671:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
672:   PetscScalar    *x,*x2d;
673:   const PetscInt *ranges;
674:   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;

677:   VecCopy(B,X);
678:   VecGetArray(X,&x);

680:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
681:   PetscLayoutGetRanges(A->rmap,&ranges);
682:   PetscBLASIntCast(ranges[1],&mb);  /* x block size */
683:   xlld = PetscMax(1,A->rmap->n);
684:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));

687:   /* allocate 2d vector */
688:   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
689:   PetscMalloc1(lszx,&x2d);
690:   xlld = PetscMax(1,lszx);

692:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
693:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));

696:   /* redistribute x as a column of a 2d matrix */
697:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

699:   /* call ScaLAPACK subroutine */
700:   switch (A->factortype) {
701:     case MAT_FACTOR_LU:
702:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
704:       break;
705:     case MAT_FACTOR_CHOLESKY:
706:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
708:       break;
709:     default:
710:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
711:   }

713:   /* redistribute x from a column of a 2d matrix */
714:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));

716:   PetscFree(x2d);
717:   VecRestoreArray(X,&x);
718:   return(0);
719: }

721: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
722: {

726:   MatSolve_ScaLAPACK(A,B,X);
727:   VecAXPY(X,1,Y);
728:   return(0);
729: }

731: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
732: {
734:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
735:   PetscBool      flg1,flg2;
736:   PetscBLASInt   one=1,info;

739:   PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
740:   PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
741:   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
742:   MatScaLAPACKCheckDistribution(B,1,X,2);
743:   b = (Mat_ScaLAPACK*)B->data;
744:   x = (Mat_ScaLAPACK*)X->data;
745:   PetscArraycpy(x->loc,b->loc,b->lld*b->locc);

747:   switch (A->factortype) {
748:     case MAT_FACTOR_LU:
749:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
751:       break;
752:     case MAT_FACTOR_CHOLESKY:
753:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
755:       break;
756:     default:
757:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
758:   }
759:   return(0);
760: }

762: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
763: {
765:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
766:   PetscBLASInt   one=1,info;

769:   if (!a->pivots) {
770:     PetscMalloc1(a->locr+a->mb,&a->pivots);
771:     PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
772:   }
773:   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
775:   A->factortype = MAT_FACTOR_LU;
776:   A->assembled  = PETSC_TRUE;

778:   PetscFree(A->solvertype);
779:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
780:   return(0);
781: }

783: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
784: {

788:   MatCopy(A,F,SAME_NONZERO_PATTERN);
789:   MatLUFactor_ScaLAPACK(F,0,0,info);
790:   return(0);
791: }

793: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
794: {
796:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
797:   return(0);
798: }

800: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
801: {
803:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
804:   PetscBLASInt   one=1,info;

807:   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
809:   A->factortype = MAT_FACTOR_CHOLESKY;
810:   A->assembled  = PETSC_TRUE;

812:   PetscFree(A->solvertype);
813:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
814:   return(0);
815: }

817: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
818: {

822:   MatCopy(A,F,SAME_NONZERO_PATTERN);
823:   MatCholeskyFactor_ScaLAPACK(F,0,info);
824:   return(0);
825: }

827: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
828: {
830:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
831:   return(0);
832: }

834: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
835: {
837:   *type = MATSOLVERSCALAPACK;
838:   return(0);
839: }

841: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
842: {
843:   Mat            B;

847:   /* Create the factorization matrix */
848:   MatCreate(PetscObjectComm((PetscObject)A),&B);
849:   MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
850:   MatSetType(B,MATSCALAPACK);
851:   MatSetUp(B);
852:   B->factortype = ftype;
853:   PetscFree(B->solvertype);
854:   PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);

856:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
857:   *F = B;
858:   return(0);
859: }

861: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
862: {

866:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
867:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
868:   return(0);
869: }

871: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
872: {
874:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
875:   PetscBLASInt   one=1,lwork=0;
876:   const char     *ntype;
877:   PetscScalar    *work=NULL,dummy;

880:   switch (type){
881:     case NORM_1:
882:       ntype = "1";
883:       lwork = PetscMax(a->locr,a->locc);
884:       break;
885:     case NORM_FROBENIUS:
886:       ntype = "F";
887:       work  = &dummy;
888:       break;
889:     case NORM_INFINITY:
890:       ntype = "I";
891:       lwork = PetscMax(a->locr,a->locc);
892:       break;
893:     default:
894:       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
895:   }
896:   if (lwork) { PetscMalloc1(lwork,&work); }
897:   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
898:   if (lwork) { PetscFree(work); }
899:   return(0);
900: }

902: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
903: {
904:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

908:   PetscArrayzero(a->loc,a->lld*a->locc);
909:   return(0);
910: }

912: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
913: {
914:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
916:   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;

919:   if (rows) {
920:     n     = a->locr;
921:     nb    = a->mb;
922:     isrc  = a->rsrc;
923:     nproc = a->grid->nprow;
924:     iproc = a->grid->myrow;
925:     PetscMalloc1(n,&idx);
926:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
927:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
928:   }
929:   if (cols) {
930:     n     = a->locc;
931:     nb    = a->nb;
932:     isrc  = a->csrc;
933:     nproc = a->grid->npcol;
934:     iproc = a->grid->mycol;
935:     PetscMalloc1(n,&idx);
936:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
937:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
938:   }
939:   return(0);
940: }

942: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
943: {
945:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
946:   Mat            Bmpi;
947:   MPI_Comm       comm;
948:   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
949:   const PetscInt *ranges,*branges,*cwork;
950:   const PetscScalar *vwork;
951:   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
952:   PetscScalar    *barray;
953:   PetscBool      differ=PETSC_FALSE;
954:   PetscMPIInt    size;

957:   PetscObjectGetComm((PetscObject)A,&comm);
958:   PetscLayoutGetRanges(A->rmap,&ranges);

960:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
961:     MPI_Comm_size(comm,&size);
962:     PetscLayoutGetRanges((*B)->rmap,&branges);
963:     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
964:   }

966:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
967:     MatCreate(comm,&Bmpi);
968:     m = PETSC_DECIDE;
969:     PetscSplitOwnershipEqual(comm,&m,&M);
970:     n = PETSC_DECIDE;
971:     PetscSplitOwnershipEqual(comm,&n,&N);
972:     MatSetSizes(Bmpi,m,n,M,N);
973:     MatSetType(Bmpi,MATDENSE);
974:     MatSetUp(Bmpi);

976:     /* create ScaLAPACK descriptor for B (1d block distribution) */
977:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
978:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
979:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

982:     /* redistribute matrix */
983:     MatDenseGetArray(Bmpi,&barray);
984:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
985:     MatDenseRestoreArray(Bmpi,&barray);
986:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
987:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);

989:     /* transfer rows of auxiliary matrix to the final matrix B */
990:     MatGetOwnershipRange(Bmpi,&rstart,&rend);
991:     for (i=rstart;i<rend;i++) {
992:       MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
993:       MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
994:       MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
995:     }
996:     MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
997:     MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
998:     MatDestroy(&Bmpi);

1000:   } else {  /* normal cases */

1002:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1003:     else {
1004:       MatCreate(comm,&Bmpi);
1005:       m = PETSC_DECIDE;
1006:       PetscSplitOwnershipEqual(comm,&m,&M);
1007:       n = PETSC_DECIDE;
1008:       PetscSplitOwnershipEqual(comm,&n,&N);
1009:       MatSetSizes(Bmpi,m,n,M,N);
1010:       MatSetType(Bmpi,MATDENSE);
1011:       MatSetUp(Bmpi);
1012:     }

1014:     /* create ScaLAPACK descriptor for B (1d block distribution) */
1015:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
1016:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1017:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

1020:     /* redistribute matrix */
1021:     MatDenseGetArray(Bmpi,&barray);
1022:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1023:     MatDenseRestoreArray(Bmpi,&barray);

1025:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1026:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1027:     if (reuse == MAT_INPLACE_MATRIX) {
1028:       MatHeaderReplace(A,&Bmpi);
1029:     } else *B = Bmpi;
1030:   }
1031:   return(0);
1032: }

1034: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1035: {
1037:   Mat_ScaLAPACK  *b;
1038:   Mat            Bmpi;
1039:   MPI_Comm       comm;
1040:   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1041:   const PetscInt *ranges;
1042:   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1043:   PetscScalar    *aarray;
1044:   PetscInt       lda;

1047:   PetscObjectGetComm((PetscObject)A,&comm);

1049:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1050:   else {
1051:     MatCreate(comm,&Bmpi);
1052:     m = PETSC_DECIDE;
1053:     PetscSplitOwnershipEqual(comm,&m,&M);
1054:     n = PETSC_DECIDE;
1055:     PetscSplitOwnershipEqual(comm,&n,&N);
1056:     MatSetSizes(Bmpi,m,n,M,N);
1057:     MatSetType(Bmpi,MATSCALAPACK);
1058:     MatSetUp(Bmpi);
1059:   }
1060:   b = (Mat_ScaLAPACK*)Bmpi->data;

1062:   /* create ScaLAPACK descriptor for A (1d block distribution) */
1063:   PetscLayoutGetRanges(A->rmap,&ranges);
1064:   PetscBLASIntCast(ranges[1],&amb);  /* row block size */
1065:   MatDenseGetLDA(A,&lda);
1066:   lld = PetscMax(lda,1);  /* local leading dimension */
1067:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));

1070:   /* redistribute matrix */
1071:   MatDenseGetArray(A,&aarray);
1072:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1073:   MatDenseRestoreArray(A,&aarray);

1075:   MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1076:   MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1077:   if (reuse == MAT_INPLACE_MATRIX) {
1078:     MatHeaderReplace(A,&Bmpi);
1079:   } else *B = Bmpi;
1080:   return(0);
1081: }

1083: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1084: {
1085:   Mat               mat_scal;
1086:   PetscErrorCode    ierr;
1087:   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1088:   const PetscInt    *cols;
1089:   const PetscScalar *vals;

1092:   if (reuse == MAT_REUSE_MATRIX) {
1093:     mat_scal = *newmat;
1094:     MatZeroEntries(mat_scal);
1095:   } else {
1096:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1097:     m = PETSC_DECIDE;
1098:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1099:     n = PETSC_DECIDE;
1100:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1101:     MatSetSizes(mat_scal,m,n,M,N);
1102:     MatSetType(mat_scal,MATSCALAPACK);
1103:     MatSetUp(mat_scal);
1104:   }
1105:   for (row=rstart;row<rend;row++) {
1106:     MatGetRow(A,row,&ncols,&cols,&vals);
1107:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1108:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1109:   }
1110:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1111:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1113:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1114:   else *newmat = mat_scal;
1115:   return(0);
1116: }

1118: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1119: {
1120:   Mat               mat_scal;
1121:   PetscErrorCode    ierr;
1122:   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1123:   const PetscInt    *cols;
1124:   const PetscScalar *vals;
1125:   PetscScalar       v;

1128:   if (reuse == MAT_REUSE_MATRIX) {
1129:     mat_scal = *newmat;
1130:     MatZeroEntries(mat_scal);
1131:   } else {
1132:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1133:     m = PETSC_DECIDE;
1134:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1135:     n = PETSC_DECIDE;
1136:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1137:     MatSetSizes(mat_scal,m,n,M,N);
1138:     MatSetType(mat_scal,MATSCALAPACK);
1139:     MatSetUp(mat_scal);
1140:   }
1141:   MatGetRowUpperTriangular(A);
1142:   for (row=rstart;row<rend;row++) {
1143:     MatGetRow(A,row,&ncols,&cols,&vals);
1144:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1145:     for (j=0;j<ncols;j++) { /* lower triangular part */
1146:       if (cols[j] == row) continue;
1147:       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1148:       MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1149:     }
1150:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1151:   }
1152:   MatRestoreRowUpperTriangular(A);
1153:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1154:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1156:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1157:   else *newmat = mat_scal;
1158:   return(0);
1159: }

1161: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1162: {
1163:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1165:   PetscInt       sz=0;

1168:   PetscLayoutSetUp(A->rmap);
1169:   PetscLayoutSetUp(A->cmap);
1170:   if (!a->lld) a->lld = a->locr;

1172:   PetscFree(a->loc);
1173:   PetscIntMultError(a->lld,a->locc,&sz);
1174:   PetscCalloc1(sz,&a->loc);
1175:   PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));

1177:   A->preallocated = PETSC_TRUE;
1178:   return(0);
1179: }

1181: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1182: {
1183:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1184:   PetscErrorCode     ierr;
1185:   Mat_ScaLAPACK_Grid *grid;
1186:   PetscBool          flg;
1187:   MPI_Comm           icomm;

1190:   MatStashDestroy_Private(&A->stash);
1191:   PetscFree(a->loc);
1192:   PetscFree(a->pivots);
1193:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1194:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1195:   if (--grid->grid_refct == 0) {
1196:     Cblacs_gridexit(grid->ictxt);
1197:     Cblacs_gridexit(grid->ictxrow);
1198:     Cblacs_gridexit(grid->ictxcol);
1199:     PetscFree(grid);
1200:     MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1201:   }
1202:   PetscCommDestroy(&icomm);
1203:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1204:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1205:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1206:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1207:   PetscFree(A->data);
1208:   return(0);
1209: }

1211: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1212: {
1214:   const PetscInt *ranges;
1215:   PetscMPIInt    size;
1216:   PetscInt       i,n;

1219:   MPI_Comm_size(map->comm,&size);
1220:   if (size>2) {
1221:     PetscLayoutGetRanges(map,&ranges);
1222:     n = ranges[1]-ranges[0];
1223:     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1224:     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1225:   }
1226:   return(0);
1227: }

1229: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1230: {
1231:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1233:   PetscBLASInt   info=0;

1236:   PetscLayoutSetUp(A->rmap);
1237:   PetscLayoutSetUp(A->cmap);

1239:   /* check that the layout is as enforced by MatCreateScaLAPACK */
1240:   MatScaLAPACKCheckLayout(A->rmap);
1241:   MatScaLAPACKCheckLayout(A->cmap);

1243:   /* compute local sizes */
1244:   PetscBLASIntCast(A->rmap->N,&a->M);
1245:   PetscBLASIntCast(A->cmap->N,&a->N);
1246:   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1247:   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1248:   a->lld  = PetscMax(1,a->locr);

1250:   /* allocate local array */
1251:   MatScaLAPACKSetPreallocation(A);

1253:   /* set up ScaLAPACK descriptor */
1254:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1256:   return(0);
1257: }

1259: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1260: {
1262:   PetscInt       nstash,reallocs;

1265:   if (A->nooffprocentries) return(0);
1266:   MatStashScatterBegin_Private(A,&A->stash,NULL);
1267:   MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1268:   PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1269:   return(0);
1270: }

1272: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1273: {
1275:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1276:   PetscMPIInt    n;
1277:   PetscInt       i,flg,*row,*col;
1278:   PetscScalar    *val;
1279:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

1282:   if (A->nooffprocentries) return(0);
1283:   while (1) {
1284:     MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1285:     if (!flg) break;
1286:     for (i=0;i<n;i++) {
1287:       PetscBLASIntCast(row[i]+1,&gridx);
1288:       PetscBLASIntCast(col[i]+1,&gcidx);
1289:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1290:       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1291:       switch (A->insertmode) {
1292:         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1293:         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1294:         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1295:       }
1296:     }
1297:   }
1298:   MatStashScatterEnd_Private(&A->stash);
1299:   return(0);
1300: }

1302: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1303: {
1305:   Mat            Adense,As;
1306:   MPI_Comm       comm;

1309:   PetscObjectGetComm((PetscObject)newMat,&comm);
1310:   MatCreate(comm,&Adense);
1311:   MatSetType(Adense,MATDENSE);
1312:   MatLoad(Adense,viewer);
1313:   MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1314:   MatDestroy(&Adense);
1315:   MatHeaderReplace(newMat,&As);
1316:   return(0);
1317: }

1319: /* -------------------------------------------------------------------*/
1320: static struct _MatOps MatOps_Values = {
1321:        MatSetValues_ScaLAPACK,
1322:        0,
1323:        0,
1324:        MatMult_ScaLAPACK,
1325: /* 4*/ MatMultAdd_ScaLAPACK,
1326:        MatMultTranspose_ScaLAPACK,
1327:        MatMultTransposeAdd_ScaLAPACK,
1328:        MatSolve_ScaLAPACK,
1329:        MatSolveAdd_ScaLAPACK,
1330:        0,
1331: /*10*/ 0,
1332:        MatLUFactor_ScaLAPACK,
1333:        MatCholeskyFactor_ScaLAPACK,
1334:        0,
1335:        MatTranspose_ScaLAPACK,
1336: /*15*/ MatGetInfo_ScaLAPACK,
1337:        0,
1338:        MatGetDiagonal_ScaLAPACK,
1339:        MatDiagonalScale_ScaLAPACK,
1340:        MatNorm_ScaLAPACK,
1341: /*20*/ MatAssemblyBegin_ScaLAPACK,
1342:        MatAssemblyEnd_ScaLAPACK,
1343:        MatSetOption_ScaLAPACK,
1344:        MatZeroEntries_ScaLAPACK,
1345: /*24*/ 0,
1346:        MatLUFactorSymbolic_ScaLAPACK,
1347:        MatLUFactorNumeric_ScaLAPACK,
1348:        MatCholeskyFactorSymbolic_ScaLAPACK,
1349:        MatCholeskyFactorNumeric_ScaLAPACK,
1350: /*29*/ MatSetUp_ScaLAPACK,
1351:        0,
1352:        0,
1353:        0,
1354:        0,
1355: /*34*/ MatDuplicate_ScaLAPACK,
1356:        0,
1357:        0,
1358:        0,
1359:        0,
1360: /*39*/ MatAXPY_ScaLAPACK,
1361:        0,
1362:        0,
1363:        0,
1364:        MatCopy_ScaLAPACK,
1365: /*44*/ 0,
1366:        MatScale_ScaLAPACK,
1367:        MatShift_ScaLAPACK,
1368:        0,
1369:        0,
1370: /*49*/ 0,
1371:        0,
1372:        0,
1373:        0,
1374:        0,
1375: /*54*/ 0,
1376:        0,
1377:        0,
1378:        0,
1379:        0,
1380: /*59*/ 0,
1381:        MatDestroy_ScaLAPACK,
1382:        MatView_ScaLAPACK,
1383:        0,
1384:        0,
1385: /*64*/ 0,
1386:        0,
1387:        0,
1388:        0,
1389:        0,
1390: /*69*/ 0,
1391:        0,
1392:        MatConvert_ScaLAPACK_Dense,
1393:        0,
1394:        0,
1395: /*74*/ 0,
1396:        0,
1397:        0,
1398:        0,
1399:        0,
1400: /*79*/ 0,
1401:        0,
1402:        0,
1403:        0,
1404:        MatLoad_ScaLAPACK,
1405: /*84*/ 0,
1406:        0,
1407:        0,
1408:        0,
1409:        0,
1410: /*89*/ 0,
1411:        0,
1412:        MatMatMultNumeric_ScaLAPACK,
1413:        0,
1414:        0,
1415: /*94*/ 0,
1416:        0,
1417:        0,
1418:        MatMatTransposeMultNumeric_ScaLAPACK,
1419:        0,
1420: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1421:        0,
1422:        0,
1423:        MatConjugate_ScaLAPACK,
1424:        0,
1425: /*104*/0,
1426:        0,
1427:        0,
1428:        0,
1429:        0,
1430: /*109*/MatMatSolve_ScaLAPACK,
1431:        0,
1432:        0,
1433:        0,
1434:        MatMissingDiagonal_ScaLAPACK,
1435: /*114*/0,
1436:        0,
1437:        0,
1438:        0,
1439:        0,
1440: /*119*/0,
1441:        MatHermitianTranspose_ScaLAPACK,
1442:        0,
1443:        0,
1444:        0,
1445: /*124*/0,
1446:        0,
1447:        0,
1448:        0,
1449:        0,
1450: /*129*/0,
1451:        0,
1452:        0,
1453:        0,
1454:        0,
1455: /*134*/0,
1456:        0,
1457:        0,
1458:        0,
1459:        0,
1460:        0,
1461: /*140*/0,
1462:        0,
1463:        0,
1464:        0,
1465:        0,
1466: /*145*/0,
1467:        0,
1468:        0
1469: };

1471: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1472: {
1473:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1474:   PetscInt           size=stash->size,nsends;
1475:   PetscErrorCode     ierr;
1476:   PetscInt           count,*sindices,**rindices,i,j,l;
1477:   PetscScalar        **rvalues,*svalues;
1478:   MPI_Comm           comm = stash->comm;
1479:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1480:   PetscMPIInt        *sizes,*nlengths,nreceives;
1481:   PetscInt           *sp_idx,*sp_idy;
1482:   PetscScalar        *sp_val;
1483:   PetscMatStashSpace space,space_next;
1484:   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1485:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;

1488:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1489:     InsertMode addv;
1490:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1491:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1492:     mat->insertmode = addv; /* in case this processor had no cache */
1493:   }

1495:   bs2 = stash->bs*stash->bs;

1497:   /*  first count number of contributors to each processor */
1498:   PetscCalloc1(size,&nlengths);
1499:   PetscMalloc1(stash->n+1,&owner);

1501:   i     = j    = 0;
1502:   space = stash->space_head;
1503:   while (space) {
1504:     space_next = space->next;
1505:     for (l=0; l<space->local_used; l++) {
1506:       PetscBLASIntCast(space->idx[l]+1,&gridx);
1507:       PetscBLASIntCast(space->idy[l]+1,&gcidx);
1508:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1509:       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1510:       nlengths[j]++; owner[i] = j;
1511:       i++;
1512:     }
1513:     space = space_next;
1514:   }

1516:   /* Now check what procs get messages - and compute nsends. */
1517:   PetscCalloc1(size,&sizes);
1518:   for (i=0, nsends=0; i<size; i++) {
1519:     if (nlengths[i]) {
1520:       sizes[i] = 1; nsends++;
1521:     }
1522:   }

1524:   {PetscMPIInt *onodes,*olengths;
1525:    /* Determine the number of messages to expect, their lengths, from from-ids */
1526:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1527:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1528:    /* since clubbing row,col - lengths are multiplied by 2 */
1529:    for (i=0; i<nreceives; i++) olengths[i] *=2;
1530:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1531:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1532:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1533:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1534:    PetscFree(onodes);
1535:    PetscFree(olengths);}

1537:   /* do sends:
1538:       1) starts[i] gives the starting index in svalues for stuff going to
1539:          the ith processor
1540:   */
1541:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1542:   PetscMalloc1(2*nsends,&send_waits);
1543:   PetscMalloc2(size,&startv,size,&starti);
1544:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1545:   startv[0] = 0; starti[0] = 0;
1546:   for (i=1; i<size; i++) {
1547:     startv[i] = startv[i-1] + nlengths[i-1];
1548:     starti[i] = starti[i-1] + 2*nlengths[i-1];
1549:   }

1551:   i     = 0;
1552:   space = stash->space_head;
1553:   while (space) {
1554:     space_next = space->next;
1555:     sp_idx     = space->idx;
1556:     sp_idy     = space->idy;
1557:     sp_val     = space->val;
1558:     for (l=0; l<space->local_used; l++) {
1559:       j = owner[i];
1560:       if (bs2 == 1) {
1561:         svalues[startv[j]] = sp_val[l];
1562:       } else {
1563:         PetscInt    k;
1564:         PetscScalar *buf1,*buf2;
1565:         buf1 = svalues+bs2*startv[j];
1566:         buf2 = space->val + bs2*l;
1567:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1568:       }
1569:       sindices[starti[j]]             = sp_idx[l];
1570:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1571:       startv[j]++;
1572:       starti[j]++;
1573:       i++;
1574:     }
1575:     space = space_next;
1576:   }
1577:   startv[0] = 0;
1578:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

1580:   for (i=0,count=0; i<size; i++) {
1581:     if (sizes[i]) {
1582:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1583:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1584:     }
1585:   }
1586: #if defined(PETSC_USE_INFO)
1587:   PetscInfo1(NULL,"No of messages: %d \n",nsends);
1588:   for (i=0; i<size; i++) {
1589:     if (sizes[i]) {
1590:       PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1591:     }
1592:   }
1593: #endif
1594:   PetscFree(nlengths);
1595:   PetscFree(owner);
1596:   PetscFree2(startv,starti);
1597:   PetscFree(sizes);

1599:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1600:   PetscMalloc1(2*nreceives,&recv_waits);

1602:   for (i=0; i<nreceives; i++) {
1603:     recv_waits[2*i]   = recv_waits1[i];
1604:     recv_waits[2*i+1] = recv_waits2[i];
1605:   }
1606:   stash->recv_waits = recv_waits;

1608:   PetscFree(recv_waits1);
1609:   PetscFree(recv_waits2);

1611:   stash->svalues         = svalues;
1612:   stash->sindices        = sindices;
1613:   stash->rvalues         = rvalues;
1614:   stash->rindices        = rindices;
1615:   stash->send_waits      = send_waits;
1616:   stash->nsends          = nsends;
1617:   stash->nrecvs          = nreceives;
1618:   stash->reproduce_count = 0;
1619:   return(0);
1620: }

1622: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1623: {
1625:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

1628:   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1629:   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1630:   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1631:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1632:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1633:   return(0);
1634: }

1636: /*@
1637:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1638:    the ScaLAPACK matrix

1640:    Logically Collective on A

1642:    Input Parameter:
1643: +  A  - a MATSCALAPACK matrix
1644: .  mb - the row block size
1645: -  nb - the column block size

1647:    Level: intermediate

1649: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1650: @*/
1651: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1652: {

1659:   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1660:   return(0);
1661: }

1663: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1664: {
1665:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

1668:   if (mb) *mb = a->mb;
1669:   if (nb) *nb = a->nb;
1670:   return(0);
1671: }

1673: /*@
1674:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1675:    the ScaLAPACK matrix

1677:    Not collective

1679:    Input Parameter:
1680: .  A  - a MATSCALAPACK matrix

1682:    Output Parameters:
1683: +  mb - the row block size
1684: -  nb - the column block size

1686:    Level: intermediate

1688: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1689: @*/
1690: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1691: {

1696:   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1697:   return(0);
1698: }

1700: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1701: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);

1703: /*MC
1704:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1706:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1708:    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver

1710:    Options Database Keys:
1711: +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1712: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1713: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1715:    Level: beginner

1717: .seealso: MATDENSE, MATELEMENTAL
1718: M*/

1720: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1721: {
1722:   Mat_ScaLAPACK      *a;
1723:   PetscErrorCode     ierr;
1724:   PetscBool          flg,flg1;
1725:   Mat_ScaLAPACK_Grid *grid;
1726:   MPI_Comm           icomm;
1727:   PetscBLASInt       nprow,npcol,myrow,mycol;
1728:   PetscInt           optv1,k=2,array[2]={0,0};
1729:   PetscMPIInt        size;

1732:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1733:   A->insertmode = NOT_SET_VALUES;

1735:   MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1736:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1737:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1738:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1739:   A->stash.ScatterDestroy = NULL;

1741:   PetscNewLog(A,&a);
1742:   A->data = (void*)a;

1744:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1745:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1746:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1747:     PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1748:   }
1749:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1750:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1751:   if (!flg) {
1752:     PetscNewLog(A,&grid);

1754:     MPI_Comm_size(icomm,&size);
1755:     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);

1757:     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1758:     PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1759:     if (flg1) {
1760:       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1761:       grid->nprow = optv1;
1762:     }
1763:     PetscOptionsEnd();

1765:     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1766:     grid->npcol = size/grid->nprow;
1767:     PetscBLASIntCast(grid->nprow,&nprow);
1768:     PetscBLASIntCast(grid->npcol,&npcol);
1769:     grid->ictxt = Csys2blacs_handle(icomm);
1770:     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1771:     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1772:     grid->grid_refct = 1;
1773:     grid->nprow      = nprow;
1774:     grid->npcol      = npcol;
1775:     grid->myrow      = myrow;
1776:     grid->mycol      = mycol;
1777:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1778:     grid->ictxrow = Csys2blacs_handle(icomm);
1779:     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1780:     grid->ictxcol = Csys2blacs_handle(icomm);
1781:     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1782:     MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);

1784:   } else grid->grid_refct++;
1785:   PetscCommDestroy(&icomm);
1786:   a->grid = grid;
1787:   a->mb   = DEFAULT_BLOCKSIZE;
1788:   a->nb   = DEFAULT_BLOCKSIZE;

1790:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1791:   PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1792:   if (flg) {
1793:     a->mb = array[0];
1794:     a->nb = (k>1)? array[1]: a->mb;
1795:   }
1796:   PetscOptionsEnd();

1798:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1799:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1800:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1801:   PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1802:   return(0);
1803: }

1805: /*@C
1806:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1807:    (2D block cyclic distribution).

1809:    Collective

1811:    Input Parameters:
1812: +  comm - MPI communicator
1813: .  mb   - row block size (or PETSC_DECIDE to have it set)
1814: .  nb   - column block size (or PETSC_DECIDE to have it set)
1815: .  M    - number of global rows
1816: .  N    - number of global columns
1817: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1818: -  csrc - coordinate of process that owns the first column of the distributed matrix

1820:    Output Parameter:
1821: .  A - the matrix

1823:    Options Database Keys:
1824: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1826:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1827:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1828:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

1830:    Notes:
1831:    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1832:    is chosen.

1834:    Storage Information:
1835:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1836:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1837:    significance and are thus ignored. The block sizes refer to the values
1838:    used for the distributed matrix, not the same meaning as in BAIJ.

1840:    Level: intermediate

1842: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1843: @*/
1844: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1845: {
1847:   Mat_ScaLAPACK  *a;
1848:   PetscInt       m,n;

1851:   MatCreate(comm,A);
1852:   MatSetType(*A,MATSCALAPACK);
1853:   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1854:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1855:   m = PETSC_DECIDE;
1856:   PetscSplitOwnershipEqual(comm,&m,&M);
1857:   n = PETSC_DECIDE;
1858:   PetscSplitOwnershipEqual(comm,&n,&N);
1859:   MatSetSizes(*A,m,n,M,N);
1860:   a = (Mat_ScaLAPACK*)(*A)->data;
1861:   PetscBLASIntCast(M,&a->M);
1862:   PetscBLASIntCast(N,&a->N);
1863:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1864:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1865:   PetscBLASIntCast(rsrc,&a->rsrc);
1866:   PetscBLASIntCast(csrc,&a->csrc);
1867:   MatSetUp(*A);
1868:   return(0);
1869: }