Actual source code: fdda.c

petsc-master 2019-07-21
Report Typos and Errors

  2:  #include <petsc/private/dmdaimpl.h>
  3:  #include <petscmat.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
 17: {
 19:   PetscInt       i,j,nz,*fill;

 22:   if (!dfill) return(0);

 24:   /* count number nonzeros */
 25:   nz = 0;
 26:   for (i=0; i<w; i++) {
 27:     for (j=0; j<w; j++) {
 28:       if (dfill[w*i+j]) nz++;
 29:     }
 30:   }
 31:   PetscMalloc1(nz + w + 1,&fill);
 32:   /* construct modified CSR storage of nonzero structure */
 33:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 34:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 35:   nz = w + 1;
 36:   for (i=0; i<w; i++) {
 37:     fill[i] = nz;
 38:     for (j=0; j<w; j++) {
 39:       if (dfill[w*i+j]) {
 40:         fill[nz] = j;
 41:         nz++;
 42:       }
 43:     }
 44:   }
 45:   fill[w] = nz;

 47:   *rfill = fill;
 48:   return(0);
 49: }


 52: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
 53: {
 55:   PetscInt       nz;

 58:   if (!dfillsparse) return(0);

 60:   /* Determine number of non-zeros */
 61:   nz = (dfillsparse[w] - w - 1);

 63:   /* Allocate space for our copy of the given sparse matrix representation. */
 64:   PetscMalloc1(nz + w + 1,rfill);
 65:   PetscArraycpy(*rfill,dfillsparse,nz+w+1);
 66:   return(0);
 67: }


 70: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 71: {
 73:   PetscInt       i,k,cnt = 1;


 77:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 78:    columns to the left with any nonzeros in them plus 1 */
 79:   PetscCalloc1(dd->w,&dd->ofillcols);
 80:   for (i=0; i<dd->w; i++) {
 81:     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 82:   }
 83:   for (i=0; i<dd->w; i++) {
 84:     if (dd->ofillcols[i]) {
 85:       dd->ofillcols[i] = cnt++;
 86:     }
 87:   }
 88:   return(0);
 89: }



 93: /*@
 94:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 95:     of the matrix returned by DMCreateMatrix().

 97:     Logically Collective on da

 99:     Input Parameter:
100: +   da - the distributed array
101: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102: -   ofill - the fill pattern in the off-diagonal blocks


105:     Level: developer

107:     Notes:
108:     This only makes sense when you are doing multicomponent problems but using the
109:        MPIAIJ matrix format

111:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112:        representing coupling and 0 entries for missing coupling. For example
113: $             dfill[9] = {1, 0, 0,
114: $                         1, 1, 0,
115: $                         0, 1, 1}
116:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118:        diagonal block).

120:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121:      can be represented in the dfill, ofill format

123:    Contributed by Glenn Hammond

125: .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

127: @*/
128: PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129: {
130:   DM_DA          *dd = (DM_DA*)da->data;

134:   /* save the given dfill and ofill information */
135:   DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);
136:   DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);

138:   /* count nonzeros in ofill columns */
139:   DMDASetBlockFills_Private2(dd);

141:   return(0);
142: }


145: /*@
146:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147:     of the matrix returned by DMCreateMatrix(), using sparse representations
148:     of fill patterns.

150:     Logically Collective on da

152:     Input Parameter:
153: +   da - the distributed array
154: .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155: -   ofill - the sparse fill pattern in the off-diagonal blocks


158:     Level: developer

160:     Notes: This only makes sense when you are doing multicomponent problems but using the
161:        MPIAIJ matrix format

163:            The format for dfill and ofill is a sparse representation of a
164:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165:            for missing coupling.  The sparse representation is a 1 dimensional
166:            array of length nz + dof + 1, where nz is the number of non-zeros in
167:            the matrix.  The first dof entries in the array give the
168:            starting array indices of each row's items in the rest of the array,
169:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170:            and the remaining nz items give the column indices of each of
171:            the 1s within the logical 2D matrix.  Each row's items within
172:            the array are the column indices of the 1s within that row
173:            of the 2D matrix.  PETSc developers may recognize that this is the
174:            same format as that computed by the DMDASetBlockFills_Private()
175:            function from a dense 2D matrix representation.

177:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178:      can be represented in the dfill, ofill format

180:    Contributed by Philip C. Roth

182: .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

184: @*/
185: PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186: {
187:   DM_DA          *dd = (DM_DA*)da->data;

191:   /* save the given dfill and ofill information */
192:   DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);
193:   DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);

195:   /* count nonzeros in ofill columns */
196:   DMDASetBlockFills_Private2(dd);

198:   return(0);
199: }


202: PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203: {
204:   PetscErrorCode   ierr;
205:   PetscInt         dim,m,n,p,nc;
206:   DMBoundaryType   bx,by,bz;
207:   MPI_Comm         comm;
208:   PetscMPIInt      size;
209:   PetscBool        isBAIJ;
210:   DM_DA            *dd = (DM_DA*)da->data;

213:   /*
214:                                   m
215:           ------------------------------------------------------
216:          |                                                     |
217:          |                                                     |
218:          |               ----------------------                |
219:          |               |                    |                |
220:       n  |           yn  |                    |                |
221:          |               |                    |                |
222:          |               .---------------------                |
223:          |             (xs,ys)     xn                          |
224:          |            .                                        |
225:          |         (gxs,gys)                                   |
226:          |                                                     |
227:           -----------------------------------------------------
228:   */

230:   /*
231:          nc - number of components per grid point
232:          col - number of colors needed in one direction for single component problem

234:   */
235:   DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);

237:   PetscObjectGetComm((PetscObject)da,&comm);
238:   MPI_Comm_size(comm,&size);
239:   if (ctype == IS_COLORING_LOCAL) {
240:     if (size == 1) {
241:       ctype = IS_COLORING_GLOBAL;
242:     } else if (dim > 1) {
243:       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244:         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245:       }
246:     }
247:   }

249:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250:      matrices is for the blocks, not the individual matrix elements  */
251:   PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);
252:   if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);}
253:   if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);}
254:   if (isBAIJ) {
255:     dd->w  = 1;
256:     dd->xs = dd->xs/nc;
257:     dd->xe = dd->xe/nc;
258:     dd->Xs = dd->Xs/nc;
259:     dd->Xe = dd->Xe/nc;
260:   }

262:   /*
263:      We do not provide a getcoloring function in the DMDA operations because
264:    the basic DMDA does not know about matrices. We think of DMDA as being more
265:    more low-level then matrices.
266:   */
267:   if (dim == 1) {
268:     DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);
269:   } else if (dim == 2) {
270:      DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);
271:   } else if (dim == 3) {
272:      DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);
273:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274:   if (isBAIJ) {
275:     dd->w  = nc;
276:     dd->xs = dd->xs*nc;
277:     dd->xe = dd->xe*nc;
278:     dd->Xs = dd->Xs*nc;
279:     dd->Xe = dd->Xe*nc;
280:   }
281:   return(0);
282: }

284: /* ---------------------------------------------------------------------------------*/

286: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287: {
288:   PetscErrorCode   ierr;
289:   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290:   PetscInt         ncolors;
291:   MPI_Comm         comm;
292:   DMBoundaryType bx,by;
293:   DMDAStencilType  st;
294:   ISColoringValue  *colors;
295:   DM_DA            *dd = (DM_DA*)da->data;

298:   /*
299:          nc - number of components per grid point
300:          col - number of colors needed in one direction for single component problem

302:   */
303:   DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);
304:   col  = 2*s + 1;
305:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
306:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
307:   PetscObjectGetComm((PetscObject)da,&comm);

309:   /* special case as taught to us by Paul Hovland */
310:   if (st == DMDA_STENCIL_STAR && s == 1) {
311:     DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);
312:   } else {

314:     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315:                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316:     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317:                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318:     if (ctype == IS_COLORING_GLOBAL) {
319:       if (!dd->localcoloring) {
320:         PetscMalloc1(nc*nx*ny,&colors);
321:         ii   = 0;
322:         for (j=ys; j<ys+ny; j++) {
323:           for (i=xs; i<xs+nx; i++) {
324:             for (k=0; k<nc; k++) {
325:               colors[ii++] = k + nc*((i % col) + col*(j % col));
326:             }
327:           }
328:         }
329:         ncolors = nc + nc*(col-1 + col*(col-1));
330:         ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
331:       }
332:       *coloring = dd->localcoloring;
333:     } else if (ctype == IS_COLORING_LOCAL) {
334:       if (!dd->ghostedcoloring) {
335:         PetscMalloc1(nc*gnx*gny,&colors);
336:         ii   = 0;
337:         for (j=gys; j<gys+gny; j++) {
338:           for (i=gxs; i<gxs+gnx; i++) {
339:             for (k=0; k<nc; k++) {
340:               /* the complicated stuff is to handle periodic boundaries */
341:               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342:             }
343:           }
344:         }
345:         ncolors = nc + nc*(col - 1 + col*(col-1));
346:         ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
347:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

349:         ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
350:       }
351:       *coloring = dd->ghostedcoloring;
352:     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353:   }
354:   ISColoringReference(*coloring);
355:   return(0);
356: }

358: /* ---------------------------------------------------------------------------------*/

360: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361: {
362:   PetscErrorCode   ierr;
363:   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364:   PetscInt         ncolors;
365:   MPI_Comm         comm;
366:   DMBoundaryType bx,by,bz;
367:   DMDAStencilType  st;
368:   ISColoringValue  *colors;
369:   DM_DA            *dd = (DM_DA*)da->data;

372:   /*
373:          nc - number of components per grid point
374:          col - number of colors needed in one direction for single component problem

376:   */
377:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
378:   col  = 2*s + 1;
379:   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380:                                                          by 2*stencil_width + 1\n");
381:   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382:                                                          by 2*stencil_width + 1\n");
383:   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384:                                                          by 2*stencil_width + 1\n");

386:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
387:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
388:   PetscObjectGetComm((PetscObject)da,&comm);

390:   /* create the coloring */
391:   if (ctype == IS_COLORING_GLOBAL) {
392:     if (!dd->localcoloring) {
393:       PetscMalloc1(nc*nx*ny*nz,&colors);
394:       ii   = 0;
395:       for (k=zs; k<zs+nz; k++) {
396:         for (j=ys; j<ys+ny; j++) {
397:           for (i=xs; i<xs+nx; i++) {
398:             for (l=0; l<nc; l++) {
399:               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400:             }
401:           }
402:         }
403:       }
404:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405:       ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);
406:     }
407:     *coloring = dd->localcoloring;
408:   } else if (ctype == IS_COLORING_LOCAL) {
409:     if (!dd->ghostedcoloring) {
410:       PetscMalloc1(nc*gnx*gny*gnz,&colors);
411:       ii   = 0;
412:       for (k=gzs; k<gzs+gnz; k++) {
413:         for (j=gys; j<gys+gny; j++) {
414:           for (i=gxs; i<gxs+gnx; i++) {
415:             for (l=0; l<nc; l++) {
416:               /* the complicated stuff is to handle periodic boundaries */
417:               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418:             }
419:           }
420:         }
421:       }
422:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423:       ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
424:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
425:     }
426:     *coloring = dd->ghostedcoloring;
427:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428:   ISColoringReference(*coloring);
429:   return(0);
430: }

432: /* ---------------------------------------------------------------------------------*/

434: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435: {
436:   PetscErrorCode   ierr;
437:   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438:   PetscInt         ncolors;
439:   MPI_Comm         comm;
440:   DMBoundaryType bx;
441:   ISColoringValue  *colors;
442:   DM_DA            *dd = (DM_DA*)da->data;

445:   /*
446:          nc - number of components per grid point
447:          col - number of colors needed in one direction for single component problem

449:   */
450:   DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);
451:   col  = 2*s + 1;

453:   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454:                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);

456:   DMDAGetCorners(da,&xs,0,0,&nx,0,0);
457:   DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);
458:   PetscObjectGetComm((PetscObject)da,&comm);

460:   /* create the coloring */
461:   if (ctype == IS_COLORING_GLOBAL) {
462:     if (!dd->localcoloring) {
463:       PetscMalloc1(nc*nx,&colors);
464:       if (dd->ofillcols) {
465:         PetscInt tc = 0;
466:         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467:         i1 = 0;
468:         for (i=xs; i<xs+nx; i++) {
469:           for (l=0; l<nc; l++) {
470:             if (dd->ofillcols[l] && (i % col)) {
471:               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472:             } else {
473:               colors[i1++] = l;
474:             }
475:           }
476:         }
477:         ncolors = nc + 2*s*tc;
478:       } else {
479:         i1 = 0;
480:         for (i=xs; i<xs+nx; i++) {
481:           for (l=0; l<nc; l++) {
482:             colors[i1++] = l + nc*(i % col);
483:           }
484:         }
485:         ncolors = nc + nc*(col-1);
486:       }
487:       ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);
488:     }
489:     *coloring = dd->localcoloring;
490:   } else if (ctype == IS_COLORING_LOCAL) {
491:     if (!dd->ghostedcoloring) {
492:       PetscMalloc1(nc*gnx,&colors);
493:       i1   = 0;
494:       for (i=gxs; i<gxs+gnx; i++) {
495:         for (l=0; l<nc; l++) {
496:           /* the complicated stuff is to handle periodic boundaries */
497:           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498:         }
499:       }
500:       ncolors = nc + nc*(col-1);
501:       ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
502:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
503:     }
504:     *coloring = dd->ghostedcoloring;
505:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506:   ISColoringReference(*coloring);
507:   return(0);
508: }

510: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511: {
512:   PetscErrorCode   ierr;
513:   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514:   PetscInt         ncolors;
515:   MPI_Comm         comm;
516:   DMBoundaryType bx,by;
517:   ISColoringValue  *colors;
518:   DM_DA            *dd = (DM_DA*)da->data;

521:   /*
522:          nc - number of components per grid point
523:          col - number of colors needed in one direction for single component problem

525:   */
526:   DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);
527:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
528:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
529:   PetscObjectGetComm((PetscObject)da,&comm);

531:   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532:   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");

534:   /* create the coloring */
535:   if (ctype == IS_COLORING_GLOBAL) {
536:     if (!dd->localcoloring) {
537:       PetscMalloc1(nc*nx*ny,&colors);
538:       ii   = 0;
539:       for (j=ys; j<ys+ny; j++) {
540:         for (i=xs; i<xs+nx; i++) {
541:           for (k=0; k<nc; k++) {
542:             colors[ii++] = k + nc*((3*j+i) % 5);
543:           }
544:         }
545:       }
546:       ncolors = 5*nc;
547:       ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
548:     }
549:     *coloring = dd->localcoloring;
550:   } else if (ctype == IS_COLORING_LOCAL) {
551:     if (!dd->ghostedcoloring) {
552:       PetscMalloc1(nc*gnx*gny,&colors);
553:       ii = 0;
554:       for (j=gys; j<gys+gny; j++) {
555:         for (i=gxs; i<gxs+gnx; i++) {
556:           for (k=0; k<nc; k++) {
557:             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558:           }
559:         }
560:       }
561:       ncolors = 5*nc;
562:       ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
563:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
564:     }
565:     *coloring = dd->ghostedcoloring;
566:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567:   return(0);
568: }

570: /* =========================================================================== */
571: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
572: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
574: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
576: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583: extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);

585: /*@C
586:    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix

588:    Logically Collective on mat

590:    Input Parameters:
591: +  mat - the matrix
592: -  da - the da

594:    Level: intermediate

596: @*/
597: PetscErrorCode MatSetupDM(Mat mat,DM da)
598: {

604:   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
605:   return(0);
606: }

608: PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609: {
610:   DM                da;
611:   PetscErrorCode    ierr;
612:   const char        *prefix;
613:   Mat               Anatural;
614:   AO                ao;
615:   PetscInt          rstart,rend,*petsc,i;
616:   IS                is;
617:   MPI_Comm          comm;
618:   PetscViewerFormat format;

621:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622:   PetscViewerGetFormat(viewer,&format);
623:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);

625:   PetscObjectGetComm((PetscObject)A,&comm);
626:   MatGetDM(A, &da);
627:   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");

629:   DMDAGetAO(da,&ao);
630:   MatGetOwnershipRange(A,&rstart,&rend);
631:   PetscMalloc1(rend-rstart,&petsc);
632:   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633:   AOApplicationToPetsc(ao,rend-rstart,petsc);
634:   ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);

636:   /* call viewer on natural ordering */
637:   MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);
638:   ISDestroy(&is);
639:   PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);
640:   PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);
641:   PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);
642:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643:   MatView(Anatural,viewer);
644:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645:   MatDestroy(&Anatural);
646:   return(0);
647: }

649: PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650: {
651:   DM             da;
653:   Mat            Anatural,Aapp;
654:   AO             ao;
655:   PetscInt       rstart,rend,*app,i,m,n,M,N;
656:   IS             is;
657:   MPI_Comm       comm;

660:   PetscObjectGetComm((PetscObject)A,&comm);
661:   MatGetDM(A, &da);
662:   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");

664:   /* Load the matrix in natural ordering */
665:   MatCreate(PetscObjectComm((PetscObject)A),&Anatural);
666:   MatSetType(Anatural,((PetscObject)A)->type_name);
667:   MatGetSize(A,&M,&N);
668:   MatGetLocalSize(A,&m,&n);
669:   MatSetSizes(Anatural,m,n,M,N);
670:   MatLoad(Anatural,viewer);

672:   /* Map natural ordering to application ordering and create IS */
673:   DMDAGetAO(da,&ao);
674:   MatGetOwnershipRange(Anatural,&rstart,&rend);
675:   PetscMalloc1(rend-rstart,&app);
676:   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677:   AOPetscToApplication(ao,rend-rstart,app);
678:   ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);

680:   /* Do permutation and replace header */
681:   MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);
682:   MatHeaderReplace(A,&Aapp);
683:   ISDestroy(&is);
684:   MatDestroy(&Anatural);
685:   return(0);
686: }

688: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689: {
691:   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692:   Mat            A;
693:   MPI_Comm       comm;
694:   MatType        Atype;
695:   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
696:   MatType        mtype;
697:   PetscMPIInt    size;
698:   DM_DA          *dd = (DM_DA*)da->data;

701:   MatInitializePackage();
702:   mtype = da->mattype;

704:   /*
705:                                   m
706:           ------------------------------------------------------
707:          |                                                     |
708:          |                                                     |
709:          |               ----------------------                |
710:          |               |                    |                |
711:       n  |           ny  |                    |                |
712:          |               |                    |                |
713:          |               .---------------------                |
714:          |             (xs,ys)     nx                          |
715:          |            .                                        |
716:          |         (gxs,gys)                                   |
717:          |                                                     |
718:           -----------------------------------------------------
719:   */

721:   /*
722:          nc - number of components per grid point
723:          col - number of colors needed in one direction for single component problem

725:   */
726:   M   = dd->M;
727:   N   = dd->N;
728:   P   = dd->P;
729:   dim = da->dim;
730:   dof = dd->w;
731:   /* DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0); */
732:   DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);
733:   PetscObjectGetComm((PetscObject)da,&comm);
734:   MatCreate(comm,&A);
735:   MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);
736:   MatSetType(A,mtype);
737:   MatSetFromOptions(A);
738:   MatSetDM(A,da);
739:   if (da->structure_only) {
740:     MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);
741:   }
742:   MatGetType(A,&Atype);
743:   /*
744:      We do not provide a getmatrix function in the DMDA operations because
745:    the basic DMDA does not know about matrices. We think of DMDA as being more
746:    more low-level than matrices. This is kind of cheating but, cause sometimes
747:    we think of DMDA has higher level than matrices.

749:      We could switch based on Atype (or mtype), but we do not since the
750:    specialized setting routines depend only on the particular preallocation
751:    details of the matrix, not the type itself.
752:   */
753:   PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);
754:   if (!aij) {
755:     PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);
756:   }
757:   if (!aij) {
758:     PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);
759:     if (!baij) {
760:       PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);
761:     }
762:     if (!baij) {
763:       PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);
764:       if (!sbaij) {
765:         PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);
766:       }
767:       if (!sbaij) {
768:         PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);
769:         if (!sell) {
770:           PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);
771:         }
772:       }
773:       if (!sell) {
774:         PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);
775:       }
776:     }
777:   }
778:   if (aij) {
779:     if (dim == 1) {
780:       if (dd->ofill) {
781:         DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);
782:       } else {
783:         DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);
784:       }
785:     } else if (dim == 2) {
786:       if (dd->ofill) {
787:         DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);
788:       } else {
789:         DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);
790:       }
791:     } else if (dim == 3) {
792:       if (dd->ofill) {
793:         DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);
794:       } else {
795:         DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);
796:       }
797:     }
798:   } else if (baij) {
799:     if (dim == 2) {
800:       DMCreateMatrix_DA_2d_MPIBAIJ(da,A);
801:     } else if (dim == 3) {
802:       DMCreateMatrix_DA_3d_MPIBAIJ(da,A);
803:     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
804:   } else if (sbaij) {
805:     if (dim == 2) {
806:       DMCreateMatrix_DA_2d_MPISBAIJ(da,A);
807:     } else if (dim == 3) {
808:       DMCreateMatrix_DA_3d_MPISBAIJ(da,A);
809:     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
810:   } else if (sell) {
811:      if (dim == 2) {
812:        DMCreateMatrix_DA_2d_MPISELL(da,A);
813:      } else if (dim == 3) {
814:        DMCreateMatrix_DA_3d_MPISELL(da,A);
815:      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
816:   } else if (is) {
817:     DMCreateMatrix_DA_IS(da,A);
818:   } else {
819:     ISLocalToGlobalMapping ltog;

821:     MatSetBlockSize(A,dof);
822:     MatSetUp(A);
823:     DMGetLocalToGlobalMapping(da,&ltog);
824:     MatSetLocalToGlobalMapping(A,ltog,ltog);
825:   }
826:   DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
827:   MatSetStencil(A,dim,dims,starts,dof);
828:   MatSetDM(A,da);
829:   MPI_Comm_size(comm,&size);
830:   if (size > 1) {
831:     /* change viewer to display matrix in natural ordering */
832:     MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
833:     MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
834:   }
835:   *J = A;
836:   return(0);
837: }

839: /* ---------------------------------------------------------------------------------*/
840: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);

842: PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
843: {
844:   DM_DA                  *da = (DM_DA*)dm->data;
845:   Mat                    lJ;
846:   ISLocalToGlobalMapping ltog;
847:   IS                     is_loc_filt, is_glob;
848:   const PetscInt         *e_loc,*idx;
849:   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
850:   PetscBool              flg;
851:   PetscErrorCode         ierr;

853:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
854:      We need to filter the local indices that are represented through the DMDAGetElements decomposition
855:      This is because the size of the local matrices in MATIS is the local size of the l2g map */
857:   dof  = da->w;
858:   dim  = dm->dim;

860:   MatSetBlockSize(J,dof);

862:   /* get local elements indices in local DMDA numbering */
863:   DMDAGetElements(dm,&nel,&nen,&e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
864:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);
865:   DMDARestoreElements(dm,&nel,&nen,&e_loc);

867:   /* obtain a consistent local ordering for MATIS */
868:   ISSortRemoveDups(is_loc_filt);
869:   ISBlockGetLocalSize(is_loc_filt,&nb);
870:   DMGetLocalToGlobalMapping(dm,&ltog);
871:   ISLocalToGlobalMappingGetSize(ltog,&nv);
872:   PetscMalloc1(PetscMax(nb,nv/dof),&gidx);
873:   ISBlockGetIndices(is_loc_filt,&idx);
874:   ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);
875:   ISBlockRestoreIndices(is_loc_filt,&idx);
876:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);
877:   ISLocalToGlobalMappingCreateIS(is_glob,&ltog);
878:   ISDestroy(&is_glob);
879:   MatSetLocalToGlobalMapping(J,ltog,ltog);
880:   ISLocalToGlobalMappingDestroy(&ltog);

882:   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
883:   MatISGetLocalMat(J,&lJ);
884:   ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);
885:   ISDestroy(&is_loc_filt);
886:   ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);
887:   ISGetIndices(is_glob,&idx);
888:   ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);
889:   ISRestoreIndices(is_glob,&idx);
890:   ISDestroy(&is_glob);
891:   ISLocalToGlobalMappingDestroy(&ltog);
892:   ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);
893:   ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);
894:   ISDestroy(&is_loc_filt);
895:   MatSetLocalToGlobalMapping(lJ,ltog,ltog);
896:   ISLocalToGlobalMappingDestroy(&ltog);
897:   PetscFree(gidx);

899:   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
900:   flg = dm->prealloc_only;
901:   dm->prealloc_only = PETSC_TRUE;
902:   switch (dim) {
903:   case 1:
904:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
905:     DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);
906:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
907:     break;
908:   case 2:
909:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
910:     DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);
911:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
912:     break;
913:   case 3:
914:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
915:     DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);
916:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
917:     break;
918:   default:
919:     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
920:     break;
921:   }
922:   dm->prealloc_only = flg;
923:   return(0);
924: }

926: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
927: {
928:   PetscErrorCode         ierr;
929:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
930:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
931:   MPI_Comm               comm;
932:   PetscScalar            *values;
933:   DMBoundaryType         bx,by;
934:   ISLocalToGlobalMapping ltog;
935:   DMDAStencilType        st;

938:   /*
939:          nc - number of components per grid point
940:          col - number of colors needed in one direction for single component problem

942:   */
943:   DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);
944:   col  = 2*s + 1;
945:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
946:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
947:   PetscObjectGetComm((PetscObject)da,&comm);

949:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
950:   DMGetLocalToGlobalMapping(da,&ltog);

952:   MatSetBlockSize(J,nc);
953:   /* determine the matrix preallocation information */
954:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
955:   for (i=xs; i<xs+nx; i++) {

957:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
958:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

960:     for (j=ys; j<ys+ny; j++) {
961:       slot = i - gxs + gnx*(j - gys);

963:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
964:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

966:       cnt = 0;
967:       for (k=0; k<nc; k++) {
968:         for (l=lstart; l<lend+1; l++) {
969:           for (p=pstart; p<pend+1; p++) {
970:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
971:               cols[cnt++] = k + nc*(slot + gnx*l + p);
972:             }
973:           }
974:         }
975:         rows[k] = k + nc*(slot);
976:       }
977:       MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
978:     }
979:   }
980:   MatSetBlockSize(J,nc);
981:   MatSeqSELLSetPreallocation(J,0,dnz);
982:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
983:   MatPreallocateFinalize(dnz,onz);

985:   MatSetLocalToGlobalMapping(J,ltog,ltog);

987:   /*
988:     For each node in the grid: we get the neighbors in the local (on processor ordering
989:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
990:     PETSc ordering.
991:   */
992:   if (!da->prealloc_only) {
993:     PetscCalloc1(col*col*nc*nc,&values);
994:     for (i=xs; i<xs+nx; i++) {

996:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

999:       for (j=ys; j<ys+ny; j++) {
1000:         slot = i - gxs + gnx*(j - gys);

1002:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1005:         cnt = 0;
1006:         for (k=0; k<nc; k++) {
1007:           for (l=lstart; l<lend+1; l++) {
1008:             for (p=pstart; p<pend+1; p++) {
1009:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1010:                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1011:               }
1012:             }
1013:           }
1014:           rows[k] = k + nc*(slot);
1015:         }
1016:         MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1017:       }
1018:     }
1019:     PetscFree(values);
1020:     /* do not copy values to GPU since they are all zero and not yet needed there */
1021:     MatPinToCPU(J,PETSC_TRUE);
1022:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1023:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1024:     MatPinToCPU(J,PETSC_FALSE);
1025:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1026:   }
1027:   PetscFree2(rows,cols);
1028:   return(0);
1029: }

1031: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1032: {
1033:   PetscErrorCode         ierr;
1034:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1035:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1036:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1037:   MPI_Comm               comm;
1038:   PetscScalar            *values;
1039:   DMBoundaryType         bx,by,bz;
1040:   ISLocalToGlobalMapping ltog;
1041:   DMDAStencilType        st;

1044:   /*
1045:          nc - number of components per grid point
1046:          col - number of colors needed in one direction for single component problem

1048:   */
1049:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1050:   col  = 2*s + 1;
1051:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1052:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1053:   PetscObjectGetComm((PetscObject)da,&comm);

1055:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1056:   DMGetLocalToGlobalMapping(da,&ltog);

1058:   MatSetBlockSize(J,nc);
1059:   /* determine the matrix preallocation information */
1060:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1061:   for (i=xs; i<xs+nx; i++) {
1062:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1063:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1064:     for (j=ys; j<ys+ny; j++) {
1065:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1066:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1067:       for (k=zs; k<zs+nz; k++) {
1068:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1069:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1071:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1073:         cnt = 0;
1074:         for (l=0; l<nc; l++) {
1075:           for (ii=istart; ii<iend+1; ii++) {
1076:             for (jj=jstart; jj<jend+1; jj++) {
1077:               for (kk=kstart; kk<kend+1; kk++) {
1078:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1079:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1080:                 }
1081:               }
1082:             }
1083:           }
1084:           rows[l] = l + nc*(slot);
1085:         }
1086:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1087:       }
1088:     }
1089:   }
1090:   MatSetBlockSize(J,nc);
1091:   MatSeqSELLSetPreallocation(J,0,dnz);
1092:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
1093:   MatPreallocateFinalize(dnz,onz);
1094:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1096:   /*
1097:     For each node in the grid: we get the neighbors in the local (on processor ordering
1098:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1099:     PETSc ordering.
1100:   */
1101:   if (!da->prealloc_only) {
1102:     PetscCalloc1(col*col*col*nc*nc*nc,&values);
1103:     for (i=xs; i<xs+nx; i++) {
1104:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1105:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1106:       for (j=ys; j<ys+ny; j++) {
1107:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1108:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1109:         for (k=zs; k<zs+nz; k++) {
1110:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1111:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1113:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1115:           cnt = 0;
1116:           for (l=0; l<nc; l++) {
1117:             for (ii=istart; ii<iend+1; ii++) {
1118:               for (jj=jstart; jj<jend+1; jj++) {
1119:                 for (kk=kstart; kk<kend+1; kk++) {
1120:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1121:                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1122:                   }
1123:                 }
1124:               }
1125:             }
1126:             rows[l] = l + nc*(slot);
1127:           }
1128:           MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1129:         }
1130:       }
1131:     }
1132:     PetscFree(values);
1133:     /* do not copy values to GPU since they are all zero and not yet needed there */
1134:     MatPinToCPU(J,PETSC_TRUE);
1135:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1136:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1137:     MatPinToCPU(J,PETSC_FALSE);
1138:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1139:   }
1140:   PetscFree2(rows,cols);
1141:   return(0);
1142: }

1144: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1145: {
1146:   PetscErrorCode         ierr;
1147:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1148:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1149:   MPI_Comm               comm;
1150:   DMBoundaryType         bx,by;
1151:   ISLocalToGlobalMapping ltog,mltog;
1152:   DMDAStencilType        st;
1153:   PetscBool              removedups = PETSC_FALSE;

1156:   /*
1157:          nc - number of components per grid point
1158:          col - number of colors needed in one direction for single component problem

1160:   */
1161:   DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);
1162:   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1163:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1164:   }
1165:   col  = 2*s + 1;
1166:   /*
1167:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1168:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1169:   */
1170:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1171:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1172:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
1173:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
1174:   PetscObjectGetComm((PetscObject)da,&comm);

1176:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
1177:   DMGetLocalToGlobalMapping(da,&ltog);

1179:   MatSetBlockSize(J,nc);
1180:   /* determine the matrix preallocation information */
1181:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1182:   for (i=xs; i<xs+nx; i++) {

1184:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1185:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1187:     for (j=ys; j<ys+ny; j++) {
1188:       slot = i - gxs + gnx*(j - gys);

1190:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1191:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1193:       cnt = 0;
1194:       for (k=0; k<nc; k++) {
1195:         for (l=lstart; l<lend+1; l++) {
1196:           for (p=pstart; p<pend+1; p++) {
1197:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1198:               cols[cnt++] = k + nc*(slot + gnx*l + p);
1199:             }
1200:           }
1201:         }
1202:         rows[k] = k + nc*(slot);
1203:       }
1204:       if (removedups) {
1205:         MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1206:       } else {
1207:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1208:       }
1209:     }
1210:   }
1211:   MatSetBlockSize(J,nc);
1212:   MatSeqAIJSetPreallocation(J,0,dnz);
1213:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1214:   MatPreallocateFinalize(dnz,onz);
1215:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1216:   if (!mltog) {
1217:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1218:   }

1220:   /*
1221:     For each node in the grid: we get the neighbors in the local (on processor ordering
1222:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1223:     PETSc ordering.
1224:   */
1225:   if (!da->prealloc_only) {
1226:     for (i=xs; i<xs+nx; i++) {

1228:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1229:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1231:       for (j=ys; j<ys+ny; j++) {
1232:         slot = i - gxs + gnx*(j - gys);

1234:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1235:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1237:         cnt = 0;
1238:         for (l=lstart; l<lend+1; l++) {
1239:           for (p=pstart; p<pend+1; p++) {
1240:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1241:               cols[cnt++] = nc*(slot + gnx*l + p);
1242:               for (k=1; k<nc; k++) {
1243:                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1244:               }
1245:             }
1246:           }
1247:         }
1248:         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1249:         MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1250:       }
1251:     }
1252:     /* do not copy values to GPU since they are all zero and not yet needed there */
1253:     MatPinToCPU(J,PETSC_TRUE);
1254:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1255:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1256:     MatPinToCPU(J,PETSC_FALSE);
1257:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1258:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1259:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1260:     }
1261:   }
1262:   PetscFree2(rows,cols);
1263:   return(0);
1264: }

1266: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1267: {
1268:   PetscErrorCode         ierr;
1269:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1270:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1271:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1272:   DM_DA                  *dd = (DM_DA*)da->data;
1273:   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1274:   MPI_Comm               comm;
1275:   DMBoundaryType         bx,by;
1276:   ISLocalToGlobalMapping ltog;
1277:   DMDAStencilType        st;
1278:   PetscBool              removedups = PETSC_FALSE;

1281:   /*
1282:          nc - number of components per grid point
1283:          col - number of colors needed in one direction for single component problem

1285:   */
1286:   DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);
1287:   col  = 2*s + 1;
1288:   /*
1289:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1290:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1291:   */
1292:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1293:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1294:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
1295:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
1296:   PetscObjectGetComm((PetscObject)da,&comm);

1298:   PetscMalloc1(col*col*nc,&cols);
1299:   DMGetLocalToGlobalMapping(da,&ltog);

1301:   MatSetBlockSize(J,nc);
1302:   /* determine the matrix preallocation information */
1303:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1304:   for (i=xs; i<xs+nx; i++) {

1306:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1307:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1309:     for (j=ys; j<ys+ny; j++) {
1310:       slot = i - gxs + gnx*(j - gys);

1312:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1313:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1315:       for (k=0; k<nc; k++) {
1316:         cnt = 0;
1317:         for (l=lstart; l<lend+1; l++) {
1318:           for (p=pstart; p<pend+1; p++) {
1319:             if (l || p) {
1320:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1321:                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1322:               }
1323:             } else {
1324:               if (dfill) {
1325:                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1326:               } else {
1327:                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1328:               }
1329:             }
1330:           }
1331:         }
1332:         row    = k + nc*(slot);
1333:         maxcnt = PetscMax(maxcnt,cnt);
1334:         if (removedups) {
1335:           MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1336:         } else {
1337:           MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1338:         }
1339:       }
1340:     }
1341:   }
1342:   MatSeqAIJSetPreallocation(J,0,dnz);
1343:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1344:   MatPreallocateFinalize(dnz,onz);
1345:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1347:   /*
1348:     For each node in the grid: we get the neighbors in the local (on processor ordering
1349:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1350:     PETSc ordering.
1351:   */
1352:   if (!da->prealloc_only) {
1353:     for (i=xs; i<xs+nx; i++) {

1355:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1356:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1358:       for (j=ys; j<ys+ny; j++) {
1359:         slot = i - gxs + gnx*(j - gys);

1361:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1362:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1364:         for (k=0; k<nc; k++) {
1365:           cnt = 0;
1366:           for (l=lstart; l<lend+1; l++) {
1367:             for (p=pstart; p<pend+1; p++) {
1368:               if (l || p) {
1369:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1370:                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1371:                 }
1372:               } else {
1373:                 if (dfill) {
1374:                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1375:                 } else {
1376:                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1377:                 }
1378:               }
1379:             }
1380:           }
1381:           row  = k + nc*(slot);
1382:           MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1383:         }
1384:       }
1385:     }
1386:     /* do not copy values to GPU since they are all zero and not yet needed there */
1387:     MatPinToCPU(J,PETSC_TRUE);
1388:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1389:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1390:     MatPinToCPU(J,PETSC_FALSE);
1391:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1392:   }
1393:   PetscFree(cols);
1394:   return(0);
1395: }

1397: /* ---------------------------------------------------------------------------------*/

1399: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1400: {
1401:   PetscErrorCode         ierr;
1402:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1403:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1404:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1405:   MPI_Comm               comm;
1406:   DMBoundaryType         bx,by,bz;
1407:   ISLocalToGlobalMapping ltog,mltog;
1408:   DMDAStencilType        st;
1409:   PetscBool              removedups = PETSC_FALSE;

1412:   /*
1413:          nc - number of components per grid point
1414:          col - number of colors needed in one direction for single component problem

1416:   */
1417:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1418:   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1419:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1420:   }
1421:   col  = 2*s + 1;

1423:   /*
1424:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1425:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1426:   */
1427:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1428:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1429:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

1431:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1432:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1433:   PetscObjectGetComm((PetscObject)da,&comm);

1435:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1436:   DMGetLocalToGlobalMapping(da,&ltog);

1438:   MatSetBlockSize(J,nc);
1439:   /* determine the matrix preallocation information */
1440:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1441:   for (i=xs; i<xs+nx; i++) {
1442:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1443:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1444:     for (j=ys; j<ys+ny; j++) {
1445:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1446:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1447:       for (k=zs; k<zs+nz; k++) {
1448:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1449:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1451:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1453:         cnt = 0;
1454:         for (l=0; l<nc; l++) {
1455:           for (ii=istart; ii<iend+1; ii++) {
1456:             for (jj=jstart; jj<jend+1; jj++) {
1457:               for (kk=kstart; kk<kend+1; kk++) {
1458:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1459:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1460:                 }
1461:               }
1462:             }
1463:           }
1464:           rows[l] = l + nc*(slot);
1465:         }
1466:         if (removedups) {
1467:           MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1468:         } else {
1469:           MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1470:         }
1471:       }
1472:     }
1473:   }
1474:   MatSetBlockSize(J,nc);
1475:   MatSeqAIJSetPreallocation(J,0,dnz);
1476:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1477:   MatPreallocateFinalize(dnz,onz);
1478:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1479:   if (!mltog) {
1480:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1481:   }

1483:   /*
1484:     For each node in the grid: we get the neighbors in the local (on processor ordering
1485:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1486:     PETSc ordering.
1487:   */
1488:   if (!da->prealloc_only) {
1489:     for (i=xs; i<xs+nx; i++) {
1490:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1491:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1492:       for (j=ys; j<ys+ny; j++) {
1493:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1494:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1495:         for (k=zs; k<zs+nz; k++) {
1496:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1497:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1499:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1501:           cnt = 0;
1502:           for (kk=kstart; kk<kend+1; kk++) {
1503:             for (jj=jstart; jj<jend+1; jj++) {
1504:               for (ii=istart; ii<iend+1; ii++) {
1505:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1506:                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1507:                     for (l=1; l<nc; l++) {
1508:                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1509:                   }
1510:                 }
1511:               }
1512:             }
1513:           }
1514:           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1515:           MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1516:         }
1517:       }
1518:     }
1519:     /* do not copy values to GPU since they are all zero and not yet needed there */
1520:     MatPinToCPU(J,PETSC_TRUE);
1521:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1522:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1523:     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1524:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1525:     }
1526:     MatPinToCPU(J,PETSC_FALSE);
1527:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1528:   }
1529:   PetscFree2(rows,cols);
1530:   return(0);
1531: }

1533: /* ---------------------------------------------------------------------------------*/

1535: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1536: {
1537:   PetscErrorCode         ierr;
1538:   DM_DA                  *dd = (DM_DA*)da->data;
1539:   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1540:   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1541:   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1542:   DMBoundaryType         bx;
1543:   ISLocalToGlobalMapping ltog;
1544:   PetscMPIInt            rank,size;

1547:   MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);
1548:   MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);

1550:   /*
1551:          nc - number of components per grid point

1553:   */
1554:   DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);
1555:   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1556:   DMDAGetCorners(da,&xs,0,0,&nx,0,0);
1557:   DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);

1559:   MatSetBlockSize(J,nc);
1560:   PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);

1562:   /*
1563:         note should be smaller for first and last process with no periodic
1564:         does not handle dfill
1565:   */
1566:   cnt = 0;
1567:   /* coupling with process to the left */
1568:   for (i=0; i<s; i++) {
1569:     for (j=0; j<nc; j++) {
1570:       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1571:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1572:       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1573:         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1574:         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1575:       }
1576:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1577:       cnt++;
1578:     }
1579:   }
1580:   for (i=s; i<nx-s; i++) {
1581:     for (j=0; j<nc; j++) {
1582:       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1583:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1584:       cnt++;
1585:     }
1586:   }
1587:   /* coupling with process to the right */
1588:   for (i=nx-s; i<nx; i++) {
1589:     for (j=0; j<nc; j++) {
1590:       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1591:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1592:       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1593:         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1594:         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1595:       }
1596:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1597:       cnt++;
1598:     }
1599:   }

1601:   MatSeqAIJSetPreallocation(J,0,cols);
1602:   MatMPIAIJSetPreallocation(J,0,cols,0,ocols);
1603:   PetscFree2(cols,ocols);

1605:   DMGetLocalToGlobalMapping(da,&ltog);
1606:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1608:   /*
1609:     For each node in the grid: we get the neighbors in the local (on processor ordering
1610:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1611:     PETSc ordering.
1612:   */
1613:   if (!da->prealloc_only) {
1614:     PetscMalloc1(maxcnt,&cols);
1615:     row = xs*nc;
1616:     /* coupling with process to the left */
1617:     for (i=xs; i<xs+s; i++) {
1618:       for (j=0; j<nc; j++) {
1619:         cnt = 0;
1620:         if (rank) {
1621:           for (l=0; l<s; l++) {
1622:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1623:           }
1624:         }
1625:         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1626:           for (l=0; l<s; l++) {
1627:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1628:           }
1629:         }
1630:         if (dfill) {
1631:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1632:             cols[cnt++] = i*nc + dfill[k];
1633:           }
1634:         } else {
1635:           for (k=0; k<nc; k++) {
1636:             cols[cnt++] = i*nc + k;
1637:           }
1638:         }
1639:         for (l=0; l<s; l++) {
1640:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1641:         }
1642:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1643:         row++;
1644:       }
1645:     }
1646:     for (i=xs+s; i<xs+nx-s; i++) {
1647:       for (j=0; j<nc; j++) {
1648:         cnt = 0;
1649:         for (l=0; l<s; l++) {
1650:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1651:         }
1652:         if (dfill) {
1653:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1654:             cols[cnt++] = i*nc + dfill[k];
1655:           }
1656:         } else {
1657:           for (k=0; k<nc; k++) {
1658:             cols[cnt++] = i*nc + k;
1659:           }
1660:         }
1661:         for (l=0; l<s; l++) {
1662:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1663:         }
1664:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1665:         row++;
1666:       }
1667:     }
1668:     /* coupling with process to the right */
1669:     for (i=xs+nx-s; i<xs+nx; i++) {
1670:       for (j=0; j<nc; j++) {
1671:         cnt = 0;
1672:         for (l=0; l<s; l++) {
1673:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1674:         }
1675:         if (dfill) {
1676:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1677:             cols[cnt++] = i*nc + dfill[k];
1678:           }
1679:         } else {
1680:           for (k=0; k<nc; k++) {
1681:             cols[cnt++] = i*nc + k;
1682:           }
1683:         }
1684:         if (rank < size-1) {
1685:           for (l=0; l<s; l++) {
1686:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1687:           }
1688:         }
1689:         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1690:           for (l=0; l<s; l++) {
1691:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1692:           }
1693:         }
1694:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1695:         row++;
1696:       }
1697:     }
1698:     PetscFree(cols);
1699:     /* do not copy values to GPU since they are all zero and not yet needed there */
1700:     MatPinToCPU(J,PETSC_TRUE);
1701:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1702:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1703:     MatPinToCPU(J,PETSC_FALSE);
1704:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1705:   }
1706:   return(0);
1707: }

1709: /* ---------------------------------------------------------------------------------*/

1711: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1712: {
1713:   PetscErrorCode         ierr;
1714:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1715:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1716:   PetscInt               istart,iend;
1717:   DMBoundaryType         bx;
1718:   ISLocalToGlobalMapping ltog,mltog;

1721:   /*
1722:          nc - number of components per grid point
1723:          col - number of colors needed in one direction for single component problem

1725:   */
1726:   DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);
1727:   if (!isIS && bx == DM_BOUNDARY_NONE) {
1728:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1729:   }
1730:   col  = 2*s + 1;

1732:   DMDAGetCorners(da,&xs,0,0,&nx,0,0);
1733:   DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);

1735:   MatSetBlockSize(J,nc);
1736:   MatSeqAIJSetPreallocation(J,col*nc,0);
1737:   MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);

1739:   DMGetLocalToGlobalMapping(da,&ltog);
1740:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1741:   if (!mltog) {
1742:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1743:   }

1745:   /*
1746:     For each node in the grid: we get the neighbors in the local (on processor ordering
1747:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1748:     PETSc ordering.
1749:   */
1750:   if (!da->prealloc_only) {
1751:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1752:     for (i=xs; i<xs+nx; i++) {
1753:       istart = PetscMax(-s,gxs - i);
1754:       iend   = PetscMin(s,gxs + gnx - i - 1);
1755:       slot   = i - gxs;

1757:       cnt = 0;
1758:       for (i1=istart; i1<iend+1; i1++) {
1759:         cols[cnt++] = nc*(slot + i1);
1760:         for (l=1; l<nc; l++) {
1761:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1762:         }
1763:       }
1764:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1765:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1766:     }
1767:     /* do not copy values to GPU since they are all zero and not yet needed there */
1768:     MatPinToCPU(J,PETSC_TRUE);
1769:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1770:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1771:     if (!isIS && bx == DM_BOUNDARY_NONE) {
1772:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1773:     }
1774:     MatPinToCPU(J,PETSC_FALSE);
1775:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1776:     PetscFree2(rows,cols);
1777:   }
1778:   return(0);
1779: }

1781: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1782: {
1783:   PetscErrorCode         ierr;
1784:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1785:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1786:   PetscInt               istart,iend,jstart,jend,ii,jj;
1787:   MPI_Comm               comm;
1788:   PetscScalar            *values;
1789:   DMBoundaryType         bx,by;
1790:   DMDAStencilType        st;
1791:   ISLocalToGlobalMapping ltog;

1794:   /*
1795:      nc - number of components per grid point
1796:      col - number of colors needed in one direction for single component problem
1797:   */
1798:   DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);
1799:   col  = 2*s + 1;

1801:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
1802:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
1803:   PetscObjectGetComm((PetscObject)da,&comm);

1805:   PetscMalloc1(col*col*nc*nc,&cols);

1807:   DMGetLocalToGlobalMapping(da,&ltog);

1809:   /* determine the matrix preallocation information */
1810:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
1811:   for (i=xs; i<xs+nx; i++) {
1812:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1813:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1814:     for (j=ys; j<ys+ny; j++) {
1815:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1816:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1817:       slot   = i - gxs + gnx*(j - gys);

1819:       /* Find block columns in block row */
1820:       cnt = 0;
1821:       for (ii=istart; ii<iend+1; ii++) {
1822:         for (jj=jstart; jj<jend+1; jj++) {
1823:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1824:             cols[cnt++] = slot + ii + gnx*jj;
1825:           }
1826:         }
1827:       }
1828:       MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1829:     }
1830:   }
1831:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1832:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1833:   MatPreallocateFinalize(dnz,onz);

1835:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1837:   /*
1838:     For each node in the grid: we get the neighbors in the local (on processor ordering
1839:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1840:     PETSc ordering.
1841:   */
1842:   if (!da->prealloc_only) {
1843:     PetscCalloc1(col*col*nc*nc,&values);
1844:     for (i=xs; i<xs+nx; i++) {
1845:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1846:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1847:       for (j=ys; j<ys+ny; j++) {
1848:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1849:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1850:         slot = i - gxs + gnx*(j - gys);
1851:         cnt  = 0;
1852:         for (ii=istart; ii<iend+1; ii++) {
1853:           for (jj=jstart; jj<jend+1; jj++) {
1854:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1855:               cols[cnt++] = slot + ii + gnx*jj;
1856:             }
1857:           }
1858:         }
1859:         MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1860:       }
1861:     }
1862:     PetscFree(values);
1863:     /* do not copy values to GPU since they are all zero and not yet needed there */
1864:     MatPinToCPU(J,PETSC_TRUE);
1865:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1866:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1867:     MatPinToCPU(J,PETSC_FALSE);
1868:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1869:   }
1870:   PetscFree(cols);
1871:   return(0);
1872: }

1874: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1875: {
1876:   PetscErrorCode         ierr;
1877:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1878:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1879:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1880:   MPI_Comm               comm;
1881:   PetscScalar            *values;
1882:   DMBoundaryType         bx,by,bz;
1883:   DMDAStencilType        st;
1884:   ISLocalToGlobalMapping ltog;

1887:   /*
1888:          nc - number of components per grid point
1889:          col - number of colors needed in one direction for single component problem

1891:   */
1892:   DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);
1893:   col  = 2*s + 1;

1895:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1896:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1897:   PetscObjectGetComm((PetscObject)da,&comm);

1899:   PetscMalloc1(col*col*col,&cols);

1901:   DMGetLocalToGlobalMapping(da,&ltog);

1903:   /* determine the matrix preallocation information */
1904:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1905:   for (i=xs; i<xs+nx; i++) {
1906:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1907:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1908:     for (j=ys; j<ys+ny; j++) {
1909:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1910:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1911:       for (k=zs; k<zs+nz; k++) {
1912:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1913:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1915:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1917:         /* Find block columns in block row */
1918:         cnt = 0;
1919:         for (ii=istart; ii<iend+1; ii++) {
1920:           for (jj=jstart; jj<jend+1; jj++) {
1921:             for (kk=kstart; kk<kend+1; kk++) {
1922:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1923:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1924:               }
1925:             }
1926:           }
1927:         }
1928:         MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1929:       }
1930:     }
1931:   }
1932:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1933:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1934:   MatPreallocateFinalize(dnz,onz);

1936:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1938:   /*
1939:     For each node in the grid: we get the neighbors in the local (on processor ordering
1940:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1941:     PETSc ordering.
1942:   */
1943:   if (!da->prealloc_only) {
1944:     PetscCalloc1(col*col*col*nc*nc,&values);
1945:     for (i=xs; i<xs+nx; i++) {
1946:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1947:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1948:       for (j=ys; j<ys+ny; j++) {
1949:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1950:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1951:         for (k=zs; k<zs+nz; k++) {
1952:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1953:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1955:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1957:           cnt = 0;
1958:           for (ii=istart; ii<iend+1; ii++) {
1959:             for (jj=jstart; jj<jend+1; jj++) {
1960:               for (kk=kstart; kk<kend+1; kk++) {
1961:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1962:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1963:                 }
1964:               }
1965:             }
1966:           }
1967:           MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1968:         }
1969:       }
1970:     }
1971:     PetscFree(values);
1972:     /* do not copy values to GPU since they are all zero and not yet needed there */
1973:     MatPinToCPU(J,PETSC_TRUE);
1974:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1975:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1976:     MatPinToCPU(J,PETSC_FALSE);
1977:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1978:   }
1979:   PetscFree(cols);
1980:   return(0);
1981: }

1983: /*
1984:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1985:   identify in the local ordering with periodic domain.
1986: */
1987: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1988: {
1990:   PetscInt       i,n;

1993:   ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);
1994:   ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);
1995:   for (i=0,n=0; i<*cnt; i++) {
1996:     if (col[i] >= *row) col[n++] = col[i];
1997:   }
1998:   *cnt = n;
1999:   return(0);
2000: }

2002: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2003: {
2004:   PetscErrorCode         ierr;
2005:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2006:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2007:   PetscInt               istart,iend,jstart,jend,ii,jj;
2008:   MPI_Comm               comm;
2009:   PetscScalar            *values;
2010:   DMBoundaryType         bx,by;
2011:   DMDAStencilType        st;
2012:   ISLocalToGlobalMapping ltog;

2015:   /*
2016:      nc - number of components per grid point
2017:      col - number of colors needed in one direction for single component problem
2018:   */
2019:   DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);
2020:   col  = 2*s + 1;

2022:   DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
2023:   DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
2024:   PetscObjectGetComm((PetscObject)da,&comm);

2026:   PetscMalloc1(col*col*nc*nc,&cols);

2028:   DMGetLocalToGlobalMapping(da,&ltog);

2030:   /* determine the matrix preallocation information */
2031:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
2032:   for (i=xs; i<xs+nx; i++) {
2033:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2034:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2035:     for (j=ys; j<ys+ny; j++) {
2036:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2037:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2038:       slot   = i - gxs + gnx*(j - gys);

2040:       /* Find block columns in block row */
2041:       cnt = 0;
2042:       for (ii=istart; ii<iend+1; ii++) {
2043:         for (jj=jstart; jj<jend+1; jj++) {
2044:           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2045:             cols[cnt++] = slot + ii + gnx*jj;
2046:           }
2047:         }
2048:       }
2049:       L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2050:       MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2051:     }
2052:   }
2053:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2054:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2055:   MatPreallocateFinalize(dnz,onz);

2057:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2059:   /*
2060:     For each node in the grid: we get the neighbors in the local (on processor ordering
2061:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2062:     PETSc ordering.
2063:   */
2064:   if (!da->prealloc_only) {
2065:     PetscCalloc1(col*col*nc*nc,&values);
2066:     for (i=xs; i<xs+nx; i++) {
2067:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2068:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2069:       for (j=ys; j<ys+ny; j++) {
2070:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2071:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2072:         slot   = i - gxs + gnx*(j - gys);

2074:         /* Find block columns in block row */
2075:         cnt = 0;
2076:         for (ii=istart; ii<iend+1; ii++) {
2077:           for (jj=jstart; jj<jend+1; jj++) {
2078:             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2079:               cols[cnt++] = slot + ii + gnx*jj;
2080:             }
2081:           }
2082:         }
2083:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2084:         MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2085:       }
2086:     }
2087:     PetscFree(values);
2088:     /* do not copy values to GPU since they are all zero and not yet needed there */
2089:     MatPinToCPU(J,PETSC_TRUE);
2090:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2091:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2092:     MatPinToCPU(J,PETSC_FALSE);
2093:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2094:   }
2095:   PetscFree(cols);
2096:   return(0);
2097: }

2099: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2100: {
2101:   PetscErrorCode         ierr;
2102:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2103:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2104:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2105:   MPI_Comm               comm;
2106:   PetscScalar            *values;
2107:   DMBoundaryType         bx,by,bz;
2108:   DMDAStencilType        st;
2109:   ISLocalToGlobalMapping ltog;

2112:   /*
2113:      nc - number of components per grid point
2114:      col - number of colors needed in one direction for single component problem
2115:   */
2116:   DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);
2117:   col  = 2*s + 1;

2119:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2120:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2121:   PetscObjectGetComm((PetscObject)da,&comm);

2123:   /* create the matrix */
2124:   PetscMalloc1(col*col*col,&cols);

2126:   DMGetLocalToGlobalMapping(da,&ltog);

2128:   /* determine the matrix preallocation information */
2129:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2130:   for (i=xs; i<xs+nx; i++) {
2131:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2132:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2133:     for (j=ys; j<ys+ny; j++) {
2134:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2135:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2136:       for (k=zs; k<zs+nz; k++) {
2137:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2138:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2140:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2142:         /* Find block columns in block row */
2143:         cnt = 0;
2144:         for (ii=istart; ii<iend+1; ii++) {
2145:           for (jj=jstart; jj<jend+1; jj++) {
2146:             for (kk=kstart; kk<kend+1; kk++) {
2147:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2148:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2149:               }
2150:             }
2151:           }
2152:         }
2153:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2154:         MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2155:       }
2156:     }
2157:   }
2158:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2159:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2160:   MatPreallocateFinalize(dnz,onz);

2162:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2164:   /*
2165:     For each node in the grid: we get the neighbors in the local (on processor ordering
2166:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2167:     PETSc ordering.
2168:   */
2169:   if (!da->prealloc_only) {
2170:     PetscCalloc1(col*col*col*nc*nc,&values);
2171:     for (i=xs; i<xs+nx; i++) {
2172:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2173:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2174:       for (j=ys; j<ys+ny; j++) {
2175:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2176:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2177:         for (k=zs; k<zs+nz; k++) {
2178:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2179:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2181:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2183:           cnt = 0;
2184:           for (ii=istart; ii<iend+1; ii++) {
2185:             for (jj=jstart; jj<jend+1; jj++) {
2186:               for (kk=kstart; kk<kend+1; kk++) {
2187:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2188:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2189:                 }
2190:               }
2191:             }
2192:           }
2193:           L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2194:           MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2195:         }
2196:       }
2197:     }
2198:     PetscFree(values);
2199:     /* do not copy values to GPU since they are all zero and not yet needed there */
2200:     MatPinToCPU(J,PETSC_TRUE);
2201:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2202:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2203:     MatPinToCPU(J,PETSC_FALSE);
2204:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2205:   }
2206:   PetscFree(cols);
2207:   return(0);
2208: }

2210: /* ---------------------------------------------------------------------------------*/

2212: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2213: {
2214:   PetscErrorCode         ierr;
2215:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2216:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2217:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2218:   DM_DA                  *dd = (DM_DA*)da->data;
2219:   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2220:   MPI_Comm               comm;
2221:   PetscScalar            *values;
2222:   DMBoundaryType         bx,by,bz;
2223:   ISLocalToGlobalMapping ltog;
2224:   DMDAStencilType        st;
2225:   PetscBool              removedups = PETSC_FALSE;

2228:   /*
2229:          nc - number of components per grid point
2230:          col - number of colors needed in one direction for single component problem

2232:   */
2233:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
2234:   col  = 2*s + 1;
2235:   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2236:                  by 2*stencil_width + 1\n");
2237:   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2238:                  by 2*stencil_width + 1\n");
2239:   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2240:                  by 2*stencil_width + 1\n");

2242:   /*
2243:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2244:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2245:   */
2246:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2247:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2248:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

2250:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2251:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2252:   PetscObjectGetComm((PetscObject)da,&comm);

2254:   PetscMalloc1(col*col*col*nc,&cols);
2255:   DMGetLocalToGlobalMapping(da,&ltog);

2257:   /* determine the matrix preallocation information */
2258:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);

2260:   MatSetBlockSize(J,nc);
2261:   for (i=xs; i<xs+nx; i++) {
2262:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2263:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2264:     for (j=ys; j<ys+ny; j++) {
2265:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2266:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2267:       for (k=zs; k<zs+nz; k++) {
2268:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2269:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2271:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2273:         for (l=0; l<nc; l++) {
2274:           cnt = 0;
2275:           for (ii=istart; ii<iend+1; ii++) {
2276:             for (jj=jstart; jj<jend+1; jj++) {
2277:               for (kk=kstart; kk<kend+1; kk++) {
2278:                 if (ii || jj || kk) {
2279:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2280:                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2281:                   }
2282:                 } else {
2283:                   if (dfill) {
2284:                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2285:                   } else {
2286:                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2287:                   }
2288:                 }
2289:               }
2290:             }
2291:           }
2292:           row  = l + nc*(slot);
2293:           maxcnt = PetscMax(maxcnt,cnt);
2294:           if (removedups) {
2295:             MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2296:           } else {
2297:             MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2298:           }
2299:         }
2300:       }
2301:     }
2302:   }
2303:   MatSeqAIJSetPreallocation(J,0,dnz);
2304:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
2305:   MatPreallocateFinalize(dnz,onz);
2306:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2308:   /*
2309:     For each node in the grid: we get the neighbors in the local (on processor ordering
2310:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2311:     PETSc ordering.
2312:   */
2313:   if (!da->prealloc_only) {
2314:     PetscCalloc1(maxcnt,&values);
2315:     for (i=xs; i<xs+nx; i++) {
2316:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2317:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2318:       for (j=ys; j<ys+ny; j++) {
2319:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2320:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2321:         for (k=zs; k<zs+nz; k++) {
2322:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2323:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2325:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2327:           for (l=0; l<nc; l++) {
2328:             cnt = 0;
2329:             for (ii=istart; ii<iend+1; ii++) {
2330:               for (jj=jstart; jj<jend+1; jj++) {
2331:                 for (kk=kstart; kk<kend+1; kk++) {
2332:                   if (ii || jj || kk) {
2333:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2334:                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335:                     }
2336:                   } else {
2337:                     if (dfill) {
2338:                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2339:                     } else {
2340:                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2341:                     }
2342:                   }
2343:                 }
2344:               }
2345:             }
2346:             row  = l + nc*(slot);
2347:             MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);
2348:           }
2349:         }
2350:       }
2351:     }
2352:     PetscFree(values);
2353:     /* do not copy values to GPU since they are all zero and not yet needed there */
2354:     MatPinToCPU(J,PETSC_TRUE);
2355:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2356:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2357:     MatPinToCPU(J,PETSC_FALSE);
2358:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2359:   }
2360:   PetscFree(cols);
2361:   return(0);
2362: }