Actual source code: fdda.c
1: #include <petsc/private/dmdaimpl.h>
2: #include <petscmat.h>
3: #include <petscbt.h>
5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10: /*
11: For ghost i that may be negative or greater than the upper bound this
12: maps it into the 0:m-1 range using periodicity
13: */
14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17: {
18: PetscInt i, j, nz, *fill;
20: PetscFunctionBegin;
21: if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23: /* count number nonzeros */
24: nz = 0;
25: for (i = 0; i < w; i++) {
26: for (j = 0; j < w; j++) {
27: if (dfill[w * i + j]) nz++;
28: }
29: }
30: PetscCall(PetscMalloc1(nz + w + 1, &fill));
31: /* construct modified CSR storage of nonzero structure */
32: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
33: so fill[1] - fill[0] gives number of nonzeros in first row etc */
34: nz = w + 1;
35: for (i = 0; i < w; i++) {
36: fill[i] = nz;
37: for (j = 0; j < w; j++) {
38: if (dfill[w * i + j]) {
39: fill[nz] = j;
40: nz++;
41: }
42: }
43: }
44: fill[w] = nz;
46: *rfill = fill;
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51: {
52: PetscInt nz;
54: PetscFunctionBegin;
55: if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57: /* Determine number of non-zeros */
58: nz = (dfillsparse[w] - w - 1);
60: /* Allocate space for our copy of the given sparse matrix representation. */
61: PetscCall(PetscMalloc1(nz + w + 1, rfill));
62: PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63: PetscFunctionReturn(PETSC_SUCCESS);
64: }
66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67: {
68: PetscInt i, k, cnt = 1;
70: PetscFunctionBegin;
71: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
72: columns to the left with any nonzeros in them plus 1 */
73: PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74: for (i = 0; i < dd->w; i++) {
75: for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76: }
77: for (i = 0; i < dd->w; i++) {
78: if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79: }
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: /*@
84: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85: of the matrix returned by `DMCreateMatrix()`.
87: Logically Collective
89: Input Parameters:
90: + da - the `DMDA`
91: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92: - ofill - the fill pattern in the off-diagonal blocks
94: Level: developer
96: Notes:
97: This only makes sense when you are doing multicomponent problems but using the
98: `MATMPIAIJ` matrix format
100: The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101: representing coupling and 0 entries for missing coupling. For example
102: .vb
103: dfill[9] = {1, 0, 0,
104: 1, 1, 0,
105: 0, 1, 1}
106: .ve
107: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109: diagonal block).
111: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112: can be represented in the `dfill`, `ofill` format
114: Contributed by\:
115: Glenn Hammond
117: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118: @*/
119: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120: {
121: DM_DA *dd = (DM_DA *)da->data;
123: PetscFunctionBegin;
124: /* save the given dfill and ofill information */
125: PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126: PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128: /* count nonzeros in ofill columns */
129: PetscCall(DMDASetBlockFills_Private2(dd));
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: /*@
134: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135: of the matrix returned by `DMCreateMatrix()`, using sparse representations
136: of fill patterns.
138: Logically Collective
140: Input Parameters:
141: + da - the `DMDA`
142: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143: - ofillsparse - the sparse fill pattern in the off-diagonal blocks
145: Level: developer
147: Notes:
148: This only makes sense when you are doing multicomponent problems but using the
149: `MATMPIAIJ` matrix format
151: The format for `dfill` and `ofill` is a sparse representation of a
152: dof-by-dof matrix with 1 entries representing coupling and 0 entries
153: for missing coupling. The sparse representation is a 1 dimensional
154: array of length nz + dof + 1, where nz is the number of non-zeros in
155: the matrix. The first dof entries in the array give the
156: starting array indices of each row's items in the rest of the array,
157: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158: and the remaining nz items give the column indices of each of
159: the 1s within the logical 2D matrix. Each row's items within
160: the array are the column indices of the 1s within that row
161: of the 2D matrix. PETSc developers may recognize that this is the
162: same format as that computed by the `DMDASetBlockFills_Private()`
163: function from a dense 2D matrix representation.
165: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166: can be represented in the `dfill`, `ofill` format
168: Contributed by\:
169: Philip C. Roth
171: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175: DM_DA *dd = (DM_DA *)da->data;
177: PetscFunctionBegin;
178: /* save the given dfill and ofill information */
179: PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180: PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
182: /* count nonzeros in ofill columns */
183: PetscCall(DMDASetBlockFills_Private2(dd));
184: PetscFunctionReturn(PETSC_SUCCESS);
185: }
187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189: PetscInt dim, m, n, p, nc;
190: DMBoundaryType bx, by, bz;
191: MPI_Comm comm;
192: PetscMPIInt size;
193: PetscBool isBAIJ;
194: DM_DA *dd = (DM_DA *)da->data;
196: PetscFunctionBegin;
197: /*
198: m
199: ------------------------------------------------------
200: | |
201: | |
202: | ---------------------- |
203: | | | |
204: n | yn | | |
205: | | | |
206: | .--------------------- |
207: | (xs,ys) xn |
208: | . |
209: | (gxs,gys) |
210: | |
211: -----------------------------------------------------
212: */
214: /*
215: nc - number of components per grid point
216: col - number of colors needed in one direction for single component problem
218: */
219: PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
221: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222: PetscCallMPI(MPI_Comm_size(comm, &size));
223: if (ctype == IS_COLORING_LOCAL) {
224: if (size == 1) {
225: ctype = IS_COLORING_GLOBAL;
226: } else {
227: PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228: }
229: }
231: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232: matrices is for the blocks, not the individual matrix elements */
233: PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236: if (isBAIJ) {
237: dd->w = 1;
238: dd->xs = dd->xs / nc;
239: dd->xe = dd->xe / nc;
240: dd->Xs = dd->Xs / nc;
241: dd->Xe = dd->Xe / nc;
242: }
244: /*
245: We do not provide a getcoloring function in the DMDA operations because
246: the basic DMDA does not know about matrices. We think of DMDA as being
247: more low-level then matrices.
248: */
249: if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250: else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251: else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252: else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253: if (isBAIJ) {
254: dd->w = nc;
255: dd->xs = dd->xs * nc;
256: dd->xe = dd->xe * nc;
257: dd->Xs = dd->Xs * nc;
258: dd->Xe = dd->Xe * nc;
259: }
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264: {
265: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266: PetscInt ncolors = 0;
267: MPI_Comm comm;
268: DMBoundaryType bx, by;
269: DMDAStencilType st;
270: ISColoringValue *colors;
271: DM_DA *dd = (DM_DA *)da->data;
273: PetscFunctionBegin;
274: /*
275: nc - number of components per grid point
276: col - number of colors needed in one direction for single component problem
278: */
279: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280: col = 2 * s + 1;
281: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
285: /* special case as taught to us by Paul Hovland */
286: if (st == DMDA_STENCIL_STAR && s == 1) {
287: PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288: } else {
289: if (ctype == IS_COLORING_GLOBAL) {
290: if (!dd->localcoloring) {
291: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292: ii = 0;
293: for (j = ys; j < ys + ny; j++) {
294: for (i = xs; i < xs + nx; i++) {
295: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296: }
297: }
298: ncolors = nc + nc * (col - 1 + col * (col - 1));
299: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300: }
301: *coloring = dd->localcoloring;
302: } else if (ctype == IS_COLORING_LOCAL) {
303: if (!dd->ghostedcoloring) {
304: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305: ii = 0;
306: for (j = gys; j < gys + gny; j++) {
307: for (i = gxs; i < gxs + gnx; i++) {
308: for (k = 0; k < nc; k++) {
309: /* the complicated stuff is to handle periodic boundaries */
310: colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311: }
312: }
313: }
314: ncolors = nc + nc * (col - 1 + col * (col - 1));
315: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
318: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319: }
320: *coloring = dd->ghostedcoloring;
321: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322: }
323: PetscCall(ISColoringReference(*coloring));
324: PetscFunctionReturn(PETSC_SUCCESS);
325: }
327: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328: {
329: PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330: PetscInt ncolors;
331: MPI_Comm comm;
332: DMBoundaryType bx, by, bz;
333: DMDAStencilType st;
334: ISColoringValue *colors;
335: DM_DA *dd = (DM_DA *)da->data;
337: PetscFunctionBegin;
338: /*
339: nc - number of components per grid point
340: col - number of colors needed in one direction for single component problem
341: */
342: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343: col = 2 * s + 1;
344: PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
345: PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
346: PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");
348: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
349: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
350: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
352: /* create the coloring */
353: if (ctype == IS_COLORING_GLOBAL) {
354: if (!dd->localcoloring) {
355: PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
356: ii = 0;
357: for (k = zs; k < zs + nz; k++) {
358: for (j = ys; j < ys + ny; j++) {
359: for (i = xs; i < xs + nx; i++) {
360: for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
361: }
362: }
363: }
364: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
365: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
366: }
367: *coloring = dd->localcoloring;
368: } else if (ctype == IS_COLORING_LOCAL) {
369: if (!dd->ghostedcoloring) {
370: PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
371: ii = 0;
372: for (k = gzs; k < gzs + gnz; k++) {
373: for (j = gys; j < gys + gny; j++) {
374: for (i = gxs; i < gxs + gnx; i++) {
375: for (l = 0; l < nc; l++) {
376: /* the complicated stuff is to handle periodic boundaries */
377: colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
378: }
379: }
380: }
381: }
382: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
383: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
384: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
385: }
386: *coloring = dd->ghostedcoloring;
387: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
388: PetscCall(ISColoringReference(*coloring));
389: PetscFunctionReturn(PETSC_SUCCESS);
390: }
392: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
393: {
394: PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
395: PetscInt ncolors;
396: MPI_Comm comm;
397: DMBoundaryType bx;
398: ISColoringValue *colors;
399: DM_DA *dd = (DM_DA *)da->data;
401: PetscFunctionBegin;
402: /*
403: nc - number of components per grid point
404: col - number of colors needed in one direction for single component problem
405: */
406: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
407: col = 2 * s + 1;
408: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
409: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
410: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
412: /* create the coloring */
413: if (ctype == IS_COLORING_GLOBAL) {
414: if (!dd->localcoloring) {
415: PetscCall(PetscMalloc1(nc * nx, &colors));
416: if (dd->ofillcols) {
417: PetscInt tc = 0;
418: for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
419: i1 = 0;
420: for (i = xs; i < xs + nx; i++) {
421: for (l = 0; l < nc; l++) {
422: if (dd->ofillcols[l] && (i % col)) {
423: colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
424: } else {
425: colors[i1++] = l;
426: }
427: }
428: }
429: ncolors = nc + 2 * s * tc;
430: } else {
431: i1 = 0;
432: for (i = xs; i < xs + nx; i++) {
433: for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
434: }
435: ncolors = nc + nc * (col - 1);
436: }
437: PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
438: }
439: *coloring = dd->localcoloring;
440: } else if (ctype == IS_COLORING_LOCAL) {
441: if (!dd->ghostedcoloring) {
442: PetscCall(PetscMalloc1(nc * gnx, &colors));
443: i1 = 0;
444: for (i = gxs; i < gxs + gnx; i++) {
445: for (l = 0; l < nc; l++) {
446: /* the complicated stuff is to handle periodic boundaries */
447: colors[i1++] = l + nc * (SetInRange(i, m) % col);
448: }
449: }
450: ncolors = nc + nc * (col - 1);
451: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
452: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
453: }
454: *coloring = dd->ghostedcoloring;
455: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
456: PetscCall(ISColoringReference(*coloring));
457: PetscFunctionReturn(PETSC_SUCCESS);
458: }
460: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
461: {
462: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
463: PetscInt ncolors;
464: MPI_Comm comm;
465: DMBoundaryType bx, by;
466: ISColoringValue *colors;
467: DM_DA *dd = (DM_DA *)da->data;
469: PetscFunctionBegin;
470: /*
471: nc - number of components per grid point
472: col - number of colors needed in one direction for single component problem
473: */
474: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
475: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
476: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
477: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
478: /* create the coloring */
479: if (ctype == IS_COLORING_GLOBAL) {
480: if (!dd->localcoloring) {
481: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
482: ii = 0;
483: for (j = ys; j < ys + ny; j++) {
484: for (i = xs; i < xs + nx; i++) {
485: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
486: }
487: }
488: ncolors = 5 * nc;
489: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
490: }
491: *coloring = dd->localcoloring;
492: } else if (ctype == IS_COLORING_LOCAL) {
493: if (!dd->ghostedcoloring) {
494: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
495: ii = 0;
496: for (j = gys; j < gys + gny; j++) {
497: for (i = gxs; i < gxs + gnx; i++) {
498: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
499: }
500: }
501: ncolors = 5 * nc;
502: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
503: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
504: }
505: *coloring = dd->ghostedcoloring;
506: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
507: PetscFunctionReturn(PETSC_SUCCESS);
508: }
510: /* =========================================================================== */
511: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
526: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
527: {
528: DM da;
529: const char *prefix;
530: Mat AA, Anatural;
531: AO ao;
532: PetscInt rstart, rend, *petsc, i;
533: IS is;
534: MPI_Comm comm;
535: PetscViewerFormat format;
536: PetscBool flag;
538: PetscFunctionBegin;
539: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
540: PetscCall(PetscViewerGetFormat(viewer, &format));
541: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
543: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
544: PetscCall(MatGetDM(A, &da));
545: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
547: PetscCall(DMDAGetAO(da, &ao));
548: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
549: PetscCall(PetscMalloc1(rend - rstart, &petsc));
550: for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
551: PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
552: PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
554: /* call viewer on natural ordering */
555: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
556: if (flag) {
557: PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
558: A = AA;
559: }
560: PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
561: PetscCall(ISDestroy(&is));
562: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
563: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
564: PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
565: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
566: PetscCall(MatView(Anatural, viewer));
567: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
568: PetscCall(MatDestroy(&Anatural));
569: if (flag) PetscCall(MatDestroy(&AA));
570: PetscFunctionReturn(PETSC_SUCCESS);
571: }
573: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
574: {
575: DM da;
576: Mat Anatural, Aapp;
577: AO ao;
578: PetscInt rstart, rend, *app, i, m, n, M, N;
579: IS is;
580: MPI_Comm comm;
582: PetscFunctionBegin;
583: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
584: PetscCall(MatGetDM(A, &da));
585: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
587: /* Load the matrix in natural ordering */
588: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
589: PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
590: PetscCall(MatGetSize(A, &M, &N));
591: PetscCall(MatGetLocalSize(A, &m, &n));
592: PetscCall(MatSetSizes(Anatural, m, n, M, N));
593: PetscCall(MatLoad(Anatural, viewer));
595: /* Map natural ordering to application ordering and create IS */
596: PetscCall(DMDAGetAO(da, &ao));
597: PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
598: PetscCall(PetscMalloc1(rend - rstart, &app));
599: for (i = rstart; i < rend; i++) app[i - rstart] = i;
600: PetscCall(AOPetscToApplication(ao, rend - rstart, app));
601: PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
603: /* Do permutation and replace header */
604: PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
605: PetscCall(MatHeaderReplace(A, &Aapp));
606: PetscCall(ISDestroy(&is));
607: PetscCall(MatDestroy(&Anatural));
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
611: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
612: {
613: PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
614: Mat A;
615: MPI_Comm comm;
616: MatType Atype;
617: MatType mtype;
618: PetscMPIInt size;
619: DM_DA *dd = (DM_DA *)da->data;
620: void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
622: PetscFunctionBegin;
623: PetscCall(MatInitializePackage());
624: mtype = da->mattype;
626: /*
627: m
628: ------------------------------------------------------
629: | |
630: | |
631: | ---------------------- |
632: | | | |
633: n | ny | | |
634: | | | |
635: | .--------------------- |
636: | (xs,ys) nx |
637: | . |
638: | (gxs,gys) |
639: | |
640: -----------------------------------------------------
641: */
643: /*
644: nc - number of components per grid point
645: col - number of colors needed in one direction for single component problem
646: */
647: M = dd->M;
648: N = dd->N;
649: P = dd->P;
650: dim = da->dim;
651: dof = dd->w;
652: /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
653: PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
654: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
655: PetscCall(MatCreate(comm, &A));
656: PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
657: PetscCall(MatSetType(A, mtype));
658: PetscCall(MatSetFromOptions(A));
659: if (dof * nx * ny * nz < da->bind_below) {
660: PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
661: PetscCall(MatBindToCPU(A, PETSC_TRUE));
662: }
663: PetscCall(MatSetDM(A, da));
664: if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
665: PetscCall(MatGetType(A, &Atype));
666: /*
667: We do not provide a getmatrix function in the DMDA operations because
668: the basic DMDA does not know about matrices. We think of DMDA as being more
669: more low-level than matrices. This is kind of cheating but, cause sometimes
670: we think of DMDA has higher level than matrices.
672: We could switch based on Atype (or mtype), but we do not since the
673: specialized setting routines depend only on the particular preallocation
674: details of the matrix, not the type itself.
675: */
676: if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
677: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
678: if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
679: if (!aij) {
680: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
681: if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
682: if (!baij) {
683: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
684: if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
685: if (!sbaij) {
686: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
687: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
688: }
689: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
690: }
691: }
692: }
693: if (aij) {
694: if (dim == 1) {
695: if (dd->ofill) {
696: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
697: } else {
698: DMBoundaryType bx;
699: PetscMPIInt size;
700: PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
701: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
702: if (size == 1 && bx == DM_BOUNDARY_NONE) {
703: PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
704: } else {
705: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
706: }
707: }
708: } else if (dim == 2) {
709: if (dd->ofill) {
710: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
711: } else {
712: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
713: }
714: } else if (dim == 3) {
715: if (dd->ofill) {
716: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
717: } else {
718: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
719: }
720: }
721: } else if (baij) {
722: if (dim == 2) {
723: PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
724: } else if (dim == 3) {
725: PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
726: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
727: } else if (sbaij) {
728: if (dim == 2) {
729: PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
730: } else if (dim == 3) {
731: PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
732: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
733: } else if (sell) {
734: if (dim == 2) {
735: PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
736: } else if (dim == 3) {
737: PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
738: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
739: } else if (is) {
740: PetscCall(DMCreateMatrix_DA_IS(da, A));
741: } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
742: ISLocalToGlobalMapping ltog;
744: PetscCall(MatSetBlockSize(A, dof));
745: PetscCall(MatSetUp(A));
746: PetscCall(DMGetLocalToGlobalMapping(da, <og));
747: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
748: }
749: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
750: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
751: PetscCall(MatSetDM(A, da));
752: PetscCallMPI(MPI_Comm_size(comm, &size));
753: if (size > 1) {
754: /* change viewer to display matrix in natural ordering */
755: PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
756: PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
757: }
758: *J = A;
759: PetscFunctionReturn(PETSC_SUCCESS);
760: }
762: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
764: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
765: {
766: DM_DA *da = (DM_DA *)dm->data;
767: Mat lJ, P;
768: ISLocalToGlobalMapping ltog;
769: IS is;
770: PetscBT bt;
771: const PetscInt *e_loc, *idx;
772: PetscInt i, nel, nen, nv, dof, *gidx, n, N;
774: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
775: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
776: PetscFunctionBegin;
777: dof = da->w;
778: PetscCall(MatSetBlockSize(J, dof));
779: PetscCall(DMGetLocalToGlobalMapping(dm, <og));
781: /* flag local elements indices in local DMDA numbering */
782: PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
783: PetscCall(PetscBTCreate(nv / dof, &bt));
784: PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
785: for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
787: /* filter out (set to -1) the global indices not used by the local elements */
788: PetscCall(PetscMalloc1(nv / dof, &gidx));
789: PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
790: PetscCall(PetscArraycpy(gidx, idx, nv / dof));
791: PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
792: for (i = 0; i < nv / dof; i++)
793: if (!PetscBTLookup(bt, i)) gidx[i] = -1;
794: PetscCall(PetscBTDestroy(&bt));
795: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
796: PetscCall(ISLocalToGlobalMappingCreateIS(is, <og));
797: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
798: PetscCall(ISLocalToGlobalMappingDestroy(<og));
799: PetscCall(ISDestroy(&is));
801: /* Preallocation */
802: if (dm->prealloc_skip) {
803: PetscCall(MatSetUp(J));
804: } else {
805: PetscCall(MatISGetLocalMat(J, &lJ));
806: PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL));
807: PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
808: PetscCall(MatSetType(P, MATPREALLOCATOR));
809: PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
810: PetscCall(MatGetSize(lJ, &N, NULL));
811: PetscCall(MatGetLocalSize(lJ, &n, NULL));
812: PetscCall(MatSetSizes(P, n, n, N, N));
813: PetscCall(MatSetBlockSize(P, dof));
814: PetscCall(MatSetUp(P));
815: for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
816: PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
817: PetscCall(MatISRestoreLocalMat(J, &lJ));
818: PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
819: PetscCall(MatDestroy(&P));
821: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
822: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
823: }
824: PetscFunctionReturn(PETSC_SUCCESS);
825: }
827: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
828: {
829: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
830: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
831: MPI_Comm comm;
832: PetscScalar *values;
833: DMBoundaryType bx, by;
834: ISLocalToGlobalMapping ltog;
835: DMDAStencilType st;
837: PetscFunctionBegin;
838: /*
839: nc - number of components per grid point
840: col - number of colors needed in one direction for single component problem
841: */
842: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
843: col = 2 * s + 1;
844: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
845: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
846: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
848: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
849: PetscCall(DMGetLocalToGlobalMapping(da, <og));
851: PetscCall(MatSetBlockSize(J, nc));
852: /* determine the matrix preallocation information */
853: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
854: for (i = xs; i < xs + nx; i++) {
855: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
856: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
858: for (j = ys; j < ys + ny; j++) {
859: slot = i - gxs + gnx * (j - gys);
861: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
862: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
864: cnt = 0;
865: for (k = 0; k < nc; k++) {
866: for (l = lstart; l < lend + 1; l++) {
867: for (p = pstart; p < pend + 1; p++) {
868: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
869: cols[cnt++] = k + nc * (slot + gnx * l + p);
870: }
871: }
872: }
873: rows[k] = k + nc * (slot);
874: }
875: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
876: }
877: }
878: PetscCall(MatSetBlockSize(J, nc));
879: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
880: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
881: MatPreallocateEnd(dnz, onz);
883: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
885: /*
886: For each node in the grid: we get the neighbors in the local (on processor ordering
887: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
888: PETSc ordering.
889: */
890: if (!da->prealloc_only) {
891: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
892: for (i = xs; i < xs + nx; i++) {
893: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
894: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
896: for (j = ys; j < ys + ny; j++) {
897: slot = i - gxs + gnx * (j - gys);
899: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
900: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
902: cnt = 0;
903: for (k = 0; k < nc; k++) {
904: for (l = lstart; l < lend + 1; l++) {
905: for (p = pstart; p < pend + 1; p++) {
906: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
907: cols[cnt++] = k + nc * (slot + gnx * l + p);
908: }
909: }
910: }
911: rows[k] = k + nc * (slot);
912: }
913: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
914: }
915: }
916: PetscCall(PetscFree(values));
917: /* do not copy values to GPU since they are all zero and not yet needed there */
918: PetscCall(MatBindToCPU(J, PETSC_TRUE));
919: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
920: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
921: PetscCall(MatBindToCPU(J, PETSC_FALSE));
922: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
923: }
924: PetscCall(PetscFree2(rows, cols));
925: PetscFunctionReturn(PETSC_SUCCESS);
926: }
928: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
929: {
930: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
931: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
932: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
933: MPI_Comm comm;
934: PetscScalar *values;
935: DMBoundaryType bx, by, bz;
936: ISLocalToGlobalMapping ltog;
937: DMDAStencilType st;
939: PetscFunctionBegin;
940: /*
941: nc - number of components per grid point
942: col - number of colors needed in one direction for single component problem
943: */
944: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
945: col = 2 * s + 1;
946: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
947: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
948: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
950: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
951: PetscCall(DMGetLocalToGlobalMapping(da, <og));
953: PetscCall(MatSetBlockSize(J, nc));
954: /* determine the matrix preallocation information */
955: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
956: for (i = xs; i < xs + nx; i++) {
957: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
958: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
959: for (j = ys; j < ys + ny; j++) {
960: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
961: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
962: for (k = zs; k < zs + nz; k++) {
963: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
964: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
966: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
968: cnt = 0;
969: for (l = 0; l < nc; l++) {
970: for (ii = istart; ii < iend + 1; ii++) {
971: for (jj = jstart; jj < jend + 1; jj++) {
972: for (kk = kstart; kk < kend + 1; kk++) {
973: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
974: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
975: }
976: }
977: }
978: }
979: rows[l] = l + nc * (slot);
980: }
981: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
982: }
983: }
984: }
985: PetscCall(MatSetBlockSize(J, nc));
986: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
987: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
988: MatPreallocateEnd(dnz, onz);
989: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
991: /*
992: For each node in the grid: we get the neighbors in the local (on processor ordering
993: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
994: PETSc ordering.
995: */
996: if (!da->prealloc_only) {
997: PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
998: for (i = xs; i < xs + nx; i++) {
999: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1000: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1001: for (j = ys; j < ys + ny; j++) {
1002: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1003: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1004: for (k = zs; k < zs + nz; k++) {
1005: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1006: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1008: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1010: cnt = 0;
1011: for (l = 0; l < nc; l++) {
1012: for (ii = istart; ii < iend + 1; ii++) {
1013: for (jj = jstart; jj < jend + 1; jj++) {
1014: for (kk = kstart; kk < kend + 1; kk++) {
1015: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1016: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1017: }
1018: }
1019: }
1020: }
1021: rows[l] = l + nc * (slot);
1022: }
1023: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1024: }
1025: }
1026: }
1027: PetscCall(PetscFree(values));
1028: /* do not copy values to GPU since they are all zero and not yet needed there */
1029: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1030: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1031: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1032: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1033: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1034: }
1035: PetscCall(PetscFree2(rows, cols));
1036: PetscFunctionReturn(PETSC_SUCCESS);
1037: }
1039: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1040: {
1041: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1042: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1043: MPI_Comm comm;
1044: DMBoundaryType bx, by;
1045: ISLocalToGlobalMapping ltog, mltog;
1046: DMDAStencilType st;
1047: PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1049: PetscFunctionBegin;
1050: /*
1051: nc - number of components per grid point
1052: col - number of colors needed in one direction for single component problem
1053: */
1054: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1055: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1056: col = 2 * s + 1;
1057: /*
1058: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1059: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1060: */
1061: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1062: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1063: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1064: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1065: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1067: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1068: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1070: PetscCall(MatSetBlockSize(J, nc));
1071: /* determine the matrix preallocation information */
1072: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1073: for (i = xs; i < xs + nx; i++) {
1074: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1075: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1077: for (j = ys; j < ys + ny; j++) {
1078: slot = i - gxs + gnx * (j - gys);
1080: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1081: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1083: cnt = 0;
1084: for (k = 0; k < nc; k++) {
1085: for (l = lstart; l < lend + 1; l++) {
1086: for (p = pstart; p < pend + 1; p++) {
1087: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1088: cols[cnt++] = k + nc * (slot + gnx * l + p);
1089: }
1090: }
1091: }
1092: rows[k] = k + nc * (slot);
1093: }
1094: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1095: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1096: }
1097: }
1098: PetscCall(MatSetBlockSize(J, nc));
1099: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1100: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1101: MatPreallocateEnd(dnz, onz);
1102: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1103: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1105: /*
1106: For each node in the grid: we get the neighbors in the local (on processor ordering
1107: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1108: PETSc ordering.
1109: */
1110: if (!da->prealloc_only) {
1111: for (i = xs; i < xs + nx; i++) {
1112: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1113: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1115: for (j = ys; j < ys + ny; j++) {
1116: slot = i - gxs + gnx * (j - gys);
1118: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1119: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1121: cnt = 0;
1122: for (l = lstart; l < lend + 1; l++) {
1123: for (p = pstart; p < pend + 1; p++) {
1124: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1125: cols[cnt++] = nc * (slot + gnx * l + p);
1126: for (k = 1; k < nc; k++) {
1127: cols[cnt] = 1 + cols[cnt - 1];
1128: cnt++;
1129: }
1130: }
1131: }
1132: }
1133: for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1134: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1135: }
1136: }
1137: /* do not copy values to GPU since they are all zero and not yet needed there */
1138: PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1139: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1140: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1141: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1142: if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1143: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1144: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1145: }
1146: PetscCall(PetscFree2(rows, cols));
1147: PetscFunctionReturn(PETSC_SUCCESS);
1148: }
1150: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1151: {
1152: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1153: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1154: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1155: DM_DA *dd = (DM_DA *)da->data;
1156: PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1157: MPI_Comm comm;
1158: DMBoundaryType bx, by;
1159: ISLocalToGlobalMapping ltog;
1160: DMDAStencilType st;
1161: PetscBool removedups = PETSC_FALSE;
1163: PetscFunctionBegin;
1164: /*
1165: nc - number of components per grid point
1166: col - number of colors needed in one direction for single component problem
1167: */
1168: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1169: col = 2 * s + 1;
1170: /*
1171: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1172: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1173: */
1174: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1175: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1176: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1177: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1178: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1180: PetscCall(PetscMalloc1(col * col * nc, &cols));
1181: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1183: PetscCall(MatSetBlockSize(J, nc));
1184: /* determine the matrix preallocation information */
1185: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1186: for (i = xs; i < xs + nx; i++) {
1187: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1188: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1190: for (j = ys; j < ys + ny; j++) {
1191: slot = i - gxs + gnx * (j - gys);
1193: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1194: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1196: for (k = 0; k < nc; k++) {
1197: cnt = 0;
1198: for (l = lstart; l < lend + 1; l++) {
1199: for (p = pstart; p < pend + 1; p++) {
1200: if (l || p) {
1201: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1202: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1203: }
1204: } else {
1205: if (dfill) {
1206: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1207: } else {
1208: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1209: }
1210: }
1211: }
1212: }
1213: row = k + nc * (slot);
1214: maxcnt = PetscMax(maxcnt, cnt);
1215: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1216: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1217: }
1218: }
1219: }
1220: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1221: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1222: MatPreallocateEnd(dnz, onz);
1223: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1225: /*
1226: For each node in the grid: we get the neighbors in the local (on processor ordering
1227: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1228: PETSc ordering.
1229: */
1230: if (!da->prealloc_only) {
1231: for (i = xs; i < xs + nx; i++) {
1232: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1233: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1235: for (j = ys; j < ys + ny; j++) {
1236: slot = i - gxs + gnx * (j - gys);
1238: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1239: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1241: for (k = 0; k < nc; k++) {
1242: cnt = 0;
1243: for (l = lstart; l < lend + 1; l++) {
1244: for (p = pstart; p < pend + 1; p++) {
1245: if (l || p) {
1246: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1247: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1248: }
1249: } else {
1250: if (dfill) {
1251: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1252: } else {
1253: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1254: }
1255: }
1256: }
1257: }
1258: row = k + nc * (slot);
1259: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1260: }
1261: }
1262: }
1263: /* do not copy values to GPU since they are all zero and not yet needed there */
1264: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1265: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1266: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1267: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1268: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1269: }
1270: PetscCall(PetscFree(cols));
1271: PetscFunctionReturn(PETSC_SUCCESS);
1272: }
1274: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1275: {
1276: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1277: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1278: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1279: MPI_Comm comm;
1280: DMBoundaryType bx, by, bz;
1281: ISLocalToGlobalMapping ltog, mltog;
1282: DMDAStencilType st;
1283: PetscBool removedups = PETSC_FALSE;
1285: PetscFunctionBegin;
1286: /*
1287: nc - number of components per grid point
1288: col - number of colors needed in one direction for single component problem
1289: */
1290: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1291: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1292: col = 2 * s + 1;
1294: /*
1295: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1296: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1297: */
1298: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1299: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1300: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1302: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1303: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1304: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1306: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1307: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1309: PetscCall(MatSetBlockSize(J, nc));
1310: /* determine the matrix preallocation information */
1311: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1312: for (i = xs; i < xs + nx; i++) {
1313: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1314: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1315: for (j = ys; j < ys + ny; j++) {
1316: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1317: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1318: for (k = zs; k < zs + nz; k++) {
1319: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1320: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1322: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1324: cnt = 0;
1325: for (l = 0; l < nc; l++) {
1326: for (ii = istart; ii < iend + 1; ii++) {
1327: for (jj = jstart; jj < jend + 1; jj++) {
1328: for (kk = kstart; kk < kend + 1; kk++) {
1329: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1330: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1331: }
1332: }
1333: }
1334: }
1335: rows[l] = l + nc * (slot);
1336: }
1337: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1338: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1339: }
1340: }
1341: }
1342: PetscCall(MatSetBlockSize(J, nc));
1343: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1344: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1345: MatPreallocateEnd(dnz, onz);
1346: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1347: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1349: /*
1350: For each node in the grid: we get the neighbors in the local (on processor ordering
1351: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1352: PETSc ordering.
1353: */
1354: if (!da->prealloc_only) {
1355: for (i = xs; i < xs + nx; i++) {
1356: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1357: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1358: for (j = ys; j < ys + ny; j++) {
1359: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1360: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1361: for (k = zs; k < zs + nz; k++) {
1362: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1363: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1365: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1367: cnt = 0;
1368: for (kk = kstart; kk < kend + 1; kk++) {
1369: for (jj = jstart; jj < jend + 1; jj++) {
1370: for (ii = istart; ii < iend + 1; ii++) {
1371: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1372: cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1373: for (l = 1; l < nc; l++) {
1374: cols[cnt] = 1 + cols[cnt - 1];
1375: cnt++;
1376: }
1377: }
1378: }
1379: }
1380: }
1381: rows[0] = nc * (slot);
1382: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1383: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1384: }
1385: }
1386: }
1387: /* do not copy values to GPU since they are all zero and not yet needed there */
1388: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1389: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1390: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1391: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1392: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1393: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1394: }
1395: PetscCall(PetscFree2(rows, cols));
1396: PetscFunctionReturn(PETSC_SUCCESS);
1397: }
1399: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1400: {
1401: DM_DA *dd = (DM_DA *)da->data;
1402: PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1403: PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1404: PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1405: DMBoundaryType bx;
1406: ISLocalToGlobalMapping ltog;
1407: PetscMPIInt rank, size;
1409: PetscFunctionBegin;
1410: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1411: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1413: /*
1414: nc - number of components per grid point
1415: */
1416: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1417: PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1418: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1419: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1421: PetscCall(MatSetBlockSize(J, nc));
1422: PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1424: /*
1425: note should be smaller for first and last process with no periodic
1426: does not handle dfill
1427: */
1428: cnt = 0;
1429: /* coupling with process to the left */
1430: for (i = 0; i < s; i++) {
1431: for (j = 0; j < nc; j++) {
1432: ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1433: cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1434: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1435: if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1436: else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1437: }
1438: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1439: cnt++;
1440: }
1441: }
1442: for (i = s; i < nx - s; i++) {
1443: for (j = 0; j < nc; j++) {
1444: cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1445: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1446: cnt++;
1447: }
1448: }
1449: /* coupling with process to the right */
1450: for (i = nx - s; i < nx; i++) {
1451: for (j = 0; j < nc; j++) {
1452: ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1453: cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1454: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1455: if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1456: else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1457: }
1458: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1459: cnt++;
1460: }
1461: }
1463: PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1464: PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1465: PetscCall(PetscFree2(cols, ocols));
1467: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1468: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1470: /*
1471: For each node in the grid: we get the neighbors in the local (on processor ordering
1472: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1473: PETSc ordering.
1474: */
1475: if (!da->prealloc_only) {
1476: PetscCall(PetscMalloc1(maxcnt, &cols));
1477: row = xs * nc;
1478: /* coupling with process to the left */
1479: for (i = xs; i < xs + s; i++) {
1480: for (j = 0; j < nc; j++) {
1481: cnt = 0;
1482: if (rank) {
1483: for (l = 0; l < s; l++) {
1484: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1485: }
1486: }
1487: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1488: for (l = 0; l < s; l++) {
1489: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1490: }
1491: }
1492: if (dfill) {
1493: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1494: } else {
1495: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1496: }
1497: for (l = 0; l < s; l++) {
1498: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1499: }
1500: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1501: row++;
1502: }
1503: }
1504: for (i = xs + s; i < xs + nx - s; i++) {
1505: for (j = 0; j < nc; j++) {
1506: cnt = 0;
1507: for (l = 0; l < s; l++) {
1508: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1509: }
1510: if (dfill) {
1511: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1512: } else {
1513: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1514: }
1515: for (l = 0; l < s; l++) {
1516: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1517: }
1518: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1519: row++;
1520: }
1521: }
1522: /* coupling with process to the right */
1523: for (i = xs + nx - s; i < xs + nx; i++) {
1524: for (j = 0; j < nc; j++) {
1525: cnt = 0;
1526: for (l = 0; l < s; l++) {
1527: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1528: }
1529: if (dfill) {
1530: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1531: } else {
1532: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1533: }
1534: if (rank < size - 1) {
1535: for (l = 0; l < s; l++) {
1536: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1537: }
1538: }
1539: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1540: for (l = 0; l < s; l++) {
1541: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1542: }
1543: }
1544: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1545: row++;
1546: }
1547: }
1548: PetscCall(PetscFree(cols));
1549: /* do not copy values to GPU since they are all zero and not yet needed there */
1550: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1551: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1552: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1553: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1554: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1555: }
1556: PetscFunctionReturn(PETSC_SUCCESS);
1557: }
1559: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1560: {
1561: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1562: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1563: PetscInt istart, iend;
1564: DMBoundaryType bx;
1565: ISLocalToGlobalMapping ltog, mltog;
1567: PetscFunctionBegin;
1568: /*
1569: nc - number of components per grid point
1570: col - number of colors needed in one direction for single component problem
1571: */
1572: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1573: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1574: col = 2 * s + 1;
1576: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1577: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1579: PetscCall(MatSetBlockSize(J, nc));
1580: PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1581: PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1583: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1584: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1585: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1587: /*
1588: For each node in the grid: we get the neighbors in the local (on processor ordering
1589: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1590: PETSc ordering.
1591: */
1592: if (!da->prealloc_only) {
1593: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1594: for (i = xs; i < xs + nx; i++) {
1595: istart = PetscMax(-s, gxs - i);
1596: iend = PetscMin(s, gxs + gnx - i - 1);
1597: slot = i - gxs;
1599: cnt = 0;
1600: for (i1 = istart; i1 < iend + 1; i1++) {
1601: cols[cnt++] = nc * (slot + i1);
1602: for (l = 1; l < nc; l++) {
1603: cols[cnt] = 1 + cols[cnt - 1];
1604: cnt++;
1605: }
1606: }
1607: rows[0] = nc * (slot);
1608: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1609: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1610: }
1611: /* do not copy values to GPU since they are all zero and not yet needed there */
1612: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1613: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1614: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1615: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1616: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1617: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1618: PetscCall(PetscFree2(rows, cols));
1619: }
1620: PetscFunctionReturn(PETSC_SUCCESS);
1621: }
1623: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1624: {
1625: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1626: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1627: PetscInt istart, iend;
1628: DMBoundaryType bx;
1629: ISLocalToGlobalMapping ltog, mltog;
1631: PetscFunctionBegin;
1632: /*
1633: nc - number of components per grid point
1634: col - number of colors needed in one direction for single component problem
1635: */
1636: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1637: col = 2 * s + 1;
1639: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1640: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1642: PetscCall(MatSetBlockSize(J, nc));
1643: PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1645: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1646: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1647: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1649: /*
1650: For each node in the grid: we get the neighbors in the local (on processor ordering
1651: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1652: PETSc ordering.
1653: */
1654: if (!da->prealloc_only) {
1655: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1656: for (i = xs; i < xs + nx; i++) {
1657: istart = PetscMax(-s, gxs - i);
1658: iend = PetscMin(s, gxs + gnx - i - 1);
1659: slot = i - gxs;
1661: cnt = 0;
1662: for (i1 = istart; i1 < iend + 1; i1++) {
1663: cols[cnt++] = nc * (slot + i1);
1664: for (l = 1; l < nc; l++) {
1665: cols[cnt] = 1 + cols[cnt - 1];
1666: cnt++;
1667: }
1668: }
1669: rows[0] = nc * (slot);
1670: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1671: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1672: }
1673: /* do not copy values to GPU since they are all zero and not yet needed there */
1674: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1675: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1676: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1677: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1679: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1680: PetscCall(PetscFree2(rows, cols));
1681: }
1682: PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1683: PetscFunctionReturn(PETSC_SUCCESS);
1684: }
1686: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1687: {
1688: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1689: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1690: PetscInt istart, iend, jstart, jend, ii, jj;
1691: MPI_Comm comm;
1692: PetscScalar *values;
1693: DMBoundaryType bx, by;
1694: DMDAStencilType st;
1695: ISLocalToGlobalMapping ltog;
1697: PetscFunctionBegin;
1698: /*
1699: nc - number of components per grid point
1700: col - number of colors needed in one direction for single component problem
1701: */
1702: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1703: col = 2 * s + 1;
1705: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1706: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1707: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1709: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1711: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1713: /* determine the matrix preallocation information */
1714: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1715: for (i = xs; i < xs + nx; i++) {
1716: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1717: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1718: for (j = ys; j < ys + ny; j++) {
1719: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1720: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1721: slot = i - gxs + gnx * (j - gys);
1723: /* Find block columns in block row */
1724: cnt = 0;
1725: for (ii = istart; ii < iend + 1; ii++) {
1726: for (jj = jstart; jj < jend + 1; jj++) {
1727: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1728: cols[cnt++] = slot + ii + gnx * jj;
1729: }
1730: }
1731: }
1732: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1733: }
1734: }
1735: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1736: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1737: MatPreallocateEnd(dnz, onz);
1739: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1741: /*
1742: For each node in the grid: we get the neighbors in the local (on processor ordering
1743: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1744: PETSc ordering.
1745: */
1746: if (!da->prealloc_only) {
1747: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1748: for (i = xs; i < xs + nx; i++) {
1749: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1750: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1751: for (j = ys; j < ys + ny; j++) {
1752: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1753: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1754: slot = i - gxs + gnx * (j - gys);
1755: cnt = 0;
1756: for (ii = istart; ii < iend + 1; ii++) {
1757: for (jj = jstart; jj < jend + 1; jj++) {
1758: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1759: cols[cnt++] = slot + ii + gnx * jj;
1760: }
1761: }
1762: }
1763: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1764: }
1765: }
1766: PetscCall(PetscFree(values));
1767: /* do not copy values to GPU since they are all zero and not yet needed there */
1768: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1769: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1770: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1771: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1772: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1773: }
1774: PetscCall(PetscFree(cols));
1775: PetscFunctionReturn(PETSC_SUCCESS);
1776: }
1778: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1779: {
1780: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1781: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1782: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1783: MPI_Comm comm;
1784: PetscScalar *values;
1785: DMBoundaryType bx, by, bz;
1786: DMDAStencilType st;
1787: ISLocalToGlobalMapping ltog;
1789: PetscFunctionBegin;
1790: /*
1791: nc - number of components per grid point
1792: col - number of colors needed in one direction for single component problem
1793: */
1794: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1795: col = 2 * s + 1;
1797: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1798: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1799: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1801: PetscCall(PetscMalloc1(col * col * col, &cols));
1803: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1805: /* determine the matrix preallocation information */
1806: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1807: for (i = xs; i < xs + nx; i++) {
1808: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1809: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1810: for (j = ys; j < ys + ny; j++) {
1811: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1812: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1813: for (k = zs; k < zs + nz; k++) {
1814: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1815: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1817: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1819: /* Find block columns in block row */
1820: cnt = 0;
1821: for (ii = istart; ii < iend + 1; ii++) {
1822: for (jj = jstart; jj < jend + 1; jj++) {
1823: for (kk = kstart; kk < kend + 1; kk++) {
1824: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1825: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1826: }
1827: }
1828: }
1829: }
1830: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1831: }
1832: }
1833: }
1834: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1835: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1836: MatPreallocateEnd(dnz, onz);
1838: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1840: /*
1841: For each node in the grid: we get the neighbors in the local (on processor ordering
1842: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1843: PETSc ordering.
1844: */
1845: if (!da->prealloc_only) {
1846: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1847: for (i = xs; i < xs + nx; i++) {
1848: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1849: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1850: for (j = ys; j < ys + ny; j++) {
1851: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1852: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1853: for (k = zs; k < zs + nz; k++) {
1854: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1855: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1857: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1859: cnt = 0;
1860: for (ii = istart; ii < iend + 1; ii++) {
1861: for (jj = jstart; jj < jend + 1; jj++) {
1862: for (kk = kstart; kk < kend + 1; kk++) {
1863: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1864: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1865: }
1866: }
1867: }
1868: }
1869: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1870: }
1871: }
1872: }
1873: PetscCall(PetscFree(values));
1874: /* do not copy values to GPU since they are all zero and not yet needed there */
1875: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1876: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1877: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1878: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1879: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1880: }
1881: PetscCall(PetscFree(cols));
1882: PetscFunctionReturn(PETSC_SUCCESS);
1883: }
1885: /*
1886: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1887: identify in the local ordering with periodic domain.
1888: */
1889: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1890: {
1891: PetscInt i, n;
1893: PetscFunctionBegin;
1894: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1895: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1896: for (i = 0, n = 0; i < *cnt; i++) {
1897: if (col[i] >= *row) col[n++] = col[i];
1898: }
1899: *cnt = n;
1900: PetscFunctionReturn(PETSC_SUCCESS);
1901: }
1903: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1904: {
1905: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1906: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1907: PetscInt istart, iend, jstart, jend, ii, jj;
1908: MPI_Comm comm;
1909: PetscScalar *values;
1910: DMBoundaryType bx, by;
1911: DMDAStencilType st;
1912: ISLocalToGlobalMapping ltog;
1914: PetscFunctionBegin;
1915: /*
1916: nc - number of components per grid point
1917: col - number of colors needed in one direction for single component problem
1918: */
1919: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1920: col = 2 * s + 1;
1922: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1923: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1924: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1926: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1928: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1930: /* determine the matrix preallocation information */
1931: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1932: for (i = xs; i < xs + nx; i++) {
1933: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1934: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1935: for (j = ys; j < ys + ny; j++) {
1936: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1937: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1938: slot = i - gxs + gnx * (j - gys);
1940: /* Find block columns in block row */
1941: cnt = 0;
1942: for (ii = istart; ii < iend + 1; ii++) {
1943: for (jj = jstart; jj < jend + 1; jj++) {
1944: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1945: }
1946: }
1947: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1948: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1949: }
1950: }
1951: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1952: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1953: MatPreallocateEnd(dnz, onz);
1955: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1957: /*
1958: For each node in the grid: we get the neighbors in the local (on processor ordering
1959: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1960: PETSc ordering.
1961: */
1962: if (!da->prealloc_only) {
1963: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1964: for (i = xs; i < xs + nx; i++) {
1965: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1966: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1967: for (j = ys; j < ys + ny; j++) {
1968: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1969: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1970: slot = i - gxs + gnx * (j - gys);
1972: /* Find block columns in block row */
1973: cnt = 0;
1974: for (ii = istart; ii < iend + 1; ii++) {
1975: for (jj = jstart; jj < jend + 1; jj++) {
1976: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1977: }
1978: }
1979: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1980: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1981: }
1982: }
1983: PetscCall(PetscFree(values));
1984: /* do not copy values to GPU since they are all zero and not yet needed there */
1985: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1986: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1987: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1988: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1989: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1990: }
1991: PetscCall(PetscFree(cols));
1992: PetscFunctionReturn(PETSC_SUCCESS);
1993: }
1995: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1996: {
1997: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1998: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1999: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2000: MPI_Comm comm;
2001: PetscScalar *values;
2002: DMBoundaryType bx, by, bz;
2003: DMDAStencilType st;
2004: ISLocalToGlobalMapping ltog;
2006: PetscFunctionBegin;
2007: /*
2008: nc - number of components per grid point
2009: col - number of colors needed in one direction for single component problem
2010: */
2011: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2012: col = 2 * s + 1;
2014: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2015: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2016: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2018: /* create the matrix */
2019: PetscCall(PetscMalloc1(col * col * col, &cols));
2021: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2023: /* determine the matrix preallocation information */
2024: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2025: for (i = xs; i < xs + nx; i++) {
2026: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2027: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2028: for (j = ys; j < ys + ny; j++) {
2029: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2030: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2031: for (k = zs; k < zs + nz; k++) {
2032: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2033: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2035: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2037: /* Find block columns in block row */
2038: cnt = 0;
2039: for (ii = istart; ii < iend + 1; ii++) {
2040: for (jj = jstart; jj < jend + 1; jj++) {
2041: for (kk = kstart; kk < kend + 1; kk++) {
2042: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2043: }
2044: }
2045: }
2046: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2047: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2048: }
2049: }
2050: }
2051: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2052: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2053: MatPreallocateEnd(dnz, onz);
2055: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2057: /*
2058: For each node in the grid: we get the neighbors in the local (on processor ordering
2059: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2060: PETSc ordering.
2061: */
2062: if (!da->prealloc_only) {
2063: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2064: for (i = xs; i < xs + nx; i++) {
2065: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2066: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2067: for (j = ys; j < ys + ny; j++) {
2068: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2069: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2070: for (k = zs; k < zs + nz; k++) {
2071: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2072: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2074: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2076: cnt = 0;
2077: for (ii = istart; ii < iend + 1; ii++) {
2078: for (jj = jstart; jj < jend + 1; jj++) {
2079: for (kk = kstart; kk < kend + 1; kk++) {
2080: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2081: }
2082: }
2083: }
2084: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2085: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2086: }
2087: }
2088: }
2089: PetscCall(PetscFree(values));
2090: /* do not copy values to GPU since they are all zero and not yet needed there */
2091: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2092: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2093: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2094: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2095: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2096: }
2097: PetscCall(PetscFree(cols));
2098: PetscFunctionReturn(PETSC_SUCCESS);
2099: }
2101: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2102: {
2103: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2104: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2105: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2106: DM_DA *dd = (DM_DA *)da->data;
2107: PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2108: MPI_Comm comm;
2109: PetscScalar *values;
2110: DMBoundaryType bx, by, bz;
2111: ISLocalToGlobalMapping ltog;
2112: DMDAStencilType st;
2113: PetscBool removedups = PETSC_FALSE;
2115: PetscFunctionBegin;
2116: /*
2117: nc - number of components per grid point
2118: col - number of colors needed in one direction for single component problem
2119: */
2120: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2121: col = 2 * s + 1;
2123: /*
2124: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2125: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2126: */
2127: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2128: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2129: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2131: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2132: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2133: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2135: PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2136: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2138: /* determine the matrix preallocation information */
2139: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2141: PetscCall(MatSetBlockSize(J, nc));
2142: for (i = xs; i < xs + nx; i++) {
2143: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2144: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2145: for (j = ys; j < ys + ny; j++) {
2146: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2147: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2148: for (k = zs; k < zs + nz; k++) {
2149: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2150: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2152: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2154: for (l = 0; l < nc; l++) {
2155: cnt = 0;
2156: for (ii = istart; ii < iend + 1; ii++) {
2157: for (jj = jstart; jj < jend + 1; jj++) {
2158: for (kk = kstart; kk < kend + 1; kk++) {
2159: if (ii || jj || kk) {
2160: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2161: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2162: }
2163: } else {
2164: if (dfill) {
2165: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166: } else {
2167: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2168: }
2169: }
2170: }
2171: }
2172: }
2173: row = l + nc * (slot);
2174: maxcnt = PetscMax(maxcnt, cnt);
2175: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2176: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2177: }
2178: }
2179: }
2180: }
2181: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2182: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2183: MatPreallocateEnd(dnz, onz);
2184: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2186: /*
2187: For each node in the grid: we get the neighbors in the local (on processor ordering
2188: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2189: PETSc ordering.
2190: */
2191: if (!da->prealloc_only) {
2192: PetscCall(PetscCalloc1(maxcnt, &values));
2193: for (i = xs; i < xs + nx; i++) {
2194: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2195: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2196: for (j = ys; j < ys + ny; j++) {
2197: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2198: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2199: for (k = zs; k < zs + nz; k++) {
2200: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2201: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2203: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2205: for (l = 0; l < nc; l++) {
2206: cnt = 0;
2207: for (ii = istart; ii < iend + 1; ii++) {
2208: for (jj = jstart; jj < jend + 1; jj++) {
2209: for (kk = kstart; kk < kend + 1; kk++) {
2210: if (ii || jj || kk) {
2211: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2212: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2213: }
2214: } else {
2215: if (dfill) {
2216: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217: } else {
2218: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2219: }
2220: }
2221: }
2222: }
2223: }
2224: row = l + nc * (slot);
2225: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2226: }
2227: }
2228: }
2229: }
2230: PetscCall(PetscFree(values));
2231: /* do not copy values to GPU since they are all zero and not yet needed there */
2232: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2233: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2234: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2235: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2236: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2237: }
2238: PetscCall(PetscFree(cols));
2239: PetscFunctionReturn(PETSC_SUCCESS);
2240: }