Actual source code: ex3.c

petsc-master 2020-09-20
Report Typos and Errors
  1: static char help[] = "Solve a toy 3D problem on a staggered grid\n\n";
  2: /*

  4:   To demonstrate the basic functionality of DMStag, solves an isoviscous
  5:   incompressible Stokes problem on a rectangular 3D domain.

  7:   u_{xx} + u_{yy} + u_{zz} - p_x = f^x
  8:   v_{xx} + v_{yy} + u_{zz} - p_y = f^y
  9:   w_{xx} + w_{yy} + w_{zz} - p_y = f^z
 10:   u_x    + v_y    + w_z          = g

 12:   g = 0 for the physical case.

 14:   Boundary conditions give prescribed flow perpendicular to the boundaries,
 15:   and zero derivative perpendicular to them (free slip). This involves
 16:   using a modifed stencil at the boundaries. Another option would be to
 17:   use DM_BOUNDARY_GHOSTED in DMStagCreate3d() and a matrix-free operator (MATSHELL)
 18:   making use of the uniformly-available ghost layer.

 20:   Use the -pinpressure option to fix a pressure node, instead of providing
 21:   a constant-pressure nullspace. This allows use of direct solvers, e.g. to
 22:   use UMFPACK,

 24:      ./ex3 -pinpressure 1 -pc_type lu -pc_factor_mat_solver_type umfpack

 26: */
 27: #include <petscdm.h>
 28: #include <petscksp.h>
 29: #include <petscdmstag.h>

 31: /* Shorter, more convenient names for DMStagStencilLocation entries */
 32: #define BACK_DOWN_LEFT   DMSTAG_BACK_DOWN_LEFT
 33: #define BACK_DOWN        DMSTAG_BACK_DOWN
 34: #define BACK_DOWN_RIGHT  DMSTAG_BACK_DOWN_RIGHT
 35: #define BACK_LEFT        DMSTAG_BACK_LEFT
 36: #define BACK             DMSTAG_BACK
 37: #define BACK_RIGHT       DMSTAG_BACK_RIGHT
 38: #define BACK_UP_LEFT     DMSTAG_BACK_UP_LEFT
 39: #define BACK_UP          DMSTAG_BACK_UP
 40: #define BACK_UP_RIGHT    DMSTAG_BACK_UP_RIGHT
 41: #define DOWN_LEFT        DMSTAG_DOWN_LEFT
 42: #define DOWN             DMSTAG_DOWN
 43: #define DOWN_RIGHT       DMSTAG_DOWN_RIGHT
 44: #define LEFT             DMSTAG_LEFT
 45: #define ELEMENT          DMSTAG_ELEMENT
 46: #define RIGHT            DMSTAG_RIGHT
 47: #define UP_LEFT          DMSTAG_UP_LEFT
 48: #define UP               DMSTAG_UP
 49: #define UP_RIGHT         DMSTAG_UP_RIGHT
 50: #define FRONT_DOWN_LEFT  DMSTAG_FRONT_DOWN_LEFT
 51: #define FRONT_DOWN       DMSTAG_FRONT_DOWN
 52: #define FRONT_DOWN_RIGHT DMSTAG_FRONT_DOWN_RIGHT
 53: #define FRONT_LEFT       DMSTAG_FRONT_LEFT
 54: #define FRONT            DMSTAG_FRONT
 55: #define FRONT_RIGHT      DMSTAG_FRONT_RIGHT
 56: #define FRONT_UP_LEFT    DMSTAG_FRONT_UP_LEFT
 57: #define FRONT_UP         DMSTAG_FRONT_UP
 58: #define FRONT_UP_RIGHT   DMSTAG_FRONT_UP_RIGHT

 60: static PetscErrorCode CreateReferenceSolution(DM,Vec*);
 61: static PetscErrorCode CreateSystem(DM,Mat*,Vec*,PetscBool);
 62: static PetscErrorCode AttachNullspace(DM,Mat);
 63: static PetscErrorCode CheckSolution(Vec,Vec);

 65: /* Manufactured solution. Chosen to be higher order than can be solved exactly,
 66: and to have a zero derivative for flow parallel to the boundaries. That is,
 67: d(ux)/dy = 0 at the top and bottom boundaries, and d(uy)/dx = 0 at the right
 68: and left boundaries.
 69: These expressions could be made more interesting with more z terms,
 70: and convergence could be confirmed.  */

 72: static PetscScalar uxRef(PetscScalar x,PetscScalar y, PetscScalar z) {return 0.0*x + y*y - 2.0*y*y*y + y*y*y*y + 0.0*z;}
 73: static PetscScalar uyRef(PetscScalar x,PetscScalar y, PetscScalar z) {return x*x - 2.0*x*x*x + x*x*x*x +0.0*y + 0.0*z;}
 74: static PetscScalar uzRef(PetscScalar x,PetscScalar y, PetscScalar z) {return 0.0*x + 0.0*y + 0.0*z + 1.0;}
 75: static PetscScalar pRef (PetscScalar x,PetscScalar y, PetscScalar z) {return -1.0*(x-0.5) + -3.0/2.0*y*y + 0.5 -2.0*(z-1.0);} /* zero integral */
 76: static PetscScalar fx   (PetscScalar x,PetscScalar y, PetscScalar z) {return 0.0*x + 2.0 -12.0*y + 12.0*y*y + 0.0*z + 1.0;}
 77: static PetscScalar fy   (PetscScalar x,PetscScalar y, PetscScalar z) {return 2.0 -12.0*x + 12.0*x*x + 3.0*y + 0.0*z;}
 78: static PetscScalar fz   (PetscScalar x,PetscScalar y, PetscScalar z) {return 0.0*x + 0.0*y + 0.0*z + 2.0;}
 79: static PetscScalar g    (PetscScalar x,PetscScalar y, PetscScalar z) {return 0.0*x + 0.0*y + 0.0*z + 0.0;}

 81: int main(int argc,char **argv)
 82: {
 84:   DM             dmSol;
 85:   Vec            sol,solRef,rhs;
 86:   Mat            A;
 87:   KSP            ksp;
 88:   PC             pc;
 89:   PetscBool      pinPressure;

 91:   /* Initialize PETSc and process command line arguments */
 92:   PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
 93:   pinPressure = PETSC_TRUE;
 94:   PetscOptionsGetBool(NULL,NULL,"-pinpressure",&pinPressure,NULL);

 96:   /* Create 3D DMStag for the solution, and set up. */
 97:   {
 98:     const PetscInt dof0 = 0, dof1 = 0,dof2 = 1, dof3 = 1; /* 1 dof on each face and element center */
 99:     const PetscInt stencilWidth = 1;
100:     DMStagCreate3d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,4,5,6,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,dof0,dof1,dof2,dof3,DMSTAG_STENCIL_BOX,stencilWidth,NULL,NULL,NULL,&dmSol);
101:     DMSetFromOptions(dmSol);
102:     DMSetUp(dmSol);
103:     DMStagSetUniformCoordinatesExplicit(dmSol,0.0,1.0,0.0,1.0,0.0,1.0);
104:     /* Note: also see ex2.c, where another, more efficient option is demonstrated,
105:        using DMStagSetUniformCoordinatesProduct() */
106:   }

108:   /* Compute (manufactured) reference solution */
109:   CreateReferenceSolution(dmSol,&solRef);

111:   /* Assemble system */
112:   CreateSystem(dmSol,&A,&rhs,pinPressure);

114:   /* Attach a constant-pressure nullspace to the operator */
115:   if (!pinPressure) {
116:     AttachNullspace(dmSol,A);
117:   }

119:   /* Solve */
120:   DMCreateGlobalVector(dmSol,&sol);
121:   KSPCreate(PETSC_COMM_WORLD,&ksp);
122:   KSPSetType(ksp,KSPFGMRES);
123:   KSPSetOperators(ksp,A,A);
124:   KSPGetPC(ksp,&pc);
125:   PCSetType(pc,PCFIELDSPLIT);
126:   PCFieldSplitSetDetectSaddlePoint(pc,PETSC_TRUE);
127:   KSPSetFromOptions(ksp);
128:   KSPSolve(ksp,rhs,sol);

130:   /* Check Solution */
131:   CheckSolution(sol,solRef);

133:   /* Clean up and finalize PETSc */
134:   KSPDestroy(&ksp);
135:   VecDestroy(&sol);
136:   VecDestroy(&solRef);
137:   VecDestroy(&rhs);
138:   MatDestroy(&A);
139:   DMDestroy(&dmSol);
140:   PetscFinalize();
141:   return ierr;
142: }

144: static PetscErrorCode CreateSystem(DM dmSol,Mat *pA,Vec *pRhs, PetscBool pinPressure)
145: {
146:   PetscErrorCode    ierr;
147:   Vec               rhs,coordLocal;
148:   Mat               A;
149:   PetscInt          startx,starty,startz,N[3],nx,ny,nz,ex,ey,ez,d;
150:   PetscInt          icp[3],icux[3],icuy[3],icuz[3],icux_right[3],icuy_up[3],icuz_front[3];
151:   PetscReal         hx,hy,hz;
152:   DM                dmCoord;
153:   PetscScalar       ****arrCoord;

156:   DMCreateMatrix(dmSol,pA);
157:   A = *pA;
158:   DMCreateGlobalVector(dmSol,pRhs);
159:   rhs = *pRhs;

161:   DMStagGetCorners(dmSol,&startx,&starty,&startz,&nx,&ny,&nz,NULL,NULL,NULL);
162:   DMStagGetGlobalSizes(dmSol,&N[0],&N[1],&N[2]);
163:   if (N[0] < 2 || N[1] < 2 || N[2] < 2) SETERRQ(PetscObjectComm((PetscObject)dmSol),PETSC_ERR_ARG_SIZ,"This example requires at least two elements in each dimensions");
164:   hx = 1.0/N[0]; hy = 1.0/N[1]; hz = 1.0/N[2];
165:   DMGetCoordinateDM(dmSol,&dmCoord);
166:   DMGetCoordinatesLocal(dmSol,&coordLocal);
167:   DMStagVecGetArrayRead(dmCoord,coordLocal,&arrCoord);
168:   for (d=0; d<3; ++d) {
169:     DMStagGetLocationSlot(dmCoord,ELEMENT,d,&icp[d]);
170:     DMStagGetLocationSlot(dmCoord,LEFT,   d,&icux[d]);
171:     DMStagGetLocationSlot(dmCoord,DOWN,   d,&icuy[d]);
172:     DMStagGetLocationSlot(dmCoord,BACK,   d,&icuz[d]);
173:     DMStagGetLocationSlot(dmCoord,RIGHT,  d,&icux_right[d]);
174:     DMStagGetLocationSlot(dmCoord,UP,     d,&icuy_up[d]);
175:     DMStagGetLocationSlot(dmCoord,FRONT,  d,&icuz_front[d]);
176:   }

178:   /* Loop over all local elements. Note that it may be more efficient in real
179:      applications to loop over each boundary separately */
180:   for (ez = startz; ez<startz+nz; ++ez) { /* With DMStag, always iterate x fastest, y second fastest, z slowest */
181:     for (ey = starty; ey<starty+ny; ++ey) {
182:       for (ex = startx; ex<startx+nx; ++ex) {

184:         if (ex == N[0]-1) {
185:           /* Right Boundary velocity Dirichlet */
186:           DMStagStencil row;
187:           PetscScalar   valRhs;
188:           const PetscScalar valA = 1.0;
189:           row.i = ex; row.j = ey; row.k = ez; row.loc = RIGHT; row.c = 0;
190:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
191:           valRhs = uxRef(arrCoord[ez][ey][ex][icux_right[0]], arrCoord[ez][ey][ex][icux_right[1]], arrCoord[ez][ey][ex][icux_right[2]]);
192:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
193:         }
194:         if (ey == N[1]-1) {
195:           /* Top boundary velocity Dirichlet */
196:           DMStagStencil row;
197:           PetscScalar   valRhs;
198:           const PetscScalar valA = 1.0;
199:           row.i = ex; row.j = ey; row.k = ez; row.loc = UP; row.c = 0;
200:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
201:           valRhs = uyRef(arrCoord[ez][ey][ex][icuy_up[0]],arrCoord[ez][ey][ex][icuy_up[1]],arrCoord[ez][ey][ex][icuy_up[2]]);
202:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
203:         }
204:         if (ez == N[2]-1) {
205:           /* Front boundary velocity Dirichlet */
206:           DMStagStencil row;
207:           PetscScalar   valRhs;
208:           const PetscScalar valA = 1.0;
209:           row.i = ex; row.j = ey; row.k = ez; row.loc = FRONT; row.c = 0;
210:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
211:           valRhs = uzRef(arrCoord[ez][ey][ex][icuz_front[0]],arrCoord[ez][ey][ex][icuz_front[1]],arrCoord[ez][ey][ex][icuz_front[2]]);
212:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
213:         }

215:         /* Equation on left face of this element */
216:         if (ex == 0) {
217:           /* Left velocity Dirichlet */
218:           DMStagStencil row;
219:           PetscScalar   valRhs;
220:           const PetscScalar valA = 1.0;
221:           row.i = ex; row.j = ey; row.k = ez; row.loc = LEFT; row.c = 0;
222:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
223:           valRhs = uxRef(arrCoord[ez][ey][ex][icux[0]],arrCoord[ez][ey][ex][icux[1]],arrCoord[ez][ey][ex][icux[2]]);
224:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
225:         } else {
226:           /* X-momentum interior equation : (u_xx + u_yy + u_zz) - p_x = f^x */
227:           DMStagStencil row,col[9];
228:           PetscScalar   valA[9],valRhs;
229:           PetscInt      nEntries;

231:           row.i = ex; row.j = ey; row.k = ez; row.loc = LEFT; row.c = 0;
232:           if (ey == 0) {
233:             if (ez == 0) {
234:               nEntries = 7;
235:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -1.0 / (hy*hy) -1.0 / (hz*hz);
236:               /* Missing down term */
237:               col[1].i = ex  ; col[1].j = ey+1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
238:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
239:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
240:               /* Missing back term */
241:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
242:               col[5].i = ex-1; col[5].j = ey  ;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hx;
243:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hx;
244:             } else if (ez == N[2]-1) {
245:               nEntries = 7;
246:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -1.0 / (hy*hy) -1.0 / (hz*hz);
247:               /* Missing down term */
248:               col[1].i = ex  ; col[1].j = ey+1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
249:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
250:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
251:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
252:               /* Missing front term */
253:               col[5].i = ex-1; col[5].j = ey  ;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hx;
254:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hx;
255:             } else {
256:               nEntries = 8;
257:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
258:               /* Missing down term */
259:               col[1].i = ex  ; col[1].j = ey+1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
260:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
261:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
262:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
263:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = LEFT;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
264:               col[6].i = ex-1; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hx;
265:               col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hx;
266:             }
267:           } else if (ey == N[1]-1) {
268:             if (ez == 0) {
269:               nEntries = 7;
270:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
271:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
272:               /* Missing up term */
273:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
274:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
275:               /* Missing back entry */
276:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
277:               col[5].i = ex-1; col[5].j = ey  ;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hx;
278:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hx;
279:             } else if (ez == N[2]-1) {
280:               nEntries = 7;
281:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
282:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
283:               /* Missing up term */
284:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
285:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
286:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
287:               /* Missing front term */
288:               col[5].i = ex-1; col[5].j = ey  ;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hx;
289:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hx;
290:             } else {
291:               nEntries = 8;
292:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
293:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
294:               /* Missing up term */
295:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
296:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
297:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
298:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = LEFT;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
299:               col[6].i = ex-1; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hx;
300:               col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hx;
301:             }
302:           } else if (ez == 0) {
303:             nEntries = 8;
304:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
305:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
306:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
307:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
308:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
309:             /* Missing back term */
310:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = LEFT;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
311:             col[6].i = ex-1; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hx;
312:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hx;
313:           } else if (ez == N[2]-1) {
314:             nEntries = 8;
315:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
316:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
317:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
318:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
319:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
320:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = LEFT;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
321:             /* Missing front term */
322:             col[6].i = ex-1; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hx;
323:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hx;
324:           } else {
325:             nEntries = 9;
326:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = LEFT;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
327:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = LEFT;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
328:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = LEFT;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
329:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = LEFT;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
330:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = LEFT;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
331:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = LEFT;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
332:             col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez+1; col[6].loc = LEFT;    col[6].c  = 0; valA[6] =  1.0 / (hz*hz);
333:             col[7].i = ex-1; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] =  1.0 / hx;
334:             col[8].i = ex  ; col[8].j = ey  ;  col[8].k = ez  ; col[8].loc = ELEMENT; col[8].c  = 0; valA[8] = -1.0 / hx;
335:           }
336:           DMStagMatSetValuesStencil(dmSol,A,1,&row,nEntries,col,valA,INSERT_VALUES);
337:           valRhs = fx(arrCoord[ez][ey][ex][icux[0]], arrCoord[ez][ey][ex][icux[1]], arrCoord[ez][ey][ex][icux[2]]);
338:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
339:         }

341:         /* Equation on bottom face of this element */
342:         if (ey == 0) {
343:           /* Bottom boundary velocity Dirichlet */
344:           DMStagStencil row;
345:           PetscScalar   valRhs;
346:           const PetscScalar valA = 1.0;
347:           row.i = ex; row.j = ey; row.k = ez; row.loc = DOWN; row.c = 0;
348:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
349:           valRhs = uyRef(arrCoord[ez][ey][ex][icuy[0]],arrCoord[ez][ey][ex][icuy[1]],arrCoord[ez][ey][ex][icuy[2]]);
350:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
351:         } else {
352:           /* Y-momentum equation, (v_xx + v_yy + v_zz) - p_y = f^y */
353:           DMStagStencil row,col[9];
354:           PetscScalar   valA[9],valRhs;
355:           PetscInt      nEntries;

357:           row.i = ex; row.j = ey; row.k = ez; row.loc = DOWN; row.c = 0;
358:           if (ex ==0) {
359:             if (ez == 0) {
360:               nEntries = 7;
361:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
362:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
363:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
364:               /* Left term missing */
365:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
366:               /* Back term missing */
367:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
368:               col[5].i = ex  ; col[5].j = ey-1;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hy;
369:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hy;
370:             } else if (ez == N[2]-1) {
371:               nEntries = 7;
372:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
373:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
374:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
375:               /* Left term missing */
376:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
377:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
378:               /* Front term missing */
379:               col[5].i = ex  ; col[5].j = ey-1;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hy;
380:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hy;
381:             } else {
382:               nEntries = 8;
383:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
384:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
385:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
386:               /* Left term missing */
387:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
388:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
389:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = DOWN;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
390:               col[6].i = ex  ; col[6].j = ey-1;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hy;
391:               col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hy;
392:             }
393:           } else if (ex == N[0]-1) {
394:             if (ez == 0) {
395:               nEntries = 7;
396:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
397:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
398:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
399:               col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
400:               /* Right term missing */
401:               /* Back term missing */
402:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
403:               col[5].i = ex  ; col[5].j = ey-1;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hy;
404:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hy;
405:             } else if (ez == N[2]-1) {
406:               nEntries = 7;
407:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
408:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
409:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
410:               col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
411:               /* Right term missing */
412:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
413:               /* Front term missing */
414:               col[5].i = ex  ; col[5].j = ey-1;  col[5].k = ez  ; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hy;
415:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hy;
416:             } else {
417:               nEntries = 8;
418:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
419:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
420:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
421:               col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
422:               /* Right term missing */
423:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
424:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = DOWN;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
425:               col[6].i = ex  ; col[6].j = ey-1;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hy;
426:               col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hy;
427:             }
428:           } else if (ez == 0) {
429:             nEntries = 8;
430:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
431:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
432:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
433:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
434:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
435:             /* Back term missing */
436:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = DOWN;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
437:             col[6].i = ex  ; col[6].j = ey-1;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hy;
438:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hy;
439:           } else if (ez == N[2]-1) {
440:             nEntries = 8;
441:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -1.0 / (hz*hz);
442:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
443:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
444:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
445:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
446:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = DOWN;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
447:             /* Front term missing */
448:             col[6].i = ex  ; col[6].j = ey-1;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hy;
449:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hy;
450:           } else {
451:             nEntries = 9;
452:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = DOWN;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
453:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = DOWN;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
454:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = DOWN;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
455:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = DOWN;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
456:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = DOWN;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
457:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = DOWN;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
458:             col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez+1; col[6].loc = DOWN;    col[6].c  = 0; valA[6] =  1.0 / (hz*hz);
459:             col[7].i = ex  ; col[7].j = ey-1;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] =  1.0 / hy;
460:             col[8].i = ex  ; col[8].j = ey  ;  col[8].k = ez  ; col[8].loc = ELEMENT; col[8].c  = 0; valA[8] = -1.0 / hy;
461:           }
462:           DMStagMatSetValuesStencil(dmSol,A,1,&row,nEntries,col,valA,INSERT_VALUES);
463:           valRhs = fy(arrCoord[ez][ey][ex][icuy[0]],arrCoord[ez][ey][ex][icuy[1]],arrCoord[ez][ey][ex][icuy[2]]);
464:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
465:         }

467:         /* Equation on back face of this element */
468:         if (ez == 0) {
469:           /* Back boundary velocity Dirichlet */
470:           DMStagStencil row;
471:           PetscScalar   valRhs;
472:           const PetscScalar valA = 1.0;
473:           row.i = ex; row.j = ey; row.k = ez; row.loc = BACK; row.c = 0;
474:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
475:           valRhs = uzRef(arrCoord[ez][ey][ex][icuz[0]],arrCoord[ez][ey][ex][icuz[1]],arrCoord[ez][ey][ex][icuz[2]]);
476:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
477:         } else {
478:           /* Z-momentum equation, (w_xx + w_yy + w_zz) - p_z = f^z */
479:           DMStagStencil row,col[9];
480:           PetscScalar   valA[9],valRhs;
481:           PetscInt      nEntries;

483:           row.i = ex; row.j = ey; row.k = ez; row.loc = BACK; row.c = 0;
484:           if (ex == 0) {
485:             if (ey == 0) {
486:               nEntries = 7;
487:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
488:               /* Down term missing */
489:               col[1].i = ex  ; col[1].j = ey+1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
490:               /* Left term missing */
491:               col[2].i = ex+1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
492:               col[3].i = ex  ; col[3].j = ey  ;  col[3].k = ez-1; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hz*hz);
493:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
494:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hz;
495:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hz;
496:             } else if (ey == N[1]-1) {
497:               nEntries = 7;
498:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
499:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
500:               /* Up term missing */
501:               /* Left term missing */
502:               col[2].i = ex+1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
503:               col[3].i = ex  ; col[3].j = ey  ;  col[3].k = ez-1; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hz*hz);
504:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
505:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hz;
506:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hz;
507:             } else {
508:               nEntries = 8;
509:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
510:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
511:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
512:               /* Left term missing */
513:               col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
514:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
515:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = BACK;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
516:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez-1; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hz;
517:               col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hz;
518:             }
519:           } else if (ex == N[0]-1) {
520:             if (ey == 0) {
521:               nEntries = 7;
522:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
523:               /* Down term missing */
524:               col[1].i = ex  ; col[1].j = ey+1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
525:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
526:               /* Right term missing */
527:               col[3].i = ex  ; col[3].j = ey  ;  col[3].k = ez-1; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hz*hz);
528:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
529:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hz;
530:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hz;
531:             } else if (ey == N[1]-1) {
532:               nEntries = 7;
533:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
534:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
535:               /* Up term missing */
536:               col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
537:               /* Right term missing */
538:               col[3].i = ex  ; col[3].j = ey  ;  col[3].k = ez-1; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hz*hz);
539:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez+1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
540:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = ELEMENT; col[5].c  = 0; valA[5] =  1.0 / hz;
541:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez  ; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] = -1.0 / hz;
542:             } else {
543:               nEntries = 8;
544:               col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -1.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
545:               col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
546:               col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
547:               col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
548:               /* Right term missing */
549:               col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
550:               col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = BACK;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
551:               col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez-1; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hz;
552:               col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hz;
553:             }
554:           } else if (ey == 0) {
555:             nEntries = 8;
556:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
557:             /* Down term missing */
558:             col[1].i = ex  ; col[1].j = ey+1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
559:             col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
560:             col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
561:             col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
562:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = BACK;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
563:             col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez-1; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hz;
564:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hz;
565:           } else if (ey == N[1]-1) {
566:             nEntries = 8;
567:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -1.0 / (hy*hy) -2.0 / (hz*hz);
568:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
569:             /* Up term missing */
570:             col[2].i = ex-1; col[2].j = ey  ;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hx*hx);
571:             col[3].i = ex+1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
572:             col[4].i = ex  ; col[4].j = ey  ;  col[4].k = ez-1; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hz*hz);
573:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez+1; col[5].loc = BACK;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
574:             col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez-1; col[6].loc = ELEMENT; col[6].c  = 0; valA[6] =  1.0 / hz;
575:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez  ; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] = -1.0 / hz;
576:           } else {
577:             nEntries = 9;
578:             col[0].i = ex  ; col[0].j = ey  ;  col[0].k = ez  ; col[0].loc = BACK;    col[0].c  = 0; valA[0] = -2.0 / (hx*hx) + -2.0 / (hy*hy) -2.0 / (hz*hz);
579:             col[1].i = ex  ; col[1].j = ey-1;  col[1].k = ez  ; col[1].loc = BACK;    col[1].c  = 0; valA[1] =  1.0 / (hy*hy);
580:             col[2].i = ex  ; col[2].j = ey+1;  col[2].k = ez  ; col[2].loc = BACK;    col[2].c  = 0; valA[2] =  1.0 / (hy*hy);
581:             col[3].i = ex-1; col[3].j = ey  ;  col[3].k = ez  ; col[3].loc = BACK;    col[3].c  = 0; valA[3] =  1.0 / (hx*hx);
582:             col[4].i = ex+1; col[4].j = ey  ;  col[4].k = ez  ; col[4].loc = BACK;    col[4].c  = 0; valA[4] =  1.0 / (hx*hx);
583:             col[5].i = ex  ; col[5].j = ey  ;  col[5].k = ez-1; col[5].loc = BACK;    col[5].c  = 0; valA[5] =  1.0 / (hz*hz);
584:             col[6].i = ex  ; col[6].j = ey  ;  col[6].k = ez+1; col[6].loc = BACK;    col[6].c  = 0; valA[6] =  1.0 / (hz*hz);
585:             col[7].i = ex  ; col[7].j = ey  ;  col[7].k = ez-1; col[7].loc = ELEMENT; col[7].c  = 0; valA[7] =  1.0 / hz;
586:             col[8].i = ex  ; col[8].j = ey  ;  col[8].k = ez  ; col[8].loc = ELEMENT; col[8].c  = 0; valA[8] = -1.0 / hz;
587:           }
588:           DMStagMatSetValuesStencil(dmSol,A,1,&row,nEntries,col,valA,INSERT_VALUES);
589:           valRhs = fz(arrCoord[ez][ey][ex][icuz[0]],arrCoord[ez][ey][ex][icuz[1]],arrCoord[ez][ey][ex][icuz[2]]);
590:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
591:         }

593:         /* P equation : u_x + v_y + w_z = g
594:            Note that this includes an explicit zero on the diagonal. This is only needed for
595:            direct solvers (not required if using an iterative solver and setting the constant-pressure nullspace) */
596:         if (pinPressure && ex == 0 && ey == 0 && ez == 0) { /* Pin the first pressure node, if requested */
597:           DMStagStencil row;
598:           PetscScalar valA,valRhs;
599:           row.i = ex; row.j = ey; row.k = ez; row.loc  = ELEMENT; row.c = 0;
600:           valA = 1.0;
601:           DMStagMatSetValuesStencil(dmSol,A,1,&row,1,&row,&valA,INSERT_VALUES);
602:           valRhs = pRef(arrCoord[ez][ey][ex][icp[0]],arrCoord[ez][ey][ex][icp[1]],arrCoord[ez][ey][ex][icp[2]]);
603:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
604:         } else {
605:           DMStagStencil row,col[7];
606:           PetscScalar   valA[7],valRhs;

608:           row.i    = ex; row.j    = ey; row.k    = ez; row.loc    = ELEMENT; row.c    = 0;
609:           col[0].i = ex; col[0].j = ey; col[0].k = ez; col[0].loc = LEFT;    col[0].c = 0; valA[0] = -1.0 / hx;
610:           col[1].i = ex; col[1].j = ey; col[1].k = ez; col[1].loc = RIGHT;   col[1].c = 0; valA[1] =  1.0 / hx;
611:           col[2].i = ex; col[2].j = ey; col[2].k = ez; col[2].loc = DOWN;    col[2].c = 0; valA[2] = -1.0 / hy;
612:           col[3].i = ex; col[3].j = ey; col[3].k = ez; col[3].loc = UP;      col[3].c = 0; valA[3] =  1.0 / hy;
613:           col[4].i = ex; col[4].j = ey; col[4].k = ez; col[4].loc = BACK;    col[4].c = 0; valA[4] = -1.0 / hz;
614:           col[5].i = ex; col[5].j = ey; col[5].k = ez; col[5].loc = FRONT;   col[5].c = 0; valA[5] =  1.0 / hz;
615:           col[6]   = row;                                                                  valA[6] =  0.0;
616:           DMStagMatSetValuesStencil(dmSol,A,1,&row,7,col,valA,INSERT_VALUES);
617:           valRhs = g(arrCoord[ez][ey][ex][icp[0]],arrCoord[ez][ey][ex][icp[1]],arrCoord[ez][ey][ex][icp[2]]);
618:           DMStagVecSetValuesStencil(dmSol,rhs,1,&row,&valRhs,INSERT_VALUES);
619:         }
620:       }
621:     }
622:   }
623:   DMStagVecRestoreArrayRead(dmCoord,coordLocal,&arrCoord);
624:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
625:   VecAssemblyBegin(rhs);
626:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
627:   VecAssemblyEnd(rhs);

629:   return(0);
630: }

632: /* Create a pressure-only DMStag and use it to generate a nullspace vector
633:    - Create a compatible DMStag with one dof per element (and nothing else).
634:    - Create a constant vector and normalize it
635:    - Migrate it to a vector on the original dmSol (making use of the fact
636:    that this will fill in zeros for "extra" dof)
637:    - Set the nullspace for the operator
638:    - Destroy everything (the operator keeps the references it needs) */
639: static PetscErrorCode AttachNullspace(DM dmSol,Mat A)
640: {
642:   DM             dmPressure;
643:   Vec            constantPressure,basis;
644:   PetscReal      nrm;
645:   MatNullSpace   matNullSpace;

648:   DMStagCreateCompatibleDMStag(dmSol,0,0,0,1,&dmPressure);
649:   DMGetGlobalVector(dmPressure,&constantPressure);
650:   VecSet(constantPressure,1.0);
651:   VecNorm(constantPressure,NORM_2,&nrm);
652:   VecScale(constantPressure,1.0/nrm);
653:   DMCreateGlobalVector(dmSol,&basis);
654:   DMStagMigrateVec(dmPressure,constantPressure,dmSol,basis);
655:   MatNullSpaceCreate(PetscObjectComm((PetscObject)dmSol),PETSC_FALSE,1,&basis,&matNullSpace);
656:   VecDestroy(&basis);
657:   VecDestroy(&constantPressure);
658:   MatSetNullSpace(A,matNullSpace);
659:   MatNullSpaceDestroy(&matNullSpace);
660:   return(0);
661: }

663: static PetscErrorCode CreateReferenceSolution(DM dmSol,Vec *pSolRef)
664: {
665:   PetscErrorCode    ierr;
666:   PetscInt          start[3],n[3],nExtra[3],ex,ey,ez,d;
667:   PetscInt          ip,iux,iuy,iuz,icp[3],icux[3],icuy[3],icuz[3];
668:   Vec               solRef,solRefLocal,coord,coordLocal;
669:   DM                dmCoord;
670:   PetscScalar       ****arrSol,****arrCoord;

673:   DMCreateGlobalVector(dmSol,pSolRef);
674:   solRef = *pSolRef;
675:   DMStagGetCorners(dmSol,&start[0],&start[1],&start[2],&n[0],&n[1],&n[2],&nExtra[0],&nExtra[1],&nExtra[2]);
676:   DMGetCoordinateDM(dmSol,&dmCoord);
677:   DMGetCoordinates(dmSol,&coord);
678:   DMGetLocalVector(dmCoord,&coordLocal);
679:   DMGlobalToLocal(dmCoord,coord,INSERT_VALUES,coordLocal);
680:   DMStagGetLocationSlot(dmSol,ELEMENT,0,&ip);
681:   DMStagGetLocationSlot(dmSol,LEFT,   0,&iux);
682:   DMStagGetLocationSlot(dmSol,DOWN,   0,&iuy);
683:   DMStagGetLocationSlot(dmSol,BACK,   0,&iuz);
684:   for (d=0; d<3; ++d) {
685:     DMStagGetLocationSlot(dmCoord,ELEMENT,d,&icp[d]);
686:     DMStagGetLocationSlot(dmCoord,LEFT,   d,&icux[d]);
687:     DMStagGetLocationSlot(dmCoord,DOWN,   d,&icuy[d]);
688:     DMStagGetLocationSlot(dmCoord,BACK,   d,&icuz[d]);
689:   }
690:   DMStagVecGetArrayRead(dmCoord,coordLocal,&arrCoord);
691:   DMGetLocalVector(dmSol,&solRefLocal);
692:   DMStagVecGetArray(dmSol,solRefLocal,&arrSol);
693:   for (ez=start[2]; ez<start[2] + n[2] + nExtra[2]; ++ez) {
694:     for (ey=start[1]; ey<start[1] + n[1] + nExtra[1]; ++ey) {
695:       for (ex=start[0]; ex<start[0] + n[0] + nExtra[0]; ++ex) {
696:         if (ex < start[0] + n[0] && ey < start[1] + n[1]) {
697:           arrSol[ez][ey][ex][iuz] = uzRef(
698:               arrCoord[ez][ey][ex][icuz[0]],
699:               arrCoord[ez][ey][ex][icuz[1]],
700:               arrCoord[ez][ey][ex][icuz[2]]);
701:         }
702:         if (ex < start[0] + n[0] && ey < start[2] + n[2]) {
703:           arrSol[ez][ey][ex][iuy] = uyRef(
704:               arrCoord[ez][ey][ex][icuy[0]],
705:               arrCoord[ez][ey][ex][icuy[1]],
706:               arrCoord[ez][ey][ex][icuy[2]]);
707:         }
708:         if (ex < start[1] + n[1] && ey < start[2] + n[2]) {
709:           arrSol[ez][ey][ex][iux] = uxRef(
710:               arrCoord[ez][ey][ex][icux[0]],
711:               arrCoord[ez][ey][ex][icux[1]],
712:               arrCoord[ez][ey][ex][icux[2]]);
713:         }
714:         if (ex < start[0] + n[0] && ey < start[1] + n[1] && ez < start[2] + n[2]) {
715:           arrSol[ez][ey][ex][ip]  = pRef(
716:               arrCoord[ez][ey][ex][icp[0]],
717:               arrCoord[ez][ey][ex][icp[1]],
718:               arrCoord[ez][ey][ex][icp[2]]);
719:         }
720:       }
721:     }
722:   }
723:   DMStagVecRestoreArrayRead(dmCoord,coordLocal,&arrCoord);
724:   DMStagVecRestoreArray(dmSol,solRefLocal,&arrSol);
725:   DMLocalToGlobal(dmSol,solRefLocal,INSERT_VALUES,solRef);
726:   DMRestoreLocalVector(dmCoord,&coordLocal);
727:   DMRestoreLocalVector(dmSol,&solRefLocal);
728:   return(0);
729: }

731: static PetscErrorCode CheckSolution(Vec sol,Vec solRef)
732: {
734:   Vec            diff;
735:   PetscReal      normsolRef,errAbs,errRel;

738:   VecDuplicate(sol,&diff);
739:   VecCopy(sol,diff);
740:   VecAXPY(diff,-1.0,solRef);
741:   VecNorm(diff,NORM_2,&errAbs);
742:   VecNorm(solRef,NORM_2,&normsolRef);
743:   errRel = errAbs/normsolRef;
744:   PetscPrintf(PETSC_COMM_WORLD,"Error (abs): %g\nError (rel): %g\n",(double)errAbs,(double)errRel);
745:   VecDestroy(&diff);
746:   return(0);
747: }

749: /*TEST

751:    test:
752:       suffix: 1
753:       requires: mumps
754:       nsize: 27
755:       args: -ksp_monitor_short -ksp_converged_reason -stag_ranks_x 3 -stag_ranks_y 3 -stag_ranks_z 3 -pc_fieldsplit_schur_fact_type diag -fieldsplit_0_ksp_type preonly -fieldsplit_1_pc_type none -fieldsplit_0_pc_type lu -fieldsplit_1_ksp_type gmres -fieldsplit_1_ksp_max_it 20

757:    test:
758:       suffix: 2
759:       requires: !single
760:       nsize: 4
761:       args: -ksp_monitor_short -ksp_converged_reason -pc_fieldsplit_schur_fact_type diag -fieldsplit_0_ksp_type preonly -fieldsplit_1_pc_type none -fieldsplit_0_pc_type gamg -fieldsplit_0_mg_levels_ksp_max_it 3 -fieldsplit_1_ksp_type gmres -fieldsplit_1_ksp_max_it 20 -fieldsplit_0_pc_gamg_esteig_ksp_type gmres

763:    test:
764:       suffix: direct_umfpack
765:       requires: suitesparse
766:       nsize: 1
767:       args: -pinpressure 1 -stag_grid_x 5 -stag_grid_y 3 -stag_grid_z 4 -ksp_monitor_short -pc_type lu -pc_factor_mat_solver_type umfpack

769: TEST*/