Actual source code: ex62.c

petsc-3.3-p7 2013-05-11
  1: static char help[] = "Stokes Problem in 2d and 3d with simplicial finite elements.\n\
2: We solve the Stokes problem in a rectangular\n\
3: domain, using a parallel unstructured mesh (DMCOMPLEX) to discretize it.\n\n\n";

5: /*
6: The isoviscous Stokes problem, which we discretize using the finite
7: element method on an unstructured mesh. The weak form equations are

9:   < \nabla v, \nabla u + {\nabla u}^T > - < \nabla\cdot v, p > + < v, f > = 0
10:   < q, \nabla\cdot v >                                                    = 0

12: We start with homogeneous Dirichlet conditions. We will expand this as the set
13: of test problems is developed.

15: Discretization:

17: We use a Python script to generate a tabulation of the finite element basis
18: functions at quadrature points, which we put in a C header file. The generic
19: command would be:

21:     bin/pythonscripts/PetscGenerateFEMQuadrature.py dim order dim 1 laplacian dim order 1 1 gradient src/snes/examples/tutorials/ex62.h

23: We can currently generate an arbitrary order Lagrange element. The underlying
24: FIAT code is capable of handling more exotic elements, but these have not been
25: tested with this code.

27: Field Data:

29:   Sieve data is organized by point, and the closure operation just stacks up the
30: data from each sieve point in the closure. Thus, for a P_2-P_1 Stokes element, we
31: have

33:   cl{e} = {f e_0 e_1 e_2 v_0 v_1 v_2}
34:   x     = [u_{e_0} v_{e_0} u_{e_1} v_{e_1} u_{e_2} v_{e_2} u_{v_0} v_{v_0} p_{v_0} u_{v_1} v_{v_1} p_{v_1} u_{v_2} v_{v_2} p_{v_2}]

36: The problem here is that we would like to loop over each field separately for
37: integration. Therefore, the closure visitor in DMComplexVecGetClosure() reorders
38: the data so that each field is contiguous

40:   x'    = [u_{e_0} v_{e_0} u_{e_1} v_{e_1} u_{e_2} v_{e_2} u_{v_0} v_{v_0} u_{v_1} v_{v_1} u_{v_2} v_{v_2} p_{v_0} p_{v_1} p_{v_2}]

42: Likewise, DMComplexVecSetClosure() takes data partitioned by field, and correctly
43: puts it into the Sieve ordering.

45: Next Steps:

47: - Refine and show convergence of correct order automatically (use femTest.py)
48: - Fix InitialGuess for arbitrary disc (means making dual application work again)
49: - Redo slides from GUCASTutorial for this new example
50: - Sparsify Jacobian
51:   - How do we get sparsity? I think by chopping up elemMat into blocks, and setting individual blocks
52:   - Maybe we just have MatSetClosure() handle this by ignoring blocks which do not interact

54: Possible new examples:

56: - Hexahedral Stokes example
57:   - Improve DMDA conversion to get edges
58: - A Finite Volume problem
59: - Make new SNES F90 example that solves two-domain Laplace with different coefficient, reads from Exodus file
60: */

62: #include <petscdmcomplex.h>
63: #include <petscsnes.h>

65: /*------------------------------------------------------------------------------
66:   This code can be generated using 'bin/pythonscripts/PetscGenerateFEMQuadrature.py dim order dim 1 laplacian dim order 1 1 gradient src/snes/examples/tutorials/ex62.h'
67:  -----------------------------------------------------------------------------*/
68: #include "ex62.h"

70: const PetscInt numFields     = 2;
71: const PetscInt numComponents = NUM_BASIS_COMPONENTS_0+NUM_BASIS_COMPONENTS_1;

73: typedef enum {NEUMANN, DIRICHLET} BCType;
74: typedef enum {RUN_FULL, RUN_TEST} RunType;

76: typedef struct {
77:   DM            dm;                /* REQUIRED in order to use SNES evaluation functions */
78:   PetscInt      debug;             /* The debugging level */
79:   PetscMPIInt   rank;              /* The process rank */
80:   PetscMPIInt   numProcs;          /* The number of processes */
81:   RunType       runType;           /* Whether to run tests, or solve the full problem */
82:   PetscBool     jacobianMF;        /* Whether to calculate the Jacobian action on the fly */
83:   PetscLogEvent createMeshEvent, residualEvent, jacobianEvent, integrateResCPUEvent, integrateJacCPUEvent, integrateJacActionCPUEvent;
84:   PetscBool     showInitial, showResidual, showJacobian, showSolution;
85:   /* Domain and mesh definition */
86:   PetscInt      dim;               /* The topological mesh dimension */
87:   PetscBool     interpolate;       /* Generate intermediate mesh elements */
88:   PetscReal     refinementLimit;   /* The largest allowable cell volume */
89:   char          partitioner[2048]; /* The graph partitioner */
91:   PetscQuadrature q[/*numFields*/2]; /* C89 Sucks Sucks Sucks Sucks */
92:   /* GPU partitioning */
93:   PetscInt      numBatches;        /* The number of cell batches per kernel */
94:   PetscInt      numBlocks;         /* The number of concurrent blocks per kernel */
95:   /* Problem definition */
96:   /* C89 Sucks Sucks Sucks Sucks: numFields = 2, numComponents = NUM_BASIS_COMPONENTS_0+NUM_BASIS_COMPONENTS_1 */
97:   void        (*f0Funcs[2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]); /* The f_0 functions f0_u(x,y,z), and f0_p(x,y,z) */
98:   void        (*f1Funcs[2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]); /* The f_1 functions f1_u(x,y,z), and f1_p(x,y,z) */
99:   void        (*g0Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]); /* The g_0 functions g0_uu(x,y,z), g0_up(x,y,z), g0_pu(x,y,z), and g0_pp(x,y,z) */
100:   void        (*g1Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]); /* The g_1 functions g1_uu(x,y,z), g1_up(x,y,z), g1_pu(x,y,z), and g1_pp(x,y,z) */
101:   void        (*g2Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g2[]); /* The g_2 functions g2_uu(x,y,z), g2_up(x,y,z), g2_pu(x,y,z), and g2_pp(x,y,z) */
102:   void        (*g3Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g3[]); /* The g_3 functions g3_uu(x,y,z), g3_up(x,y,z), g3_pu(x,y,z), and g3_pp(x,y,z) */
103:   PetscScalar (*exactFuncs[NUM_BASIS_COMPONENTS_0+NUM_BASIS_COMPONENTS_1])(const PetscReal x[]); /* The exact solution function u(x,y,z), v(x,y,z), and p(x,y,z) */
104:   BCType        bcType;            /* The type of boundary conditions */
105: } AppCtx;

107: typedef struct {
108:   DM      dm;
109:   Vec     u; /* The base vector for the Jacbobian action J(u) x */
110:   Mat     J; /* Preconditioner for testing */
111:   AppCtx *user;
112: } JacActionCtx;

114: PetscScalar zero(const PetscReal coords[]) {
115:   return 0.0;
116: }

118: /*
119:   In 2D we use exact solution:

121:     u = x^2 + y^2
122:     v = 2 x^2 - 2xy
123:     p = x + y - 1
124:     f_x = f_y = 3

126:   so that

128:     -\Delta u + \nabla p + f = <-4, -4> + <1, 1> + <3, 3> = 0
129:     \nabla \cdot u           = 2x - 2x                    = 0
130: */
131: PetscScalar quadratic_u_2d(const PetscReal x[]) {
132:   return x[0]*x[0] + x[1]*x[1];
133: };

135: PetscScalar quadratic_v_2d(const PetscReal x[]) {
136:   return 2.0*x[0]*x[0] - 2.0*x[0]*x[1];
137: };

139: PetscScalar linear_p_2d(const PetscReal x[]) {
140:   return x[0] + x[1] - 1.0;
141: };

143: void f0_u(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) {
144:   const PetscInt Ncomp = NUM_BASIS_COMPONENTS_0;
145:   PetscInt       comp;

147:   for(comp = 0; comp < Ncomp; ++comp) {
148:     f0[comp] = 3.0;
149:   }
150: }

152: /* gradU[comp*dim+d] = {u_x, u_y, v_x, v_y} or {u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z}
153:    u[Ncomp]          = {p} */
154: void f1_u(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) {
155:   const PetscInt dim   = SPATIAL_DIM_0;
156:   const PetscInt Ncomp = NUM_BASIS_COMPONENTS_0;
157:   PetscInt       comp, d;

159:   for(comp = 0; comp < Ncomp; ++comp) {
160:     for(d = 0; d < dim; ++d) {
163:     }
164:     f1[comp*dim+comp] -= u[Ncomp];
165:   }
166: }

168: /* gradU[comp*dim+d] = {u_x, u_y, v_x, v_y} or {u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z} */
169: void f0_p(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) {
170:   const PetscInt dim = SPATIAL_DIM_0;
171:   PetscInt       d;

173:   f0[0] = 0.0;
174:   for(d = 0; d < dim; ++d) {
176:   }
177: }

179: void f1_p(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) {
180:   const PetscInt dim = SPATIAL_DIM_0;
181:   PetscInt       d;

183:   for(d = 0; d < dim; ++d) {
184:     f1[d] = 0.0;
185:   }
186: }

188: /* < q, \nabla\cdot v >
189:    NcompI = 1, NcompJ = dim */
190: void g1_pu(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]) {
191:   const PetscInt dim = SPATIAL_DIM_0;
192:   PetscInt       d;

194:   for(d = 0; d < dim; ++d) {
195:     g1[d*dim+d] = 1.0; /* \frac{\partial\phi^{u_d}}{\partial x_d} */
196:   }
197: }

199: /* -< \nabla\cdot v, p >
200:     NcompI = dim, NcompJ = 1 */
201: void g2_up(PetscScalar u[], const PetscScalar gradU[], PetscScalar g2[]) {
202:   const PetscInt dim = SPATIAL_DIM_0;
203:   PetscInt       d;

205:   for(d = 0; d < dim; ++d) {
206:     g2[d*dim+d] = -1.0; /* \frac{\partial\psi^{u_d}}{\partial x_d} */
207:   }
208: }

210: /* < \nabla v, \nabla u + {\nabla u}^T >
211:    This just gives \nabla u, give the perdiagonal for the transpose */
212: void g3_uu(PetscScalar u[], const PetscScalar gradU[], PetscScalar g3[]) {
213:   const PetscInt dim   = SPATIAL_DIM_0;
214:   const PetscInt Ncomp = NUM_BASIS_COMPONENTS_0;
215:   PetscInt       compI, d;

217:   for(compI = 0; compI < Ncomp; ++compI) {
218:     for(d = 0; d < dim; ++d) {
219:       g3[((compI*Ncomp+compI)*dim+d)*dim+d] = 1.0;
220:     }
221:   }
222: }

224: /*
225:   In 3D we use exact solution:

227:     u = x^2 + y^2
228:     v = y^2 + z^2
229:     w = x^2 + y^2 - 2(x+y)z
230:     p = x + y + z - 3/2
231:     f_x = f_y = f_z = 3

233:   so that

235:     -\Delta u + \nabla p + f = <-4, -4, -4> + <1, 1, 1> + <3, 3, 3> = 0
236:     \nabla \cdot u           = 2x + 2y - 2(x + y)                   = 0
237: */
238: PetscScalar quadratic_u_3d(const PetscReal x[]) {
239:   return x[0]*x[0] + x[1]*x[1];
240: };

242: PetscScalar quadratic_v_3d(const PetscReal x[]) {
243:   return x[1]*x[1] + x[2]*x[2];
244: };

246: PetscScalar quadratic_w_3d(const PetscReal x[]) {
247:   return x[0]*x[0] + x[1]*x[1] - 2.0*(x[0] + x[1])*x[2];
248: };

250: PetscScalar linear_p_3d(const PetscReal x[]) {
251:   return x[0] + x[1] + x[2] - 1.5;
252: };

256: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) {
257:   const char    *bcTypes[2]  = {"neumann", "dirichlet"};
258:   const char    *runTypes[2] = {"full", "test"};
259:   PetscInt       bc, run;

263:   options->debug           = 0;
264:   options->runType         = RUN_FULL;
265:   options->dim             = 2;
266:   options->interpolate     = PETSC_FALSE;
267:   options->refinementLimit = 0.0;
268:   options->bcType          = DIRICHLET;
269:   options->numBatches      = 1;
270:   options->numBlocks       = 1;
271:   options->jacobianMF      = PETSC_FALSE;
272:   options->showResidual    = PETSC_FALSE;
273:   options->showResidual    = PETSC_FALSE;
274:   options->showJacobian    = PETSC_FALSE;
275:   options->showSolution    = PETSC_TRUE;

277:   MPI_Comm_size(comm, &options->numProcs);
278:   MPI_Comm_rank(comm, &options->rank);
279:   PetscOptionsBegin(comm, "", "Stokes Problem Options", "DMCOMPLEX");
280:   PetscOptionsInt("-debug", "The debugging level", "ex62.c", options->debug, &options->debug, PETSC_NULL);
281:   run = options->runType;
282:   PetscOptionsEList("-run_type", "The run type", "ex62.c", runTypes, 2, runTypes[options->runType], &run, PETSC_NULL);
283:   options->runType = (RunType) run;
284:   PetscOptionsInt("-dim", "The topological mesh dimension", "ex62.c", options->dim, &options->dim, PETSC_NULL);
285:   PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex62.c", options->interpolate, &options->interpolate, PETSC_NULL);
286:   PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex62.c", options->refinementLimit, &options->refinementLimit, PETSC_NULL);
287:   PetscStrcpy(options->partitioner, "chaco");
288:   PetscOptionsString("-partitioner", "The graph partitioner", "pflotran.cxx", options->partitioner, options->partitioner, 2048, PETSC_NULL);
289:   bc = options->bcType;
290:   PetscOptionsEList("-bc_type","Type of boundary condition","ex62.c",bcTypes,2,bcTypes[options->bcType],&bc,PETSC_NULL);
291:   options->bcType = (BCType) bc;
292:   PetscOptionsInt("-gpu_batches", "The number of cell batches per kernel", "ex62.c", options->numBatches, &options->numBatches, PETSC_NULL);
293:   PetscOptionsInt("-gpu_blocks", "The number of concurrent blocks per kernel", "ex62.c", options->numBlocks, &options->numBlocks, PETSC_NULL);
294:   PetscOptionsBool("-jacobian_mf", "Calculate the action of the Jacobian on the fly", "ex62.c", options->jacobianMF, &options->jacobianMF, PETSC_NULL);
295:   PetscOptionsBool("-show_initial", "Output the initial guess for verification", "ex62.c", options->showInitial, &options->showInitial, PETSC_NULL);
296:   PetscOptionsBool("-show_residual", "Output the residual for verification", "ex62.c", options->showResidual, &options->showResidual, PETSC_NULL);
297:   PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex62.c", options->showJacobian, &options->showJacobian, PETSC_NULL);
298:   PetscOptionsBool("-show_solution", "Output the solution for verification", "ex62.c", options->showSolution, &options->showSolution, PETSC_NULL);
299:   PetscOptionsEnd();

301:   PetscLogEventRegister("CreateMesh",          DM_CLASSID,   &options->createMeshEvent);
302:   PetscLogEventRegister("Residual",            SNES_CLASSID, &options->residualEvent);
303:   PetscLogEventRegister("IntegResBatchCPU",    SNES_CLASSID, &options->integrateResCPUEvent);
304:   PetscLogEventRegister("IntegJacBatchCPU",    SNES_CLASSID, &options->integrateJacCPUEvent);
305:   PetscLogEventRegister("IntegJacActBatchCPU", SNES_CLASSID, &options->integrateJacActionCPUEvent);
306:   PetscLogEventRegister("Jacobian",            SNES_CLASSID, &options->jacobianEvent);
307:   return(0);
308: };

312: PetscErrorCode VecChop(Vec v, PetscReal tol)
313: {
314:   PetscScalar   *a;
315:   PetscInt       n, i;

319:   VecGetLocalSize(v, &n);
320:   VecGetArray(v, &a);
321:   for(i = 0; i < n; ++i) {
322:     if (PetscAbsScalar(a[i]) < tol) a[i] = 0.0;
323:   }
324:   VecRestoreArray(v, &a);
325:   return(0);
326: }

330: PetscErrorCode MatChop(Mat A, PetscReal tol)
331: {
332:   PetscScalar   *newVals;
333:   PetscInt      *newCols;
334:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

338:   MatGetOwnershipRange(A, &rStart, &rEnd);
339:   for(r = rStart; r < rEnd; ++r) {
340:     PetscInt ncols;

342:     MatGetRow(A, r, &ncols, PETSC_NULL, PETSC_NULL);
343:     colMax = PetscMax(colMax, ncols);
344:     MatRestoreRow(A, r, &ncols, PETSC_NULL, PETSC_NULL);
345:   }
346:   numRows = rEnd - rStart;
347:   MPI_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PETSC_COMM_WORLD);
348:   PetscMalloc2(colMax,PetscInt,&newCols,colMax,PetscScalar,&newVals);
349:   for(r = rStart; r < rStart+maxRows; ++r) {
350:     const PetscScalar *vals;
351:     const PetscInt    *cols;
352:     PetscInt           ncols, c;

354:     if (r < rEnd) {
355:       MatGetRow(A, r, &ncols, &cols, &vals);
356:       for(c = 0; c < ncols; ++c) {
357:         newCols[c] = cols[c];
358:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
359:       }
360:       MatRestoreRow(A, r, &ncols, &cols, &vals);
361:       MatSetValues(A, 1, &r, ncols, newCols, newVals, INSERT_VALUES);
362:     }
363:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
364:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
365:   }
366:   PetscFree2(newCols,newVals);
367:   return(0);
368: }

372: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
373: {
374:   PetscInt       dim             = user->dim;
375:   PetscBool      interpolate     = user->interpolate;
376:   PetscReal      refinementLimit = user->refinementLimit;
377:   const char    *partitioner     = user->partitioner;

381:   PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
382:   DMComplexCreateBoxMesh(comm, dim, interpolate, dm);
383:   {
384:     DM refinedMesh     = PETSC_NULL;
385:     DM distributedMesh = PETSC_NULL;

387:     /* Refine mesh using a volume constraint */
388:     DMComplexSetRefinementLimit(*dm, refinementLimit);
389:     DMRefine(*dm, comm, &refinedMesh);
390:     if (refinedMesh) {
391:       DMDestroy(dm);
392:       *dm  = refinedMesh;
393:     }
394:     /* Distribute mesh over processes */
395:     DMComplexDistribute(*dm, partitioner, &distributedMesh);
396:     if (distributedMesh) {
397:       DMDestroy(dm);
398:       *dm  = distributedMesh;
399:     }
400:   }
401:   DMSetFromOptions(*dm);
402:   PetscLogEventEnd(user->createMeshEvent,0,0,0,0);
403:   user->dm = *dm;
404:   return(0);
405: }

414:   user->q[0].numBasisFuncs = NUM_BASIS_FUNCTIONS_0;
415:   user->q[0].numComponents = NUM_BASIS_COMPONENTS_0;
416:   user->q[0].basis         = Basis_0;
417:   user->q[0].basisDer      = BasisDerivatives_0;
421:   user->q[1].numBasisFuncs = NUM_BASIS_FUNCTIONS_1;
422:   user->q[1].numComponents = NUM_BASIS_COMPONENTS_1;
423:   user->q[1].basis         = Basis_1;
424:   user->q[1].basisDer      = BasisDerivatives_1;
425:   return(0);
426: }

430: /*
431:   There is a problem here with uninterpolated meshes. The index in numDof[] is not dimension in this case,
432:   but sieve depth.
433: */
434: PetscErrorCode SetupSection(DM dm, AppCtx *user) {
435:   PetscSection   section;
436:   PetscInt       dim                = user->dim;
437:   PetscInt       numBC              = 0;
438:   PetscInt       numComp[/*numFields*/2] = {NUM_BASIS_COMPONENTS_0, NUM_BASIS_COMPONENTS_1}; /* C89 Sucks Sucks Sucks Sucks */
439:   PetscInt       bcFields[1]        = {0};
440:   IS             bcPoints[1]        = {PETSC_NULL};
441:   PetscInt       numDof[/*numFields*/2*(SPATIAL_DIM_0+1)]; /* C89 Sucks Sucks Sucks Sucks */
442:   PetscInt       f, d;

446:   if (dim != SPATIAL_DIM_0) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_0);
447:   if (dim != SPATIAL_DIM_1) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_1);
448:   for(d = 0; d <= dim; ++d) {
449:     numDof[0*(dim+1)+d] = numDof_0[d];
450:     numDof[1*(dim+1)+d] = numDof_1[d];
451:   }
452:   for(f = 0; f < numFields; ++f) {
453:     for(d = 1; d < dim; ++d) {
454:       if ((numDof[f*(dim+1)+d] > 0) && !user->interpolate) SETERRQ(((PetscObject) dm)->comm, PETSC_ERR_ARG_WRONG, "Mesh must be interpolated when unknowns are specified on edges or faces.");
455:     }
456:   }
457:   if (user->bcType == DIRICHLET) {
458:     numBC = 1;
459:     DMComplexGetStratumIS(dm, "marker", 1, &bcPoints[0]);
460:   }
461:   DMComplexCreateSection(dm, dim, numFields, numComp, numDof, numBC, bcFields, bcPoints, &section);
462:   PetscSectionSetFieldName(section, 0, "velocity");
463:   PetscSectionSetFieldName(section, 1, "pressure");
464:   DMSetDefaultSection(dm, section);
465:   if (user->bcType == DIRICHLET) {
466:     ISDestroy(&bcPoints[0]);
467:   }
468:   return(0);
469: }

473: PetscErrorCode SetupExactSolution(AppCtx *user) {
475:   user->f0Funcs[0] = f0_u;
476:   user->f0Funcs[1] = f0_p;
477:   user->f1Funcs[0] = f1_u;
478:   user->f1Funcs[1] = f1_p;
479:   user->g0Funcs[0] = PETSC_NULL;
480:   user->g0Funcs[1] = PETSC_NULL;
481:   user->g0Funcs[2] = PETSC_NULL;
482:   user->g0Funcs[3] = PETSC_NULL;
483:   user->g1Funcs[0] = PETSC_NULL;
484:   user->g1Funcs[1] = PETSC_NULL;
485:   user->g1Funcs[2] = g1_pu;      /* < q, \nabla\cdot v > */
486:   user->g1Funcs[3] = PETSC_NULL;
487:   user->g2Funcs[0] = PETSC_NULL;
488:   user->g2Funcs[1] = g2_up;      /* < \nabla\cdot v, p > */
489:   user->g2Funcs[2] = PETSC_NULL;
490:   user->g2Funcs[3] = PETSC_NULL;
491:   user->g3Funcs[0] = g3_uu;      /* < \nabla v, \nabla u + {\nabla u}^T > */
492:   user->g3Funcs[1] = PETSC_NULL;
493:   user->g3Funcs[2] = PETSC_NULL;
494:   user->g3Funcs[3] = PETSC_NULL;
495:   switch(user->dim) {
496:   case 2:
499:     user->exactFuncs[2] = linear_p_2d;
500:     break;
501:   case 3:
505:     user->exactFuncs[3] = linear_p_3d;
506:     break;
507:   default:
508:     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid dimension %d", user->dim);
509:   }
510:   return(0);
511: }

515: PetscErrorCode ComputeError(Vec X, PetscReal *error, AppCtx *user) {
516:   PetscScalar   (**exactFuncs)(const PetscReal []) = user->exactFuncs;
517:   const PetscInt   debug         = user->debug;
518:   const PetscInt   dim           = user->dim;
519:   Vec              localX;
520:   PetscReal       *coords, *v0, *J, *invJ, detJ;
521:   PetscReal        localError;
522:   PetscInt         cStart, cEnd, c, field, fieldOffset, comp;
523:   PetscErrorCode   ierr;

526:   DMGetLocalVector(user->dm, &localX);
527:   DMGlobalToLocalBegin(user->dm, X, INSERT_VALUES, localX);
528:   DMGlobalToLocalEnd(user->dm, X, INSERT_VALUES, localX);
529:   PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
530:   DMComplexGetHeightStratum(user->dm, 0, &cStart, &cEnd);
531:   for(c = cStart; c < cEnd; ++c) {
532:     const PetscScalar *x;
533:     PetscReal          elemError = 0.0;

535:     DMComplexComputeCellGeometry(user->dm, c, v0, J, invJ, &detJ);
536:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
537:     DMComplexVecGetClosure(user->dm, PETSC_NULL, localX, c, &x);

539:     for(field = 0, comp = 0, fieldOffset = 0; field < numFields; ++field) {
543:       const PetscInt   numBasisFuncs = user->q[field].numBasisFuncs;
544:       const PetscInt   numBasisComps = user->q[field].numComponents;
545:       const PetscReal *basis         = user->q[field].basis;
546:       PetscInt         q, d, e, fc, f;

548:       if (debug) {
549:         char title[1024];
550:         PetscSNPrintf(title, 1023, "Solution for Field %d", field);
551:         DMPrintCellVector(c, title, numBasisFuncs*numBasisComps, &x[fieldOffset]);
552:       }
553:       for(q = 0; q < numQuadPoints; ++q) {
554:         for(d = 0; d < dim; d++) {
555:           coords[d] = v0[d];
556:           for(e = 0; e < dim; e++) {
557:             coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
558:           }
559:         }
560:         for(fc = 0; fc < numBasisComps; ++fc) {
561:           const PetscScalar funcVal     = (*exactFuncs[comp+fc])(coords);
562:           PetscReal         interpolant = 0.0;
563:           for(f = 0; f < numBasisFuncs; ++f) {
564:             const PetscInt fidx = f*numBasisComps+fc;
565:             interpolant += x[fieldOffset+fidx]*basis[q*numBasisFuncs*numBasisComps+fidx];
566:           }
567:           if (debug) {PetscPrintf(PETSC_COMM_SELF, "    elem %d field %d error %g\n", c, field, PetscSqr(interpolant - funcVal)*quadWeights[q]*detJ);}
568:           elemError += PetscSqr(interpolant - funcVal)*quadWeights[q]*detJ;
569:         }
570:       }
571:       comp        += numBasisComps;
572:       fieldOffset += numBasisFuncs*numBasisComps;
573:     }
574:     if (debug) {PetscPrintf(PETSC_COMM_SELF, "  elem %d error %g\n", c, elemError);}
575:     localError += elemError;
576:   }
577:   PetscFree4(coords,v0,J,invJ);
578:   DMRestoreLocalVector(user->dm, &localX);
579:   MPI_Allreduce(&localError, error, 1, MPIU_REAL, MPI_SUM, PETSC_COMM_WORLD);
580:   *error = PetscSqrtReal(*error);
581:   return(0);
582: }

586: /*
587:   DMComputeVertexFunction - This calls a function with the coordinates of each vertex, and stores the result in a vector.

589:   Input Parameters:
590: + dm - The DM
591: . mode - The insertion mode for values
592: . numComp - The number of components (functions)
593: - func - The coordinate functions to evaluate

595:   Output Parameter:
596: . X - vector
597: */
598: PetscErrorCode DMComputeVertexFunction(DM dm, InsertMode mode, Vec X, PetscInt numComp, PetscScalar (**funcs)(const PetscReal []), AppCtx *user)
599: {
600:   Vec            localX, coordinates;
601:   PetscSection   section, cSection;
602:   PetscInt       vStart, vEnd, v, c;
603:   PetscScalar   *values;

607:   DMGetLocalVector(dm, &localX);
608:   DMComplexGetDepthStratum(dm, 0, &vStart, &vEnd);
609:   DMGetDefaultSection(dm, &section);
610:   DMComplexGetCoordinateSection(dm, &cSection);
611:   DMComplexGetCoordinateVec(dm, &coordinates);
612:   PetscMalloc(numComp * sizeof(PetscScalar), &values);
613:   for(v = vStart; v < vEnd; ++v) {
614:     PetscScalar *coords;

616:     VecGetValuesSection(coordinates, cSection, v, &coords);
617:     for(c = 0; c < numComp; ++c) {
618:       values[c] = (*funcs[c])(coords);
619:     }
620:     VecSetValuesSection(localX, section, v, values, mode);
621:   }
622:   /* Temporary, msut be replaced by a projection on the finite element basis */
623:   {
624:     PetscScalar *coordsE;
625:     PetscInt     eStart = 0, eEnd = 0, e, depth, dim;

627:     PetscSectionGetDof(cSection, vStart, &dim);
628:     DMComplexGetLabelSize(dm, "depth", &depth);
629:     --depth;
630:     if (depth > 1) {DMComplexGetDepthStratum(dm, 1, &eStart, &eEnd);}
631:     PetscMalloc(dim * sizeof(PetscScalar),&coordsE);
632:     for(e = eStart; e < eEnd; ++e) {
633:       const PetscInt *cone;
634:       PetscInt        coneSize, d;
635:       PetscScalar    *coordsA, *coordsB;

637:       DMComplexGetConeSize(dm, e, &coneSize);
638:       DMComplexGetCone(dm, e, &cone);
639:       if (coneSize != 2) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Cone size %d for point %d should be 2", coneSize, e);
640:       VecGetValuesSection(coordinates, cSection, cone[0], &coordsA);
641:       VecGetValuesSection(coordinates, cSection, cone[1], &coordsB);
642:       for(d = 0; d < dim; ++d) {
643:         coordsE[d] = 0.5*(coordsA[d] + coordsB[d]);
644:       }
645:       for(c = 0; c < numComp; ++c) {
646:         values[c] = (*funcs[c])(coordsE);
647:       }
648:       VecSetValuesSection(localX, section, e, values, mode);
649:     }
650:     PetscFree(coordsE);
651:   }

653:   PetscFree(values);
654:   if (user->showInitial) {
655:     PetscInt p;

657:     PetscPrintf(PETSC_COMM_WORLD, "Local function\n");
658:     for(p = 0; p < user->numProcs; ++p) {
659:       if (p == user->rank) {VecView(localX, PETSC_VIEWER_STDOUT_SELF);}
660:       PetscBarrier((PetscObject) dm);
661:     }
662:   }
663:   DMLocalToGlobalBegin(dm, localX, mode, X);
664:   DMLocalToGlobalEnd(dm, localX, mode, X);
665:   DMRestoreLocalVector(dm, &localX);
666: #if 0
667:   const PetscInt localDof = this->_mesh->sizeWithBC(s, *cells->begin());
668:   PetscReal      detJ;

670:   PetscMalloc(localDof * sizeof(PetscScalar), &values);
671:   PetscMalloc2(dim,PetscReal,&v0,dim*dim,PetscReal,&J);
672:   ALE::ISieveVisitor::PointRetriever<PETSC_MESH_TYPE::sieve_type> pV((int) pow(this->_mesh->getSieve()->getMaxConeSize(), dim+1)+1, true);

674:   for(PetscInt c = cStart; c < cEnd; ++c) {
675:     ALE::ISieveTraversal<PETSC_MESH_TYPE::sieve_type>::orientedClosure(*this->_mesh->getSieve(), c, pV);
676:     const PETSC_MESH_TYPE::point_type *oPoints = pV.getPoints();
677:     const int                          oSize   = pV.getSize();
678:     int                                v       = 0;

680:     DMComplexComputeCellGeometry(dm, c, v0, J, PETSC_NULL, &detJ);
681:     for(PetscInt cl = 0; cl < oSize; ++cl) {
682:       const PetscInt fDim;

684:       PetscSectionGetDof(oPoints[cl], &fDim);
685:       if (pointDim) {
686:         for(PetscInt d = 0; d < fDim; ++d, ++v) {
687:           values[v] = (*this->_options.integrate)(v0, J, v, initFunc);
688:         }
689:       }
690:     }
691:     DMComplexVecSetClosure(dm, PETSC_NULL, localX, c, values);
692:     pV.clear();
693:   }
694:   PetscFree2(v0,J);
695:   PetscFree(values);
696: #endif
697:   return(0);
698: }

702: PetscErrorCode CreatePressureNullSpace(DM dm, AppCtx *user, MatNullSpace *nullSpace) {
703:   Vec            pressure, localP;

707:   DMGetGlobalVector(dm, &pressure);
708:   DMGetLocalVector(dm, &localP);
709:   VecSet(pressure, 0.0);
710:   /* Put a constant in for all pressures
711:      Could change this to project the constant function onto the pressure space (when that is finished) */
712:   {
713:     PetscSection section;
714:     PetscInt     pStart, pEnd, p;
715:     PetscScalar *a;

717:     DMGetDefaultSection(dm, &section);
718:     PetscSectionGetChart(section, &pStart, &pEnd);
719:     VecGetArray(localP, &a);
720:     for(p = pStart; p < pEnd; ++p) {
721:       PetscInt fDim, off, d;

723:       PetscSectionGetFieldDof(section, p, 1, &fDim);
724:       PetscSectionGetFieldOffset(section, p, 1, &off);
725:       for(d = 0; d < fDim; ++d) {
726:         a[off+d] = 1.0;
727:       }
728:     }
729:     VecRestoreArray(localP, &a);
730:   }
731:   DMLocalToGlobalBegin(dm, localP, INSERT_VALUES, pressure);
732:   DMLocalToGlobalEnd(dm, localP, INSERT_VALUES, pressure);
733:   DMRestoreLocalVector(dm, &localP);
734:   VecNormalize(pressure, PETSC_NULL);
735:   if (user->debug) {
736:     PetscPrintf(((PetscObject) dm)->comm, "Pressure Null Space\n");
737:     VecView(pressure, PETSC_VIEWER_STDOUT_WORLD);
738:   }
739:   MatNullSpaceCreate(((PetscObject) dm)->comm, PETSC_FALSE, 1, &pressure, nullSpace);
740:   DMRestoreGlobalVector(dm, &pressure);
741:   return(0);
742: }

746: PetscErrorCode IntegrateResidualBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt field, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (*f0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]), void (*f1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]), PetscScalar elemVec[], AppCtx *user) {
747:   const PetscInt debug   = user->debug;
748:   const PetscInt dim     = SPATIAL_DIM_0;
749:   PetscInt       cOffset = 0;
750:   PetscInt       eOffset = 0, e;

754:   PetscLogEventBegin(user->integrateResCPUEvent,0,0,0,0);
755:   for(e = 0; e < Ne; ++e) {
756:     const PetscReal  detJ = jacobianDeterminants[e];
757:     const PetscReal *invJ = &jacobianInverses[e*dim*dim];
761:     PetscInt         q, f;

763:     if (Nq > NUM_QUADRATURE_POINTS_0) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of quadrature points %d should be <= %d", Nq, NUM_QUADRATURE_POINTS_0);
764:     if (debug > 1) {
765:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
766:       DMPrintCellMatrix(e, "invJ", dim, dim, invJ);
767:     }
768:     for(q = 0; q < Nq; ++q) {
769:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
770:       PetscScalar      u[dim+1];
772:       PetscInt         fOffset            = 0;
773:       PetscInt         dOffset            = cOffset;
774:       const PetscInt   Ncomp       = quad[field].numComponents;
776:       PetscInt         d, f, i;

778:       for(d = 0; d <= dim; ++d)        {u[d]     = 0.0;}
779:       for(d = 0; d < dim*(dim+1); ++d) {gradU[d] = 0.0;}
780:       for(f = 0; f < numFields; ++f) {
781:         const PetscInt   Nb       = quad[f].numBasisFuncs;
782:         const PetscInt   Ncomp    = quad[f].numComponents;
783:         const PetscReal *basis    = quad[f].basis;
784:         const PetscReal *basisDer = quad[f].basisDer;
785:         PetscInt         b, comp;

787:         for(b = 0; b < Nb; ++b) {
788:           for(comp = 0; comp < Ncomp; ++comp) {
789:             const PetscInt cidx = b*Ncomp+comp;
790:             PetscScalar    realSpaceDer[dim];
791:             PetscInt       d, g;

793:             u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
794:             for(d = 0; d < dim; ++d) {
795:               realSpaceDer[d] = 0.0;
796:               for(g = 0; g < dim; ++g) {
797:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
798:               }
800:             }
801:           }
802:         }
803:         if (debug > 1) {
804:           PetscInt d;
805:           for(comp = 0; comp < Ncomp; ++comp) {
806:             PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);
807:             for(d = 0; d < dim; ++d) {
809:             }
810:           }
811:         }
812:         fOffset += Ncomp;
813:         dOffset += Nb*Ncomp;
814:       }

817:       for(i = 0; i < Ncomp; ++i) {
819:       }
821:       for(i = 0; i < Ncomp*dim; ++i) {
823:       }
824:       if (debug > 1) {
825:         PetscInt c,d;
826:         for(c = 0; c < Ncomp; ++c) {
827:           PetscPrintf(PETSC_COMM_SELF, "    f0[%d]: %g\n", c, f0[q*Ncomp+c]);
828:           for(d = 0; d < dim; ++d) {
829:             PetscPrintf(PETSC_COMM_SELF, "    f1[%d]_%c: %g\n", c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);
830:           }
831:         }
832:       }
833:       if (q == Nq-1) {cOffset = dOffset;}
834:     }
835:     for(f = 0; f < numFields; ++f) {
837:       const PetscInt   Nb       = quad[f].numBasisFuncs;
838:       const PetscInt   Ncomp    = quad[f].numComponents;
839:       const PetscReal *basis    = quad[f].basis;
840:       const PetscReal *basisDer = quad[f].basisDer;
841:       PetscInt         b, comp;

843:       if (f == field) {
844:       for(b = 0; b < Nb; ++b) {
845:         for(comp = 0; comp < Ncomp; ++comp) {
846:           const PetscInt cidx = b*Ncomp+comp;
847:           PetscInt       q;

849:           elemVec[eOffset+cidx] = 0.0;
850:           for(q = 0; q < Nq; ++q) {
851:             PetscScalar realSpaceDer[dim];
852:             PetscInt    d, g;

854:             elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp];
855:             for(d = 0; d < dim; ++d) {
856:               realSpaceDer[d] = 0.0;
857:               for(g = 0; g < dim; ++g) {
858:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
859:               }
860:               elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
861:             }
862:           }
863:         }
864:       }
865:       if (debug > 1) {
866:         PetscInt b, comp;

868:         for(b = 0; b < Nb; ++b) {
869:           for(comp = 0; comp < Ncomp; ++comp) {
870:             PetscPrintf(PETSC_COMM_SELF, "    elemVec[%d,%d]: %g\n", b, comp, elemVec[eOffset+b*Ncomp+comp]);
871:           }
872:         }
873:       }
874:       }
875:       eOffset += Nb*Ncomp;
876:     }
877:   }
878:   /* PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne); */
879:   PetscLogEventEnd(user->integrateResCPUEvent,0,0,0,0);
880:   return(0);
881: };

885: /*
886:   FormFunctionLocal - Form the local residual F from the local input X

888:   Input Parameters:
889: + dm - The mesh
890: . X  - Local input vector
891: - user - The user context

893:   Output Parameter:
894: . F  - Local output vector

896:   Note:
897:   We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
898:   like a GPU, or vectorize on a multicore machine.

900: .seealso: FormJacobianLocal()
901: */
902: PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user)
903: {
904:   const PetscInt   debug = user->debug;
905:   const PetscInt   dim   = user->dim;
906:   PetscReal       *coords, *v0, *J, *invJ, *detJ;
907:   PetscScalar     *elemVec;
908:   PetscInt         cStart, cEnd, c, field;
909:   PetscErrorCode   ierr;

912:   PetscLogEventBegin(user->residualEvent,0,0,0,0);
913:   VecSet(F, 0.0);
914:   PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
915:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
916:   const PetscInt numCells = cEnd - cStart;
917:   PetscInt       cellDof  = 0;
918:   PetscScalar   *u;

920:   for(field = 0; field < numFields; ++field) {
921:     cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
922:   }
923:   PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);
924:   for(c = cStart; c < cEnd; ++c) {
925:     const PetscScalar *x;
926:     PetscInt           i;

928:     DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
929:     if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
930:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);

932:     for(i = 0; i < cellDof; ++i) {
933:       u[c*cellDof+i] = x[i];
934:     }
935:   }
936:   for(field = 0; field < numFields; ++field) {
938:     const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
939:     void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field];
940:     void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field];
941:     /* Conforming batches */
943:     PetscInt numBlocks  = 1;
944:     PetscInt batchSize  = numBlocks * blockSize;
945:     PetscInt numBatches = user->numBatches;
946:     PetscInt numChunks  = numCells / (numBatches*batchSize);
947:     IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);
948:     /* Remainder */
949:     PetscInt numRemainder = numCells % (numBatches * batchSize);
950:     PetscInt offset       = numCells - numRemainder;
951:     IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
952:                                      user->q, f0, f1, &elemVec[offset*cellDof], user);
953:   }
954:   for(c = cStart; c < cEnd; ++c) {
955:     if (debug) {DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);}
956:     DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);
957:   }
958:   PetscFree4(u,invJ,detJ,elemVec);
959:   PetscFree3(coords,v0,J);
960:   if (user->showResidual) {
961:     PetscInt p;

963:     PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
964:     for(p = 0; p < user->numProcs; ++p) {
965:       if (p == user->rank) {
966:         Vec f;

968:         VecDuplicate(F, &f);
969:         VecCopy(F, f);
970:         VecChop(f, 1.0e-10);
971:         VecView(f, PETSC_VIEWER_STDOUT_SELF);
972:         VecDestroy(&f);
973:       }
974:       PetscBarrier((PetscObject) dm);
975:     }
976:   }
977:   PetscLogEventEnd(user->residualEvent,0,0,0,0);
978:   return(0);
979: }

983: /*
984: Loop over batch of elements (e):
985:   Loop over element vector entries (f,fc --> i):
986:     Sum over element matrix columns entries (g,gc --> j):
987:       Loop over quadrature points (q):
988:         Make u_q and gradU_q (loops over fields,Nb,Ncomp)
989:           elemVec[i] += \psi^{fc}_f(q) g0_{fc,gc}(u, \nabla u) \phi^{gc}_g(q)
990:                       + \psi^{fc}_f(q) \cdot g1_{fc,gc,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
991:                       + \nabla\psi^{fc}_f(q) \cdot g2_{fc,gc,df}(u, \nabla u) \phi^{gc}_g(q)
992:                       + \nabla\psi^{fc}_f(q) \cdot g3_{fc,gc,df,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
993: */
994: PetscErrorCode IntegrateJacobianActionBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt fieldI, const PetscScalar coefficients[], const PetscScalar argCoefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (**g0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (**g1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), void (**g2_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (**g3_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), PetscScalar elemVec[], AppCtx *user) {
995:   const PetscReal *basisI    = quad[fieldI].basis;
996:   const PetscReal *basisDerI = quad[fieldI].basisDer;
997:   const PetscInt   debug   = user->debug;
998:   const PetscInt   dim     = SPATIAL_DIM_0;
999:   PetscInt         cellDof = 0; /* Total number of dof on a cell */
1000:   PetscInt         cOffset = 0; /* Offset into coefficients[], argCoefficients[], elemVec[] for element e */
1001:   PetscInt         offsetI = 0; /* Offset into an element vector for fieldI */
1002:   PetscInt         fieldJ, offsetJ, field, e;
1003:   PetscErrorCode   ierr;

1006:   for(field = 0; field < numFields; ++field) {
1007:     if (field == fieldI) {offsetI = cellDof;}
1009:   }
1010:   PetscLogEventBegin(user->integrateJacActionCPUEvent,0,0,0,0);
1011:   for(e = 0; e < Ne; ++e) {
1012:     const PetscReal  detJ    = jacobianDeterminants[e];
1013:     const PetscReal *invJ    = &jacobianInverses[e*dim*dim];
1014:     const PetscInt   Nb_i    = quad[fieldI].numBasisFuncs;
1015:     const PetscInt   Ncomp_i = quad[fieldI].numComponents;
1016:     PetscInt         f, fc, g, gc;

1018:     for(f = 0; f < Nb_i; ++f) {
1021:       PetscInt         q;

1023:       for(fc = 0; fc < Ncomp_i; ++fc) {
1024:         const PetscInt fidx = f*Ncomp_i+fc; /* Test function basis index */
1025:         const PetscInt i    = offsetI+fidx; /* Element vector row */
1026:         elemVec[cOffset+i] = 0.0;
1027:       }
1028:       for(q = 0; q < Nq; ++q) {
1029:         PetscScalar u[dim+1];
1031:         PetscInt    fOffset            = 0;       /* Offset into u[] for field_q (like offsetI) */
1032:         PetscInt    dOffset            = cOffset; /* Offset into coefficients[] for field_q */
1033:         PetscInt    field_q, d;
1034:         PetscScalar g0[dim*dim];         /* Ncomp_i*Ncomp_j */
1035:         PetscScalar g1[dim*dim*dim];     /* Ncomp_i*Ncomp_j*dim */
1036:         PetscScalar g2[dim*dim*dim];     /* Ncomp_i*Ncomp_j*dim */
1037:         PetscScalar g3[dim*dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim*dim */
1038:         PetscInt    c;

1040:         if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
1041:         for(d = 0; d <= dim; ++d)        {u[d]     = 0.0;}
1042:         for(d = 0; d < dim*(dim+1); ++d) {gradU[d] = 0.0;}
1043:         for(field_q = 0; field_q < numFields; ++field_q) {
1044:           const PetscInt   Nb          = quad[field_q].numBasisFuncs;
1045:           const PetscInt   Ncomp       = quad[field_q].numComponents;
1046:           const PetscReal *basis       = quad[field_q].basis;
1047:           const PetscReal *basisDer    = quad[field_q].basisDer;
1048:           PetscInt         b, comp;

1050:           for(b = 0; b < Nb; ++b) {
1051:             for(comp = 0; comp < Ncomp; ++comp) {
1052:               const PetscInt cidx = b*Ncomp+comp;
1053:               PetscScalar    realSpaceDer[dim];
1054:               PetscInt       d1, d2;

1056:               u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
1057:               for(d1 = 0; d1 < dim; ++d1) {
1058:                 realSpaceDer[d1] = 0.0;
1059:                 for(d2 = 0; d2 < dim; ++d2) {
1060:                   realSpaceDer[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
1061:                 }
1063:               }
1064:             }
1065:           }
1066:           if (debug > 1) {
1067:             for(comp = 0; comp < Ncomp; ++comp) {
1068:               PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);
1069:               for(d = 0; d < dim; ++d) {
1071:               }
1072:             }
1073:           }
1074:           fOffset += Ncomp;
1075:           dOffset += Nb*Ncomp;
1076:         }

1078:         for(fieldJ = 0, offsetJ = 0; fieldJ < numFields; offsetJ += quad[fieldJ].numBasisFuncs*quad[fieldJ].numComponents,  ++fieldJ) {
1079:           const PetscReal *basisJ    = quad[fieldJ].basis;
1080:           const PetscReal *basisDerJ = quad[fieldJ].basisDer;
1081:           const PetscInt   Nb_j      = quad[fieldJ].numBasisFuncs;
1082:           const PetscInt   Ncomp_j   = quad[fieldJ].numComponents;

1084:           for(g = 0; g < Nb_j; ++g) {
1085:             if ((Ncomp_i > dim) || (Ncomp_j > dim)) SETERRQ3(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of components %d and %d should be <= %d", Ncomp_i, Ncomp_j, dim);
1086:             PetscMemzero(g0, Ncomp_i*Ncomp_j         * sizeof(PetscScalar));
1087:             PetscMemzero(g1, Ncomp_i*Ncomp_j*dim     * sizeof(PetscScalar));
1088:             PetscMemzero(g2, Ncomp_i*Ncomp_j*dim     * sizeof(PetscScalar));
1089:             PetscMemzero(g3, Ncomp_i*Ncomp_j*dim*dim * sizeof(PetscScalar));
1090:             if (g0_func[fieldI*numFields+fieldJ]) {
1092:               for(c = 0; c < Ncomp_i*Ncomp_j; ++c) {
1094:               }
1095:             }
1096:             if (g1_func[fieldI*numFields+fieldJ]) {
1098:               for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1100:               }
1101:             }
1102:             if (g2_func[fieldI*numFields+fieldJ]) {
1104:               for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1106:               }
1107:             }
1108:             if (g3_func[fieldI*numFields+fieldJ]) {
1110:               for(c = 0; c < Ncomp_i*Ncomp_j*dim*dim; ++c) {
1112:               }
1113:             }

1115:             for(fc = 0; fc < Ncomp_i; ++fc) {
1116:               const PetscInt fidx = f*Ncomp_i+fc; /* Test function basis index */
1117:               const PetscInt i    = offsetI+fidx; /* Element matrix row */
1118:               for(gc = 0; gc < Ncomp_j; ++gc) {
1119:                 const PetscInt gidx  = g*Ncomp_j+gc; /* Trial function basis index */
1120:                 const PetscInt j     = offsetJ+gidx; /* Element matrix column */
1121:                 PetscScalar    entry = 0.0;          /* The (i,j) entry in the element matrix */
1122:                 PetscScalar    realSpaceDerI[dim];
1123:                 PetscScalar    realSpaceDerJ[dim];
1124:                 PetscInt       d, d2;

1126:                 for(d = 0; d < dim; ++d) {
1127:                   realSpaceDerI[d] = 0.0;
1128:                   realSpaceDerJ[d] = 0.0;
1129:                   for(d2 = 0; d2 < dim; ++d2) {
1130:                     realSpaceDerI[d] += invJ[d2*dim+d]*basisDerI[(q*Nb_i*Ncomp_i+fidx)*dim+d2];
1131:                     realSpaceDerJ[d] += invJ[d2*dim+d]*basisDerJ[(q*Nb_j*Ncomp_j+gidx)*dim+d2];
1132:                   }
1133:                 }
1134:                 entry += basisI[q*Nb_i*Ncomp_i+fidx]*g0[fc*Ncomp_j+gc]*basisJ[q*Nb_j*Ncomp_j+gidx];
1135:                 for(d = 0; d < dim; ++d) {
1136:                   entry += basisI[q*Nb_i*Ncomp_i+fidx]*g1[(fc*Ncomp_j+gc)*dim+d]*realSpaceDerJ[d];
1137:                   entry += realSpaceDerI[d]*g2[(fc*Ncomp_j+gc)*dim+d]*basisJ[q*Nb_j*Ncomp_j+gidx];
1138:                   for(d2 = 0; d2 < dim; ++d2) {
1139:                     entry += realSpaceDerI[d]*g3[((fc*Ncomp_j+gc)*dim+d)*dim+d2]*realSpaceDerJ[d2];
1140:                   }
1141:                 }
1142:                 elemVec[cOffset+i] += entry*argCoefficients[cOffset+j];
1143:               }
1144:             }
1145:           }
1146:         }
1147:       }
1148:     }
1149:     if (debug > 1) {
1150:       PetscInt fc, f;

1152:       PetscPrintf(PETSC_COMM_SELF, "Element %d action vector for field %d\n", e, fieldI);
1153:       for(fc = 0; fc < Ncomp_i; ++fc) {
1154:         for(f = 0; f < Nb_i; ++f) {
1155:           const PetscInt i = offsetI + f*Ncomp_i+fc;
1156:           PetscPrintf(PETSC_COMM_SELF, "    argCoef[%d,%d]: %g\n", f, fc, argCoefficients[cOffset+i]);
1157:         }
1158:       }
1159:       for(fc = 0; fc < Ncomp_i; ++fc) {
1160:         for(f = 0; f < Nb_i; ++f) {
1161:           const PetscInt i = offsetI + f*Ncomp_i+fc;
1162:           PetscPrintf(PETSC_COMM_SELF, "    elemVec[%d,%d]: %g\n", f, fc, elemVec[cOffset+i]);
1163:         }
1164:       }
1165:     }
1166:     cOffset += cellDof;
1167:   }
1168:   PetscLogEventEnd(user->integrateJacActionCPUEvent,0,0,0,0);
1169:   return(0);
1170: }

1174: /*
1175:   FormJacobianActionLocal - Form the local portion of the action of Jacobian matrix J on the local input X.

1177:   Input Parameters:
1178: + dm - The mesh
1179: . J  - The Jacobian shell matrix
1180: . X  - Local input vector
1181: - user - The user context

1183:   Output Parameter:
1184: . F  - Local output vector

1186:   Note:
1187:   We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
1188:   like a GPU, or vectorize on a multicore machine.

1190: .seealso: FormJacobianLocal()
1191: */
1192: PetscErrorCode FormJacobianActionLocal(DM dm, Mat Jac, Vec X, Vec F, AppCtx *user)
1193: {
1194:   const PetscInt   debug = user->debug;
1195:   const PetscInt   dim   = user->dim;
1196:   JacActionCtx    *jctx;
1197:   PetscReal       *coords, *v0, *J, *invJ, *detJ;
1198:   PetscScalar     *elemVec;
1199:   PetscInt         cStart, cEnd, c, field;
1200:   PetscErrorCode   ierr;

1203:   PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
1204:   MatShellGetContext(Jac, &jctx);
1205:   VecSet(F, 0.0);
1206:   PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
1207:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
1208:   const PetscInt numCells = cEnd - cStart;
1209:   PetscInt       cellDof  = 0;
1210:   PetscScalar   *u, *a;

1212:   for(field = 0; field < numFields; ++field) {
1213:     cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
1214:   }
1215:   PetscMalloc5(numCells*cellDof,PetscScalar,&u,numCells*cellDof,PetscScalar,&a,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);
1216:   for(c = cStart; c < cEnd; ++c) {
1217:     const PetscScalar *x;
1218:     PetscInt           i;

1220:     DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
1221:     if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
1222:     DMComplexVecGetClosure(dm, PETSC_NULL, jctx->u, c, &x);
1223:     for(i = 0; i < cellDof; ++i) {
1224:       u[c*cellDof+i] = x[i];
1225:     }
1226:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
1227:     for(i = 0; i < cellDof; ++i) {
1228:       a[c*cellDof+i] = x[i];
1229:     }
1230:   }
1231:   for(field = 0; field < numFields; ++field) {
1233:     const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
1234:     /* Conforming batches */
1236:     PetscInt numBlocks  = 1;
1237:     PetscInt batchSize  = numBlocks * blockSize;
1238:     PetscInt numBatches = user->numBatches;
1239:     PetscInt numChunks  = numCells / (numBatches*batchSize);
1240:     IntegrateJacobianActionBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, a, invJ, detJ, user->q, user->g0Funcs, user->g1Funcs, user->g2Funcs, user->g3Funcs, elemVec, user);
1241:     /* Remainder */
1242:     PetscInt numRemainder = numCells % (numBatches * batchSize);
1243:     PetscInt offset       = numCells - numRemainder;
1244:     IntegrateJacobianActionBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &a[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
1245:                                            user->q, user->g0Funcs, user->g1Funcs, user->g2Funcs, user->g3Funcs, &elemVec[offset*cellDof], user);
1246:   }
1247:   for(c = cStart; c < cEnd; ++c) {
1248:     if (debug) {DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);}
1249:     DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);
1250:   }
1251:   PetscFree5(u,a,invJ,detJ,elemVec);
1252:   PetscFree3(coords,v0,J);
1253:   if (0) {
1254:     PetscInt p;

1256:     PetscPrintf(PETSC_COMM_WORLD, "Jacobian Action:\n");
1257:     for(p = 0; p < user->numProcs; ++p) {
1258:       if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
1259:       PetscBarrier((PetscObject) dm);
1260:     }
1261:   }
1262:   PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
1263:   return(0);
1264: }

1268: PetscErrorCode FormJacobianAction(Mat J, Vec X,  Vec Y)
1269: {
1270:   JacActionCtx    *ctx;
1271:   DM               dm;
1272:   Vec              dummy, localX, localY;
1273:   PetscInt         N, n;
1274:   PetscErrorCode   ierr;

1280:   MatShellGetContext(J, &ctx);
1281:   dm = ctx->dm;

1283:   /* determine whether X = localX */
1284:   DMGetLocalVector(dm, &dummy);
1285:   DMGetLocalVector(dm, &localX);
1286:   DMGetLocalVector(dm, &localY);
1287:   /* TODO: THIS dummy restore is necessary here so that the first available local vector has boundary conditions in it
1288:    I think the right thing to do is have the user put BC into a local vector and give it to us
1289:   */
1290:   DMRestoreLocalVector(dm, &dummy);
1291:   VecGetSize(X, &N);
1292:   VecGetSize(localX, &n);

1294:   if (n != N){ /* X != localX */
1295:     VecSet(localX, 0.0);
1296:     DMGlobalToLocalBegin(dm, X, INSERT_VALUES, localX);
1297:     DMGlobalToLocalEnd(dm, X, INSERT_VALUES, localX);
1298:   } else {
1299:     DMRestoreLocalVector(dm, &localX);
1300:     localX = X;
1301:   }
1302:   FormJacobianActionLocal(dm, J, localX, localY, ctx->user);
1303:   if (n != N){
1304:     DMRestoreLocalVector(dm, &localX);
1305:   }
1306:   VecSet(Y, 0.0);
1309:   DMRestoreLocalVector(dm, &localY);
1310:   if (0) {
1311:     Vec       r;
1312:     PetscReal norm;

1314:     VecDuplicate(X, &r);
1315:     MatMult(ctx->J, X, r);
1316:     VecAXPY(r, -1.0, Y);
1317:     VecNorm(r, NORM_2, &norm);
1318:     if (norm > 1.0e-8) {
1319:       PetscPrintf(PETSC_COMM_WORLD, "Jacobian Action Input:\n");
1320:       VecView(X, PETSC_VIEWER_STDOUT_WORLD);
1321:       PetscPrintf(PETSC_COMM_WORLD, "Jacobian Action Result:\n");
1322:       VecView(Y, PETSC_VIEWER_STDOUT_WORLD);
1323:       PetscPrintf(PETSC_COMM_WORLD, "Difference:\n");
1324:       VecView(r, PETSC_VIEWER_STDOUT_WORLD);
1325:       SETERRQ1(((PetscObject) J)->comm, PETSC_ERR_ARG_WRONG, "The difference with assembled multiply is too large %g", norm);
1326:     }
1327:     VecDestroy(&r);
1328:   }
1329:   return(0);
1330: }

1334: /*
1335: Loop over batch of elements (e):
1336:   Loop over element matrix entries (f,fc,g,gc --> i,j):
1337:     Loop over quadrature points (q):
1338:       Make u_q and gradU_q (loops over fields,Nb,Ncomp)
1339:         elemMat[i,j] += \psi^{fc}_f(q) g0_{fc,gc}(u, \nabla u) \phi^{gc}_g(q)
1340:                       + \psi^{fc}_f(q) \cdot g1_{fc,gc,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
1341:                       + \nabla\psi^{fc}_f(q) \cdot g2_{fc,gc,df}(u, \nabla u) \phi^{gc}_g(q)
1342:                       + \nabla\psi^{fc}_f(q) \cdot g3_{fc,gc,df,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
1343: */
1344: PetscErrorCode IntegrateJacobianBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt fieldI, PetscInt fieldJ, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (*g0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (*g1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), void (*g2_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (*g3_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), PetscScalar elemMat[], AppCtx *user) {
1345:   const PetscReal *basisI    = quad[fieldI].basis;
1346:   const PetscReal *basisDerI = quad[fieldI].basisDer;
1347:   const PetscReal *basisJ    = quad[fieldJ].basis;
1348:   const PetscReal *basisDerJ = quad[fieldJ].basisDer;
1349:   const PetscInt   debug   = user->debug;
1350:   const PetscInt   dim     = SPATIAL_DIM_0;
1351:   PetscInt         cellDof = 0; /* Total number of dof on a cell */
1352:   PetscInt         cOffset = 0; /* Offset into coefficients[] for element e */
1353:   PetscInt         eOffset = 0; /* Offset into elemMat[] for element e */
1354:   PetscInt         offsetI = 0; /* Offset into an element vector for fieldI */
1355:   PetscInt         offsetJ = 0; /* Offset into an element vector for fieldJ */
1356:   PetscInt         field, e;
1357:   PetscErrorCode   ierr;

1360:   for(field = 0; field < numFields; ++field) {
1361:     if (field == fieldI) {offsetI = cellDof;}
1362:     if (field == fieldJ) {offsetJ = cellDof;}
1364:   }
1365:   PetscLogEventBegin(user->integrateJacCPUEvent,0,0,0,0);
1366:   for(e = 0; e < Ne; ++e) {
1367:     const PetscReal  detJ    = jacobianDeterminants[e];
1368:     const PetscReal *invJ    = &jacobianInverses[e*dim*dim];
1369:     const PetscInt   Nb_i    = quad[fieldI].numBasisFuncs;
1370:     const PetscInt   Ncomp_i = quad[fieldI].numComponents;
1371:     const PetscInt   Nb_j    = quad[fieldJ].numBasisFuncs;
1372:     const PetscInt   Ncomp_j = quad[fieldJ].numComponents;
1373:     PetscInt         f, g;

1375:     for(f = 0; f < Nb_i; ++f) {
1376:       for(g = 0; g < Nb_j; ++g) {
1379:         PetscInt         q;

1381:         for(q = 0; q < Nq; ++q) {
1382:           PetscScalar u[dim+1];
1384:           PetscInt    fOffset            = 0;       /* Offset into u[] for field_q (like offsetI) */
1385:           PetscInt    dOffset            = cOffset; /* Offset into coefficients[] for field_q */
1386:           PetscInt    field_q, d;
1387:           PetscScalar g0[dim*dim];         /* Ncomp_i*Ncomp_j */
1388:           PetscScalar g1[dim*dim*dim];     /* Ncomp_i*Ncomp_j*dim */
1389:           PetscScalar g2[dim*dim*dim];     /* Ncomp_i*Ncomp_j*dim */
1390:           PetscScalar g3[dim*dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim*dim */
1391:           PetscInt    fc, gc, c;

1393:           if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
1394:           for(d = 0; d <= dim; ++d)        {u[d]     = 0.0;}
1395:           for(d = 0; d < dim*(dim+1); ++d) {gradU[d] = 0.0;}
1396:           for(field_q = 0; field_q < numFields; ++field_q) {
1397:             const PetscInt   Nb          = quad[field_q].numBasisFuncs;
1398:             const PetscInt   Ncomp       = quad[field_q].numComponents;
1399:             const PetscReal *basis       = quad[field_q].basis;
1400:             const PetscReal *basisDer    = quad[field_q].basisDer;
1401:             PetscInt         b, comp;

1403:             for(b = 0; b < Nb; ++b) {
1404:               for(comp = 0; comp < Ncomp; ++comp) {
1405:                 const PetscInt cidx = b*Ncomp+comp;
1406:                 PetscScalar    realSpaceDer[dim];
1407:                 PetscInt       d1, d2;

1409:                 u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
1410:                 for(d1 = 0; d1 < dim; ++d1) {
1411:                   realSpaceDer[d1] = 0.0;
1412:                   for(d2 = 0; d2 < dim; ++d2) {
1413:                     realSpaceDer[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
1414:                   }
1416:                 }
1417:               }
1418:             }
1419:             if (debug > 1) {
1420:               for(comp = 0; comp < Ncomp; ++comp) {
1421:                 PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);
1422:                 for(d = 0; d < dim; ++d) {
1424:                 }
1425:               }
1426:             }
1427:             fOffset += Ncomp;
1428:             dOffset += Nb*Ncomp;
1429:           }

1431:           if ((Ncomp_i > dim) || (Ncomp_j > dim)) SETERRQ3(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of components %d and %d should be <= %d", Ncomp_i, Ncomp_j, dim);
1432:           PetscMemzero(g0, Ncomp_i*Ncomp_j         * sizeof(PetscScalar));
1433:           PetscMemzero(g1, Ncomp_i*Ncomp_j*dim     * sizeof(PetscScalar));
1434:           PetscMemzero(g2, Ncomp_i*Ncomp_j*dim     * sizeof(PetscScalar));
1435:           PetscMemzero(g3, Ncomp_i*Ncomp_j*dim*dim * sizeof(PetscScalar));
1436:           if (g0_func) {
1438:             for(c = 0; c < Ncomp_i*Ncomp_j; ++c) {
1440:             }
1441:           }
1442:           if (g1_func) {
1444:             for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1446:             }
1447:           }
1448:           if (g2_func) {
1450:             for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1452:             }
1453:           }
1454:           if (g3_func) {
1456:             for(c = 0; c < Ncomp_i*Ncomp_j*dim*dim; ++c) {
1458:             }
1459:           }

1461:           for(fc = 0; fc < Ncomp_i; ++fc) {
1462:             const PetscInt fidx = f*Ncomp_i+fc; /* Test function basis index */
1463:             const PetscInt i    = offsetI+fidx; /* Element matrix row */
1464:             for(gc = 0; gc < Ncomp_j; ++gc) {
1465:               const PetscInt gidx = g*Ncomp_j+gc; /* Trial function basis index */
1466:               const PetscInt j    = offsetJ+gidx; /* Element matrix column */
1467:               PetscScalar    realSpaceDerI[dim];
1468:               PetscScalar    realSpaceDerJ[dim];
1469:               PetscInt       d, d2;

1471:               for(d = 0; d < dim; ++d) {
1472:                 realSpaceDerI[d] = 0.0;
1473:                 realSpaceDerJ[d] = 0.0;
1474:                 for(d2 = 0; d2 < dim; ++d2) {
1475:                   realSpaceDerI[d] += invJ[d2*dim+d]*basisDerI[(q*Nb_i*Ncomp_i+fidx)*dim+d2];
1476:                   realSpaceDerJ[d] += invJ[d2*dim+d]*basisDerJ[(q*Nb_j*Ncomp_j+gidx)*dim+d2];
1477:                 }
1478:               }
1479:               elemMat[eOffset+i*cellDof+j] += basisI[q*Nb_i*Ncomp_i+fidx]*g0[fc*Ncomp_j+gc]*basisJ[q*Nb_j*Ncomp_j+gidx];
1480:               for(d = 0; d < dim; ++d) {
1481:                 elemMat[eOffset+i*cellDof+j] += basisI[q*Nb_i*Ncomp_i+fidx]*g1[(fc*Ncomp_j+gc)*dim+d]*realSpaceDerJ[d];
1482:                 elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g2[(fc*Ncomp_j+gc)*dim+d]*basisJ[q*Nb_j*Ncomp_j+gidx];
1483:                 for(d2 = 0; d2 < dim; ++d2) {
1484:                   elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g3[((fc*Ncomp_j+gc)*dim+d)*dim+d2]*realSpaceDerJ[d2];
1485:                 }
1486:               }
1487:             }
1488:           }
1489:         }
1490:       }
1491:     }
1492:     if (debug > 1) {
1493:       PetscInt fc, f, gc, g;

1495:       PetscPrintf(PETSC_COMM_SELF, "Element matrix for fields %d and %d\n", fieldI, fieldJ);
1496:       for(fc = 0; fc < Ncomp_i; ++fc) {
1497:         for(f = 0; f < Nb_i; ++f) {
1498:           const PetscInt i = offsetI + f*Ncomp_i+fc;
1499:           for(gc = 0; gc < Ncomp_j; ++gc) {
1500:             for(g = 0; g < Nb_j; ++g) {
1501:               const PetscInt j = offsetJ + g*Ncomp_j+gc;
1502:               PetscPrintf(PETSC_COMM_SELF, "    elemMat[%d,%d,%d,%d]: %g\n", f, fc, g, gc, elemMat[eOffset+i*cellDof+j]);
1503:             }
1504:           }
1505:           PetscPrintf(PETSC_COMM_SELF, "\n");
1506:         }
1507:       }
1508:     }
1509:     cOffset += cellDof;
1510:     eOffset += cellDof*cellDof;
1511:   }
1512:   PetscLogEventEnd(user->integrateJacCPUEvent,0,0,0,0);
1513:   return(0);
1514: }

1518: /*
1519:   FormJacobianLocal - Form the local portion of the Jacobian matrix J from the local input X.

1521:   Input Parameters:
1522: + dm - The mesh
1523: . X  - Local input vector
1524: - user - The user context

1526:   Output Parameter:
1527: . Jac  - Jacobian matrix

1529:   Note:
1530:   We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
1531:   like a GPU, or vectorize on a multicore machine.

1533: .seealso: FormFunctionLocal()
1534: */
1535: PetscErrorCode FormJacobianLocal(DM dm, Vec X, Mat Jac, Mat JacP, AppCtx *user)
1536: {
1537:   const PetscInt debug = user->debug;
1538:   const PetscInt dim   = user->dim;
1539:   PetscReal     *v0, *J, *invJ, *detJ;
1540:   PetscScalar   *elemMat, *u;
1541:   PetscInt       numCells, cStart, cEnd, c, field, fieldI;
1542:   PetscInt       cellDof = 0;

1546:   PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
1547:   MatZeroEntries(JacP);
1548:   PetscMalloc2(dim,PetscReal,&v0,dim*dim,PetscReal,&J);
1549:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
1550:   numCells = cEnd - cStart;
1551:   for(field = 0; field < numFields; ++field) {
1552:     cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
1553:   }
1554:   PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof*cellDof,PetscScalar,&elemMat);
1555:   for(c = cStart; c < cEnd; ++c) {
1556:     const PetscScalar *x;
1557:     PetscInt           i;

1559:     DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
1560:     if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
1561:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);

1563:     for(i = 0; i < cellDof; ++i) {
1564:       u[c*cellDof+i] = x[i];
1565:     }
1566:   }
1567:   PetscMemzero(elemMat, numCells*cellDof*cellDof * sizeof(PetscScalar));
1568:   for(fieldI = 0; fieldI < numFields; ++fieldI) {
1570:     const PetscInt numBasisFuncs = user->q[fieldI].numBasisFuncs;
1571:     PetscInt       fieldJ;

1573:     for(fieldJ = 0; fieldJ < numFields; ++fieldJ) {
1574:       void (*g0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]) = user->g0Funcs[fieldI*numFields+fieldJ];
1575:       void (*g1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]) = user->g1Funcs[fieldI*numFields+fieldJ];
1576:       void (*g2)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g2[]) = user->g2Funcs[fieldI*numFields+fieldJ];
1577:       void (*g3)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g3[]) = user->g3Funcs[fieldI*numFields+fieldJ];
1578:       /* Conforming batches */
1580:       PetscInt numBlocks  = 1;
1581:       PetscInt batchSize  = numBlocks * blockSize;
1582:       PetscInt numBatches = user->numBatches;
1583:       PetscInt numChunks  = numCells / (numBatches*batchSize);
1584:       IntegrateJacobianBatchCPU(numChunks*numBatches*batchSize, numFields, fieldI, fieldJ, u, invJ, detJ, user->q, g0, g1, g2, g3, elemMat, user);
1585:       /* Remainder */
1586:       PetscInt numRemainder = numCells % (numBatches * batchSize);
1587:       PetscInt offset       = numCells - numRemainder;
1588:       IntegrateJacobianBatchCPU(numRemainder, numFields, fieldI, fieldJ, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
1589:                                        user->q, g0, g1, g2, g3, &elemMat[offset*cellDof*cellDof], user);
1590:     }
1591:   }
1592:   for(c = cStart; c < cEnd; ++c) {
1593:     if (debug) {DMPrintCellMatrix(c, "Jacobian", cellDof, cellDof, &elemMat[c*cellDof*cellDof]);}
1594:     DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, JacP, c, &elemMat[c*cellDof*cellDof], ADD_VALUES);
1595:   }
1596:   PetscFree4(u,invJ,detJ,elemMat);
1597:   PetscFree2(v0,J);

1599:   /* Assemble matrix, using the 2-step process:
1600:        MatAssemblyBegin(), MatAssemblyEnd(). */
1601:   MatAssemblyBegin(JacP, MAT_FINAL_ASSEMBLY);
1602:   MatAssemblyEnd(JacP, MAT_FINAL_ASSEMBLY);

1604:   if (user->showJacobian) {
1605:     PetscPrintf(PETSC_COMM_WORLD, "Jacobian:\n");
1606:     MatChop(JacP, 1.0e-10);
1607:     MatView(JacP, PETSC_VIEWER_STDOUT_WORLD);
1608:   }
1609:   PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
1610:   if (user->jacobianMF) {
1611:     JacActionCtx *jctx;

1613:     MatShellGetContext(Jac, &jctx);
1614:     VecCopy(X, jctx->u);
1615:   }
1616:   return(0);
1617: }

1621: int main(int argc, char **argv)
1622: {
1623:   SNES           snes;                 /* nonlinear solver */
1624:   Vec            u,r;                  /* solution, residual vectors */
1625:   Mat            A,J;                  /* Jacobian matrix */
1626:   MatNullSpace   nullSpace;            /* May be necessary for pressure */
1627:   AppCtx         user;                 /* user-defined work context */
1628:   JacActionCtx   userJ;                /* context for Jacobian MF action */
1629:   PetscInt       its;                  /* iterations for convergence */
1630:   PetscReal      error = 0.0;          /* L_2 error in the solution */

1633:   PetscInitialize(&argc, &argv, PETSC_NULL, help);
1634:   ProcessOptions(PETSC_COMM_WORLD, &user);
1635:   SNESCreate(PETSC_COMM_WORLD, &snes);
1636:   CreateMesh(PETSC_COMM_WORLD, &user, &user.dm);
1637:   SNESSetDM(snes, user.dm);

1639:   SetupExactSolution(&user);
1641:   SetupSection(user.dm, &user);

1643:   DMCreateGlobalVector(user.dm, &u);
1644:   VecDuplicate(u, &r);

1646:   DMCreateMatrix(user.dm, MATAIJ, &J);
1647:   if (user.jacobianMF) {
1648:     PetscInt M, m, N, n;

1650:     MatGetSize(J, &M, &N);
1651:     MatGetLocalSize(J, &m, &n);
1652:     MatCreate(PETSC_COMM_WORLD, &A);
1653:     MatSetSizes(A, m, n, M, N);
1654:     MatSetType(A, MATSHELL);
1655:     MatSetUp(A);
1656:     MatShellSetOperation(A, MATOP_MULT, (void (*)(void)) FormJacobianAction);
1657:     userJ.dm   = user.dm;
1658:     userJ.J    = J;
1659:     userJ.user = &user;
1660:     DMCreateLocalVector(user.dm, &userJ.u);
1661:     MatShellSetContext(A, &userJ);
1662:   } else {
1663:     A = J;
1664:   }
1665:   SNESSetJacobian(snes, A, J, SNESDMComputeJacobian, &user);
1666:   CreatePressureNullSpace(user.dm, &user, &nullSpace);
1667:   MatSetNullSpace(J, nullSpace);
1668:   if (A != J) {
1669:     MatSetNullSpace(A, nullSpace);
1670:   }

1672:   DMSetLocalFunction(user.dm, (DMLocalFunction1) FormFunctionLocal);
1673:   DMSetLocalJacobian(user.dm, (DMLocalJacobian1) FormJacobianLocal);
1674:   SNESSetFunction(snes, r, SNESDMComputeFunction, &user);
1675:   SNESSetFromOptions(snes);

1677:   {
1678:     KSP ksp; PC pc; Vec crd_vec;
1679:     const PetscScalar *v;
1680:     PetscInt i,k,j,mlocal;
1681:     PetscReal *coords;

1683:     SNESGetKSP( snes, &ksp );
1684:     KSPGetPC( ksp, &pc );
1685:     DMComplexGetCoordinateVec( user.dm, &crd_vec );
1686:     VecGetLocalSize(crd_vec,&mlocal);
1687:     PetscMalloc(SPATIAL_DIM_0*mlocal*sizeof *coords,&coords);
1689:     for(k=j=0; j<mlocal; j++)
1690:       for(i=0; i<SPATIAL_DIM_0; i++,k++)
1691:         coords[k] = PetscRealPart(v[k]);
1693:     PCSetCoordinates( pc, SPATIAL_DIM_0, mlocal, coords );
1694:     PetscFree( coords );
1695:   }

1697:   DMComputeVertexFunction(user.dm, INSERT_ALL_VALUES, u, numComponents, user.exactFuncs, &user);
1698:   if (user.runType == RUN_FULL) {
1699:     PetscScalar (*initialGuess[numComponents])(const PetscReal x[]);
1700:     PetscInt c;

1702:     for(c = 0; c < numComponents; ++c) {initialGuess[c] = zero;}
1703:     DMComputeVertexFunction(user.dm, INSERT_VALUES, u, numComponents, initialGuess, &user);
1704:     if (user.debug) {
1705:       PetscPrintf(PETSC_COMM_WORLD, "Initial guess\n");
1706:       VecView(u, PETSC_VIEWER_STDOUT_WORLD);
1707:     }
1708:     SNESSolve(snes, PETSC_NULL, u);
1709:     SNESGetIterationNumber(snes, &its);
1710:     PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %D\n", its);
1711:     ComputeError(u, &error, &user);
1712:     PetscPrintf(PETSC_COMM_WORLD, "L_2 Error: %.3g\n", error);
1713:     if (user.showSolution) {
1714:       PetscPrintf(PETSC_COMM_WORLD, "Solution\n");
1715:       VecChop(u, 3.0e-9);
1716:       VecView(u, PETSC_VIEWER_STDOUT_WORLD);
1717:     }
1718:   } else {
1719:     PetscReal res = 0.0;

1721:     /* Check discretization error */
1722:     PetscPrintf(PETSC_COMM_WORLD, "Initial guess\n");
1723:     VecView(u, PETSC_VIEWER_STDOUT_WORLD);
1724:     ComputeError(u, &error, &user);
1725:     PetscPrintf(PETSC_COMM_WORLD, "L_2 Error: %g\n", error);
1726:     /* Check residual */
1727:     SNESDMComputeFunction(snes, u, r, &user);
1728:     PetscPrintf(PETSC_COMM_WORLD, "Initial Residual\n");
1729:     VecChop(r, 1.0e-10);
1730:     VecView(r, PETSC_VIEWER_STDOUT_WORLD);
1731:     VecNorm(r, NORM_2, &res);
1732:     PetscPrintf(PETSC_COMM_WORLD, "L_2 Residual: %g\n", res);
1733:     /* Check Jacobian */
1734:     {
1735:       Vec          b;
1736:       MatStructure flag;
1737:       PetscBool    isNull;

1739:       SNESDMComputeJacobian(snes, u, &A, &A, &flag, &user);
1740:       MatNullSpaceTest(nullSpace, J, &isNull);
1741:       if (!isNull) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "The null space calculated for the system operator is invalid.");
1742:       VecDuplicate(u, &b);
1743:       VecSet(r, 0.0);
1744:       SNESDMComputeFunction(snes, r, b, &user);
1745:       MatMult(A, u, r);
1746:       VecAXPY(r, 1.0, b);
1747:       VecDestroy(&b);
1748:       PetscPrintf(PETSC_COMM_WORLD, "Au - b = Au + F(0)\n");
1749:       VecChop(r, 1.0e-10);
1750:       VecView(r, PETSC_VIEWER_STDOUT_WORLD);
1751:       VecNorm(r, NORM_2, &res);
1752:       PetscPrintf(PETSC_COMM_WORLD, "Linear L_2 Residual: %g\n", res);
1753:     }
1754:   }

1756:   if (user.runType == RUN_FULL) {
1757:     PetscViewer viewer;

1759:     PetscViewerCreate(PETSC_COMM_WORLD, &viewer);
1760:     /*PetscViewerSetType(viewer, PETSCVIEWERDRAW);
1761:       PetscViewerDrawSetInfo(viewer, PETSC_NULL, "Solution", PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE); */
1762:     PetscViewerSetType(viewer, PETSCVIEWERASCII);
1763:     PetscViewerFileSetName(viewer, "ex62_sol.vtk");
1764:     PetscViewerSetFormat(viewer, PETSC_VIEWER_ASCII_VTK);
1765:     DMView(user.dm, viewer);
1766:     VecView(u, viewer);
1767:     PetscViewerDestroy(&viewer);
1768:   }

1770:   MatNullSpaceDestroy(&nullSpace);
1771:   if (user.jacobianMF) {
1772:     VecDestroy(&userJ.u);
1773:   }
1774:   if (A != J) {
1775:     MatDestroy(&A);
1776:   }
1777:   MatDestroy(&J);
1778:   VecDestroy(&u);
1779:   VecDestroy(&r);
1780:   SNESDestroy(&snes);
1781:   DMDestroy(&user.dm);
1782:   PetscFinalize();
1783:   return 0;
1784: }