Actual source code: ex52.c

petsc-3.4.4 2014-03-13
  1: static const char help[] = "Testbed for FEM operations on the GPU.\n\n";

  3: /*
  4: #if 0
  5:  - Automate testing of different:
  6:    - elements, batches, blocks, operators
  7:  - Process log_summary with Python (use existing tools)
  8:  - Put results in PyTables

 10: PYLITH:
 11:  - Redo setup/cleanup in PyLith
 12:  - Test with examples/3d/hex8/step01.cfg (Brad is making a flag)
 13:  - Code up Jacobian (looks like original code)
 14:  - Consider taking in coordinates rather than J^{-T}/|J|

 16: MUST CHECK WITH:
 17:  - 3D elements
 18:  - Different quadrature (1pt and 3pt)
 19:  - Different basis (linear and quadratic)
 20: #endif
 21: */

 23: #include<petscdmplex.h>
 24: #include<petscsnes.h>

 26: typedef enum {LAPLACIAN, ELASTICITY} OpType;

 28: typedef struct {
 29:   DM            dm;                /* REQUIRED in order to use SNES evaluation functions */
 30:   PetscInt      debug;             /* The debugging level */
 31:   PetscMPIInt   rank;              /* The process rank */
 32:   PetscMPIInt   numProcs;          /* The number of processes */
 33:   PetscInt      dim;               /* The topological mesh dimension */
 34:   PetscBool     interpolate;       /* Generate intermediate mesh elements */
 35:   PetscReal     refinementLimit;   /* The largest allowable cell volume */
 36:   char          partitioner[2048]; /* The graph partitioner */
 37:   PetscBool     computeFunction;   /* The flag for computing a residual */
 38:   PetscBool     computeJacobian;   /* The flag for computing a Jacobian */
 39:   PetscBool     batch;             /* The flag for batch assembly */
 40:   PetscBool     gpu;               /* The flag for GPU integration */
 41:   OpType        op;                /* The type of PDE operator (should use FFC/Ignition here) */
 42:   PetscBool     showResidual, showJacobian;
 43:   PetscLogEvent createMeshEvent, residualEvent, residualBatchEvent, jacobianEvent, jacobianBatchEvent, integrateBatchCPUEvent, integrateBatchGPUEvent, integrateGPUOnlyEvent;
 44:   /* GPU partitioning */
 45:   PetscInt      numBatches;        /* The number of cell batches per kernel */
 46:   PetscInt      numBlocks;         /* The number of concurrent blocks per kernel */
 47:   /* Element quadrature */
 48:   PetscQuadrature q;
 49: } AppCtx;

 51: /*------------------------------------------------------------------------------
 52:   This code can be generated using 'bin/pythonscripts/PetscGenerateFEMQuadrature.py dim 1 numComp 1 laplacian src/snes/examples/tutorials/ex52.h'
 53:  -----------------------------------------------------------------------------*/
 54: #include "ex52.h"

 56: void quadratic_2d(const PetscReal x[], PetscScalar u[])
 57: {
 58:   u[0] = x[0]*x[0] + x[1]*x[1];
 59: };

 61: void quadratic_2d_elas(const PetscReal x[], PetscScalar u[])
 62: {
 63:   u[0] = x[0]*x[0] + x[1]*x[1];
 64:   u[1] = x[0]*x[0] + x[1]*x[1];
 65: };

 69: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
 70: {
 71:   const char     *opTypes[2] = {"laplacian", "elasticity"};
 72:   PetscInt       op;

 76:   options->debug           = 0;
 77:   options->dim             = 2;
 78:   options->interpolate     = PETSC_FALSE;
 79:   options->refinementLimit = 0.0;
 80:   options->computeFunction = PETSC_FALSE;
 81:   options->computeJacobian = PETSC_FALSE;
 82:   options->batch           = PETSC_FALSE;
 83:   options->gpu             = PETSC_FALSE;
 84:   options->numBatches      = 1;
 85:   options->numBlocks       = 1;
 86:   options->op              = LAPLACIAN;
 87:   options->showResidual    = PETSC_TRUE;
 88:   options->showJacobian    = PETSC_TRUE;

 90:   MPI_Comm_size(comm, &options->numProcs);
 91:   MPI_Comm_rank(comm, &options->rank);
 92:   PetscOptionsBegin(comm, "", "Bratu Problem Options", "DMPLEX");
 93:   PetscOptionsInt("-debug", "The debugging level", "ex52.c", options->debug, &options->debug, NULL);
 94:   PetscOptionsInt("-dim", "The topological mesh dimension", "ex52.c", options->dim, &options->dim, NULL);
 95:   PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex52.c", options->interpolate, &options->interpolate, NULL);
 96:   PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex52.c", options->refinementLimit, &options->refinementLimit, NULL);
 97:   PetscStrcpy(options->partitioner, "chaco");
 98:   PetscOptionsString("-partitioner", "The graph partitioner", "ex52.c", options->partitioner, options->partitioner, 2048, NULL);
 99:   PetscOptionsBool("-compute_function", "Compute the residual", "ex52.c", options->computeFunction, &options->computeFunction, NULL);
100:   PetscOptionsBool("-compute_jacobian", "Compute the Jacobian", "ex52.c", options->computeJacobian, &options->computeJacobian, NULL);
101:   PetscOptionsBool("-batch", "Use the batch assembly method", "ex52.c", options->batch, &options->batch, NULL);
102:   PetscOptionsBool("-gpu", "Use the GPU for integration method", "ex52.c", options->gpu, &options->gpu, NULL);
103:   PetscOptionsInt("-gpu_batches", "The number of cell batches per kernel", "ex52.c", options->numBatches, &options->numBatches, NULL);
104:   PetscOptionsInt("-gpu_blocks", "The number of concurrent blocks per kernel", "ex52.c", options->numBlocks, &options->numBlocks, NULL);

106:   op = options->op;

108:   PetscOptionsEList("-op_type","Type of PDE operator","ex52.c",opTypes,2,opTypes[options->op],&op,NULL);

110:   options->op = (OpType) op;

112:   PetscOptionsBool("-show_residual", "Output the residual for verification", "ex52.c", options->showResidual, &options->showResidual, NULL);
113:   PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex52.c", options->showJacobian, &options->showJacobian, NULL);
114:   PetscOptionsEnd();

116:   PetscLogEventRegister("CreateMesh",    DM_CLASSID,   &options->createMeshEvent);
117:   PetscLogEventRegister("Residual",      SNES_CLASSID, &options->residualEvent);
118:   PetscLogEventRegister("ResidualBatch", SNES_CLASSID, &options->residualBatchEvent);
119:   PetscLogEventRegister("Jacobian",      SNES_CLASSID, &options->jacobianEvent);
120:   PetscLogEventRegister("JacobianBatch", SNES_CLASSID, &options->jacobianBatchEvent);
121:   PetscLogEventRegister("IntegBatchCPU", SNES_CLASSID, &options->integrateBatchCPUEvent);
122:   PetscLogEventRegister("IntegBatchGPU", SNES_CLASSID, &options->integrateBatchGPUEvent);
123:   PetscLogEventRegister("IntegGPUOnly",  SNES_CLASSID, &options->integrateGPUOnlyEvent);
124:   return(0);
125: };

129: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
130: {
131:   PetscInt       dim             = user->dim;
132:   PetscBool      interpolate     = user->interpolate;
133:   PetscReal      refinementLimit = user->refinementLimit;
134:   const char     *partitioner    = user->partitioner;

138:   PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
139:   DMPlexCreateBoxMesh(comm, dim, interpolate, dm);
140:   {
141:     DM refinedMesh     = NULL;
142:     DM distributedMesh = NULL;

144:     /* Refine mesh using a volume constraint */
145:     DMPlexSetRefinementLimit(*dm, refinementLimit);
146:     DMRefine(*dm, comm, &refinedMesh);
147:     if (refinedMesh) {
148:       DMDestroy(dm);
149:       *dm  = refinedMesh;
150:     }
151:     /* Distribute mesh over processes */
152:     DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);
153:     if (distributedMesh) {
154:       DMDestroy(dm);
155:       *dm  = distributedMesh;
156:     }
157:   }
158:   DMSetFromOptions(*dm);
159:   PetscLogEventEnd(user->createMeshEvent,0,0,0,0);

161:   user->dm = *dm;
162:   return(0);
163: }

167: PetscErrorCode SetupQuadrature(AppCtx *user)
168: {
170:   switch (user->dim) {
171:   case 2:
172:   case 3:
173:     user->q.numQuadPoints = NUM_QUADRATURE_POINTS_0;
174:     user->q.quadPoints    = points_0;
175:     user->q.quadWeights   = weights_0;
176:     user->q.numBasisFuncs = NUM_BASIS_FUNCTIONS_0;
177:     user->q.numComponents = NUM_BASIS_COMPONENTS_0;
178:     user->q.basis         = Basis_0;
179:     user->q.basisDer      = BasisDerivatives_0;
180:     break;
181:   default:
182:     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid dimension %d", user->dim);
183:   }
184:   return(0);
185: }

189: PetscErrorCode SetupSection(DM dm, AppCtx *user)
190: {
191:   PetscSection   section;
192:   /* These can be generated using config/PETSc/FEM.py */
193:   PetscInt       dim        = user->dim;
194:   PetscInt       numBC      = 0;
195:   PetscInt       numComp[1] = {NUM_BASIS_COMPONENTS_0};

199:   if (dim != SPATIAL_DIM_0) SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_0);
200:   DMPlexCreateSection(dm, dim, 1, numComp, numDof_0, numBC, NULL, NULL, &section);
201:   DMSetDefaultSection(dm, section);
202:   return(0);
203: }

207: /*
208:   FormInitialGuess - Forms initial approximation.

210:   Input Parameters:
211: + user - user-defined application context
212: - guessFunc - The coordinate function to use for the guess

214:   Output Parameter:
215: . X - vector
216: */
217: PetscErrorCode FormInitialGuess(Vec X, void (*guessFunc)(const PetscReal [], PetscScalar []), InsertMode mode, AppCtx *user)
218: {
219:   Vec            localX, coordinates;
220:   PetscSection   section, cSection;
221:   PetscInt       vStart, vEnd, v;

225:   DMGetLocalVector(user->dm, &localX);
226:   DMPlexGetDepthStratum(user->dm, 0, &vStart, &vEnd);
227:   DMGetDefaultSection(user->dm, &section);
228:   DMPlexGetCoordinateSection(user->dm, &cSection);
229:   DMGetCoordinatesLocal(user->dm, &coordinates);
230:   for (v = vStart; v < vEnd; ++v) {
231:     PetscScalar values[3];
232:     PetscScalar *coords;

234:     VecGetValuesSection(coordinates, cSection, v, &coords);
235:     (*guessFunc)(coords, values);
236:     VecSetValuesSection(localX, section, v, values, mode);
237:   }

239:   DMLocalToGlobalBegin(user->dm, localX, INSERT_VALUES, X);
240:   DMLocalToGlobalEnd(user->dm, localX, INSERT_VALUES, X);
241:   DMRestoreLocalVector(user->dm, &localX);
242:   return(0);
243: }

247: /*
248:  dm - The mesh
249:  X  - Local intput vector
250:  F  - Local output vector
251:  */
252: PetscErrorCode FormFunctionLocalLaplacian(DM dm, Vec X, Vec F, AppCtx *user)
253: {
254:   /*PetscScalar    (*rhsFunc)(const PetscReal []) = user->rhsFunc; */
255:   const PetscInt  debug         = user->debug;
256:   const PetscInt  dim           = user->dim;
257:   const PetscInt  numQuadPoints = user->q.numQuadPoints;
258:   const PetscReal *quadPoints   = user->q.quadPoints;
259:   const PetscReal *quadWeights  = user->q.quadWeights;
260:   const PetscInt  numBasisFuncs = user->q.numBasisFuncs;
261:   const PetscReal *basis        = user->q.basis;
262:   const PetscReal *basisDer     = user->q.basisDer;
263:   PetscReal       *coords, *v0, *J, *invJ, detJ;
264:   PetscScalar     *realSpaceDer, *fieldGrad, *elemVec;
265:   PetscInt        cStart, cEnd, c, p;
266:   PetscErrorCode  ierr;

269:   PetscLogEventBegin(user->residualEvent,0,0,0,0);
270:   VecSet(F, 0.0);
271:   PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec);
272:   PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
273:   DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
274:   for (c = cStart; c < cEnd; ++c) {
275:     PetscScalar *x;
276:     PetscInt     q, f, d, e;

278:     PetscMemzero(elemVec, numBasisFuncs * sizeof(PetscScalar));
279:     DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
280:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
281:     DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
282:     if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs, x);}

284:     for (q = 0; q < numQuadPoints; ++q) {
285:       PetscScalar fieldVal = 0.0;

287:       if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
288:       for (d = 0; d < dim; ++d) {
289:         fieldGrad[d] = 0.0;
290:         coords[d]    = v0[d];
291:         for (e = 0; e < dim; ++e) coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
292:         if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "    coords[%d] %g\n", d, coords[d]);}
293:       }
294:       for (f = 0; f < numBasisFuncs; ++f) {
295:         fieldVal += x[f]*basis[q*numBasisFuncs+f];

297:         for (d = 0; d < dim; ++d) {
298:           realSpaceDer[d] = 0.0;
299:           for (e = 0; e < dim; ++e) realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
300:           fieldGrad[d] += realSpaceDer[d]*x[f];
301:         }
302:       }
303:       if (debug > 1) {
304:         for (d = 0; d < dim; ++d) PetscPrintf(PETSC_COMM_SELF, "    fieldGrad[%d] %g\n", d, fieldGrad[d]);
305:       }
306:       const PetscScalar funcVal = 0.0; /* (*rhsFunc)(coords); */
307:       for (f = 0; f < numBasisFuncs; ++f) {
308:         /* Constant term: -f(x) */
309:         elemVec[f] -= basis[q*numBasisFuncs+f]*funcVal*quadWeights[q]*detJ;
310:         /* Linear term: -\Delta u */
311:         PetscScalar product = 0.0;
312:         for (d = 0; d < dim; ++d) {
313:           realSpaceDer[d] = 0.0;
314:           for (e = 0; e < dim; ++e) realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
315:           product += realSpaceDer[d]*fieldGrad[d];
316:         }
317:         elemVec[f] += product*quadWeights[q]*detJ;
318:         /* Nonlinear term: -\lambda e^{u} */
319:         elemVec[f] -= basis[q*numBasisFuncs+f]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
320:       }
321:     }
322:     DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
323:     if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs, elemVec);}
324:     DMPlexVecSetClosure(dm, NULL, F, c, elemVec, ADD_VALUES);
325:   }
326:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
327:   PetscFree3(realSpaceDer,fieldGrad,elemVec);
328:   PetscFree4(coords,v0,J,invJ);

330:   PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
331:   for (p = 0; p < user->numProcs; ++p) {
332:     if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
333:     PetscBarrier((PetscObject) dm);
334:   }
335:   PetscLogEventEnd(user->residualEvent,0,0,0,0);
336:   return(0);
337: }

341: /*
342:  dm - The mesh
343:  X  - Local intput vector
344:  F  - Local output vector
345:  */
346: PetscErrorCode FormFunctionLocalElasticity(DM dm, Vec X, Vec F, AppCtx *user)
347: {
348:   /*PetscScalar    (*rhsFunc)(const PetscReal []) = user->rhsFunc;*/
349:   const PetscInt  debug         = user->debug;
350:   const PetscInt  dim           = user->dim;
351:   const PetscInt  numQuadPoints = user->q.numQuadPoints;
352:   const PetscReal *quadPoints   = user->q.quadPoints;
353:   const PetscReal *quadWeights  = user->q.quadWeights;
354:   const PetscInt  numBasisFuncs = user->q.numBasisFuncs;
355:   const PetscInt  numBasisComps = user->q.numComponents;
356:   const PetscReal *basis        = user->q.basis;
357:   const PetscReal *basisDer     = user->q.basisDer;
358:   PetscReal       *coords, *v0, *J, *invJ, detJ;
359:   PetscScalar     *realSpaceDer, *fieldGrad, *elemVec;
360:   PetscInt        cStart, cEnd, c, p;
361:   PetscErrorCode  ierr;

364:   PetscLogEventBegin(user->residualEvent,0,0,0,0);
365:   VecSet(F, 0.0);
366:   PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim*numBasisComps,PetscScalar,&fieldGrad,numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
367:   PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
368:   DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
369:   for (c = cStart; c < cEnd; ++c) {
370:     PetscScalar *x;
371:     PetscInt     q, d, e, f, comp;

373:     PetscMemzero(elemVec, numBasisFuncs*numBasisComps * sizeof(PetscScalar));
374:     DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
375:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
376:     DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
377:     if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs*numBasisComps, x);}

379:     for (q = 0; q < numQuadPoints; ++q) {
380:       PetscScalar fieldVal[3] = {0.0,0.0,0.0};

382:       if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
383:       for (d = 0; d < dim; ++d) {
384:         for (comp = 0; comp < numBasisComps; ++comp) fieldGrad[comp*dim+d] = 0.0;
385:         coords[d] = v0[d];
386:         for (e = 0; e < dim; ++e) coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
387:         if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "    coords[%d] %g\n", d, coords[d]);}
388:       }
389:       for (f = 0; f < numBasisFuncs; ++f) {
390:         for (comp = 0; comp < numBasisComps; ++comp) {
391:           const PetscInt cidx = f*numBasisComps+comp;

393:           fieldVal[comp] += x[cidx]*basis[q*numBasisFuncs*numBasisComps+cidx];

395:           for (d = 0; d < dim; ++d) {
396:             realSpaceDer[d] = 0.0;
397:             for (e = 0; e < dim; ++e) realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
398:             fieldGrad[comp*dim+d] += realSpaceDer[d]*x[cidx];
399:           }
400:         }
401:       }
402:       if (debug > 1) {
403:         for (comp = 0; comp < numBasisComps; ++comp) {
404:           for (d = 0; d < dim; ++d) {
405:             PetscPrintf(PETSC_COMM_SELF, "    field %d gradient[%d] %g\n", comp, d, fieldGrad[comp*dim+d]);
406:           }
407:         }
408:       }
409:       const PetscScalar funcVal[3] = {0.0, 0.0, 0.0}; /* (*rhsFunc)(coords); */
410:       for (f = 0; f < numBasisFuncs; ++f) {
411:         for (comp = 0; comp < numBasisComps; ++comp) {
412:           const PetscInt cidx = f*numBasisComps+comp;

414:           /* Constant term: -f(x) */
415:           elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*funcVal[comp]*quadWeights[q]*detJ;
416:           /* Linear term: -\Delta u */
417:           PetscScalar product = 0.0;
418:           for (d = 0; d < dim; ++d) {
419:             realSpaceDer[d] = 0.0;
420:             for (e = 0; e < dim; ++e) {
421:               realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
422:             }
423:             product += realSpaceDer[d]*0.5*(fieldGrad[comp*dim+d] + fieldGrad[d*dim+comp]);
424:           }
425:           elemVec[cidx] += product*quadWeights[q]*detJ;
426:           /* Nonlinear term: -\lambda e^{u} */
427:           elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*0.0*PetscExpScalar(fieldVal[comp])*quadWeights[q]*detJ;
428:         }
429:       }
430:     }
431:     DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
432:     if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, elemVec);}
433:     DMPlexVecSetClosure(dm, NULL, F, c, elemVec, ADD_VALUES);
434:   }
435:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
436:   PetscFree3(realSpaceDer,fieldGrad,elemVec);
437:   PetscFree4(coords,v0,J,invJ);

439:   PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
440:   for (p = 0; p < user->numProcs; ++p) {
441:     if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
442:     PetscBarrier((PetscObject) dm);
443:   }
444:   PetscLogEventEnd(user->residualEvent,0,0,0,0);
445:   return(0);
446: }

448: PETSC_EXTERN PetscErrorCode IntegrateElementBatchGPU(PetscInt Ne, PetscInt Nbatch, PetscInt Nbc, PetscInt Nbl, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscScalar elemVec[], PetscLogEvent event, PetscInt debug);

450: void f1_laplacian(PetscScalar u, const PetscScalar gradU[], PetscScalar f1[])
451: {
452:   const PetscInt dim = SPATIAL_DIM_0;
453:   PetscInt       d;

455:   for (d = 0; d < dim; ++d) f1[d] = gradU[d];
456:   return;
457: }

461: PetscErrorCode IntegrateLaplacianBatchCPU(PetscInt Ne, PetscInt Nb, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user)
462: {
463:   const PetscInt debug = user->debug;
464:   const PetscInt dim   = SPATIAL_DIM_0;
465:   PetscInt       e;

469:   PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
470:   for (e = 0; e < Ne; ++e) {
471:     const PetscReal detJ  = jacobianDeterminants[e];
472:     const PetscReal *invJ = &jacobianInverses[e*dim*dim];
473:     PetscScalar     f1[SPATIAL_DIM_0 /*1*/]; /* Nq = 1 */
474:     PetscInt        q, b, d, f;

476:     if (debug > 1) {
477:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
478:       PetscPrintf(PETSC_COMM_SELF, "  invJ: %g %g %g %g\n", invJ[0], invJ[1], invJ[2], invJ[3]);
479:     }
480:     for (q = 0; q < Nq; ++q) {
481:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
482:       PetscScalar u = 0.0;
483:       PetscScalar gradU[SPATIAL_DIM_0];

485:       for (d = 0; d < dim; ++d) gradU[d] = 0.0;
486:       for (b = 0; b < Nb; ++b) {
487:         PetscScalar realSpaceDer[SPATIAL_DIM_0];

489:         u += coefficients[e*Nb+b]*basisTabulation[q*Nb+b];
490:         for (d = 0; d < dim; ++d) {
491:           realSpaceDer[d] = 0.0;
492:           for (f = 0; f < dim; ++f) {
493:             realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
494:           }
495:           gradU[d] += coefficients[e*Nb+b]*realSpaceDer[d];
496:         }
497:       }
498:       f1_laplacian(u, gradU, &f1[q*dim]);
499:       f1[q*dim+0] *= detJ*quadWeights[q];
500:       f1[q*dim+1] *= detJ*quadWeights[q];
501:       if (debug > 1) {
502:         PetscPrintf(PETSC_COMM_SELF, "    f1[%d]: %g\n", 0, f1[q*dim+0]);
503:         PetscPrintf(PETSC_COMM_SELF, "    f1[%d]: %g\n", 1, f1[q*dim+1]);
504:       }
505:     }
506:     for (b = 0; b < Nb; ++b) {
507:       elemVec[e*Nb+b] = 0.0;
508:       for (q = 0; q < Nq; ++q) {
509:         PetscScalar realSpaceDer[SPATIAL_DIM_0];

511:         for (d = 0; d < dim; ++d) {
512:           realSpaceDer[d] = 0.0;
513:           for (f = 0; f < dim; ++f) {
514:             realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
515:           }
516:           elemVec[e*Nb+b] += realSpaceDer[d]*f1[q*dim+d];
517:         }
518:       }
519:     }
520:   }
521:   PetscLogFlops((((2+(2+2*dim)*dim)*Nb+2*dim)*Nq + (2+2*dim)*dim*Nq*Nb)*Ne);
522:   PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
523:   return(0);
524: };

526: void f1_elasticity(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[])
527: {
528:   const PetscInt dim   = SPATIAL_DIM_0;
529:   const PetscInt Ncomp = dim;
530:   PetscInt       comp, d;

532:   for (comp = 0; comp < Ncomp; ++comp) {
533:     for (d = 0; d < dim; ++d) {
534:       f1[comp*dim+d] = 0.5*(gradU[comp*dim+d] + gradU[d*dim+comp]);
535:     }
536:   }
537:   return;
538: }

542: PetscErrorCode IntegrateElasticityBatchCPU(PetscInt Ne, PetscInt Nb, PetscInt Ncomp, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user)
543: {
544:   const PetscInt debug = user->debug;
545:   const PetscInt dim   = SPATIAL_DIM_0;
546:   PetscInt       e;

550:   PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
551:   for (e = 0; e < Ne; ++e) {
552:     const PetscReal detJ  = jacobianDeterminants[e];
553:     const PetscReal *invJ = &jacobianInverses[e*dim*dim];
554:     PetscScalar     f1[SPATIAL_DIM_0*SPATIAL_DIM_0 /*1*/]; /* Nq = 1 */
555:     PetscInt        q, b, comp, i;

557:     if (debug > 1) {
558:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
559:       PetscPrintf(PETSC_COMM_SELF, "  invJ:");
560:       for (i = 0; i < dim*dim; ++i) {
561:         PetscPrintf(PETSC_COMM_SELF, " %g", invJ[i]);
562:       }
563:       PetscPrintf(PETSC_COMM_SELF, "\n");
564:     }
565:     for (q = 0; q < Nq; ++q) {
566:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
567:       PetscScalar u[SPATIAL_DIM_0];
568:       PetscScalar gradU[SPATIAL_DIM_0*SPATIAL_DIM_0];

570:       for (i = 0; i < dim; ++i) u[i] = 0.0;
571:       for (i = 0; i < dim*dim; ++i) gradU[i] = 0.0;
572:       for (b = 0; b < Nb; ++b) {
573:         for (comp = 0; comp < Ncomp; ++comp) {
574:           const PetscInt cidx = b*Ncomp+comp;
575:           PetscScalar    realSpaceDer[SPATIAL_DIM_0];
576:           PetscInt       d, f;

578:           u[comp] += coefficients[e*Nb*Ncomp+cidx]*basisTabulation[q*Nb*Ncomp+cidx];
579:           for (d = 0; d < dim; ++d) {
580:             realSpaceDer[d] = 0.0;
581:             for (f = 0; f < dim; ++f) {
582:               realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
583:             }
584:             gradU[comp*dim+d] += coefficients[e*Nb*Ncomp+cidx]*realSpaceDer[d];
585:           }
586:         }
587:       }
588:       f1_elasticity(u, gradU, &f1[q*Ncomp*dim]);
589:       for (i = 0; i < Ncomp*dim; ++i) {
590:         f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
591:       }
592:       if (debug > 1) {
593:         PetscInt c, d;
594:         for (c = 0; c < Ncomp; ++c) {
595:           for (d = 0; d < dim; ++d) {
596:             PetscPrintf(PETSC_COMM_SELF, "    gradU[%d]_%c: %g f1[%d]_%c: %g\n", c, 'x'+d, gradU[c*dim+d], c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);
597:           }
598:         }
599:       }
600:     }
601:     for (b = 0; b < Nb; ++b) {
602:       for (comp = 0; comp < Ncomp; ++comp) {
603:         const PetscInt cidx = b*Ncomp+comp;
604:         PetscInt       q, d, f;

606:         elemVec[e*Nb*Ncomp+cidx] = 0.0;
607:         for (q = 0; q < Nq; ++q) {
608:           PetscScalar realSpaceDer[SPATIAL_DIM_0];

610:           for (d = 0; d < dim; ++d) {
611:             realSpaceDer[d] = 0.0;
612:             for (f = 0; f < dim; ++f) {
613:               realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
614:             }
615:             elemVec[e*Nb*Ncomp+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
616:           }
617:         }
618:       }
619:     }
620:   }
621:   PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne);
622:   PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
623:   return(0);
624: };

628: /*
629:  dm - The mesh
630:  X  - Local intput vector
631:  F  - Local output vector
632:  */
633: PetscErrorCode FormFunctionLocalBatch(DM dm, Vec X, Vec F, AppCtx *user)
634: {
635:   /* PetscScalar    (*rhsFunc)(const PetscReal []) = user->rhsFunc; */
636:   const PetscInt  debug         = user->debug;
637:   const PetscInt  dim           = user->dim;
638:   const PetscInt  numQuadPoints = user->q.numQuadPoints;
639:   const PetscReal *quadPoints   = user->q.quadPoints;
640:   const PetscReal *quadWeights  = user->q.quadWeights;
641:   const PetscInt  numBasisFuncs = user->q.numBasisFuncs;
642:   const PetscInt  numBasisComps = user->q.numComponents;
643:   const PetscReal *basis        = user->q.basis;
644:   const PetscReal *basisDer     = user->q.basisDer;
645:   PetscReal       *coords, *v0, *J, *invJ, *detJ;
646:   PetscScalar     *elemVec;
647:   PetscInt        cStart, cEnd, c;
648:   PetscErrorCode  ierr;

651:   PetscLogEventBegin(user->residualBatchEvent,0,0,0,0);
652:   VecSet(F, 0.0);
653:   /* PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec); */
654:   PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
655:   DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
656:   const PetscInt numCells = cEnd - cStart;
657:   PetscScalar    *u;

659:   PetscMalloc4(numCells*numBasisFuncs*numBasisComps,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
660:   for (c = cStart; c < cEnd; ++c) {
661:     PetscScalar *x;
662:     PetscInt     f;

664:     DMPlexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
665:     DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);

667:     for (f = 0; f < numBasisFuncs*numBasisComps; ++f) {
668:       u[c*numBasisFuncs*numBasisComps+f] = x[f];
669:     }
670:     DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
671:   }
672:   /* Conforming batches */
673:   PetscInt blockSize  = numBasisFuncs*numQuadPoints;
674:   PetscInt numBlocks  = user->numBlocks;
675:   PetscInt batchSize  = numBlocks * blockSize;
676:   PetscInt numBatches = user->numBatches;
677:   PetscInt numChunks  = numCells / (numBatches*batchSize);
678:   if (user->gpu) {
679:     PetscLogEventBegin(user->integrateBatchGPUEvent,0,0,0,0);
680:     IntegrateElementBatchGPU(numChunks*numBatches*batchSize, numBatches, batchSize, numBlocks, u, invJ, detJ, elemVec, user->integrateGPUOnlyEvent, user->debug);
681:     PetscLogFlops((((2+(2+2*dim)*dim)*numBasisComps*numBasisFuncs+(2+2)*dim*numBasisComps)*numQuadPoints + (2+2*dim)*dim*numQuadPoints*numBasisComps*numBasisFuncs)*numChunks*numBatches*batchSize);
682:     PetscLogEventEnd(user->integrateBatchGPUEvent,0,0,0,0);
683:   } else {
684:     switch (user->op) {
685:     case LAPLACIAN:
686:       IntegrateLaplacianBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
687:     case ELASTICITY:
688:       IntegrateElasticityBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, numBasisComps, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
689:     default:
690:       SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
691:     }
692:   }
693:   /* Remainder */
694:   PetscInt numRemainder = numCells % (numBatches * batchSize);
695:   PetscInt offset       = numCells - numRemainder;
696:   switch (user->op) {
697:   case LAPLACIAN:
698:     IntegrateLaplacianBatchCPU(numRemainder, numBasisFuncs, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
699:                                       numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
700:   case ELASTICITY:
701:     IntegrateElasticityBatchCPU(numRemainder, numBasisFuncs, numBasisComps, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
702:                                        numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
703:   default:
704:     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
705:   }
706:   for (c = cStart; c < cEnd; ++c) {
707:     if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, &elemVec[c*numBasisFuncs*numBasisComps]);}
708:     DMPlexVecSetClosure(dm, NULL, F, c, &elemVec[c*numBasisFuncs*numBasisComps], ADD_VALUES);
709:   }
710:   PetscFree4(u,invJ,detJ,elemVec);
711:   PetscFree3(coords,v0,J);
712:   if (user->showResidual) {
713:     PetscInt p;
714:     PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
715:     for (p = 0; p < user->numProcs; ++p) {
716:       if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
717:       PetscBarrier((PetscObject) dm);
718:     }
719:   }
720:   PetscLogEventEnd(user->residualBatchEvent,0,0,0,0);
721:   return(0);
722: }

726: /*
727:   dm  - The mesh
728:   X   - The local input vector
729:   Jac - The output matrix
730: */
731: PetscErrorCode FormJacobianLocalLaplacian(DM dm, Vec X, Mat Jac, Mat JacPre, MatStructure *flag, AppCtx *user)
732: {
733:   const PetscInt  debug         = user->debug;
734:   const PetscInt  dim           = user->dim;
735:   const PetscInt  numQuadPoints = user->q.numQuadPoints;
736:   const PetscReal *quadWeights  = user->q.quadWeights;
737:   const PetscInt  numBasisFuncs = user->q.numBasisFuncs;
738:   const PetscReal *basis        = user->q.basis;
739:   const PetscReal *basisDer     = user->q.basisDer;
740:   PetscReal       *v0, *J, *invJ, detJ;
741:   PetscScalar     *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
742:   PetscInt        cStart, cEnd, c;
743:   PetscErrorCode  ierr;

746:   PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
747:   MatZeroEntries(Jac);
748:   PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
749:   PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
750:   DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
751:   for (c = cStart; c < cEnd; ++c) {
752:     PetscScalar *x;
753:     PetscInt     q;

755:     PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
756:     DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
757:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
758:     DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);

760:     for (q = 0; q < numQuadPoints; ++q) {
761:       PetscScalar fieldVal = 0.0;
762:       PetscInt    f, g, d, e;

764:       for (f = 0; f < numBasisFuncs; ++f) fieldVal += x[f]*basis[q*numBasisFuncs+f];
765:       for (f = 0; f < numBasisFuncs; ++f) {
766:         for (d = 0; d < dim; ++d) {
767:           realSpaceTestDer[d] = 0.0;
768:           for (e = 0; e < dim; ++e) {
769:             realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
770:           }
771:         }
772:         for (g = 0; g < numBasisFuncs; ++g) {
773:           for (d = 0; d < dim; ++d) {
774:             realSpaceBasisDer[d] = 0.0;
775:             for (e = 0; e < dim; ++e) {
776:               realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
777:             }
778:           }
779:           /* Linear term: -\Delta u */
780:           PetscScalar product = 0.0;
781:           for (d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
782:           elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
783:           /* Nonlinear term: -\lambda e^{u} */
784:           elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
785:         }
786:       }
787:     }
788:     DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
789:     if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
790:     DMPlexMatSetClosure(dm, NULL, NULL, Jac, c, elemMat, ADD_VALUES);
791:   }
792:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
793:   PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
794:   PetscFree3(v0,J,invJ);

796:   /* Assemble matrix, using the 2-step process:
797:        MatAssemblyBegin(), MatAssemblyEnd(). */
798:   MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
799:   MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
800:   /* Tell the matrix we will never add a new nonzero location to the
801:      matrix. If we do, it will generate an error. */
802:   MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
803:   PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
804:   return(0);
805: }

809: /*
810:   dm  - The mesh
811:   X   - The local input vector
812:   Jac - The output matrix
813: */
814: PetscErrorCode FormJacobianLocalElasticity(DM dm, Vec X, Mat Jac, Mat JacPre, MatStructure *flag, AppCtx *user)
815: {
816:   const PetscInt  debug         = user->debug;
817:   const PetscInt  dim           = user->dim;
818:   const PetscInt  numQuadPoints = user->q.numQuadPoints;
819:   const PetscReal *quadWeights  = user->q.quadWeights;
820:   const PetscInt  numBasisFuncs = user->q.numBasisFuncs;
821:   const PetscReal *basis        = user->q.basis;
822:   const PetscReal *basisDer     = user->q.basisDer;
823:   PetscReal       *v0, *J, *invJ, detJ;
824:   PetscScalar     *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
825:   PetscInt        cStart, cEnd, c;
826:   PetscErrorCode  ierr;

829:   PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
830:   MatZeroEntries(Jac);
831:   PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
832:   PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
833:   DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
834:   for (c = cStart; c < cEnd; ++c) {
835:     PetscScalar *x;
836:     PetscInt     q;

838:     PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
839:     DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
840:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
841:     DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);

843:     for (q = 0; q < numQuadPoints; ++q) {
844:       PetscScalar fieldVal = 0.0;
845:       PetscInt    f, g, d, e;

847:       for (f = 0; f < numBasisFuncs; ++f) fieldVal += x[f]*basis[q*numBasisFuncs+f];

849:       for (f = 0; f < numBasisFuncs; ++f) {
850:         for (d = 0; d < dim; ++d) {
851:           realSpaceTestDer[d] = 0.0;
852:           for (e = 0; e < dim; ++e) {
853:             realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
854:           }
855:         }
856:         for (g = 0; g < numBasisFuncs; ++g) {
857:           for (d = 0; d < dim; ++d) {
858:             realSpaceBasisDer[d] = 0.0;
859:             for (e = 0; e < dim; ++e) {
860:               realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
861:             }
862:           }
863:           /* Linear term: -\Delta u */
864:           PetscScalar product = 0.0;
865:           for (d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
866:           elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
867:           /* Nonlinear term: -\lambda e^{u} */
868:           elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
869:         }
870:       }
871:     }
872:     DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
873:     if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
874:     DMPlexMatSetClosure(dm, NULL, NULL, Jac, c, elemMat, ADD_VALUES);
875:   }
876:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
877:   PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
878:   PetscFree3(v0,J,invJ);

880:   /* Assemble matrix, using the 2-step process:
881:        MatAssemblyBegin(), MatAssemblyEnd(). */
882:   MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
883:   MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
884:   /* Tell the matrix we will never add a new nonzero location to the
885:      matrix. If we do, it will generate an error. */
886:   MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
887:   PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
888:   return(0);
889: }

893: /*
894:   dm  - The mesh
895:   X   - The local input vector
896:   Jac - The output matrix
897: */
898: PetscErrorCode FormJacobianLocalBatch(DM dm, Vec X, Mat Jac, Mat JacPre, MatStructure *flag, AppCtx *user)
899: {
900:   const PetscInt  debug         = user->debug;
901:   const PetscInt  dim           = user->dim;
902:   const PetscInt  numQuadPoints = user->q.numQuadPoints;
903:   const PetscReal *quadWeights  = user->q.quadWeights;
904:   const PetscInt  numBasisFuncs = user->q.numBasisFuncs;
905:   const PetscReal *basis        = user->q.basis;
906:   const PetscReal *basisDer     = user->q.basisDer;
907:   PetscReal       *v0, *J, *invJ, detJ;
908:   PetscScalar     *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
909:   PetscInt        cStart, cEnd, c;
910:   PetscErrorCode  ierr;

913:   PetscLogEventBegin(user->jacobianBatchEvent,0,0,0,0);
914:   MatZeroEntries(Jac);
915:   PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
916:   PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
917:   DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
918:   for (c = cStart; c < cEnd; ++c) {
919:     PetscScalar *x;
920:     PetscInt     q;

922:     PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
923:     DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
924:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
925:     DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);

927:     for (q = 0; q < numQuadPoints; ++q) {
928:       PetscScalar fieldVal = 0.0;
929:       PetscInt    f, g, d, e;

931:       for (f = 0; f < numBasisFuncs; ++f) fieldVal += x[f]*basis[q*numBasisFuncs+f];

933:       for (f = 0; f < numBasisFuncs; ++f) {
934:         for (d = 0; d < dim; ++d) {
935:           realSpaceTestDer[d] = 0.0;
936:           for (e = 0; e < dim; ++e) {
937:             realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
938:           }
939:         }
940:         for (g = 0; g < numBasisFuncs; ++g) {
941:           for (d = 0; d < dim; ++d) {
942:             realSpaceBasisDer[d] = 0.0;
943:             for (e = 0; e < dim; ++e) {
944:               realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
945:             }
946:           }
947:           /* Linear term: -\Delta u */
948:           PetscScalar product = 0.0;
949:           for (d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
950:           elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
951:           /* Nonlinear term: -\lambda e^{u} */
952:           elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
953:         }
954:       }
955:     }
956:     DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
957:     if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
958:     DMPlexMatSetClosure(dm, NULL, NULL, Jac, c, elemMat, ADD_VALUES);
959:   }
960:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
961:   PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
962:   PetscFree3(v0,J,invJ);

964:   /* Assemble matrix, using the 2-step process:
965:        MatAssemblyBegin(), MatAssemblyEnd(). */
966:   MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
967:   MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
968:   /* Tell the matrix we will never add a new nonzero location to the
969:      matrix. If we do, it will generate an error. */
970:   MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
971:   PetscLogEventEnd(user->jacobianBatchEvent,0,0,0,0);
972:   return(0);
973: }

977: int main(int argc, char **argv)
978: {
979:   DM             dm;
980:   SNES           snes;
981:   AppCtx         user;

984:   PetscInitialize(&argc, &argv, NULL, help);
985: #if !defined(PETSC_HAVE_CUDA)
986:   SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires CUDA support.");
987: #endif
988:   ProcessOptions(PETSC_COMM_WORLD, &user);
989:   CreateMesh(PETSC_COMM_WORLD, &user, &dm);
990:   SetupQuadrature(&user);
991:   SetupSection(dm, &user);

993:   SNESCreate(PETSC_COMM_WORLD, &snes);
994:   SNESSetDM(snes, dm);
995:   if (user.computeFunction) {
996:     Vec X, F;

998:     DMGetGlobalVector(dm, &X);
999:     DMGetGlobalVector(dm, &F);
1000:     if (user.batch) {
1001:       switch (user.op) {
1002:       case LAPLACIAN:
1003:         FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);break;
1004:       case ELASTICITY:
1005:         FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);break;
1006:       default:
1007:         SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1008:       }
1009:       DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*))FormFunctionLocalBatch, &user);
1010:     } else {
1011:       switch (user.op) {
1012:       case LAPLACIAN:
1013:         FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);
1014:         DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*))FormFunctionLocalLaplacian, &user);break;
1015:       case ELASTICITY:
1016:         FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);
1017:         DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*))FormFunctionLocalElasticity, &user);break;
1018:       default:
1019:         SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1020:       }
1021:     }
1022:     SNESComputeFunction(snes, X, F);
1023:     DMRestoreGlobalVector(dm, &X);
1024:     DMRestoreGlobalVector(dm, &F);
1025:   }
1026:   if (user.computeJacobian) {
1027:     Vec          X;
1028:     Mat          J;
1029:     MatStructure flag;

1031:     DMGetGlobalVector(dm, &X);
1032:     DMCreateMatrix(dm, MATAIJ, &J);
1033:     if (user.batch) {
1034:       DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, MatStructure*, void*))FormJacobianLocalBatch, &user);
1035:     } else {
1036:       switch (user.op) {
1037:       case LAPLACIAN:
1038:         DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, MatStructure*, void*))FormJacobianLocalLaplacian, &user);break;
1039:       case ELASTICITY:
1040:         DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, MatStructure*, void*))FormJacobianLocalElasticity, &user);break;
1041:       default:
1042:         SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1043:       }
1044:     }
1045:     SNESComputeJacobian(snes, X, &J, &J, &flag);
1046:     MatDestroy(&J);
1047:     DMRestoreGlobalVector(dm, &X);
1048:   }
1049:   SNESDestroy(&snes);
1050:   DMDestroy(&dm);
1051:   PetscFinalize();
1052:   return 0;
1053: }