Actual source code: ex14.c

petsc-3.5.4 2015-05-23
Report Typos and Errors
  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: */

 46: #include <petscts.h>
 47: #include <petscdm.h>
 48: #include <petscdmda.h>
 49: #include <petscdmcomposite.h>
 50: #include <ctype.h>              /* toupper() */

 52: #if defined __SSE2__
 53: #  include <emmintrin.h>
 54: #endif

 56: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 57: #define USE_SSE2_KERNELS (!defined NO_SSE2                              \
 58:                           && !defined PETSC_USE_COMPLEX                 \
 59:                           && !defined PETSC_USE_REAL_SINGLE           \
 60:                           && defined __SSE2__)

 62: #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
 63: #  if defined __cplusplus       /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
 64: #    define restrict
 65: #  else
 66: #    define restrict PETSC_RESTRICT
 67: #  endif
 68: #endif

 70: static PetscClassId THI_CLASSID;

 72: typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
 73: static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
 74: static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
 75: static const PetscReal HexQNodes[]    = {-0.57735026918962573, 0.57735026918962573};
 76: #define G 0.57735026918962573
 77: #define H (0.5*(1.+G))
 78: #define L (0.5*(1.-G))
 79: #define M (-0.5)
 80: #define P (0.5)
 81: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
 82: static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
 83:                                                    {0,H,0,0,0,L,0,0},
 84:                                                    {0,0,H,0,0,0,L,0},
 85:                                                    {0,0,0,H,0,0,0,L},
 86:                                                    {L,0,0,0,H,0,0,0},
 87:                                                    {0,L,0,0,0,H,0,0},
 88:                                                    {0,0,L,0,0,0,H,0},
 89:                                                    {0,0,0,L,0,0,0,H}};
 90: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
 91:   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
 92:   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
 93:   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
 94:   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
 95:   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
 96:   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
 97:   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
 98:   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
 99: /* Stanndard Gauss */
100: static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
101:                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
102:                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
103:                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
104:                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
105:                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
106:                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
107:                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
108: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
109:   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
110:   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
111:   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
112:   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
113:   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
114:   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
115:   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
116:   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
117: static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
118: /* Standard 2x2 Gauss quadrature for the bottom layer. */
119: static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
120:                                             {L*H,H*H,H*L,L*L},
121:                                             {L*L,H*L,H*H,L*H},
122:                                             {H*L,L*L,L*H,H*H}};
123: static const PetscReal QuadQDeriv[4][4][2] = {
124:   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
125:   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
126:   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
127:   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
128: #undef G
129: #undef H
130: #undef L
131: #undef M
132: #undef P

134: #define HexExtract(x,i,j,k,n) do {              \
135:     (n)[0] = (x)[i][j][k];                      \
136:     (n)[1] = (x)[i+1][j][k];                    \
137:     (n)[2] = (x)[i+1][j+1][k];                  \
138:     (n)[3] = (x)[i][j+1][k];                    \
139:     (n)[4] = (x)[i][j][k+1];                    \
140:     (n)[5] = (x)[i+1][j][k+1];                  \
141:     (n)[6] = (x)[i+1][j+1][k+1];                \
142:     (n)[7] = (x)[i][j+1][k+1];                  \
143:   } while (0)

145: #define HexExtractRef(x,i,j,k,n) do {           \
146:     (n)[0] = &(x)[i][j][k];                     \
147:     (n)[1] = &(x)[i+1][j][k];                   \
148:     (n)[2] = &(x)[i+1][j+1][k];                 \
149:     (n)[3] = &(x)[i][j+1][k];                   \
150:     (n)[4] = &(x)[i][j][k+1];                   \
151:     (n)[5] = &(x)[i+1][j][k+1];                 \
152:     (n)[6] = &(x)[i+1][j+1][k+1];               \
153:     (n)[7] = &(x)[i][j+1][k+1];                 \
154:   } while (0)

156: #define QuadExtract(x,i,j,n) do {               \
157:     (n)[0] = (x)[i][j];                         \
158:     (n)[1] = (x)[i+1][j];                       \
159:     (n)[2] = (x)[i+1][j+1];                     \
160:     (n)[3] = (x)[i][j+1];                       \
161:   } while (0)

163: static PetscScalar Sqr(PetscScalar a) {return a*a;}

165: static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
166: {
167:   PetscInt i;
168:   dz[0] = dz[1] = dz[2] = 0;
169:   for (i=0; i<8; i++) {
170:     dz[0] += dphi[i][0] * zn[i];
171:     dz[1] += dphi[i][1] * zn[i];
172:     dz[2] += dphi[i][2] * zn[i];
173:   }
174: }

176: static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
177: {
178:   const PetscReal
179:     jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
180:   ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
181:   ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
182:   PetscInt i;

184:   for (i=0; i<8; i++) {
185:     const PetscReal *dphir = HexQDeriv[q][i];
186:     phi[i] = HexQInterp[q][i];
187:     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
188:     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
189:     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
190:   }
191:   *jw = 1.0 * jdet;
192: }

194: typedef struct _p_THI   *THI;
195: typedef struct _n_Units *Units;

197: typedef struct {
198:   PetscScalar u,v;
199: } Node;

201: typedef struct {
202:   PetscScalar b;                /* bed */
203:   PetscScalar h;                /* thickness */
204:   PetscScalar beta2;            /* friction */
205: } PrmNode;

207: #define FieldSize(ntype) ((PetscInt)(sizeof(ntype)/sizeof(PetscScalar)))
208: #define FieldOffset(ntype,member) ((PetscInt)(offsetof(ntype,member)/sizeof(PetscScalar)))
209: #define FieldIndex(ntype,i,member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype,member)))
210: #define NODE_SIZE FieldSize(Node)
211: #define PRMNODE_SIZE FieldSize(PrmNode)

213: typedef struct {
214:   PetscReal min,max,cmin,cmax;
215: } PRange;

217: struct _p_THI {
218:   PETSCHEADER(int);
219:   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
220:   PetscInt  nlevels;
221:   PetscInt  zlevels;
222:   PetscReal Lx,Ly,Lz;           /* Model domain */
223:   PetscReal alpha;              /* Bed angle */
224:   Units     units;
225:   PetscReal dirichlet_scale;
226:   PetscReal ssa_friction_scale;
227:   PetscReal inertia;
228:   PRange    eta;
229:   PRange    beta2;
230:   struct {
231:     PetscReal Bd2,eps,exponent,glen_n;
232:   } viscosity;
233:   struct {
234:     PetscReal irefgam,eps2,exponent;
235:   } friction;
236:   struct {
237:     PetscReal rate,exponent,refvel;
238:   } erosion;
239:   PetscReal rhog;
240:   PetscBool no_slip;
241:   PetscBool verbose;
242:   MatType   mattype;
243:   char      *monitor_basename;
244:   PetscInt  monitor_interval;
245: };

247: struct _n_Units {
248:   /* fundamental */
249:   PetscReal meter;
250:   PetscReal kilogram;
251:   PetscReal second;
252:   /* derived */
253:   PetscReal Pascal;
254:   PetscReal year;
255: };

257: static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
258: {
259:   const PetscScalar zm1 = zm-1,
260:     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
261:               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
262:               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
263:               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
264:               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
265:               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
266:               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
267:               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
268:   PetscInt i;
269:   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
270: }

274: /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
275: static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2],PetscReal hx,PetscReal hy,const PrmNode pn[4],PrmNode dp[4][2])
276: {
278:   PetscInt       q,i,f;
279:   const PetscScalar (*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
280:   PetscScalar (*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;

283:   PetscMemzero(dpg,4*sizeof(dpg[0]));
284:   for (q=0; q<4; q++) {
285:     for (i=0; i<4; i++) {
286:       for (f=0; f<PRMNODE_SIZE; f++) {
287:         dpg[q][0][f] += dphi[q][i][0]/hx * pg[i][f];
288:         dpg[q][1][f] += dphi[q][i][1]/hy * pg[i][f];
289:       }
290:     }
291:   }
292:   return(0);
293: }

295: static inline PetscReal StaggeredMidpoint2D(PetscScalar a,PetscScalar b,PetscScalar c,PetscScalar d)
296: {return 0.5*PetscRealPart(0.75*a + 0.75*b + 0.25*c + 0.25*d);}
297: static inline PetscReal UpwindFlux1D(PetscReal u,PetscReal hL,PetscReal hR)
298: {return (u > 0) ? hL*u : hR*u;}

300: #define UpwindFluxXW(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i-1][j][k].u, x3[i-1][j+dj][k].u,x3[i][k+dj][k].u), \
301:                                                     PetscRealPart(0.75*x2[i-1][j  ].h+0.25*x2[i-1][j+dj].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h))
302: #define UpwindFluxXE(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i+1][j][k].u, x3[i+1][j+dj][k].u,x3[i][k+dj][k].u), \
303:                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h), PetscRealPart(0.75*x2[i+1][j  ].h+0.25*x2[i+1][j+dj].h))
304: #define UpwindFluxYS(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j-1][k].v, x3[i+di][j-1][k].v,x3[i+di][j][k].v), \
305:                                                     PetscRealPart(0.75*x2[i  ][j-1].h+0.25*x2[i+di][j-1].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h))
306: #define UpwindFluxYN(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j+1][k].v, x3[i+di][j+1][k].v,x3[i+di][j][k].v), \
307:                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h), PetscRealPart(0.75*x2[i  ][j+1].h+0.25*x2[i+di][j+1].h))

309: static void PrmNodeGetFaceMeasure(const PrmNode **p,PetscInt i,PetscInt j,PetscScalar h[])
310: {
311:   /* West */
312:   h[0] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j-1].h,p[i][j-1].h);
313:   h[1] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j+1].h,p[i][j+1].h);
314:   /* East */
315:   h[2] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j+1].h,p[i][j+1].h);
316:   h[3] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j-1].h,p[i][j-1].h);
317:   /* South */
318:   h[4] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i+1][j-1].h,p[i+1][j].h);
319:   h[5] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i-1][j-1].h,p[i-1][j].h);
320:   /* North */
321:   h[6] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i-1][j+1].h,p[i-1][j].h);
322:   h[7] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i+1][j+1].h,p[i+1][j].h);
323: }

325: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
326: static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
327: {
328:   Units units = thi->units;
329:   PetscReal s = -x*PetscSinReal(thi->alpha);
330:   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
331:   p->h = s - p->b;
332:   p->beta2 = -1e-10;             /* This value is not used, but it should not be huge because that would change the finite difference step size  */
333: }

335: static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
336: {
337:   Units units = thi->units;
338:   PetscReal s = -x*PetscSinReal(thi->alpha);
339:   p->b = s - 1000*units->meter;
340:   p->h = s - p->b;
341:   /* tau_b = beta2 v   is a stress (Pa).
342:    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
343:   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
344: }

346: /* These are just toys */

348: /* From Fred Herman */
349: static void THIInitialize_HOM_F(THI thi,PetscReal x,PetscReal y,PrmNode *p)
350: {
351:   Units units = thi->units;
352:   PetscReal s = -x*PetscSinReal(thi->alpha);
353:   p->b = s - 1000*units->meter + 100*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx);/* * sin(y*2*PETSC_PI/thi->Ly); */
354:   p->h = s - p->b;
355:   p->h = (1-(atan((x-thi->Lx/2)/1.)+PETSC_PI/2.)/PETSC_PI)*500*units->meter+1*units->meter;
356:   s = PetscRealPart(p->b + p->h);
357:   p->beta2 = -1e-10;
358:   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
359: }

361: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
362: static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
363: {
364:   Units units = thi->units;
365:   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
366:   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
367:   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
368:   p->h = s - p->b;
369:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
370: }

372: /* Like Z, but with 200 meter cliffs */
373: static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
374: {
375:   Units units = thi->units;
376:   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
377:   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
378:   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
379:   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
380:   p->h = s - p->b;
381:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
382: }

384: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
385: static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
386: {
387:   Units units = thi->units;
388:   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
389:   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
390:   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
391:   p->h = s - p->b;
392:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
393: }

395: static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
396: {
397:   if (thi->friction.irefgam == 0) {
398:     Units units = thi->units;
399:     thi->friction.irefgam = 1./(0.5*PetscSqr(100 * units->meter / units->year));
400:     thi->friction.eps2 = 0.5*PetscSqr(1.e-4 / thi->friction.irefgam);
401:   }
402:   if (thi->friction.exponent == 0) {
403:     *beta2  = rbeta2;
404:     *dbeta2 = 0;
405:   } else {
406:     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
407:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
408:   }
409: }

411: static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
412: {
413:   PetscReal Bd2,eps,exponent;
414:   if (thi->viscosity.Bd2 == 0) {
415:     Units units = thi->units;
416:     const PetscReal
417:       n = thi->viscosity.glen_n,                        /* Glen exponent */
418:       p = 1. + 1./n,                                    /* for Stokes */
419:       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
420:       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
421:     thi->viscosity.Bd2      = B/2;
422:     thi->viscosity.exponent = (p-2)/2;
423:     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
424:   }
425:   Bd2      = thi->viscosity.Bd2;
426:   exponent = thi->viscosity.exponent;
427:   eps      = thi->viscosity.eps;
428:   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
429:   *deta    = exponent * (*eta) / (eps + gam);
430: }

432: static void THIErosion(THI thi,const Node *vel,PetscScalar *erate,Node *derate)
433: {
434:   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel),
435:                     rate    = -thi->erosion.rate*PetscPowScalar(magref2, 0.5*thi->erosion.exponent);
436:   if (erate) *erate = rate;
437:   if (derate) {
438:     if (thi->erosion.exponent == 1) {
439:       derate->u = 0;
440:       derate->v = 0;
441:     } else {
442:       derate->u = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
443:       derate->v = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
444:     }
445:   }
446: }

448: static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
449: {
450:   if (x < *min) *min = x;
451:   if (x > *max) *max = x;
452: }

454: static void PRangeClear(PRange *p)
455: {
456:   p->cmin = p->min = 1e100;
457:   p->cmax = p->max = -1e100;
458: }

462: static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
463: {

466:   p->cmin = min;
467:   p->cmax = max;
468:   if (min < p->min) p->min = min;
469:   if (max > p->max) p->max = max;
470:   return(0);
471: }

475: static PetscErrorCode THIDestroy(THI *thi)
476: {

480:   if (--((PetscObject)(*thi))->refct > 0) return(0);
481:   PetscFree((*thi)->units);
482:   PetscFree((*thi)->mattype);
483:   PetscFree((*thi)->monitor_basename);
484:   PetscHeaderDestroy(thi);
485:   return(0);
486: }

490: static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
491: {
492:   static PetscBool registered = PETSC_FALSE;
493:   THI              thi;
494:   Units            units;
495:   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
496:   PetscErrorCode   ierr;

499:   *inthi = 0;
500:   if (!registered) {
501:     PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);
502:     registered = PETSC_TRUE;
503:   }
504:   PetscHeaderCreate(thi,_p_THI,0,THI_CLASSID,"THI","Toy Hydrostatic Ice","THI",comm,THIDestroy,0);

506:   PetscNew(&thi->units);

508:   units           = thi->units;
509:   units->meter    = 1e-2;
510:   units->second   = 1e-7;
511:   units->kilogram = 1e-12;

513:   PetscOptionsBegin(comm,NULL,"Scaled units options","");
514:   {
515:     PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);
516:     PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);
517:     PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);
518:   }
519:   PetscOptionsEnd();
520:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
521:   units->year   = 31556926. * units->second, /* seconds per year */

523:   thi->Lx              = 10.e3;
524:   thi->Ly              = 10.e3;
525:   thi->Lz              = 1000;
526:   thi->nlevels         = 1;
527:   thi->dirichlet_scale = 1;
528:   thi->verbose         = PETSC_FALSE;

530:   thi->viscosity.glen_n = 3.;
531:   thi->erosion.rate     = 1e-3; /* m/a */
532:   thi->erosion.exponent = 1.;
533:   thi->erosion.refvel   = 1.;   /* m/a */

535:   PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
536:   {
537:     QuadratureType quad       = QUAD_GAUSS;
538:     char           homexp[]   = "A";
539:     char           mtype[256] = MATSBAIJ;
540:     PetscReal      L,m = 1.0;
541:     PetscBool      flg;
542:     L    = thi->Lx;
543:     PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);
544:     if (flg) thi->Lx = thi->Ly = L;
545:     PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);
546:     PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);
547:     PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);
548:     PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);
549:     switch (homexp[0] = toupper(homexp[0])) {
550:     case 'A':
551:       thi->initialize = THIInitialize_HOM_A;
552:       thi->no_slip    = PETSC_TRUE;
553:       thi->alpha      = 0.5;
554:       break;
555:     case 'C':
556:       thi->initialize = THIInitialize_HOM_C;
557:       thi->no_slip    = PETSC_FALSE;
558:       thi->alpha      = 0.1;
559:       break;
560:     case 'F':
561:       thi->initialize = THIInitialize_HOM_F;
562:       thi->no_slip    = PETSC_FALSE;
563:       thi->alpha      = 0.5;
564:       break;
565:     case 'X':
566:       thi->initialize = THIInitialize_HOM_X;
567:       thi->no_slip    = PETSC_FALSE;
568:       thi->alpha      = 0.3;
569:       break;
570:     case 'Y':
571:       thi->initialize = THIInitialize_HOM_Y;
572:       thi->no_slip    = PETSC_FALSE;
573:       thi->alpha      = 0.5;
574:       break;
575:     case 'Z':
576:       thi->initialize = THIInitialize_HOM_Z;
577:       thi->no_slip    = PETSC_FALSE;
578:       thi->alpha      = 0.5;
579:       break;
580:     default:
581:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
582:     }
583:     PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);
584:     switch (quad) {
585:     case QUAD_GAUSS:
586:       HexQInterp = HexQInterp_Gauss;
587:       HexQDeriv  = HexQDeriv_Gauss;
588:       break;
589:     case QUAD_LOBATTO:
590:       HexQInterp = HexQInterp_Lobatto;
591:       HexQDeriv  = HexQDeriv_Lobatto;
592:       break;
593:     }
594:     PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);
595:     PetscOptionsReal("-thi_viscosity_glen_n","Exponent in Glen flow law, 1=linear, infty=ideal plastic",NULL,thi->viscosity.glen_n,&thi->viscosity.glen_n,NULL);
596:     PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);
597:     thi->friction.exponent = (m-1)/2;
598:     PetscOptionsReal("-thi_erosion_rate","Rate of erosion relative to sliding velocity at reference velocity (m/a)",NULL,thi->erosion.rate,&thi->erosion.rate,NULL);
599:     PetscOptionsReal("-thi_erosion_exponent","Power of sliding velocity appearing in erosion relation",NULL,thi->erosion.exponent,&thi->erosion.exponent,NULL);
600:     PetscOptionsReal("-thi_erosion_refvel","Reference sliding velocity for erosion (m/a)",NULL,thi->erosion.refvel,&thi->erosion.refvel,NULL);
601:     thi->erosion.rate   *= units->meter / units->year;
602:     thi->erosion.refvel *= units->meter / units->year;
603:     PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);
604:     PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);
605:     PetscOptionsReal("-thi_inertia","Coefficient of accelaration term in velocity system, physical is almost zero",NULL,thi->inertia,&thi->inertia,NULL);
606:     PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL);
607:     PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);
608:     PetscStrallocpy(mtype,&thi->mattype);
609:     PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);
610:     PetscOptionsString("-thi_monitor","Basename to write state files to",NULL,monitor_basename,monitor_basename,sizeof(monitor_basename),&flg);
611:     if (flg) {
612:       PetscStrallocpy(monitor_basename,&thi->monitor_basename);
613:       thi->monitor_interval = 1;
614:       PetscOptionsInt("-thi_monitor_interval","Frequency at which to write state files",NULL,thi->monitor_interval,&thi->monitor_interval,NULL);
615:     }
616:   }
617:   PetscOptionsEnd();

619:   /* dimensionalize */
620:   thi->Lx    *= units->meter;
621:   thi->Ly    *= units->meter;
622:   thi->Lz    *= units->meter;
623:   thi->alpha *= PETSC_PI / 180;

625:   PRangeClear(&thi->eta);
626:   PRangeClear(&thi->beta2);

628:   {
629:     PetscReal u       = 1000*units->meter/(3e7*units->second),
630:               gradu   = u / (100*units->meter),eta,deta,
631:               rho     = 910 * units->kilogram/PetscPowRealInt(units->meter,3),
632:               grav    = 9.81 * units->meter/PetscSqr(units->second),
633:               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
634:     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
635:     thi->rhog = rho * grav;
636:     if (thi->verbose) {
637:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal);
638:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving);
639:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving);
640:       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
641:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving);
642:     }
643:   }

645:   *inthi = thi;
646:   return(0);
647: }

651: /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
652:  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
653:  * the horizontal. */
654: static PetscErrorCode THIFixGhosts(THI thi,DM da3,DM da2,Vec X3,Vec X2)
655: {
657:   DMDALocalInfo  info;
658:   PrmNode        **x2;
659:   PetscInt       i,j;

662:   DMDAGetLocalInfo(da3,&info);
663:   /* VecView(X2,PETSC_VIEWER_STDOUT_WORLD); */
664:   DMDAVecGetArray(da2,X2,&x2);
665:   for (i=info.gzs; i<info.gzs+info.gzm; i++) {
666:     if (i > -1 && i < info.mz) continue;
667:     for (j=info.gys; j<info.gys+info.gym; j++) {
668:       x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i<0 ? 1.0 : -1.0);
669:     }
670:   }
671:   DMDAVecRestoreArray(da2,X2,&x2);
672:   /* VecView(X2,PETSC_VIEWER_STDOUT_WORLD); */
673:   return(0);
674: }

678: static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,PrmNode **p)
679: {
680:   PetscInt       i,j,xs,xm,ys,ym,mx,my;

684:   DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);
685:   DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);
686:   for (i=xs; i<xs+xm; i++) {
687:     for (j=ys; j<ys+ym; j++) {
688:       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
689:       thi->initialize(thi,xx,yy,&p[i][j]);
690:     }
691:   }
692:   return(0);
693: }

697: static PetscErrorCode THIInitial(THI thi,DM pack,Vec X)
698: {
699:   DM             da3,da2;
700:   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
701:   PetscReal      hx,hy;
702:   PrmNode        **prm;
703:   Node           ***x;
704:   Vec            X3g,X2g,X2;

708:   DMCompositeGetEntries(pack,&da3,&da2);
709:   DMCompositeGetAccess(pack,X,&X3g,&X2g);
710:   DMGetLocalVector(da2,&X2);

712:   DMDAGetInfo(da3,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);
713:   DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm);
714:   DMDAVecGetArray(da3,X3g,&x);
715:   DMDAVecGetArray(da2,X2,&prm);

717:   THIInitializePrm(thi,da2,prm);

719:   hx = thi->Lx / mx;
720:   hy = thi->Ly / my;
721:   for (i=xs; i<xs+xm; i++) {
722:     for (j=ys; j<ys+ym; j++) {
723:       for (k=zs; k<zs+zm; k++) {
724:         const PetscScalar zm1      = zm-1,
725:                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
726:                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
727:         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
728:         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
729:       }
730:     }
731:   }

733:   DMDAVecRestoreArray(da3,X3g,&x);
734:   DMDAVecRestoreArray(da2,X2,&prm);

736:   DMLocalToGlobalBegin(da2,X2,INSERT_VALUES,X2g);
737:   DMLocalToGlobalEnd  (da2,X2,INSERT_VALUES,X2g);
738:   DMRestoreLocalVector(da2,&X2);

740:   DMCompositeRestoreAccess(pack,X,&X3g,&X2g);
741:   return(0);
742: }

744: static void PointwiseNonlinearity(THI thi,const Node n[restrict 8],const PetscReal phi[restrict 3],PetscReal dphi[restrict 8][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict 3],PetscScalar dv[restrict 3],PetscReal *eta,PetscReal *deta)
745: {
746:   PetscInt    l,ll;
747:   PetscScalar gam;

749:   du[0] = du[1] = du[2] = 0;
750:   dv[0] = dv[1] = dv[2] = 0;
751:   *u    = 0;
752:   *v    = 0;
753:   for (l=0; l<8; l++) {
754:     *u += phi[l] * n[l].u;
755:     *v += phi[l] * n[l].v;
756:     for (ll=0; ll<3; ll++) {
757:       du[ll] += dphi[l][ll] * n[l].u;
758:       dv[ll] += dphi[l][ll] * n[l].v;
759:     }
760:   }
761:   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
762:   THIViscosity(thi,PetscRealPart(gam),eta,deta);
763: }

767: static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const Node ***xdot,Node ***f,THI thi)
768: {
769:   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
770:   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;

774:   xs = info->zs;
775:   ys = info->ys;
776:   xm = info->zm;
777:   ym = info->ym;
778:   zm = info->xm;
779:   hx = thi->Lx / info->mz;
780:   hy = thi->Ly / info->my;

782:   etamin   = 1e100;
783:   etamax   = 0;
784:   beta2min = 1e100;
785:   beta2max = 0;

787:   for (i=xs; i<xs+xm; i++) {
788:     for (j=ys; j<ys+ym; j++) {
789:       PrmNode pn[4],dpn[4][2];
790:       QuadExtract(prm,i,j,pn);
791:       QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn);
792:       for (k=0; k<zm-1; k++) {
793:         PetscInt  ls = 0;
794:         Node      n[8],ndot[8],*fn[8];
795:         PetscReal zn[8],etabase = 0;
796:         PrmHexGetZ(pn,k,zm,zn);
797:         HexExtract(x,i,j,k,n);
798:         HexExtract(xdot,i,j,k,ndot);
799:         HexExtractRef(f,i,j,k,fn);
800:         if (thi->no_slip && k == 0) {
801:           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
802:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
803:           ls = 4;
804:         }
805:         for (q=0; q<8; q++) {
806:           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
807:           PetscScalar du[3],dv[3],u,v,udot=0,vdot=0;
808:           for (l=ls; l<8; l++) {
809:             udot += HexQInterp[q][l]*ndot[l].u;
810:             vdot += HexQInterp[q][l]*ndot[l].v;
811:           }
812:           HexGrad(HexQDeriv[q],zn,dz);
813:           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
814:           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
815:           jw /= thi->rhog;      /* scales residuals to be O(1) */
816:           if (q == 0) etabase = eta;
817:           RangeUpdate(&etamin,&etamax,eta);
818:           for (l=ls; l<8; l++) { /* test functions */
819:             const PetscScalar ds[2] = {dpn[q%4][0].h+dpn[q%4][0].b, dpn[q%4][1].h+dpn[q%4][1].b};
820:             const PetscReal   pp    = phi[l],*dp = dphi[l];
821:             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
822:             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
823:             fn[l]->u += pp*jw*udot*thi->inertia*pp;
824:             fn[l]->v += pp*jw*vdot*thi->inertia*pp;
825:           }
826:         }
827:         if (k == 0) { /* we are on a bottom face */
828:           if (thi->no_slip) {
829:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
830:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
831:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
832:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
833:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
834:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
835:             * assembled matrix (see the similar block in THIJacobianLocal).
836:             *
837:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
838:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
839:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
840:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
841:             */
842:             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
843:             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
844:             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
845:             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
846:           } else {              /* Integrate over bottom face to apply boundary condition */
847:             for (q=0; q<4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
848:               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
849:               PetscScalar     u  =0,v=0,rbeta2=0;
850:               PetscReal       beta2,dbeta2;
851:               for (l=0; l<4; l++) {
852:                 u     += phi[l]*n[l].u;
853:                 v     += phi[l]*n[l].v;
854:                 rbeta2 += phi[l]*pn[l].beta2;
855:               }
856:               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
857:               RangeUpdate(&beta2min,&beta2max,beta2);
858:               for (l=0; l<4; l++) {
859:                 const PetscReal pp = phi[l];
860:                 fn[ls+l]->u += pp*jw*beta2*u;
861:                 fn[ls+l]->v += pp*jw*beta2*v;
862:               }
863:             }
864:           }
865:         }
866:       }
867:     }
868:   }

870:   PRangeMinMax(&thi->eta,etamin,etamax);
871:   PRangeMinMax(&thi->beta2,beta2min,beta2max);
872:   return(0);
873: }

877: static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const PrmNode **prmdot,PrmNode **f,THI thi)
878: {
879:   PetscInt xs,ys,xm,ym,zm,i,j,k;

882:   xs = info->zs;
883:   ys = info->ys;
884:   xm = info->zm;
885:   ym = info->ym;
886:   zm = info->xm;

888:   for (i=xs; i<xs+xm; i++) {
889:     for (j=ys; j<ys+ym; j++) {
890:       PetscScalar div = 0,erate,h[8];
891:       PrmNodeGetFaceMeasure(prm,i,j,h);
892:       for (k=0; k<zm; k++) {
893:         PetscScalar weight = (k==0 || k == zm-1) ? 0.5/(zm-1) : 1.0/(zm-1);
894:         if (0) {                /* centered flux */
895:           div += (- weight*h[0] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j-1][k].u,x[i][j-1][k].u)
896:                   - weight*h[1] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j+1][k].u,x[i][j+1][k].u)
897:                   + weight*h[2] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j+1][k].u,x[i][j+1][k].u)
898:                   + weight*h[3] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j-1][k].u,x[i][j-1][k].u)
899:                   - weight*h[4] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i+1][j-1][k].v,x[i+1][j][k].v)
900:                   - weight*h[5] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i-1][j-1][k].v,x[i-1][j][k].v)
901:                   + weight*h[6] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i-1][j+1][k].v,x[i-1][j][k].v)
902:                   + weight*h[7] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i+1][j+1][k].v,x[i+1][j][k].v));
903:         } else {                /* Upwind flux */
904:           div += weight*(-UpwindFluxXW(x,prm,h,i,j,k, 1)
905:                          -UpwindFluxXW(x,prm,h,i,j,k,-1)
906:                          +UpwindFluxXE(x,prm,h,i,j,k, 1)
907:                          +UpwindFluxXE(x,prm,h,i,j,k,-1)
908:                          -UpwindFluxYS(x,prm,h,i,j,k, 1)
909:                          -UpwindFluxYS(x,prm,h,i,j,k,-1)
910:                          +UpwindFluxYN(x,prm,h,i,j,k, 1)
911:                          +UpwindFluxYN(x,prm,h,i,j,k,-1));
912:         }
913:       }
914:       /* printf("div[%d][%d] %g\n",i,j,div); */
915:       THIErosion(thi,&x[i][j][0],&erate,NULL);
916:       f[i][j].b     = prmdot[i][j].b - erate;
917:       f[i][j].h     = prmdot[i][j].h + div;
918:       f[i][j].beta2 = prmdot[i][j].beta2;
919:     }
920:   }
921:   return(0);
922: }

926: static PetscErrorCode THIFunction(TS ts,PetscReal t,Vec X,Vec Xdot,Vec F,void *ctx)
927: {
929:   THI            thi = (THI)ctx;
930:   DM             pack,da3,da2;
931:   Vec            X3,X2,Xdot3,Xdot2,F3,F2,F3g,F2g;
932:   const Node     ***x3,***xdot3;
933:   const PrmNode  **x2,**xdot2;
934:   Node           ***f3;
935:   PrmNode        **f2;
936:   DMDALocalInfo  info3;

939:   TSGetDM(ts,&pack);
940:   DMCompositeGetEntries(pack,&da3,&da2);
941:   DMDAGetLocalInfo(da3,&info3);
942:   DMCompositeGetLocalVectors(pack,&X3,&X2);
943:   DMCompositeGetLocalVectors(pack,&Xdot3,&Xdot2);
944:   DMCompositeScatter(pack,X,X3,X2);
945:   THIFixGhosts(thi,da3,da2,X3,X2);
946:   DMCompositeScatter(pack,Xdot,Xdot3,Xdot2);

948:   DMGetLocalVector(da3,&F3);
949:   DMGetLocalVector(da2,&F2);
950:   VecZeroEntries(F3);

952:   DMDAVecGetArray(da3,X3,&x3);
953:   DMDAVecGetArray(da2,X2,&x2);
954:   DMDAVecGetArray(da3,Xdot3,&xdot3);
955:   DMDAVecGetArray(da2,Xdot2,&xdot2);
956:   DMDAVecGetArray(da3,F3,&f3);
957:   DMDAVecGetArray(da2,F2,&f2);

959:   THIFunctionLocal_3D(&info3,x3,x2,xdot3,f3,thi);
960:   THIFunctionLocal_2D(&info3,x3,x2,xdot2,f2,thi);

962:   DMDAVecRestoreArray(da3,X3,&x3);
963:   DMDAVecRestoreArray(da2,X2,&x2);
964:   DMDAVecRestoreArray(da3,Xdot3,&xdot3);
965:   DMDAVecRestoreArray(da2,Xdot2,&xdot2);
966:   DMDAVecRestoreArray(da3,F3,&f3);
967:   DMDAVecRestoreArray(da2,F2,&f2);

969:   DMCompositeRestoreLocalVectors(pack,&X3,&X2);
970:   DMCompositeRestoreLocalVectors(pack,&Xdot3,&Xdot2);

972:   VecZeroEntries(F);
973:   DMCompositeGetAccess(pack,F,&F3g,&F2g);
974:   DMLocalToGlobalBegin(da3,F3,ADD_VALUES,F3g);
975:   DMLocalToGlobalEnd  (da3,F3,ADD_VALUES,F3g);
976:   DMLocalToGlobalBegin(da2,F2,INSERT_VALUES,F2g);
977:   DMLocalToGlobalEnd  (da2,F2,INSERT_VALUES,F2g);

979:   if (thi->verbose) {
980:     PetscViewer viewer;
981:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi),&viewer);
982:     PetscViewerASCIIPrintf(viewer,"3D_Velocity residual (bs=2):\n");
983:     PetscViewerASCIIPushTab(viewer);
984:     VecView(F3,viewer);
985:     PetscViewerASCIIPopTab(viewer);
986:     PetscViewerASCIIPrintf(viewer,"2D_Fields residual (bs=3):\n");
987:     PetscViewerASCIIPushTab(viewer);
988:     VecView(F2,viewer);
989:     PetscViewerASCIIPopTab(viewer);
990:   }

992:   DMCompositeRestoreAccess(pack,F,&F3g,&F2g);

994:   DMRestoreLocalVector(da3,&F3);
995:   DMRestoreLocalVector(da2,&F2);
996:   return(0);
997: }

1001: static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
1002: {
1004:   PetscReal      nrm;
1005:   PetscInt       m;
1006:   PetscMPIInt    rank;

1009:   MatNorm(B,NORM_FROBENIUS,&nrm);
1010:   MatGetSize(B,&m,0);
1011:   MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);
1012:   if (!rank) {
1013:     PetscScalar val0,val2;
1014:     MatGetValue(B,0,0,&val0);
1015:     MatGetValue(B,2,2,&val2);
1016:     PetscViewerASCIIPrintf(viewer,"Matrix dim %8d  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax);
1017:   }
1018:   return(0);
1019: }

1023: static PetscErrorCode THISurfaceStatistics(DM pack,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
1024: {
1026:   DM             da3,da2;
1027:   Vec            X3,X2;
1028:   Node           ***x;
1029:   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
1030:   PetscReal      umin = 1e100,umax=-1e100;
1031:   PetscScalar    usum =0.0,gusum;

1034:   DMCompositeGetEntries(pack,&da3,&da2);
1035:   DMCompositeGetAccess(pack,X,&X3,&X2);
1036:   *min = *max = *mean = 0;
1037:   DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1038:   DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm);
1039:   if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,1,"Unexpected decomposition");
1040:   DMDAVecGetArray(da3,X3,&x);
1041:   for (i=xs; i<xs+xm; i++) {
1042:     for (j=ys; j<ys+ym; j++) {
1043:       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
1044:       RangeUpdate(&umin,&umax,u);
1045:       usum += u;
1046:     }
1047:   }
1048:   DMDAVecRestoreArray(da3,X3,&x);
1049:   DMCompositeRestoreAccess(pack,X,&X3,&X2);

1051:   MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da3));
1052:   MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da3));
1053:   MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da3));
1054:   *mean = PetscRealPart(gusum) / (mx*my);
1055:   return(0);
1056: }

1060: static PetscErrorCode THISolveStatistics(THI thi,TS ts,PetscInt coarsened,const char name[])
1061: {
1062:   MPI_Comm       comm;
1063:   DM             pack;
1064:   Vec            X,X3,X2;

1068:   PetscObjectGetComm((PetscObject)thi,&comm);
1069:   TSGetDM(ts,&pack);
1070:   TSGetSolution(ts,&X);
1071:   DMCompositeGetAccess(pack,X,&X3,&X2);
1072:   PetscPrintf(comm,"Solution statistics after solve: %s\n",name);
1073:   {
1074:     PetscInt            its,lits;
1075:     SNESConvergedReason reason;
1076:     SNES                snes;
1077:     TSGetSNES(ts,&snes);
1078:     SNESGetIterationNumber(snes,&its);
1079:     SNESGetConvergedReason(snes,&reason);
1080:     SNESGetLinearSolveIterations(snes,&lits);
1081:     PetscPrintf(comm,"%s: Number of SNES iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits);
1082:   }
1083:   {
1084:     PetscReal   nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
1085:     PetscInt    i,j,m;
1086:     PetscScalar *x;
1087:     VecNorm(X3,NORM_2,&nrm2);
1088:     VecGetLocalSize(X3,&m);
1089:     VecGetArray(X3,&x);
1090:     for (i=0; i<m; i+=2) {
1091:       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
1092:       tmin[0] = PetscMin(u,tmin[0]);
1093:       tmin[1] = PetscMin(v,tmin[1]);
1094:       tmin[2] = PetscMin(c,tmin[2]);
1095:       tmax[0] = PetscMax(u,tmax[0]);
1096:       tmax[1] = PetscMax(v,tmax[1]);
1097:       tmax[2] = PetscMax(c,tmax[2]);
1098:     }
1099:     VecRestoreArray(X,&x);
1100:     MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));
1101:     MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));
1102:     /* Dimensionalize to meters/year */
1103:     nrm2 *= thi->units->year / thi->units->meter;
1104:     for (j=0; j<3; j++) {
1105:       min[j] *= thi->units->year / thi->units->meter;
1106:       max[j] *= thi->units->year / thi->units->meter;
1107:     }
1108:     PetscPrintf(comm,"|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]);
1109:     {
1110:       PetscReal umin,umax,umean;
1111:       THISurfaceStatistics(pack,X,&umin,&umax,&umean);
1112:       umin  *= thi->units->year / thi->units->meter;
1113:       umax  *= thi->units->year / thi->units->meter;
1114:       umean *= thi->units->year / thi->units->meter;
1115:       PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean);
1116:     }
1117:     /* These values stay nondimensional */
1118:     PetscPrintf(comm,"Global eta range   [%g, %g], converged range [%g, %g]\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax);
1119:     PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax);
1120:   }
1121:   PetscPrintf(comm,"\n");
1122:   DMCompositeRestoreAccess(pack,X,&X3,&X2);
1123:   return(0);
1124: }

1126: static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info,PetscInt i,PetscInt j,PetscInt k)
1127: {return ((i-info->gzs)*info->gym + (j-info->gys))*info->gxm + (k-info->gxs);}
1128: static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info,PetscInt i,PetscInt j)
1129: {return (i-info->gzs)*info->gym + (j-info->gys);}

1133: static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,Mat B,Mat Bcpl,THI thi)
1134: {
1135:   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1136:   PetscReal      hx,hy;

1140:   xs = info->zs;
1141:   ys = info->ys;
1142:   xm = info->zm;
1143:   ym = info->ym;
1144:   zm = info->xm;
1145:   hx = thi->Lx / info->mz;
1146:   hy = thi->Ly / info->my;

1148:   for (i=xs; i<xs+xm; i++) {
1149:     for (j=ys; j<ys+ym; j++) {
1150:       PrmNode pn[4],dpn[4][2];
1151:       QuadExtract(prm,i,j,pn);
1152:       QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn);
1153:       for (k=0; k<zm-1; k++) {
1154:         Node        n[8];
1155:         PetscReal   zn[8],etabase = 0;
1156:         PetscScalar Ke[8*NODE_SIZE][8*NODE_SIZE],Kcpl[8*NODE_SIZE][4*PRMNODE_SIZE];
1157:         PetscInt    ls = 0;

1159:         PrmHexGetZ(pn,k,zm,zn);
1160:         HexExtract(x,i,j,k,n);
1161:         PetscMemzero(Ke,sizeof(Ke));
1162:         PetscMemzero(Kcpl,sizeof(Kcpl));
1163:         if (thi->no_slip && k == 0) {
1164:           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1165:           ls = 4;
1166:         }
1167:         for (q=0; q<8; q++) {
1168:           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1169:           PetscScalar du[3],dv[3],u,v;
1170:           HexGrad(HexQDeriv[q],zn,dz);
1171:           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1172:           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1173:           jw /= thi->rhog;      /* residuals are scaled by this factor */
1174:           if (q == 0) etabase = eta;
1175:           for (l=ls; l<8; l++) { /* test functions */
1176:             const PetscReal pp=phi[l],*restrict dp = dphi[l];
1177:             for (ll=ls; ll<8; ll++) { /* trial functions */
1178:               const PetscReal *restrict dpl = dphi[ll];
1179:               PetscScalar dgdu,dgdv;
1180:               dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1181:               dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1182:               /* Picard part */
1183:               Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1184:               Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1185:               Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1186:               Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1187:               /* extra Newton terms */
1188:               Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1189:               Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1190:               Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1191:               Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1192:               /* inertial part */
1193:               Ke[l*2+0][ll*2+0] += pp*jw*thi->inertia*pp;
1194:               Ke[l*2+1][ll*2+1] += pp*jw*thi->inertia*pp;
1195:             }
1196:             for (ll=0; ll<4; ll++) { /* Trial functions for surface/bed */
1197:               const PetscReal dpl[] = {QuadQDeriv[q%4][ll][0]/hx, QuadQDeriv[q%4][ll][1]/hy}; /* surface = h + b */
1198:               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[0];
1199:               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[0];
1200:               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[1];
1201:               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[1];
1202:             }
1203:           }
1204:         }
1205:         if (k == 0) { /* on a bottom face */
1206:           if (thi->no_slip) {
1207:             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1208:             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1209:             Ke[0][0] = thi->dirichlet_scale*diagu;
1210:             Ke[0][1] = 0;
1211:             Ke[1][0] = 0;
1212:             Ke[1][1] = thi->dirichlet_scale*diagv;
1213:           } else {
1214:             for (q=0; q<4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1215:               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
1216:               PetscScalar     u  =0,v=0,rbeta2=0;
1217:               PetscReal       beta2,dbeta2;
1218:               for (l=0; l<4; l++) {
1219:                 u      += phi[l]*n[l].u;
1220:                 v      += phi[l]*n[l].v;
1221:                 rbeta2 += phi[l]*pn[l].beta2;
1222:               }
1223:               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1224:               for (l=0; l<4; l++) {
1225:                 const PetscReal pp = phi[l];
1226:                 for (ll=0; ll<4; ll++) {
1227:                   const PetscReal ppl = phi[ll];
1228:                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1229:                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1230:                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1231:                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1232:                 }
1233:               }
1234:             }
1235:           }
1236:         }
1237:         {
1238:           const PetscInt rc3blocked[8] = {
1239:             DMDALocalIndex3D(info,i+0,j+0,k+0),
1240:             DMDALocalIndex3D(info,i+1,j+0,k+0),
1241:             DMDALocalIndex3D(info,i+1,j+1,k+0),
1242:             DMDALocalIndex3D(info,i+0,j+1,k+0),
1243:             DMDALocalIndex3D(info,i+0,j+0,k+1),
1244:             DMDALocalIndex3D(info,i+1,j+0,k+1),
1245:             DMDALocalIndex3D(info,i+1,j+1,k+1),
1246:             DMDALocalIndex3D(info,i+0,j+1,k+1)
1247:           },col2blocked[PRMNODE_SIZE*4] = {
1248:             DMDALocalIndex2D(info,i+0,j+0),
1249:             DMDALocalIndex2D(info,i+1,j+0),
1250:             DMDALocalIndex2D(info,i+1,j+1),
1251:             DMDALocalIndex2D(info,i+0,j+1)
1252:           };
1253: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1254:           for (l=0; l<8; l++) {
1255:             for (ll=l+1; ll<8; ll++) {
1256:               Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1257:               Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1258:               Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1259:               Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1260:             }
1261:           }
1262: #endif
1263:           MatSetValuesBlockedLocal(B,8,rc3blocked,8,rc3blocked,&Ke[0][0],ADD_VALUES); /* velocity-velocity coupling can use blocked insertion */
1264:           {                     /* The off-diagonal part cannot (yet) */
1265:             PetscInt row3scalar[NODE_SIZE*8],col2scalar[PRMNODE_SIZE*4];
1266:             for (l=0; l<8; l++) for (ll=0; ll<NODE_SIZE; ll++) row3scalar[l*NODE_SIZE+ll] = rc3blocked[l]*NODE_SIZE+ll;
1267:             for (l=0; l<4; l++) for (ll=0; ll<PRMNODE_SIZE; ll++) col2scalar[l*PRMNODE_SIZE+ll] = col2blocked[l]*PRMNODE_SIZE+ll;
1268:             MatSetValuesLocal(Bcpl,8*NODE_SIZE,row3scalar,4*PRMNODE_SIZE,col2scalar,&Kcpl[0][0],ADD_VALUES);
1269:           }
1270:         }
1271:       }
1272:     }
1273:   }
1274:   return(0);
1275: }

1279: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,const Node ***x3,const PrmNode **x2,const PrmNode **xdot2,PetscReal a,Mat B22,Mat B21,THI thi)
1280: {
1282:   PetscInt       xs,ys,xm,ym,zm,i,j,k;

1285:   xs = info->zs;
1286:   ys = info->ys;
1287:   xm = info->zm;
1288:   ym = info->ym;
1289:   zm = info->xm;

1291:   if (zm > 1024) SETERRQ(((PetscObject)info->da)->comm,PETSC_ERR_SUP,"Need to allocate more space");
1292:   for (i=xs; i<xs+xm; i++) {
1293:     for (j=ys; j<ys+ym; j++) {
1294:       {                         /* Self-coupling */
1295:         const PetscInt    row[]  = {DMDALocalIndex2D(info,i,j)};
1296:         const PetscInt    col[]  = {DMDALocalIndex2D(info,i,j)};
1297:         const PetscScalar vals[] = {
1298:           a,0,0,
1299:           0,a,0,
1300:           0,0,a
1301:         };
1302:         MatSetValuesBlockedLocal(B22,1,row,1,col,vals,INSERT_VALUES);
1303:       }
1304:       for (k=0; k<zm; k++) {    /* Coupling to velocity problem */
1305:         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1306:         const PetscInt row[]  = {FieldIndex(PrmNode,DMDALocalIndex2D(info,i,j),h)};
1307:         const PetscInt cols[] = {
1308:           FieldIndex(Node,DMDALocalIndex3D(info,i-1,j,k),u),
1309:           FieldIndex(Node,DMDALocalIndex3D(info,i  ,j,k),u),
1310:           FieldIndex(Node,DMDALocalIndex3D(info,i+1,j,k),u),
1311:           FieldIndex(Node,DMDALocalIndex3D(info,i,j-1,k),v),
1312:           FieldIndex(Node,DMDALocalIndex3D(info,i,j  ,k),v),
1313:           FieldIndex(Node,DMDALocalIndex3D(info,i,j+1,k),v)
1314:         };
1315:         const PetscScalar
1316:           w  = (k && k<zm-1) ? 0.5 : 0.25,
1317:           hW = w*(x2[i-1][j  ].h+x2[i  ][j  ].h)/(zm-1.),
1318:           hE = w*(x2[i  ][j  ].h+x2[i+1][j  ].h)/(zm-1.),
1319:           hS = w*(x2[i  ][j-1].h+x2[i  ][j  ].h)/(zm-1.),
1320:           hN = w*(x2[i  ][j  ].h+x2[i  ][j+1].h)/(zm-1.);
1321:         PetscScalar *vals,
1322:                      vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0),
1323:                                       ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW),
1324:                                       ((PetscRealPart(x3[i][j][k].u) > 0) ?  0  : +hE),
1325:                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0),
1326:                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS),
1327:                                       ((PetscRealPart(x3[i][j][k].v) > 0) ?  0  : +hN)},
1328:                      vals_centered[] = {-0.5*hW, 0.5*(-hW+hE), 0.5*hE,
1329:                                         -0.5*hS, 0.5*(-hS+hN), 0.5*hN};
1330:         vals = 1 ? vals_upwind : vals_centered;
1331:         if (k == 0) {
1332:           Node derate;
1333:           THIErosion(thi,&x3[i][j][0],NULL,&derate);
1334:           vals[1] -= derate.u;
1335:           vals[4] -= derate.v;
1336:         }
1337:         MatSetValuesLocal(B21,1,row,6,cols,vals,INSERT_VALUES);
1338:       }
1339:     }
1340:   }
1341:   return(0);
1342: }

1346: static PetscErrorCode THIJacobian(TS ts,PetscReal t,Vec X,Vec Xdot,PetscReal a,Mat A,Mat B,void *ctx)
1347: {
1349:   THI            thi = (THI)ctx;
1350:   DM             pack,da3,da2;
1351:   Vec            X3,X2,Xdot2;
1352:   Mat            B11,B12,B21,B22;
1353:   DMDALocalInfo  info3;
1354:   IS             *isloc;
1355:   const Node     ***x3;
1356:   const PrmNode  **x2,**xdot2;

1359:   TSGetDM(ts,&pack);
1360:   DMCompositeGetEntries(pack,&da3,&da2);
1361:   DMDAGetLocalInfo(da3,&info3);
1362:   DMCompositeGetLocalVectors(pack,&X3,&X2);
1363:   DMCompositeGetLocalVectors(pack,NULL,&Xdot2);
1364:   DMCompositeScatter(pack,X,X3,X2);
1365:   THIFixGhosts(thi,da3,da2,X3,X2);
1366:   DMCompositeScatter(pack,Xdot,NULL,Xdot2);

1368:   MatZeroEntries(B);

1370:   DMCompositeGetLocalISs(pack,&isloc);
1371:   MatGetLocalSubMatrix(B,isloc[0],isloc[0],&B11);
1372:   MatGetLocalSubMatrix(B,isloc[0],isloc[1],&B12);
1373:   MatGetLocalSubMatrix(B,isloc[1],isloc[0],&B21);
1374:   MatGetLocalSubMatrix(B,isloc[1],isloc[1],&B22);

1376:   DMDAVecGetArray(da3,X3,&x3);
1377:   DMDAVecGetArray(da2,X2,&x2);
1378:   DMDAVecGetArray(da2,Xdot2,&xdot2);

1380:   THIJacobianLocal_Momentum(&info3,x3,x2,B11,B12,thi);

1382:   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1383:   MatAssemblyBegin(*B,MAT_FLUSH_ASSEMBLY);
1384:   MatAssemblyEnd(*B,MAT_FLUSH_ASSEMBLY);

1386:   THIJacobianLocal_2D(&info3,x3,x2,xdot2,a,B22,B21,thi);

1388:   DMDAVecRestoreArray(da3,X3,&x3);
1389:   DMDAVecRestoreArray(da2,X2,&x2);
1390:   DMDAVecRestoreArray(da2,Xdot2,&xdot2);

1392:   MatRestoreLocalSubMatrix(B,isloc[0],isloc[0],&B11);
1393:   MatRestoreLocalSubMatrix(B,isloc[0],isloc[1],&B12);
1394:   MatRestoreLocalSubMatrix(B,isloc[1],isloc[0],&B21);
1395:   MatRestoreLocalSubMatrix(B,isloc[1],isloc[1],&B22);
1396:   ISDestroy(&isloc[0]);
1397:   ISDestroy(&isloc[1]);
1398:   PetscFree(isloc);

1400:   DMCompositeRestoreLocalVectors(pack,&X3,&X2);
1401:   DMCompositeRestoreLocalVectors(pack,0,&Xdot2);

1403:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1404:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1405:   if (A != B) {
1406:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1407:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1408:   }
1409:   if (thi->verbose) {THIMatrixStatistics(thi,*B,PETSC_VIEWER_STDOUT_WORLD);}
1410:   return(0);
1411: }

1415: /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1416:  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1417:  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1418:  */
1419: static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM pack,Vec X,const char filename[],const char filename2[])
1420: {
1421:   const PetscInt dof   = NODE_SIZE,dof2 = PRMNODE_SIZE;
1422:   Units          units = thi->units;
1423:   MPI_Comm       comm;
1425:   PetscViewer    viewer3,viewer2;
1426:   PetscMPIInt    rank,size,tag,nn,nmax,nn2,nmax2;
1427:   PetscInt       mx,my,mz,r,range[6];
1428:   PetscScalar    *x,*x2;
1429:   DM             da3,da2;
1430:   Vec            X3,X2;

1433:   PetscObjectGetComm((PetscObject)thi,&comm);
1434:   DMCompositeGetEntries(pack,&da3,&da2);
1435:   DMCompositeGetAccess(pack,X,&X3,&X2);
1436:   DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1437:   MPI_Comm_size(comm,&size);
1438:   MPI_Comm_rank(comm,&rank);
1439:   PetscViewerASCIIOpen(comm,filename,&viewer3);
1440:   PetscViewerASCIIOpen(comm,filename2,&viewer2);
1441:   PetscViewerASCIIPrintf(viewer3,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1442:   PetscViewerASCIIPrintf(viewer2,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1443:   PetscViewerASCIIPrintf(viewer3,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1);
1444:   PetscViewerASCIIPrintf(viewer2,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,0,0,my-1,0,mx-1);

1446:   DMDAGetCorners(da3,range,range+1,range+2,range+3,range+4,range+5);
1447:   PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);
1448:   MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);
1449:   PetscMPIIntCast(range[4]*range[5]*dof2,&nn2);
1450:   MPI_Reduce(&nn2,&nmax2,1,MPI_INT,MPI_MAX,0,comm);
1451:   tag  = ((PetscObject)viewer3)->tag;
1452:   VecGetArray(X3,&x);
1453:   VecGetArray(X2,&x2);
1454:   if (!rank) {
1455:     PetscScalar *array,*array2;
1456:     PetscMalloc2(nmax,&array,nmax2,&array2);
1457:     for (r=0; r<size; r++) {
1458:       PetscInt i,j,k,f,xs,xm,ys,ym,zs,zm;
1459:       Node     *y3;
1460:       PetscScalar (*y2)[PRMNODE_SIZE];
1461:       MPI_Status status;
1462:       if (r) {
1463:         MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);
1464:       }
1465:       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1466:       if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1467:       if (r) {
1468:         MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);
1469:         MPI_Get_count(&status,MPIU_SCALAR,&nn);
1470:         if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,1,"corrupt da3 send");
1471:         y3   = (Node*)array;
1472:         MPI_Recv(array2,nmax2,MPIU_SCALAR,r,tag,comm,&status);
1473:         MPI_Get_count(&status,MPIU_SCALAR,&nn2);
1474:         if (nn2 != xm*ym*dof2) SETERRQ(PETSC_COMM_SELF,1,"corrupt da2 send");
1475:         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1476:       } else {
1477:         y3 = (Node*)x;
1478:         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1479:       }
1480:       PetscViewerASCIIPrintf(viewer3,"    <Piece Extent=\"%d %d %d %d %d %d\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);
1481:       PetscViewerASCIIPrintf(viewer2,"    <Piece Extent=\"%d %d %d %d %d %d\">\n",0,0,ys,ys+ym-1,xs,xs+xm-1);

1483:       PetscViewerASCIIPrintf(viewer3,"      <Points>\n");
1484:       PetscViewerASCIIPrintf(viewer2,"      <Points>\n");
1485:       PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1486:       PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1487:       for (i=xs; i<xs+xm; i++) {
1488:         for (j=ys; j<ys+ym; j++) {
1489:           PetscReal
1490:             xx = thi->Lx*i/mx,
1491:             yy = thi->Ly*j/my,
1492:             b  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,b)]),
1493:             h  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,h)]);
1494:           for (k=zs; k<zs+zm; k++) {
1495:             PetscReal zz = b + h*k/(mz-1.);
1496:             PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",xx,yy,zz);
1497:           }
1498:           PetscViewerASCIIPrintf(viewer2,"%f %f %f\n",xx,yy,(double)0.0);
1499:         }
1500:       }
1501:       PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");
1502:       PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n");
1503:       PetscViewerASCIIPrintf(viewer3,"      </Points>\n");
1504:       PetscViewerASCIIPrintf(viewer2,"      </Points>\n");

1506:       {                         /* Velocity and rank (3D) */
1507:         PetscViewerASCIIPrintf(viewer3,"      <PointData>\n");
1508:         PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1509:         for (i=0; i<nn/dof; i++) {
1510:           PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",PetscRealPart(y3[i].u)*units->year/units->meter,PetscRealPart(y3[i].v)*units->year/units->meter,0.0);
1511:         }
1512:         PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");

1514:         PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1515:         for (i=0; i<nn; i+=dof) {
1516:           PetscViewerASCIIPrintf(viewer3,"%d\n",r);
1517:         }
1518:         PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");
1519:         PetscViewerASCIIPrintf(viewer3,"      </PointData>\n");
1520:       }

1522:       {                         /* 2D */
1523:         PetscViewerASCIIPrintf(viewer2,"      <PointData>\n");
1524:         for (f=0; f<PRMNODE_SIZE; f++) {
1525:           const char *fieldname;
1526:           DMDAGetFieldName(da2,f,&fieldname);
1527:           PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n",fieldname);
1528:           for (i=0; i<nn2/PRMNODE_SIZE; i++) {
1529:             PetscViewerASCIIPrintf(viewer2,"%g\n",y2[i][f]);
1530:           }
1531:           PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n");
1532:         }
1533:         PetscViewerASCIIPrintf(viewer2,"      </PointData>\n");
1534:       }

1536:       PetscViewerASCIIPrintf(viewer3,"    </Piece>\n");
1537:       PetscViewerASCIIPrintf(viewer2,"    </Piece>\n");
1538:     }
1539:     PetscFree2(array,array2);
1540:   } else {
1541:     MPI_Send(range,6,MPIU_INT,0,tag,comm);
1542:     MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm);
1543:     MPI_Send(x2,nn2,MPIU_SCALAR,0,tag,comm);
1544:   }
1545:   VecRestoreArray(X3,&x);
1546:   VecRestoreArray(X2,&x2);
1547:   PetscViewerASCIIPrintf(viewer3,"  </StructuredGrid>\n");
1548:   PetscViewerASCIIPrintf(viewer2,"  </StructuredGrid>\n");

1550:   DMCompositeRestoreAccess(pack,X,&X3,&X2);
1551:   PetscViewerASCIIPrintf(viewer3,"</VTKFile>\n");
1552:   PetscViewerASCIIPrintf(viewer2,"</VTKFile>\n");
1553:   PetscViewerDestroy(&viewer3);
1554:   PetscViewerDestroy(&viewer2);
1555:   return(0);
1556: }

1560: static PetscErrorCode THITSMonitor(TS ts,PetscInt step,PetscReal t,Vec X,void *ctx)
1561: {
1563:   THI            thi = (THI)ctx;
1564:   DM             pack;
1565:   char           filename3[PETSC_MAX_PATH_LEN],filename2[PETSC_MAX_PATH_LEN];

1568:   PetscPrintf(PetscObjectComm((PetscObject)ts),"%3D: t=%g\n",step,(double)t);
1569:   if (thi->monitor_interval && step % thi->monitor_interval) return(0);
1570:   TSGetDM(ts,&pack);
1571:   PetscSNPrintf(filename3,sizeof(filename3),"%s-3d-%03d.vts",thi->monitor_basename,step);
1572:   PetscSNPrintf(filename2,sizeof(filename2),"%s-2d-%03d.vts",thi->monitor_basename,step);
1573:   THIDAVecView_VTK_XML(thi,pack,X,filename3,filename2);
1574:   return(0);
1575: }


1580: static PetscErrorCode THICreateDM3d(THI thi,DM *dm3d)
1581: {
1582:   MPI_Comm       comm;
1583:   PetscInt       M    = 3,N = 3,P = 2;
1584:   DM             da;

1588:   PetscObjectGetComm((PetscObject)thi,&comm);
1589:   PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1590:   {
1591:     PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);
1592:     N    = M;
1593:     PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);
1594:     PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);
1595:   }
1596:   PetscOptionsEnd();
1597:   DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);
1598:   DMDASetFieldName(da,0,"x-velocity");
1599:   DMDASetFieldName(da,1,"y-velocity");
1600:   *dm3d = da;
1601:   return(0);
1602: }

1606: int main(int argc,char *argv[])
1607: {
1608:   MPI_Comm       comm;
1609:   DM             pack,da3,da2;
1610:   TS             ts;
1611:   THI            thi;
1612:   Vec            X;
1613:   Mat            B;
1614:   PetscInt       i,steps;
1615:   PetscReal      ftime;

1618:   PetscInitialize(&argc,&argv,0,help);
1619:   comm = PETSC_COMM_WORLD;

1621:   THICreate(comm,&thi);
1622:   THICreateDM3d(thi,&da3);
1623:   {
1624:     PetscInt        Mx,My,mx,my,s;
1625:     DMDAStencilType st;
1626:     DMDAGetInfo(da3,0, 0,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);
1627:     DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2);
1628:   }

1630:   PetscObjectSetName((PetscObject)da3,"3D_Velocity");
1631:   DMSetOptionsPrefix(da3,"f3d_");
1632:   DMDASetFieldName(da3,0,"u");
1633:   DMDASetFieldName(da3,1,"v");
1634:   PetscObjectSetName((PetscObject)da2,"2D_Fields");
1635:   DMSetOptionsPrefix(da2,"f2d_");
1636:   DMDASetFieldName(da2,0,"b");
1637:   DMDASetFieldName(da2,1,"h");
1638:   DMDASetFieldName(da2,2,"beta2");
1639:   DMCompositeCreate(comm,&pack);
1640:   DMCompositeAddDM(pack,da3);
1641:   DMCompositeAddDM(pack,da2);
1642:   DMDestroy(&da3);
1643:   DMDestroy(&da2);
1644:   DMSetUp(pack);
1645:   DMCreateMatrix(pack,&B);
1646:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1647:   MatSetOptionsPrefix(B,"thi_");

1649:   for (i=0; i<thi->nlevels; i++) {
1650:     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
1651:     PetscInt  Mx,My,Mz;
1652:     DMCompositeGetEntries(pack,&da3,&da2);
1653:     DMDAGetInfo(da3,0, &Mz,&My,&Mx, 0,0,0, 0,0,0,0,0,0);
1654:     PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %d domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1));
1655:   }

1657:   DMCreateGlobalVector(pack,&X);
1658:   THIInitial(thi,pack,X);

1660:   TSCreate(comm,&ts);
1661:   TSSetDM(ts,pack);
1662:   TSSetProblemType(ts,TS_NONLINEAR);
1663:   TSMonitorSet(ts,THITSMonitor,thi,NULL);
1664:   TSSetType(ts,TSTHETA);
1665:   TSSetIFunction(ts,NULL,THIFunction,thi);
1666:   TSSetIJacobian(ts,B,B,THIJacobian,thi);
1667:   TSSetDuration(ts,100,10.0);
1668:   TSSetSolution(ts,X);
1669:   TSSetInitialTimeStep(ts,0.,1e-3);
1670:   TSSetFromOptions(ts);

1672:   TSSolve(ts,X);
1673:   TSGetSolveTime(ts,&ftime);
1674:   TSGetTimeStepNumber(ts,&steps);
1675:   PetscPrintf(PETSC_COMM_WORLD,"Steps %D  final time %g\n",steps,(double)ftime);

1677:   if (0) {THISolveStatistics(thi,ts,0,"Full");}

1679:   {
1680:     PetscBool flg;
1681:     char      filename[PETSC_MAX_PATH_LEN] = "";
1682:     PetscOptionsGetString(NULL,"-o",filename,sizeof(filename),&flg);
1683:     if (flg) {
1684:       THIDAVecView_VTK_XML(thi,pack,X,filename,NULL);
1685:     }
1686:   }

1688:   VecDestroy(&X);
1689:   MatDestroy(&B);
1690:   DMDestroy(&pack);
1691:   TSDestroy(&ts);
1692:   THIDestroy(&thi);
1693:   PetscFinalize();
1694:   return 0;
1695: }