Actual source code: ex48.c

petsc-master 2016-12-07
Report Typos and Errors
  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: There are two compile-time options:

 46:   NO_SSE2:
 47:     If the host supports SSE2, we use integration code that has been vectorized with SSE2
 48:     intrinsics, unless this macro is defined.  The intrinsics speed up integration by about
 49:     30% on my architecture (P8700, gcc-4.5 snapshot).

 51:   COMPUTE_LOWER_TRIANGULAR:
 52:     The element matrices we assemble are lower-triangular so it is not necessary to compute
 53:     all entries explicitly.  If this macro is defined, the lower-triangular entries are
 54:     computed explicitly.

 56: */

 58: #if defined(PETSC_APPLE_FRAMEWORK)
 59: #import <PETSc/petscsnes.h>
 60: #import <PETSc/petsc/private/dmdaimpl.h>     /* There is not yet a public interface to manipulate dm->ops */
 61: #else
 62:  #include <petscsnes.h>
 63: #include <petsc/private/dmdaimpl.h>     /* There is not yet a public interface to manipulate dm->ops */
 64: #endif
 65: #include <ctype.h>              /* toupper() */

 67: #if defined(__cplusplus)
 68: /*  c++ cannot handle  [_restrict_] notation like C does */
 69: #undef PETSC_RESTRICT
 70: #define PETSC_RESTRICT
 71: #endif

 73: #if defined __SSE2__
 74: #  include <emmintrin.h>
 75: #endif

 77: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 78: #if !defined NO_SSE2                           \
 79:      && !defined PETSC_USE_COMPLEX             \
 80:      && !defined PETSC_USE_REAL_SINGLE         \
 81:      && !defined PETSC_USE_REAL___FLOAT128     \
 82:      && defined __SSE2__
 83: #define USE_SSE2_KERNELS 1
 84: #else
 85: #define USE_SSE2_KERNELS 0
 86: #endif

 88: static PetscClassId THI_CLASSID;

 90: typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
 91: static const char      *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
 92: PETSC_UNUSED static const PetscReal HexQWeights[8]     = {1,1,1,1,1,1,1,1};
 93: PETSC_UNUSED static const PetscReal HexQNodes[]        = {-0.57735026918962573, 0.57735026918962573};
 94: #define G 0.57735026918962573
 95: #define H (0.5*(1.+G))
 96: #define L (0.5*(1.-G))
 97: #define M (-0.5)
 98: #define P (0.5)
 99: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
100: static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
101:                                                    {0,H,0,0,0,L,0,0},
102:                                                    {0,0,H,0,0,0,L,0},
103:                                                    {0,0,0,H,0,0,0,L},
104:                                                    {L,0,0,0,H,0,0,0},
105:                                                    {0,L,0,0,0,H,0,0},
106:                                                    {0,0,L,0,0,0,H,0},
107:                                                    {0,0,0,L,0,0,0,H}};
108: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
109:   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
110:   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
111:   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
112:   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
113:   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
114:   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
115:   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
116:   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
117: /* Stanndard Gauss */
118: static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
119:                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
120:                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
121:                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
122:                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
123:                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
124:                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
125:                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
126: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
127:   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
128:   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
129:   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
130:   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
131:   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
132:   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
133:   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
134:   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
135: static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
136: /* Standard 2x2 Gauss quadrature for the bottom layer. */
137: static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
138:                                             {L*H,H*H,H*L,L*L},
139:                                             {L*L,H*L,H*H,L*H},
140:                                             {H*L,L*L,L*H,H*H}};
141: static const PetscReal QuadQDeriv[4][4][2] = {
142:   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
143:   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
144:   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
145:   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
146: #undef G
147: #undef H
148: #undef L
149: #undef M
150: #undef P

152: #define HexExtract(x,i,j,k,n) do {              \
153:     (n)[0] = (x)[i][j][k];                      \
154:     (n)[1] = (x)[i+1][j][k];                    \
155:     (n)[2] = (x)[i+1][j+1][k];                  \
156:     (n)[3] = (x)[i][j+1][k];                    \
157:     (n)[4] = (x)[i][j][k+1];                    \
158:     (n)[5] = (x)[i+1][j][k+1];                  \
159:     (n)[6] = (x)[i+1][j+1][k+1];                \
160:     (n)[7] = (x)[i][j+1][k+1];                  \
161:   } while (0)

163: #define HexExtractRef(x,i,j,k,n) do {           \
164:     (n)[0] = &(x)[i][j][k];                     \
165:     (n)[1] = &(x)[i+1][j][k];                   \
166:     (n)[2] = &(x)[i+1][j+1][k];                 \
167:     (n)[3] = &(x)[i][j+1][k];                   \
168:     (n)[4] = &(x)[i][j][k+1];                   \
169:     (n)[5] = &(x)[i+1][j][k+1];                 \
170:     (n)[6] = &(x)[i+1][j+1][k+1];               \
171:     (n)[7] = &(x)[i][j+1][k+1];                 \
172:   } while (0)

174: #define QuadExtract(x,i,j,n) do {               \
175:     (n)[0] = (x)[i][j];                         \
176:     (n)[1] = (x)[i+1][j];                       \
177:     (n)[2] = (x)[i+1][j+1];                     \
178:     (n)[3] = (x)[i][j+1];                       \
179:   } while (0)

181: static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
182: {
183:   PetscInt i;
184:   dz[0] = dz[1] = dz[2] = 0;
185:   for (i=0; i<8; i++) {
186:     dz[0] += dphi[i][0] * zn[i];
187:     dz[1] += dphi[i][1] * zn[i];
188:     dz[2] += dphi[i][2] * zn[i];
189:   }
190: }

192: static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw)
193: {
194:   const PetscReal jac[3][3]  = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}};
195:   const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}};
196:   const PetscReal jdet       = jac[0][0]*jac[1][1]*jac[2][2];
197:   PetscInt        i;

199:   for (i=0; i<8; i++) {
200:     const PetscReal *dphir = HexQDeriv[q][i];
201:     phi[i]     = HexQInterp[q][i];
202:     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
203:     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
204:     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
205:   }
206:   *jw = 1.0 * jdet;
207: }

209: typedef struct _p_THI   *THI;
210: typedef struct _n_Units *Units;

212: typedef struct {
213:   PetscScalar u,v;
214: } Node;

216: typedef struct {
217:   PetscScalar b;                /* bed */
218:   PetscScalar h;                /* thickness */
219:   PetscScalar beta2;            /* friction */
220: } PrmNode;

222: typedef struct {
223:   PetscReal min,max,cmin,cmax;
224: } PRange;

226: typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;

228: struct _p_THI {
229:   PETSCHEADER(int);
230:   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
231:   PetscInt  zlevels;
232:   PetscReal Lx,Ly,Lz;           /* Model domain */
233:   PetscReal alpha;              /* Bed angle */
234:   Units     units;
235:   PetscReal dirichlet_scale;
236:   PetscReal ssa_friction_scale;
237:   PRange    eta;
238:   PRange    beta2;
239:   struct {
240:     PetscReal Bd2,eps,exponent;
241:   } viscosity;
242:   struct {
243:     PetscReal irefgam,eps2,exponent,refvel,epsvel;
244:   } friction;
245:   PetscReal rhog;
246:   PetscBool no_slip;
247:   PetscBool tridiagonal;
248:   PetscBool coarse2d;
249:   PetscBool verbose;
250:   MatType   mattype;
251: };

253: struct _n_Units {
254:   /* fundamental */
255:   PetscReal meter;
256:   PetscReal kilogram;
257:   PetscReal second;
258:   /* derived */
259:   PetscReal Pascal;
260:   PetscReal year;
261: };

263: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI);
264: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI);
265: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI);

267: static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
268: {
269:   const PetscScalar zm1    = zm-1,
270:                     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
271:                               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
272:                               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
273:                               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
274:                               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
275:                               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
276:                               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
277:                               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
278:   PetscInt i;
279:   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
280: }

282: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
283: static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
284: {
285:   Units     units = thi->units;
286:   PetscReal s     = -x*PetscSinReal(thi->alpha);

288:   p->b     = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
289:   p->h     = s - p->b;
290:   p->beta2 = 1e30;
291: }

293: static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
294: {
295:   Units     units = thi->units;
296:   PetscReal s     = -x*PetscSinReal(thi->alpha);

298:   p->b = s - 1000*units->meter;
299:   p->h = s - p->b;
300:   /* tau_b = beta2 v   is a stress (Pa) */
301:   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
302: }

304: /* These are just toys */

306: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
307: static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
308: {
309:   Units     units = thi->units;
310:   PetscReal x     = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
311:   PetscReal r     = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
312:   p->b     = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
313:   p->h     = s - p->b;
314:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
315: }

317: /* Like Z, but with 200 meter cliffs */
318: static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
319: {
320:   Units     units = thi->units;
321:   PetscReal x     = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
322:   PetscReal r     = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);

324:   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
325:   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
326:   p->h     = s - p->b;
327:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
328: }

330: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
331: static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
332: {
333:   Units     units = thi->units;
334:   PetscReal x     = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
335:   PetscReal r     = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);

337:   p->b     = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
338:   p->h     = s - p->b;
339:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
340: }

342: static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
343: {
344:   if (thi->friction.irefgam == 0) {
345:     Units units = thi->units;
346:     thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year));
347:     thi->friction.eps2    = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
348:   }
349:   if (thi->friction.exponent == 0) {
350:     *beta2  = rbeta2;
351:     *dbeta2 = 0;
352:   } else {
353:     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
354:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
355:   }
356: }

358: static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
359: {
360:   PetscReal Bd2,eps,exponent;
361:   if (thi->viscosity.Bd2 == 0) {
362:     Units units = thi->units;
363:     const PetscReal
364:       n = 3.,                                           /* Glen exponent */
365:       p = 1. + 1./n,                                    /* for Stokes */
366:       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
367:       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
368:     thi->viscosity.Bd2      = B/2;
369:     thi->viscosity.exponent = (p-2)/2;
370:     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
371:   }
372:   Bd2      = thi->viscosity.Bd2;
373:   exponent = thi->viscosity.exponent;
374:   eps      = thi->viscosity.eps;
375:   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
376:   *deta    = exponent * (*eta) / (eps + gam);
377: }

379: static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
380: {
381:   if (x < *min) *min = x;
382:   if (x > *max) *max = x;
383: }

385: static void PRangeClear(PRange *p)
386: {
387:   p->cmin = p->min = 1e100;
388:   p->cmax = p->max = -1e100;
389: }

393: static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
394: {

397:   p->cmin = min;
398:   p->cmax = max;
399:   if (min < p->min) p->min = min;
400:   if (max > p->max) p->max = max;
401:   return(0);
402: }

406: static PetscErrorCode THIDestroy(THI *thi)
407: {

411:   if (!*thi) return(0);
412:   if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; return(0);}
413:   PetscFree((*thi)->units);
414:   PetscFree((*thi)->mattype);
415:   PetscHeaderDestroy(thi);
416:   return(0);
417: }

421: static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
422: {
423:   static PetscBool registered = PETSC_FALSE;
424:   THI              thi;
425:   Units            units;
426:   PetscErrorCode   ierr;

429:   *inthi = 0;
430:   if (!registered) {
431:     PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);
432:     registered = PETSC_TRUE;
433:   }
434:   PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);

436:   PetscNew(&thi->units);
437:   units           = thi->units;
438:   units->meter    = 1e-2;
439:   units->second   = 1e-7;
440:   units->kilogram = 1e-12;

442:   PetscOptionsBegin(comm,NULL,"Scaled units options","");
443:   {
444:     PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);
445:     PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);
446:     PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);
447:   }
448:   PetscOptionsEnd();
449:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
450:   units->year   = 31556926. * units->second; /* seconds per year */

452:   thi->Lx              = 10.e3;
453:   thi->Ly              = 10.e3;
454:   thi->Lz              = 1000;
455:   thi->dirichlet_scale = 1;
456:   thi->verbose         = PETSC_FALSE;

458:   PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
459:   {
460:     QuadratureType quad       = QUAD_GAUSS;
461:     char           homexp[]   = "A";
462:     char           mtype[256] = MATSBAIJ;
463:     PetscReal      L,m = 1.0;
464:     PetscBool      flg;
465:     L    = thi->Lx;
466:     PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);
467:     if (flg) thi->Lx = thi->Ly = L;
468:     PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);
469:     PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);
470:     PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);
471:     PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);
472:     switch (homexp[0] = toupper(homexp[0])) {
473:     case 'A':
474:       thi->initialize = THIInitialize_HOM_A;
475:       thi->no_slip    = PETSC_TRUE;
476:       thi->alpha      = 0.5;
477:       break;
478:     case 'C':
479:       thi->initialize = THIInitialize_HOM_C;
480:       thi->no_slip    = PETSC_FALSE;
481:       thi->alpha      = 0.1;
482:       break;
483:     case 'X':
484:       thi->initialize = THIInitialize_HOM_X;
485:       thi->no_slip    = PETSC_FALSE;
486:       thi->alpha      = 0.3;
487:       break;
488:     case 'Y':
489:       thi->initialize = THIInitialize_HOM_Y;
490:       thi->no_slip    = PETSC_FALSE;
491:       thi->alpha      = 0.5;
492:       break;
493:     case 'Z':
494:       thi->initialize = THIInitialize_HOM_Z;
495:       thi->no_slip    = PETSC_FALSE;
496:       thi->alpha      = 0.5;
497:       break;
498:     default:
499:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
500:     }
501:     PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);
502:     switch (quad) {
503:     case QUAD_GAUSS:
504:       HexQInterp = HexQInterp_Gauss;
505:       HexQDeriv  = HexQDeriv_Gauss;
506:       break;
507:     case QUAD_LOBATTO:
508:       HexQInterp = HexQInterp_Lobatto;
509:       HexQDeriv  = HexQDeriv_Lobatto;
510:       break;
511:     }
512:     PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);

514:     thi->friction.refvel = 100.;
515:     thi->friction.epsvel = 1.;

517:     PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);
518:     PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);
519:     PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);

521:     thi->friction.exponent = (m-1)/2;

523:     PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);
524:     PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);
525:     PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);
526:     PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);
527:     PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);
528:     PetscStrallocpy(mtype,(char**)&thi->mattype);
529:     PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);
530:   }
531:   PetscOptionsEnd();

533:   /* dimensionalize */
534:   thi->Lx    *= units->meter;
535:   thi->Ly    *= units->meter;
536:   thi->Lz    *= units->meter;
537:   thi->alpha *= PETSC_PI / 180;

539:   PRangeClear(&thi->eta);
540:   PRangeClear(&thi->beta2);

542:   {
543:     PetscReal u       = 1000*units->meter/(3e7*units->second),
544:               gradu   = u / (100*units->meter),eta,deta,
545:               rho     = 910 * units->kilogram/PetscPowReal(units->meter,3),
546:               grav    = 9.81 * units->meter/PetscSqr(units->second),
547:               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
548:     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
549:     thi->rhog = rho * grav;
550:     if (thi->verbose) {
551:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal);
552:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving);
553:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving));
554:       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
555:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving));
556:     }
557:   }

559:   *inthi = thi;
560:   return(0);
561: }

565: static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm)
566: {
567:   PrmNode        **p;
568:   PetscInt       i,j,xs,xm,ys,ym,mx,my;

572:   DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);
573:   DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);
574:   DMDAVecGetArray(da2prm,prm,&p);
575:   for (i=xs; i<xs+xm; i++) {
576:     for (j=ys; j<ys+ym; j++) {
577:       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
578:       thi->initialize(thi,xx,yy,&p[i][j]);
579:     }
580:   }
581:   DMDAVecRestoreArray(da2prm,prm,&p);
582:   return(0);
583: }

587: static PetscErrorCode THISetUpDM(THI thi,DM dm)
588: {
589:   PetscErrorCode  ierr;
590:   PetscInt        refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s;
591:   DMDAStencilType st;
592:   DM              da2prm;
593:   Vec             X;

596:   DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);
597:   if (dim == 2) {
598:     DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);
599:   }
600:   DMGetRefineLevel(dm,&refinelevel);
601:   DMGetCoarsenLevel(dm,&coarsenlevel);
602:   level = refinelevel - coarsenlevel;
603:   DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);
604:   DMSetUp(da2prm);
605:   DMCreateLocalVector(da2prm,&X);
606:   {
607:     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
608:     if (dim == 2) {
609:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My));
610:     } else {
611:       PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)));
612:     }
613:   }
614:   THIInitializePrm(thi,da2prm,X);
615:   if (thi->tridiagonal) {       /* Reset coarse Jacobian evaluation */
616:     DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);
617:   }
618:   if (thi->coarse2d) {
619:     DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi);
620:   }
621:   PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm);
622:   PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X);
623:   DMDestroy(&da2prm);
624:   VecDestroy(&X);
625:   return(0);
626: }

630: static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx)
631: {
632:   THI            thi = (THI)ctx;
634:   PetscInt       rlevel,clevel;

637:   THISetUpDM(thi,dmc);
638:   DMGetRefineLevel(dmc,&rlevel);
639:   DMGetCoarsenLevel(dmc,&clevel);
640:   if (rlevel-clevel == 0) {DMSetMatType(dmc,MATAIJ);}
641:   DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi);
642:   return(0);
643: }

647: static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx)
648: {
649:   THI            thi = (THI)ctx;

653:   THISetUpDM(thi,dmf);
654:   DMSetMatType(dmf,thi->mattype);
655:   DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi);
656:   /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
657:   DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi);
658:   return(0);
659: }

663: static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm)
664: {
666:   DM             da2prm;
667:   Vec            X;

670:   PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
671:   if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
672:   PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
673:   if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
674:   DMDAVecGetArray(da2prm,X,prm);
675:   return(0);
676: }

680: static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm)
681: {
683:   DM             da2prm;
684:   Vec            X;

687:   PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
688:   if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
689:   PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
690:   if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
691:   DMDAVecRestoreArray(da2prm,X,prm);
692:   return(0);
693: }

697: static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx)
698: {
699:   THI            thi;
700:   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
701:   PetscReal      hx,hy;
702:   PrmNode        **prm;
703:   Node           ***x;
705:   DM             da;

708:   SNESGetDM(snes,&da);
709:   DMGetApplicationContext(da,&thi);
710:   DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);
711:   DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
712:   DMDAVecGetArray(da,X,&x);
713:   THIDAGetPrm(da,&prm);
714:   hx   = thi->Lx / mx;
715:   hy   = thi->Ly / my;
716:   for (i=xs; i<xs+xm; i++) {
717:     for (j=ys; j<ys+ym; j++) {
718:       for (k=zs; k<zs+zm; k++) {
719:         const PetscScalar zm1      = zm-1,
720:                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
721:                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
722:         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
723:         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
724:       }
725:     }
726:   }
727:   DMDAVecRestoreArray(da,X,&x);
728:   THIDARestorePrm(da,&prm);
729:   return(0);
730: }

732: static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta)
733: {
734:   PetscInt    l,ll;
735:   PetscScalar gam;

737:   du[0] = du[1] = du[2] = 0;
738:   dv[0] = dv[1] = dv[2] = 0;
739:   *u    = 0;
740:   *v    = 0;
741:   for (l=0; l<8; l++) {
742:     *u += phi[l] * n[l].u;
743:     *v += phi[l] * n[l].v;
744:     for (ll=0; ll<3; ll++) {
745:       du[ll] += dphi[l][ll] * n[l].u;
746:       dv[ll] += dphi[l][ll] * n[l].v;
747:     }
748:   }
749:   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]);
750:   THIViscosity(thi,PetscRealPart(gam),eta,deta);
751: }

753: static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
754: {
755:   PetscInt    l,ll;
756:   PetscScalar gam;

758:   du[0] = du[1] = 0;
759:   dv[0] = dv[1] = 0;
760:   *u    = 0;
761:   *v    = 0;
762:   for (l=0; l<4; l++) {
763:     *u += phi[l] * n[l].u;
764:     *v += phi[l] * n[l].v;
765:     for (ll=0; ll<2; ll++) {
766:       du[ll] += dphi[l][ll] * n[l].u;
767:       dv[ll] += dphi[l][ll] * n[l].v;
768:     }
769:   }
770:   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]);
771:   THIViscosity(thi,PetscRealPart(gam),eta,deta);
772: }

776: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi)
777: {
778:   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
779:   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
780:   PrmNode        **prm;

784:   xs = info->zs;
785:   ys = info->ys;
786:   xm = info->zm;
787:   ym = info->ym;
788:   zm = info->xm;
789:   hx = thi->Lx / info->mz;
790:   hy = thi->Ly / info->my;

792:   etamin   = 1e100;
793:   etamax   = 0;
794:   beta2min = 1e100;
795:   beta2max = 0;

797:   THIDAGetPrm(info->da,&prm);

799:   for (i=xs; i<xs+xm; i++) {
800:     for (j=ys; j<ys+ym; j++) {
801:       PrmNode pn[4];
802:       QuadExtract(prm,i,j,pn);
803:       for (k=0; k<zm-1; k++) {
804:         PetscInt  ls = 0;
805:         Node      n[8],*fn[8];
806:         PetscReal zn[8],etabase = 0;
807:         PrmHexGetZ(pn,k,zm,zn);
808:         HexExtract(x,i,j,k,n);
809:         HexExtractRef(f,i,j,k,fn);
810:         if (thi->no_slip && k == 0) {
811:           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
812:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
813:           ls = 4;
814:         }
815:         for (q=0; q<8; q++) {
816:           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
817:           PetscScalar du[3],dv[3],u,v;
818:           HexGrad(HexQDeriv[q],zn,dz);
819:           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
820:           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
821:           jw /= thi->rhog;      /* scales residuals to be O(1) */
822:           if (q == 0) etabase = eta;
823:           RangeUpdate(&etamin,&etamax,eta);
824:           for (l=ls; l<8; l++) { /* test functions */
825:             const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0};
826:             const PetscReal pp    = phi[l],*dp = dphi[l];
827:             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
828:             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
829:           }
830:         }
831:         if (k == 0) { /* we are on a bottom face */
832:           if (thi->no_slip) {
833:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
834:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
835:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
836:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
837:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
838:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
839:             * assembled matrix (see the similar block in THIJacobianLocal).
840:             *
841:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
842:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
843:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
844:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
845:             */
846:             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
847:             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
848:             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
849:             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
850:           } else {              /* Integrate over bottom face to apply boundary condition */
851:             for (q=0; q<4; q++) {
852:               const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
853:               PetscScalar     u  =0,v=0,rbeta2=0;
854:               PetscReal       beta2,dbeta2;
855:               for (l=0; l<4; l++) {
856:                 u      += phi[l]*n[l].u;
857:                 v      += phi[l]*n[l].v;
858:                 rbeta2 += phi[l]*pn[l].beta2;
859:               }
860:               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
861:               RangeUpdate(&beta2min,&beta2max,beta2);
862:               for (l=0; l<4; l++) {
863:                 const PetscReal pp = phi[l];
864:                 fn[ls+l]->u += pp*jw*beta2*u;
865:                 fn[ls+l]->v += pp*jw*beta2*v;
866:               }
867:             }
868:           }
869:         }
870:       }
871:     }
872:   }

874:   THIDARestorePrm(info->da,&prm);

876:   PRangeMinMax(&thi->eta,etamin,etamax);
877:   PRangeMinMax(&thi->beta2,beta2min,beta2max);
878:   return(0);
879: }

883: static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
884: {
886:   PetscReal      nrm;
887:   PetscInt       m;
888:   PetscMPIInt    rank;

891:   MatNorm(B,NORM_FROBENIUS,&nrm);
892:   MatGetSize(B,&m,0);
893:   MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);
894:   if (!rank) {
895:     PetscScalar val0,val2;
896:     MatGetValue(B,0,0,&val0);
897:     MatGetValue(B,2,2,&val2);
898:     PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e  (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax);
899:   }
900:   return(0);
901: }

905: static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
906: {
908:   Node           ***x;
909:   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
910:   PetscReal      umin = 1e100,umax=-1e100;
911:   PetscScalar    usum = 0.0,gusum;

914:   *min = *max = *mean = 0;
915:   DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
916:   DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
917:   if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,1,"Unexpected decomposition");
918:   DMDAVecGetArray(da,X,&x);
919:   for (i=xs; i<xs+xm; i++) {
920:     for (j=ys; j<ys+ym; j++) {
921:       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
922:       RangeUpdate(&umin,&umax,u);
923:       usum += u;
924:     }
925:   }
926:   DMDAVecRestoreArray(da,X,&x);
927:   MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da));
928:   MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da));
929:   MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da));
930:   *mean = PetscRealPart(gusum) / (mx*my);
931:   return(0);
932: }

936: static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[])
937: {
938:   MPI_Comm       comm;
939:   Vec            X;
940:   DM             dm;

944:   PetscObjectGetComm((PetscObject)thi,&comm);
945:   SNESGetSolution(snes,&X);
946:   SNESGetDM(snes,&dm);
947:   PetscPrintf(comm,"Solution statistics after solve: %s\n",name);
948:   {
949:     PetscInt            its,lits;
950:     SNESConvergedReason reason;
951:     SNESGetIterationNumber(snes,&its);
952:     SNESGetConvergedReason(snes,&reason);
953:     SNESGetLinearSolveIterations(snes,&lits);
954:     PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits);
955:   }
956:   {
957:     PetscReal         nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
958:     PetscInt          i,j,m;
959:     const PetscScalar *x;
960:     VecNorm(X,NORM_2,&nrm2);
961:     VecGetLocalSize(X,&m);
962:     VecGetArrayRead(X,&x);
963:     for (i=0; i<m; i+=2) {
964:       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
965:       tmin[0] = PetscMin(u,tmin[0]);
966:       tmin[1] = PetscMin(v,tmin[1]);
967:       tmin[2] = PetscMin(c,tmin[2]);
968:       tmax[0] = PetscMax(u,tmax[0]);
969:       tmax[1] = PetscMax(v,tmax[1]);
970:       tmax[2] = PetscMax(c,tmax[2]);
971:     }
972:     VecRestoreArrayRead(X,&x);
973:     MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));
974:     MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));
975:     /* Dimensionalize to meters/year */
976:     nrm2 *= thi->units->year / thi->units->meter;
977:     for (j=0; j<3; j++) {
978:       min[j] *= thi->units->year / thi->units->meter;
979:       max[j] *= thi->units->year / thi->units->meter;
980:     }
981:     if (min[0] == 0.0) min[0] = 0.0;
982:     PetscPrintf(comm,"|X|_2 %g   %g <= u <=  %g   %g <= v <=  %g   %g <= c <=  %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2]);
983:     {
984:       PetscReal umin,umax,umean;
985:       THISurfaceStatistics(dm,X,&umin,&umax,&umean);
986:       umin  *= thi->units->year / thi->units->meter;
987:       umax  *= thi->units->year / thi->units->meter;
988:       umean *= thi->units->year / thi->units->meter;
989:       PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean);
990:     }
991:     /* These values stay nondimensional */
992:     PetscPrintf(comm,"Global eta range   %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax);
993:     PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax);
994:   }
995:   return(0);
996: }

1000: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi)
1001: {
1002:   PetscInt       xs,ys,xm,ym,i,j,q,l,ll;
1003:   PetscReal      hx,hy;
1004:   PrmNode        **prm;

1008:   xs = info->ys;
1009:   ys = info->xs;
1010:   xm = info->ym;
1011:   ym = info->xm;
1012:   hx = thi->Lx / info->my;
1013:   hy = thi->Ly / info->mx;

1015:   MatZeroEntries(B);
1016:   THIDAGetPrm(info->da,&prm);

1018:   for (i=xs; i<xs+xm; i++) {
1019:     for (j=ys; j<ys+ym; j++) {
1020:       Node        n[4];
1021:       PrmNode     pn[4];
1022:       PetscScalar Ke[4*2][4*2];
1023:       QuadExtract(prm,i,j,pn);
1024:       QuadExtract(x,i,j,n);
1025:       PetscMemzero(Ke,sizeof(Ke));
1026:       for (q=0; q<4; q++) {
1027:         PetscReal   phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
1028:         PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
1029:         for (l=0; l<4; l++) {
1030:           phi[l]     = QuadQInterp[q][l];
1031:           dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
1032:           dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
1033:           h         += phi[l] * pn[l].h;
1034:           rbeta2    += phi[l] * pn[l].beta2;
1035:         }
1036:         jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
1037:         PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1038:         THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1039:         for (l=0; l<4; l++) {
1040:           const PetscReal pp = phi[l],*dp = dphi[l];
1041:           for (ll=0; ll<4; ll++) {
1042:             const PetscReal ppl = phi[ll],*dpl = dphi[ll];
1043:             PetscScalar     dgdu,dgdv;
1044:             dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
1045:             dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
1046:             /* Picard part */
1047:             Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1048:             Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1049:             Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1050:             Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1051:             /* extra Newton terms */
1052:             Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
1053:             Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
1054:             Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
1055:             Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
1056:           }
1057:         }
1058:       }
1059:       {
1060:         const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
1061:         MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);
1062:       }
1063:     }
1064:   }
1065:   THIDARestorePrm(info->da,&prm);

1067:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1068:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1069:   MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1070:   if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1071:   return(0);
1072: }

1076: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
1077: {
1078:   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1079:   PetscReal      hx,hy;
1080:   PrmNode        **prm;

1084:   xs = info->zs;
1085:   ys = info->ys;
1086:   xm = info->zm;
1087:   ym = info->ym;
1088:   zm = info->xm;
1089:   hx = thi->Lx / info->mz;
1090:   hy = thi->Ly / info->my;

1092:   MatZeroEntries(B);
1093:   MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE);
1094:   THIDAGetPrm(info->da,&prm);

1096:   for (i=xs; i<xs+xm; i++) {
1097:     for (j=ys; j<ys+ym; j++) {
1098:       PrmNode pn[4];
1099:       QuadExtract(prm,i,j,pn);
1100:       for (k=0; k<zm-1; k++) {
1101:         Node        n[8];
1102:         PetscReal   zn[8],etabase = 0;
1103:         PetscScalar Ke[8*2][8*2];
1104:         PetscInt    ls = 0;

1106:         PrmHexGetZ(pn,k,zm,zn);
1107:         HexExtract(x,i,j,k,n);
1108:         PetscMemzero(Ke,sizeof(Ke));
1109:         if (thi->no_slip && k == 0) {
1110:           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1111:           ls = 4;
1112:         }
1113:         for (q=0; q<8; q++) {
1114:           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1115:           PetscScalar du[3],dv[3],u,v;
1116:           HexGrad(HexQDeriv[q],zn,dz);
1117:           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1118:           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1119:           jw /= thi->rhog;      /* residuals are scaled by this factor */
1120:           if (q == 0) etabase = eta;
1121:           for (l=ls; l<8; l++) { /* test functions */
1122:             const PetscReal *PETSC_RESTRICT dp = dphi[l];
1123: #if USE_SSE2_KERNELS
1124:             /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1125:             __m128d
1126:               p4         = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1127:               p42        = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1128:               du0        = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1129:               dv0        = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1130:               jweta      = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1131:               dp0        = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1132:               dp0jweta   = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1133:               p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1134:               p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1135:               pdu2dv2    = _mm_unpacklo_pd(du2,dv2),                          /* [du2, dv2] */
1136:               du1pdv0    = _mm_add_pd(du1,dv0),                               /* du1 + dv0 */
1137:               t1         = _mm_mul_pd(dp0,p4du0p2dv1),                        /* dp0 (4 du0 + 2 dv1) */
1138:               t2         = _mm_mul_pd(dp1,p4dv1p2du0);                        /* dp1 (4 dv1 + 2 du0) */

1140: #endif
1141: #if defined COMPUTE_LOWER_TRIANGULAR  /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1142:             for (ll=ls; ll<8; ll++) { /* trial functions */
1143: #else
1144:             for (ll=l; ll<8; ll++) {
1145: #endif
1146:               const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1147:               if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1148: #if !USE_SSE2_KERNELS
1149:               /* The analytic Jacobian in nice, easy-to-read form */
1150:               {
1151:                 PetscScalar dgdu,dgdv;
1152:                 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1153:                 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1154:                 /* Picard part */
1155:                 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1156:                 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1157:                 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1158:                 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1159:                 /* extra Newton terms */
1160:                 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1161:                 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1162:                 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1163:                 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1164:               }
1165: #else
1166:               /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1167:               * benefit.  On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1168:               * by 25 to 30 percent. */
1169:               {
1170:                 __m128d
1171:                   keu   = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1172:                   kev   = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1173:                   dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1174:                   t0,t3,pdgduv;
1175:                 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1176:                                                 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1177:                                                            _mm_mul_pd(dp2jweta,dpl2))));
1178:                 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1179:                                                 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1180:                                                            _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1181:                 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1182:                                                               _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1183:                                                    _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1184:                                                               _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1185:                 t0 = _mm_mul_pd(jwdeta,pdgduv);  /* jw deta [dgdu, dgdv] */
1186:                 t3 = _mm_mul_pd(t0,du1pdv0);     /* t0 (du1 + dv0) */
1187:                 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1188:                                                                            _mm_add_pd(_mm_mul_pd(dp1,t3),
1189:                                                                                       _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1190:                 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1191:                                                                            _mm_add_pd(_mm_mul_pd(dp0,t3),
1192:                                                                                       _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1193:               }
1194: #endif
1195:             }
1196:           }
1197:         }
1198:         if (k == 0) { /* on a bottom face */
1199:           if (thi->no_slip) {
1200:             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1201:             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1202:             Ke[0][0] = thi->dirichlet_scale*diagu;
1203:             Ke[1][1] = thi->dirichlet_scale*diagv;
1204:           } else {
1205:             for (q=0; q<4; q++) {
1206:               const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1207:               PetscScalar     u  =0,v=0,rbeta2=0;
1208:               PetscReal       beta2,dbeta2;
1209:               for (l=0; l<4; l++) {
1210:                 u      += phi[l]*n[l].u;
1211:                 v      += phi[l]*n[l].v;
1212:                 rbeta2 += phi[l]*pn[l].beta2;
1213:               }
1214:               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1215:               for (l=0; l<4; l++) {
1216:                 const PetscReal pp = phi[l];
1217:                 for (ll=0; ll<4; ll++) {
1218:                   const PetscReal ppl = phi[ll];
1219:                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1220:                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1221:                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1222:                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1223:                 }
1224:               }
1225:             }
1226:           }
1227:         }
1228:         {
1229:           const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1230:           if (amode == THIASSEMBLY_TRIDIAGONAL) {
1231:             for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1232:               const PetscInt   l4     = l+4;
1233:               const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1234: #if defined COMPUTE_LOWER_TRIANGULAR
1235:               const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1236:                                              {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1237:                                              {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1238:                                              {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1239: #else
1240:               /* Same as above except for the lower-left block */
1241:               const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1242:                                              {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1243:                                              {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1244:                                              {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1245: #endif
1246:               MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);
1247:             }
1248:           } else {
1249: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1250:             for (l=0; l<8; l++) {
1251:               for (ll=l+1; ll<8; ll++) {
1252:                 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1253:                 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1254:                 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1255:                 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1256:               }
1257:             }
1258: #endif
1259:             MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);
1260:           }
1261:         }
1262:       }
1263:     }
1264:   }
1265:   THIDARestorePrm(info->da,&prm);

1267:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1268:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1269:   MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1270:   if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1271:   return(0);
1272: }

1276: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1277: {

1281:   THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);
1282:   return(0);
1283: }

1287: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1288: {

1292:   THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);
1293:   return(0);
1294: }

1298: static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[])
1299: {
1300:   PetscErrorCode  ierr;
1301:   THI             thi;
1302:   PetscInt        dim,M,N,m,n,s,dof;
1303:   DM              dac,daf;
1304:   DMDAStencilType st;
1305:   DM_DA           *ddf,*ddc;

1308:   PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);
1309:   if (!thi) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance");
1310:   if (nlevels > 1) {
1311:     DMRefineHierarchy(dac0,nlevels-1,hierarchy);
1312:     dac  = hierarchy[nlevels-2];
1313:   } else {
1314:     dac = dac0;
1315:   }
1316:   DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);
1317:   if (dim != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs");

1319:   /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1320:   DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf);
1321:   DMSetUp(daf);

1323:   daf->ops->creatematrix        = dac->ops->creatematrix;
1324:   daf->ops->createinterpolation = dac->ops->createinterpolation;
1325:   daf->ops->getcoloring         = dac->ops->getcoloring;
1326:   ddf                           = (DM_DA*)daf->data;
1327:   ddc                           = (DM_DA*)dac->data;
1328:   ddf->interptype               = ddc->interptype;

1330:   DMDASetFieldName(daf,0,"x-velocity");
1331:   DMDASetFieldName(daf,1,"y-velocity");

1333:   hierarchy[nlevels-1] = daf;
1334:   return(0);
1335: }

1339: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale)
1340: {
1342:   PetscInt       dim;

1349:   DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);
1350:   if (dim  == 2) {
1351:     /* We are in the 2D problem and use normal DMDA interpolation */
1352:     DMCreateInterpolation(dac,daf,A,scale);
1353:   } else {
1354:     PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1355:     Mat      B;

1357:     DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1358:     DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);
1359:     if (zs != 0) SETERRQ(PETSC_COMM_SELF,1,"unexpected");
1360:     MatCreate(PetscObjectComm((PetscObject)daf),&B);
1361:     MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);

1363:     MatSetType(B,MATAIJ);
1364:     MatSeqAIJSetPreallocation(B,1,NULL);
1365:     MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);
1366:     MatGetOwnershipRange(B,&rstart,NULL);
1367:     MatGetOwnershipRangeColumn(B,&cstart,NULL);
1368:     for (i=xs; i<xs+xm; i++) {
1369:       for (j=ys; j<ys+ym; j++) {
1370:         for (k=zs; k<zs+zm; k++) {
1371:           PetscInt    i2  = i*ym+j,i3 = i2*zm+k;
1372:           PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1373:           MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);
1374:         }
1375:       }
1376:     }
1377:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1378:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1379:     MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);
1380:     MatDestroy(&B);
1381:   }
1382:   return(0);
1383: }

1387: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J)
1388: {
1389:   PetscErrorCode         ierr;
1390:   Mat                    A;
1391:   PetscInt               xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1392:   ISLocalToGlobalMapping ltog;

1395:   DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);
1396:   if (dim != 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D");
1397:   DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);
1398:   DMGetLocalToGlobalMapping(da,&ltog);
1399:   MatCreate(PetscObjectComm((PetscObject)da),&A);
1400:   MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);
1401:   MatSetType(A,da->mattype);
1402:   MatSetFromOptions(A);
1403:   MatSeqAIJSetPreallocation(A,3*2,NULL);
1404:   MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL);
1405:   MatSeqBAIJSetPreallocation(A,2,3,NULL);
1406:   MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL);
1407:   MatSeqSBAIJSetPreallocation(A,2,2,NULL);
1408:   MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL);
1409:   MatSetLocalToGlobalMapping(A,ltog,ltog);
1410:   DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
1411:   MatSetStencil(A,dim,dims,starts,dof);
1412:   *J   = A;
1413:   return(0);
1414: }

1418: static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[])
1419: {
1420:   const PetscInt    dof   = 2;
1421:   Units             units = thi->units;
1422:   MPI_Comm          comm;
1423:   PetscErrorCode    ierr;
1424:   PetscViewer       viewer;
1425:   PetscMPIInt       rank,size,tag,nn,nmax;
1426:   PetscInt          mx,my,mz,r,range[6];
1427:   const PetscScalar *x;

1430:   PetscObjectGetComm((PetscObject)thi,&comm);
1431:   DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1432:   MPI_Comm_size(comm,&size);
1433:   MPI_Comm_rank(comm,&rank);
1434:   PetscViewerASCIIOpen(comm,filename,&viewer);
1435:   PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1436:   PetscViewerASCIIPrintf(viewer,"  <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1);

1438:   DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);
1439:   PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);
1440:   MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);
1441:   tag  = ((PetscObject) viewer)->tag;
1442:   VecGetArrayRead(X,&x);
1443:   if (!rank) {
1444:     PetscScalar *array;
1445:     PetscMalloc1(nmax,&array);
1446:     for (r=0; r<size; r++) {
1447:       PetscInt          i,j,k,xs,xm,ys,ym,zs,zm;
1448:       const PetscScalar *ptr;
1449:       MPI_Status        status;
1450:       if (r) {
1451:         MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);
1452:       }
1453:       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1454:       if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1455:       if (r) {
1456:         MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);
1457:         MPI_Get_count(&status,MPIU_SCALAR,&nn);
1458:         if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1459:         ptr = array;
1460:       } else ptr = x;
1461:       PetscViewerASCIIPrintf(viewer,"    <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);

1463:       PetscViewerASCIIPrintf(viewer,"      <Points>\n");
1464:       PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1465:       for (i=xs; i<xs+xm; i++) {
1466:         for (j=ys; j<ys+ym; j++) {
1467:           for (k=zs; k<zs+zm; k++) {
1468:             PrmNode   p;
1469:             PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1470:             thi->initialize(thi,xx,yy,&p);
1471:             zz   = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1472:             PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz);
1473:           }
1474:         }
1475:       }
1476:       PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");
1477:       PetscViewerASCIIPrintf(viewer,"      </Points>\n");

1479:       PetscViewerASCIIPrintf(viewer,"      <PointData>\n");
1480:       PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1481:       for (i=0; i<nn; i+=dof) {
1482:         PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0);
1483:       }
1484:       PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");

1486:       PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1487:       for (i=0; i<nn; i+=dof) {
1488:         PetscViewerASCIIPrintf(viewer,"%D\n",r);
1489:       }
1490:       PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");
1491:       PetscViewerASCIIPrintf(viewer,"      </PointData>\n");

1493:       PetscViewerASCIIPrintf(viewer,"    </Piece>\n");
1494:     }
1495:     PetscFree(array);
1496:   } else {
1497:     MPI_Send(range,6,MPIU_INT,0,tag,comm);
1498:     MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm);
1499:   }
1500:   VecRestoreArrayRead(X,&x);
1501:   PetscViewerASCIIPrintf(viewer,"  </StructuredGrid>\n");
1502:   PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");
1503:   PetscViewerDestroy(&viewer);
1504:   return(0);
1505: }

1509: int main(int argc,char *argv[])
1510: {
1511:   MPI_Comm       comm;
1512:   THI            thi;
1514:   DM             da;
1515:   SNES           snes;

1517:   PetscInitialize(&argc,&argv,0,help);
1518:   comm = PETSC_COMM_WORLD;

1520:   THICreate(comm,&thi);
1521:   {
1522:     PetscInt M = 3,N = 3,P = 2;
1523:     PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1524:     {
1525:       PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);
1526:       N    = M;
1527:       PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);
1528:       if (thi->coarse2d) {
1529:         PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);
1530:       } else {
1531:         PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);
1532:       }
1533:     }
1534:     PetscOptionsEnd();
1535:     if (thi->coarse2d) {
1536:       DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);
1537:       DMSetFromOptions(da);
1538:       DMSetUp(da);
1539:       da->ops->refinehierarchy     = DMRefineHierarchy_THI;
1540:       da->ops->createinterpolation = DMCreateInterpolation_DA_THI;

1542:       PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);
1543:     } else {
1544:       DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);
1545:       DMSetFromOptions(da);
1546:       DMSetUp(da);
1547:     }
1548:     DMDASetFieldName(da,0,"x-velocity");
1549:     DMDASetFieldName(da,1,"y-velocity");
1550:   }
1551:   THISetUpDM(thi,da);
1552:   if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;

1554:   {                             /* Set the fine level matrix type if -da_refine */
1555:     PetscInt rlevel,clevel;
1556:     DMGetRefineLevel(da,&rlevel);
1557:     DMGetCoarsenLevel(da,&clevel);
1558:     if (rlevel - clevel > 0) {DMSetMatType(da,thi->mattype);}
1559:   }

1561:   DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi);
1562:   if (thi->tridiagonal) {
1563:     DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi);
1564:   } else {
1565:     DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);
1566:   }
1567:   DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi);
1568:   DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi);

1570:   DMSetApplicationContext(da,thi);

1572:   SNESCreate(comm,&snes);
1573:   SNESSetDM(snes,da);
1574:   DMDestroy(&da);
1575:   SNESSetComputeInitialGuess(snes,THIInitial,NULL);
1576:   SNESSetFromOptions(snes);

1578:   SNESSolve(snes,NULL,NULL);

1580:   THISolveStatistics(thi,snes,0,"Full");

1582:   {
1583:     PetscBool flg;
1584:     char      filename[PETSC_MAX_PATH_LEN] = "";
1585:     PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);
1586:     if (flg) {
1587:       Vec X;
1588:       DM  dm;
1589:       SNESGetSolution(snes,&X);
1590:       SNESGetDM(snes,&dm);
1591:       THIDAVecView_VTK_XML(thi,dm,X,filename);
1592:     }
1593:   }

1595:   DMDestroy(&da);
1596:   SNESDestroy(&snes);
1597:   THIDestroy(&thi);
1598:   PetscFinalize();
1599:   return ierr;
1600: }