Actual source code: ex4.c

petsc-3.7.6 2017-04-24
Report Typos and Errors
  1: static char help[] = "Test MatSetValuesBatch: setting batches of elements using the GPU.\n\
2: This works with SeqAIJCUSP and MPIAIJCUSP matrices.\n\n";
3: #include <petscdm.h>
4: #include <petscdmda.h>
5: #include <petscksp.h>

7: /* We will use a structured mesh for this assembly test. Each square will be divided into two triangles:
8:   C       D
9:    _______
10:   |\      | The matrix for 0 and 1 is /   1  -0.5 -0.5 \
11:   | \   1 |                           | -0.5  0.5  0.0 |
12:   |  \    |                           \ -0.5  0.0  0.5 /
13:   |   \   |
14:   |    \  |
15:   |  0  \ |
16:   |      \|
17:   ---------
18:   A       B

21:   DONE 1) Build and run on baconost
22:     - Gather data for CPU/GPU up to da_grid_x 1300
23:       - Looks 6x faster than CPU
24:     - Make plot

26:   DONE 2) Solve the Neumann Poisson problem

28:   3) Multi-GPU Assembly
29:     - MPIAIJCUSP: Just have two SEQAIJCUSP matrices, nothing else special
30:     a) Filter rows to be sent to other procs (normally stashed)
31:     b) send/recv rows, might as well do with a VecScatter
32:     c) Potential to overlap this computation w/ GPU (talk to Nathan)
33:     c') Just shove these rows in after the local
34:     d) Have implicit rep of COO from repeated/tiled_range
35:     e) Do a filtered copy, decrementing rows and remapping columns, which splits into two sets
36:     f) Make two COO matrices and do separate aggregation on each one

38:   4) Solve the Neumann Poisson problem in parallel
39:     - Try it on GPU machine at Brown (They need another GNU install)

41:   5) GPU FEM integration
42:     - Move launch code to PETSc   or   - Try again now that assembly is in PETSc
43:     - Move build code to PETSc

45:   6) Try out CUSP PCs
46: */

50: PetscErrorCode IntegrateCells(DM dm, PetscInt *Ne, PetscInt *Nl, PetscInt **elemRows, PetscScalar **elemMats)
51: {
52:   DMDALocalInfo  info;
53:   PetscInt       *er;
54:   PetscScalar    *em;
55:   PetscInt       X, Y, dof;
56:   PetscInt       nl, nxe, nye, ne;
57:   PetscInt       k = 0, m  = 0;
58:   PetscInt       i, j;
60: #if defined(PETSC_USE_LOG)
61:   PetscLogEvent  integrationEvent;
62: #endif

65:   PetscLogEventRegister("ElemIntegration", DM_CLASSID, &integrationEvent);
66:   PetscLogEventBegin(integrationEvent,0,0,0,0);
67:   DMDAGetInfo(dm, 0, &X, &Y,0,0,0,0, &dof,0,0,0,0,0);
68:   DMDAGetLocalInfo(dm, &info);
69:   nl   = dof*3;
70:   nxe  = info.xm; if (info.xs+info.xm == X) nxe--;
71:   nye  = info.ym; if (info.ys+info.ym == Y) nye--;
72:   ne   = 2 * nxe * nye;
73:   *Ne  = ne;
74:   *Nl  = nl;
75:   PetscMalloc2(ne*nl, elemRows, ne*nl*nl, elemMats);
76:   er   = *elemRows;
77:   em   = *elemMats;
78:   /* Proc 0        Proc 1                                               */
79:   /* xs: 0  xm: 3  xs: 0 xm: 3                                          */
80:   /* ys: 0  ym: 2  ys: 2 ym: 1                                          */
81:   /* 8 elements x 3 vertices = 24 element matrix rows and 72 entries    */
82:   /*   6 offproc rows containing 18 element matrix entries              */
83:   /*  18  onproc rows containing 54 element matrix entries              */
84:   /*   3 offproc columns in 8 element matrix entries                    */
85:   /*   so we should have 46 diagonal matrix entries                     */
86:   for (j = info.ys; j < info.ys+nye; ++j) {
87:     for (i = info.xs; i < info.xs+nxe; ++i) {
88:       PetscInt rowA = j*X     + i, rowB = j*X     + i+1;
89:       PetscInt rowC = (j+1)*X + i, rowD = (j+1)*X + i+1;

91:       /* Lower triangle */
92:       er[k+0] = rowA; em[m+0*nl+0] =  1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
93:       er[k+1] = rowB; em[m+1*nl+0] = -0.5; em[m+1*nl+1] =  0.5; em[m+1*nl+2] =  0.0;
94:       er[k+2] = rowC; em[m+2*nl+0] = -0.5; em[m+2*nl+1] =  0.0; em[m+2*nl+2] =  0.5;
95:       k      += nl; m += nl*nl;
96:       /* Upper triangle */
97:       er[k+0] = rowD; em[m+0*nl+0] =  1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
98:       er[k+1] = rowC; em[m+1*nl+0] = -0.5; em[m+1*nl+1] =  0.5; em[m+1*nl+2] =  0.0;
99:       er[k+2] = rowB; em[m+2*nl+0] = -0.5; em[m+2*nl+1] =  0.0; em[m+2*nl+2] =  0.5;
100:       k      += nl; m += nl*nl;
101:     }
102:   }
103:   PetscLogEventEnd(integrationEvent,0,0,0,0);
104:   return(0);
105: }

109: int main(int argc, char **argv)
110: {
111:   KSP            ksp;
112:   MatNullSpace   nullsp;
113:   DM             dm;
114:   Mat            A;
115:   Vec            x, b;
116:   PetscViewer    viewer;
117:   PetscInt       Nl, Ne;
118:   PetscInt       *elemRows;
119:   PetscScalar    *elemMats;
120:   PetscBool      doGPU = PETSC_TRUE, doCPU = PETSC_TRUE, doSolve = PETSC_FALSE, doView = PETSC_TRUE;
122: #if defined(PETSC_USE_LOG)
123:   PetscLogStage  gpuStage, cpuStage;
124: #endif

126:   PetscInitialize(&argc, &argv, 0, help);
127:   DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_BOX, -3, -3, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &dm);
128:   IntegrateCells(dm, &Ne, &Nl, &elemRows, &elemMats);
129:   PetscOptionsGetBool(NULL,NULL, "-view", &doView, NULL);
130:   /* Construct matrix using GPU */
131:   PetscOptionsGetBool(NULL,NULL, "-gpu", &doGPU, NULL);
132:   if (doGPU) {
133:     PetscLogStageRegister("GPU Stage", &gpuStage);
134:     PetscLogStagePush(gpuStage);
135:     DMSetMatType(dm,MATAIJ);
136:     DMCreateMatrix(dm, &A);
137:     MatSetType(A, MATAIJCUSP);
138:     MatSeqAIJSetPreallocation(A, 0, NULL);
139:     MatMPIAIJSetPreallocation(A, 0, NULL, 0, NULL);
140:     MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
141:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
142:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
143:     if (doView) {
144:       PetscViewerASCIIOpen(PETSC_COMM_WORLD, NULL, &viewer);
145:       if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
146:       MatView(A, viewer);
147:       if (Ne > 500) {PetscViewerPopFormat(viewer);}
148:       PetscViewerDestroy(&viewer);
149:     }
150:     PetscLogStagePop();
151:     MatDestroy(&A);
152:   }
153:   /* Construct matrix using CPU */
154:   PetscOptionsGetBool(NULL,NULL, "-cpu", &doCPU, NULL);
155:   if (doCPU) {
156:     PetscLogStageRegister("CPU Stage", &cpuStage);
157:     PetscLogStagePush(cpuStage);
158:     DMSetMatType(dm,MATAIJ);
159:     DMCreateMatrix(dm, &A);
160:     MatZeroEntries(A);
161:     MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
162:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
163:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
164:     if (doView) {
165:       PetscViewerASCIIOpen(PETSC_COMM_WORLD, NULL, &viewer);
166:       if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
167:       MatView(A, viewer);
168:       if (Ne > 500) {PetscViewerPopFormat(viewer);}
169:       PetscViewerDestroy(&viewer);
170:     }
171:     PetscLogStagePop();
172:   }
173:   /* Solve simple system with random rhs */
174:   PetscOptionsGetBool(NULL,NULL, "-solve", &doSolve, NULL);
175:   if (doSolve) {
176:     MatCreateVecs(A, &x, &b);
177:     VecSetRandom(b, NULL);
178:     KSPCreate(PETSC_COMM_WORLD, &ksp);
179:     KSPSetOperators(ksp, A, A);
180:     MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, NULL, &nullsp);
181:     MatSetNullSpace(A, nullsp);
182:     MatNullSpaceDestroy(&nullsp);
183:     KSPSetFromOptions(ksp);
184:     KSPSolve(ksp, b, x);
185:     VecDestroy(&x);
186:     VecDestroy(&b);
187:     /* Solve physical system:

189:          -\Delta u = -6 (x + y - 1)

191:        where u = x^3 - 3/2 x^2 + y^3 - 3/2y^2 + 1/2,
192:        so \Delta u = 6 x - 3 + 6 y - 3,
193:        and \frac{\partial u}{\partial n} = {3x (x - 1), 3y (y - 1)} \cdot n
194:                                          = \pm 3x (x - 1) at x=0,1 = 0
195:                                          = \pm 3y (y - 1) at y=0,1 = 0
196:     */
197:   }
198:   /* Cleanup */
199:   MatDestroy(&A);
200:   PetscFree2(elemRows, elemMats);
201:   DMDestroy(&dm);
202:   PetscFinalize();
203:   return 0;
204: }