Actual source code: ex4.c
petsc-3.3-p7 2013-05-11
1: static char help[] = "Test MatSetValuesBatch: setting batches of elements using the GPU.\n\
2: This works with SeqAIJCUSP and MPIAIJCUSP matrices.\n\n";
3: #include <petscdmda.h>
4: #include <petscksp.h>
6: /* We will use a structured mesh for this assembly test. Each square will be divided into two triangles:
7: C D
8: _______
9: |\ | The matrix for 0 and 1 is / 1 -0.5 -0.5 \
10: | \ 1 | | -0.5 0.5 0.0 |
11: | \ | \ -0.5 0.0 0.5 /
12: | \ |
13: | \ |
14: | 0 \ |
15: | \|
16: ---------
17: A B
19: TO ADD:
20: DONE 1) Build and run on baconost
21: - Gather data for CPU/GPU up to da_grid_x 1300
22: - Looks 6x faster than CPU
23: - Make plot
25: DONE 2) Solve the Neumann Poisson problem
27: 3) Multi-GPU Assembly
28: - MPIAIJCUSP: Just have two SEQAIJCUSP matrices, nothing else special
29: a) Filter rows to be sent to other procs (normally stashed)
30: b) send/recv rows, might as well do with a VecScatter
31: c) Potential to overlap this computation w/ GPU (talk to Nathan)
32: c') Just shove these rows in after the local
33: d) Have implicit rep of COO from repeated/tiled_range
34: e) Do a filtered copy, decrementing rows and remapping columns, which splits into two sets
35: f) Make two COO matrices and do separate aggregation on each one
37: 4) Solve the Neumann Poisson problem in parallel
38: - Try it on GPU machine at Brown (They need another GNU install)
40: 5) GPU FEM integration
41: - Move launch code to PETSc or - Try again now that assembly is in PETSc
42: - Move build code to PETSc
44: 6) Try out CUSP PCs
45: */
49: PetscErrorCode IntegrateCells(DM dm, PetscInt *Ne, PetscInt *Nl, PetscInt **elemRows, PetscScalar **elemMats) {
50: DMDALocalInfo info;
51: PetscInt *er;
52: PetscScalar *em;
53: PetscInt X, Y, dof;
54: PetscInt nl, nxe, nye, ne;
55: PetscInt k = 0, m = 0;
56: PetscInt i, j;
57: PetscLogEvent integrationEvent;
61: PetscLogEventRegister("ElemIntegration", DM_CLASSID, &integrationEvent);
62: PetscLogEventBegin(integrationEvent,0,0,0,0);
63: DMDAGetInfo(dm, 0, &X, &Y,0,0,0,0, &dof,0,0,0,0,0);
64: DMDAGetLocalInfo(dm, &info);
65: nl = dof*3;
66: nxe = info.xm; if (info.xs+info.xm == X) nxe--;
67: nye = info.ym; if (info.ys+info.ym == Y) nye--;
68: ne = 2 * nxe * nye;
69: *Ne = ne;
70: *Nl = nl;
71: PetscMalloc2(ne*nl, PetscInt, elemRows, ne*nl*nl, PetscScalar, elemMats);
72: er = *elemRows;
73: em = *elemMats;
74: // Proc 0 Proc 1
75: // xs: 0 xm: 3 xs: 0 xm: 3
76: // ys: 0 ym: 2 ys: 2 ym: 1
77: // 8 elements x 3 vertices = 24 element matrix rows and 72 entries
78: // 6 offproc rows containing 18 element matrix entries
79: // 18 onproc rows containing 54 element matrix entries
80: // 3 offproc columns in 8 element matrix entries
81: // so we should have 46 diagonal matrix entries
82: for(j = info.ys; j < info.ys+nye; ++j) {
83: for(i = info.xs; i < info.xs+nxe; ++i) {
84: PetscInt rowA = j*X + i, rowB = j*X + i+1;
85: PetscInt rowC = (j+1)*X + i, rowD = (j+1)*X + i+1;
87: /* Lower triangle */
88: er[k+0] = rowA; em[m+0*nl+0] = 1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
89: er[k+1] = rowB; em[m+1*nl+0] = -0.5; em[m+1*nl+1] = 0.5; em[m+1*nl+2] = 0.0;
90: er[k+2] = rowC; em[m+2*nl+0] = -0.5; em[m+2*nl+1] = 0.0; em[m+2*nl+2] = 0.5;
91: k += nl; m += nl*nl;
92: /* Upper triangle */
93: er[k+0] = rowD; em[m+0*nl+0] = 1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
94: er[k+1] = rowC; em[m+1*nl+0] = -0.5; em[m+1*nl+1] = 0.5; em[m+1*nl+2] = 0.0;
95: er[k+2] = rowB; em[m+2*nl+0] = -0.5; em[m+2*nl+1] = 0.0; em[m+2*nl+2] = 0.5;
96: k += nl; m += nl*nl;
97: }
98: }
99: PetscLogEventEnd(integrationEvent,0,0,0,0);
100: return(0);
101: }
105: int main(int argc, char **argv)
106: {
107: KSP ksp;
108: MatNullSpace nullsp;
109: DM dm;
110: Mat A;
111: Vec x, b;
112: PetscViewer viewer;
113: PetscInt Nl, Ne;
114: PetscInt *elemRows;
115: PetscScalar *elemMats;
116: PetscBool doGPU = PETSC_TRUE, doCPU = PETSC_TRUE, doSolve = PETSC_FALSE, doView = PETSC_TRUE;
117: PetscLogStage gpuStage, cpuStage;
120: PetscInitialize(&argc, &argv, 0, help);
121: DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE, DMDA_STENCIL_BOX, -3, -3, PETSC_DECIDE, PETSC_DECIDE, 1, 1, PETSC_NULL, PETSC_NULL, &dm);
122: IntegrateCells(dm, &Ne, &Nl, &elemRows, &elemMats);
123: PetscOptionsGetBool(PETSC_NULL, "-view", &doView, PETSC_NULL);
124: /* Construct matrix using GPU */
125: PetscOptionsGetBool(PETSC_NULL, "-gpu", &doGPU, PETSC_NULL);
126: if (doGPU) {
127: PetscLogStageRegister("GPU Stage", &gpuStage);
128: PetscLogStagePush(gpuStage);
129: DMCreateMatrix(dm, MATAIJ, &A);
130: MatSetType(A, MATAIJCUSP);
131: MatSeqAIJSetPreallocation(A, 0, PETSC_NULL);
132: MatMPIAIJSetPreallocation(A, 0, PETSC_NULL, 0, PETSC_NULL);
133: MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
134: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
135: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
136: if (doView) {
137: PetscViewerASCIIOpen(PETSC_COMM_WORLD, PETSC_NULL, &viewer);
138: if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
139: MatView(A, viewer);
140: PetscViewerDestroy(&viewer);
141: }
142: PetscLogStagePop();
143: MatDestroy(&A);
144: }
145: /* Construct matrix using CPU */
146: PetscOptionsGetBool(PETSC_NULL, "-cpu", &doCPU, PETSC_NULL);
147: if (doCPU) {
148: PetscLogStageRegister("CPU Stage", &cpuStage);
149: PetscLogStagePush(cpuStage);
150: DMCreateMatrix(dm, MATAIJ, &A);
151: MatZeroEntries(A);
152: MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
153: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
154: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
155: if (doView) {
156: PetscViewerASCIIOpen(PETSC_COMM_WORLD, PETSC_NULL, &viewer);
157: if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
158: MatView(A, viewer);
159: PetscViewerDestroy(&viewer);
160: }
161: PetscLogStagePop();
162: }
163: /* Solve simple system with random rhs */
164: PetscOptionsGetBool(PETSC_NULL, "-solve", &doSolve, PETSC_NULL);
165: if (doSolve) {
166: MatGetVecs(A, &x, &b);
167: VecSetRandom(b, PETSC_NULL);
168: KSPCreate(PETSC_COMM_WORLD, &ksp);
169: KSPSetOperators(ksp, A, A, DIFFERENT_NONZERO_PATTERN);
170: MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, PETSC_NULL, &nullsp);
171: KSPSetNullSpace(ksp, nullsp);
172: MatNullSpaceDestroy(&nullsp);
173: KSPSetFromOptions(ksp);
174: KSPSolve(ksp, b, x);
175: VecDestroy(&x);
176: VecDestroy(&b);
177: /* Solve physical system:
179: -\Delta u = -6 (x + y - 1)
181: where u = x^3 - 3/2 x^2 + y^3 - 3/2y^2 + 1/2,
182: so \Delta u = 6 x - 3 + 6 y - 3,
183: and \frac{\partial u}{\partial n} = {3x (x - 1), 3y (y - 1)} \cdot n
184: = \pm 3x (x - 1) at x=0,1 = 0
185: = \pm 3y (y - 1) at y=0,1 = 0
186: */
187: }
188: /* Cleanup */
189: MatDestroy(&A);
190: PetscFree2(elemRows, elemMats);
191: DMDestroy(&dm);
192: PetscFinalize();
193: return 0;
194: }