Actual source code: ex4.c

petsc-master 2018-02-18
Report Typos and Errors
  1: static char help[] = "Test MatSetValuesBatch: setting batches of elements using the GPU.\n\
  2: This works with SeqAIJCUSP and MPIAIJCUSP matrices.\n\n";

  4:  #include <petscdm.h>
  5:  #include <petscdmda.h>
  6:  #include <petscksp.h>

  8: /* We will use a structured mesh for this assembly test. Each square will be divided into two triangles:
  9:   C       D
 10:    _______
 11:   |\      | The matrix for 0 and 1 is /   1  -0.5 -0.5 \
 12:   | \   1 |                           | -0.5  0.5  0.0 |
 13:   |  \    |                           \ -0.5  0.0  0.5 /
 14:   |   \   |
 15:   |    \  |
 16:   |  0  \ |
 17:   |      \|
 18:   ---------
 19:   A       B

 21: TO ADD:
 22:   DONE 1) Build and run on baconost
 23:     - Gather data for CPU/GPU up to da_grid_x 1300
 24:       - Looks 6x faster than CPU
 25:     - Make plot

 27:   DONE 2) Solve the Neumann Poisson problem

 29:   3) Multi-GPU Assembly
 30:     - MPIAIJCUSP: Just have two SEQAIJCUSP matrices, nothing else special
 31:     a) Filter rows to be sent to other procs (normally stashed)
 32:     b) send/recv rows, might as well do with a VecScatter
 33:     c) Potential to overlap this computation w/ GPU (talk to Nathan)
 34:     c') Just shove these rows in after the local
 35:     d) Have implicit rep of COO from repeated/tiled_range
 36:     e) Do a filtered copy, decrementing rows and remapping columns, which splits into two sets
 37:     f) Make two COO matrices and do separate aggregation on each one

 39:   4) Solve the Neumann Poisson problem in parallel
 40:     - Try it on GPU machine at Brown (They need another GNU install)

 42:   5) GPU FEM integration
 43:     - Move launch code to PETSc   or   - Try again now that assembly is in PETSc
 44:     - Move build code to PETSc

 46:   6) Try out CUSP PCs
 47: */

 49: PetscErrorCode IntegrateCells(DM dm, PetscInt *Ne, PetscInt *Nl, PetscInt **elemRows, PetscScalar **elemMats)
 50: {
 51:   DMDALocalInfo  info;
 52:   PetscInt       *er;
 53:   PetscScalar    *em;
 54:   PetscInt       X, Y, dof;
 55:   PetscInt       nl, nxe, nye, ne;
 56:   PetscInt       k = 0, m  = 0;
 57:   PetscInt       i, j;
 59: #if defined(PETSC_USE_LOG)
 60:   PetscLogEvent  integrationEvent;
 61: #endif

 64:   PetscLogEventRegister("ElemIntegration", DM_CLASSID, &integrationEvent);
 65:   PetscLogEventBegin(integrationEvent,0,0,0,0);
 66:   DMDAGetInfo(dm, 0, &X, &Y,0,0,0,0, &dof,0,0,0,0,0);
 67:   DMDAGetLocalInfo(dm, &info);
 68:   nl   = dof*3;
 69:   nxe  = info.xm; if (info.xs+info.xm == X) nxe--;
 70:   nye  = info.ym; if (info.ys+info.ym == Y) nye--;
 71:   ne   = 2 * nxe * nye;
 72:   *Ne  = ne;
 73:   *Nl  = nl;
 74:   PetscMalloc2(ne*nl, elemRows, ne*nl*nl, elemMats);
 75:   er   = *elemRows;
 76:   em   = *elemMats;
 77:   /* Proc 0        Proc 1                                               */
 78:   /* xs: 0  xm: 3  xs: 0 xm: 3                                          */
 79:   /* ys: 0  ym: 2  ys: 2 ym: 1                                          */
 80:   /* 8 elements x 3 vertices = 24 element matrix rows and 72 entries    */
 81:   /*   6 offproc rows containing 18 element matrix entries              */
 82:   /*  18  onproc rows containing 54 element matrix entries              */
 83:   /*   3 offproc columns in 8 element matrix entries                    */
 84:   /*   so we should have 46 diagonal matrix entries                     */
 85:   for (j = info.ys; j < info.ys+nye; ++j) {
 86:     for (i = info.xs; i < info.xs+nxe; ++i) {
 87:       PetscInt rowA = j*X     + i, rowB = j*X     + i+1;
 88:       PetscInt rowC = (j+1)*X + i, rowD = (j+1)*X + i+1;

 90:       /* Lower triangle */
 91:       er[k+0] = rowA; em[m+0*nl+0] =  1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
 92:       er[k+1] = rowB; em[m+1*nl+0] = -0.5; em[m+1*nl+1] =  0.5; em[m+1*nl+2] =  0.0;
 93:       er[k+2] = rowC; em[m+2*nl+0] = -0.5; em[m+2*nl+1] =  0.0; em[m+2*nl+2] =  0.5;
 94:       k      += nl; m += nl*nl;
 95:       /* Upper triangle */
 96:       er[k+0] = rowD; em[m+0*nl+0] =  1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
 97:       er[k+1] = rowC; em[m+1*nl+0] = -0.5; em[m+1*nl+1] =  0.5; em[m+1*nl+2] =  0.0;
 98:       er[k+2] = rowB; em[m+2*nl+0] = -0.5; em[m+2*nl+1] =  0.0; em[m+2*nl+2] =  0.5;
 99:       k      += nl; m += nl*nl;
100:     }
101:   }
102:   PetscLogEventEnd(integrationEvent,0,0,0,0);
103:   return(0);
104: }

106: int main(int argc, char **argv)
107: {
108:   KSP            ksp;
109:   MatNullSpace   nullsp;
110:   DM             dm;
111:   Mat            A;
112:   Vec            x, b;
113:   PetscViewer    viewer;
114:   PetscInt       Nl, Ne;
115:   PetscInt       *elemRows;
116:   PetscScalar    *elemMats;
117:   PetscBool      doView = PETSC_TRUE;
119: #if defined(PETSC_USE_LOG)
120:   PetscLogStage  gpuStage, cpuStage;
121: #endif

123:   PetscInitialize(&argc, &argv, 0, help);if (ierr) return ierr;
124:   DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_BOX, 3, 3, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &dm);
125:   DMSetFromOptions(dm);
126:   DMSetUp(dm);
127:   IntegrateCells(dm, &Ne, &Nl, &elemRows, &elemMats);
128:   PetscOptionsGetBool(NULL,NULL, "-view", &doView, NULL);

130:   /* Construct matrix using GPU */
131:   PetscLogStageRegister("GPU Stage", &gpuStage);
132:   PetscLogStagePush(gpuStage);
133:   DMSetMatType(dm,MATAIJ);
134:   DMCreateMatrix(dm, &A);
135:   MatSetType(A, MATAIJCUSP);
136:   MatSeqAIJSetPreallocation(A, 0, NULL);
137:   MatMPIAIJSetPreallocation(A, 0, NULL, 0, NULL);
138:   MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
139:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
140:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
141:   if (doView) {
142:     PetscViewerASCIIOpen(PETSC_COMM_WORLD, NULL, &viewer);
143:     if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
144:     MatView(A, viewer);
145:     if (Ne > 500) {PetscViewerPopFormat(viewer);}
146:     PetscViewerDestroy(&viewer);
147:   }
148:   PetscLogStagePop();
149:   MatDestroy(&A);

151:   /* Construct matrix using CPU */
152:   PetscLogStageRegister("CPU Stage", &cpuStage);
153:   PetscLogStagePush(cpuStage);
154:   DMSetMatType(dm,MATAIJ);
155:   DMCreateMatrix(dm, &A);
156:   MatZeroEntries(A);
157:   MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
158:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
159:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
160:   if (doView) {
161:     PetscViewerASCIIOpen(PETSC_COMM_WORLD, NULL, &viewer);
162:     if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
163:     MatView(A, viewer);
164:     if (Ne > 500) {PetscViewerPopFormat(viewer);}
165:     PetscViewerDestroy(&viewer);
166:   }
167:   PetscLogStagePop();

169:   /* Solve simple system with random rhs */
170:   MatCreateVecs(A, &x, &b);
171:   VecSetRandom(b, NULL);
172:   KSPCreate(PETSC_COMM_WORLD, &ksp);
173:   KSPSetOperators(ksp, A, A);
174:   MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, NULL, &nullsp);
175:   MatSetNullSpace(A, nullsp);
176:   MatNullSpaceDestroy(&nullsp);
177:   KSPSetFromOptions(ksp);
178:   KSPSolve(ksp, b, x);
179:   VecDestroy(&x);
180:   VecDestroy(&b);
181:   /* Solve physical system:

183:          -\Delta u = -6 (x + y - 1)

185:        where u = x^3 - 3/2 x^2 + y^3 - 3/2y^2 + 1/2,
186:        so \Delta u = 6 x - 3 + 6 y - 3,
187:        and \frac{\partial u}{\partial n} = {3x (x - 1), 3y (y - 1)} \cdot n
188:                                          = \pm 3x (x - 1) at x=0,1 = 0
189:                                          = \pm 3y (y - 1) at y=0,1 = 0
190:     */

192:   MatDestroy(&A);
193:   PetscFree2(elemRows, elemMats);
194:   DMDestroy(&dm);
195:   PetscFinalize();
196:   return ierr;
197: }


200: /*TEST

202:    test:
203:       args: -pc_type none
204:       requires: cusp
205:       TODO: Broken MatSetValuesBatch_SeqAIJCUSP()

207: TEST*/