Actual source code: ex46.c

  1: static char help[] = "Solves a linear system in parallel with KSP and DM.\n\
  2: Compare this to ex2 which solves the same problem without a DM.\n\n";

  4: /*
  5:   Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
  6:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
  7:   automatically includes:
  8:      petscsys.h       - base PETSc routines   petscvec.h - vectors
  9:      petscmat.h - matrices
 10:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 11:      petscviewer.h - viewers               petscpc.h  - preconditioners
 12: */
 13: #include <petscdm.h>
 14: #include <petscdmda.h>
 15: #include <petscksp.h>

 17: int main(int argc, char **argv)
 18: {
 19:   DM            da;      /* distributed array */
 20:   Vec           x, b, u; /* approx solution, RHS, exact solution */
 21:   Mat           A;       /* linear system matrix */
 22:   KSP           ksp;     /* linear solver context */
 23:   PetscRandom   rctx;    /* random number generator context */
 24:   PetscReal     norm;    /* norm of solution error */
 25:   PetscInt      i, j, its;
 26:   PetscBool     flg = PETSC_FALSE;
 27:   PetscLogStage stage;
 28:   DMDALocalInfo info;

 30:   PetscFunctionBeginUser;
 31:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 32:   /*
 33:      Create distributed array to handle parallel distribution.
 34:      The problem size will default to 8 by 7, but this can be
 35:      changed using -da_grid_x M -da_grid_y N
 36:   */
 37:   PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 8, 7, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
 38:   PetscCall(DMSetFromOptions(da));
 39:   PetscCall(DMSetUp(da));

 41:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 42:          Compute the matrix and right-hand-side vector that define
 43:          the linear system, Ax = b.
 44:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 45:   /*
 46:      Create parallel matrix preallocated according to the DMDA, format AIJ by default.
 47:      To use symmetric storage, run with -dm_mat_type sbaij -mat_ignore_lower_triangular
 48:   */
 49:   PetscCall(DMCreateMatrix(da, &A));

 51:   /*
 52:      Set matrix elements for the 2-D, five-point stencil in parallel.
 53:       - Each processor needs to insert only elements that it owns
 54:         locally (but any non-local elements will be sent to the
 55:         appropriate processor during matrix assembly).
 56:       - Rows and columns are specified by the stencil
 57:       - Entries are normalized for a domain [0,1]x[0,1]
 58:    */
 59:   PetscCall(PetscLogStageRegister("Assembly", &stage));
 60:   PetscCall(PetscLogStagePush(stage));
 61:   PetscCall(DMDAGetLocalInfo(da, &info));
 62:   for (j = info.ys; j < info.ys + info.ym; j++) {
 63:     for (i = info.xs; i < info.xs + info.xm; i++) {
 64:       PetscReal   hx = 1. / info.mx, hy = 1. / info.my;
 65:       MatStencil  row = {0}, col[5] = {{0}};
 66:       PetscScalar v[5];
 67:       PetscInt    ncols = 0;
 68:       row.j             = j;
 69:       row.i             = i;
 70:       col[ncols].j      = j;
 71:       col[ncols].i      = i;
 72:       v[ncols++]        = 2 * (hx / hy + hy / hx);
 73:       /* boundaries */
 74:       if (i > 0) {
 75:         col[ncols].j = j;
 76:         col[ncols].i = i - 1;
 77:         v[ncols++]   = -hy / hx;
 78:       }
 79:       if (i < info.mx - 1) {
 80:         col[ncols].j = j;
 81:         col[ncols].i = i + 1;
 82:         v[ncols++]   = -hy / hx;
 83:       }
 84:       if (j > 0) {
 85:         col[ncols].j = j - 1;
 86:         col[ncols].i = i;
 87:         v[ncols++]   = -hx / hy;
 88:       }
 89:       if (j < info.my - 1) {
 90:         col[ncols].j = j + 1;
 91:         col[ncols].i = i;
 92:         v[ncols++]   = -hx / hy;
 93:       }
 94:       PetscCall(MatSetValuesStencil(A, 1, &row, ncols, col, v, INSERT_VALUES));
 95:     }
 96:   }

 98:   /*
 99:      Assemble matrix, using the 2-step process:
100:        MatAssemblyBegin(), MatAssemblyEnd()
101:      Computations can be done while messages are in transition
102:      by placing code between these two statements.
103:   */
104:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
105:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
106:   PetscCall(PetscLogStagePop());

108:   /*
109:      Create parallel vectors compatible with the DMDA.
110:   */
111:   PetscCall(DMCreateGlobalVector(da, &u));
112:   PetscCall(VecDuplicate(u, &b));
113:   PetscCall(VecDuplicate(u, &x));

115:   /*
116:      Set exact solution; then compute right-hand-side vector.
117:      By default we use an exact solution of a vector with all
118:      elements of 1.0;  Alternatively, using the runtime option
119:      -random_sol forms a solution vector with random components.
120:   */
121:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL));
122:   if (flg) {
123:     PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
124:     PetscCall(PetscRandomSetFromOptions(rctx));
125:     PetscCall(VecSetRandom(u, rctx));
126:     PetscCall(PetscRandomDestroy(&rctx));
127:   } else {
128:     PetscCall(VecSet(u, 1.));
129:   }
130:   PetscCall(MatMult(A, u, b));

132:   /*
133:      View the exact solution vector if desired
134:   */
135:   flg = PETSC_FALSE;
136:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
137:   if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));

139:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
140:                 Create the linear solver and set various options
141:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

143:   /*
144:      Create linear solver context
145:   */
146:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));

148:   /*
149:      Set operators. Here the matrix that defines the linear system
150:      also serves as the preconditioning matrix.
151:   */
152:   PetscCall(KSPSetOperators(ksp, A, A));

154:   /*
155:     Set runtime options, e.g.,
156:         -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
157:     These options will override those specified above as long as
158:     KSPSetFromOptions() is called _after_ any other customization
159:     routines.
160:   */
161:   PetscCall(KSPSetFromOptions(ksp));

163:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164:                       Solve the linear system
165:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

167:   PetscCall(KSPSolve(ksp, b, x));

169:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
170:                       Check solution and clean up
171:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

173:   /*
174:      Check the error
175:   */
176:   PetscCall(VecAXPY(x, -1., u));
177:   PetscCall(VecNorm(x, NORM_2, &norm));
178:   PetscCall(KSPGetIterationNumber(ksp, &its));

180:   /*
181:      Print convergence information.  PetscPrintf() produces a single
182:      print statement from all processes that share a communicator.
183:      An alternative is PetscFPrintf(), which prints to a file.
184:   */
185:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));

187:   /*
188:      Free work space.  All PETSc objects should be destroyed when they
189:      are no longer needed.
190:   */
191:   PetscCall(KSPDestroy(&ksp));
192:   PetscCall(VecDestroy(&u));
193:   PetscCall(VecDestroy(&x));
194:   PetscCall(VecDestroy(&b));
195:   PetscCall(MatDestroy(&A));
196:   PetscCall(DMDestroy(&da));

198:   /*
199:      Always call PetscFinalize() before exiting a program.  This routine
200:        - finalizes the PETSc libraries as well as MPI
201:        - provides summary and diagnostic information if certain runtime
202:          options are chosen (e.g., -log_view).
203:   */
204:   PetscCall(PetscFinalize());
205:   return 0;
206: }

208: /*TEST

210:    testset:
211:       requires: cuda
212:       args: -dm_mat_type aijcusparse -dm_vec_type cuda -random_exact_sol
213:       output_file: output/ex46_aijcusparse.out

215:       test:
216:         suffix: aijcusparse
217:         args: -use_gpu_aware_mpi 0
218:       test:
219:         suffix: aijcusparse_mpi_gpu_aware

221:    testset:
222:       requires: cuda
223:       args: -dm_mat_type aijcusparse -dm_vec_type cuda -random_exact_sol -pc_type ilu -pc_factor_mat_solver_type cusparse
224:       output_file: output/ex46_aijcusparse_2.out
225:       test:
226:         suffix: aijcusparse_2
227:         args: -use_gpu_aware_mpi 0
228:       test:
229:         suffix: aijcusparse_2_mpi_gpu_aware

231: TEST*/