Actual source code: ex5.c

  1: static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
  2: illustrates repeated solution of linear systems with the same preconditioner\n\
  3: method but different matrices (having the same nonzero structure).  The code\n\
  4: also uses multiple profiling stages.  Input arguments are\n\
  5:   -m <size> : problem size\n\
  6:   -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";

  8: /*
  9:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 10:   automatically includes:
 11:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 12:      petscmat.h - matrices
 13:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 14:      petscviewer.h - viewers               petscpc.h  - preconditioners
 15: */
 16: #include <petscksp.h>

 18: int main(int argc, char **args)
 19: {
 20:   KSP           ksp;         /* linear solver context */
 21:   Mat           C;           /* matrix */
 22:   Vec           x, u, b;     /* approx solution, RHS, exact solution */
 23:   PetscReal     norm, bnorm; /* norm of solution error */
 24:   PetscScalar   v, none = -1.0;
 25:   PetscInt      Ii, J, ldim, low, high, iglobal, Istart, Iend;
 26:   PetscInt      i, j, m = 3, n = 2, its;
 27:   PetscMPIInt   size, rank;
 28:   PetscBool     mat_nonsymmetric = PETSC_FALSE;
 29:   PetscBool     testnewC = PETSC_FALSE, testscaledMat = PETSC_FALSE, single = PETSC_FALSE;
 30:   PetscLogStage stages[2];

 32:   PetscFunctionBeginUser;
 33:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 34:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 35:   PetscCall(PetscOptionsHasName(NULL, NULL, "-pc_precision", &single));
 36:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 37:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 38:   n = 2 * size;

 40:   /*
 41:      Set flag if we are doing a nonsymmetric problem; the default is symmetric.
 42:   */
 43:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mat_nonsym", &mat_nonsymmetric, NULL));

 45:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_scaledMat", &testscaledMat, NULL));

 47:   /*
 48:      Register two stages for separate profiling of the two linear solves.
 49:      Use the runtime option -log_view for a printout of performance
 50:      statistics at the program's conlusion.
 51:   */
 52:   PetscCall(PetscLogStageRegister("Original Solve", &stages[0]));
 53:   PetscCall(PetscLogStageRegister("Second Solve", &stages[1]));

 55:   /* -------------- Stage 0: Solve Original System ---------------------- */
 56:   /*
 57:      Indicate to PETSc profiling that we're beginning the first stage
 58:   */
 59:   PetscCall(PetscLogStagePush(stages[0]));

 61:   /*
 62:      Create parallel matrix, specifying only its global dimensions.
 63:      When using MatCreate(), the matrix format can be specified at
 64:      runtime. Also, the parallel partitioning of the matrix is
 65:      determined by PETSc at runtime.
 66:   */
 67:   PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
 68:   PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
 69:   PetscCall(MatSetFromOptions(C));
 70:   PetscCall(MatSetUp(C));

 72:   /*
 73:      Currently, all PETSc parallel matrix formats are partitioned by
 74:      contiguous chunks of rows across the processors.  Determine which
 75:      rows of the matrix are locally owned.
 76:   */
 77:   PetscCall(MatGetOwnershipRange(C, &Istart, &Iend));

 79:   /*
 80:      Set matrix entries matrix in parallel.
 81:       - Each processor needs to insert only elements that it owns
 82:         locally (but any non-local elements will be sent to the
 83:         appropriate processor during matrix assembly).
 84:       - Always specify global row and columns of matrix entries.
 85:   */
 86:   for (Ii = Istart; Ii < Iend; Ii++) {
 87:     v = -1.0;
 88:     i = Ii / n;
 89:     j = Ii - i * n;
 90:     if (i > 0) {
 91:       J = Ii - n;
 92:       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
 93:     }
 94:     if (i < m - 1) {
 95:       J = Ii + n;
 96:       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
 97:     }
 98:     if (j > 0) {
 99:       J = Ii - 1;
100:       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
101:     }
102:     if (j < n - 1) {
103:       J = Ii + 1;
104:       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
105:     }
106:     v = 4.0;
107:     PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
108:   }

110:   /*
111:      Make the matrix nonsymmetric if desired
112:   */
113:   if (mat_nonsymmetric) {
114:     for (Ii = Istart; Ii < Iend; Ii++) {
115:       v = -1.5;
116:       i = Ii / n;
117:       if (i > 1) {
118:         J = Ii - n - 1;
119:         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
120:       }
121:     }
122:   } else {
123:     PetscCall(MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE));
124:     PetscCall(MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
125:   }

127:   /*
128:      Assemble matrix, using the 2-step process:
129:        MatAssemblyBegin(), MatAssemblyEnd()
130:      Computations can be done while messages are in transition
131:      by placing code between these two statements.
132:   */
133:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
134:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));

136:   /*
137:      Create parallel vectors.
138:       - When using VecSetSizes(), we specify only the vector's global
139:         dimension; the parallel partitioning is determined at runtime.
140:       - Note: We form 1 vector from scratch and then duplicate as needed.
141:   */
142:   PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
143:   PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
144:   PetscCall(VecSetFromOptions(u));
145:   PetscCall(VecDuplicate(u, &b));
146:   PetscCall(VecDuplicate(b, &x));

148:   /*
149:      Currently, all parallel PETSc vectors are partitioned by
150:      contiguous chunks across the processors.  Determine which
151:      range of entries are locally owned.
152:   */
153:   PetscCall(VecGetOwnershipRange(x, &low, &high));

155:   /*
156:     Set elements within the exact solution vector in parallel.
157:      - Each processor needs to insert only elements that it owns
158:        locally (but any non-local entries will be sent to the
159:        appropriate processor during vector assembly).
160:      - Always specify global locations of vector entries.
161:   */
162:   PetscCall(VecGetLocalSize(x, &ldim));
163:   for (i = 0; i < ldim; i++) {
164:     iglobal = i + low;
165:     v       = (PetscScalar)(i + 100 * rank);
166:     PetscCall(VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES));
167:   }

169:   /*
170:      Assemble vector, using the 2-step process:
171:        VecAssemblyBegin(), VecAssemblyEnd()
172:      Computations can be done while messages are in transition,
173:      by placing code between these two statements.
174:   */
175:   PetscCall(VecAssemblyBegin(u));
176:   PetscCall(VecAssemblyEnd(u));

178:   /*
179:      Compute right-hand-side vector
180:   */
181:   PetscCall(MatMult(C, u, b));

183:   /*
184:     Create linear solver context
185:   */
186:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));

188:   /*
189:      Set operators. Here the matrix that defines the linear system
190:      also serves as the preconditioning matrix.
191:   */
192:   PetscCall(KSPSetOperators(ksp, C, C));

194:   /*
195:      Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
196:   */
197:   PetscCall(KSPSetFromOptions(ksp));

199:   /*
200:      Solve linear system.  Here we explicitly call KSPSetUp() for more
201:      detailed performance monitoring of certain preconditioners, such
202:      as ICC and ILU.  This call is optional, as KSPSetUp() will
203:      automatically be called within KSPSolve() if it hasn't been
204:      called already.
205:   */
206:   PetscCall(KSPSetUp(ksp));
207:   PetscCall(KSPSolve(ksp, b, x));

209:   /*
210:      Check the residual
211:   */
212:   PetscCall(VecAXPY(x, none, u));
213:   PetscCall(VecNorm(x, NORM_2, &norm));
214:   PetscCall(VecNorm(b, NORM_2, &bnorm));
215:   PetscCall(KSPGetIterationNumber(ksp, &its));
216:   if (!testscaledMat || (!single && norm / bnorm > 1.e-5)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));

218:   /* -------------- Stage 1: Solve Second System ---------------------- */
219:   /*
220:      Solve another linear system with the same method.  We reuse the KSP
221:      context, matrix and vector data structures, and hence save the
222:      overhead of creating new ones.

224:      Indicate to PETSc profiling that we're concluding the first
225:      stage with PetscLogStagePop(), and beginning the second stage with
226:      PetscLogStagePush().
227:   */
228:   PetscCall(PetscLogStagePop());
229:   PetscCall(PetscLogStagePush(stages[1]));

231:   /*
232:      Initialize all matrix entries to zero.  MatZeroEntries() retains the
233:      nonzero structure of the matrix for sparse formats.
234:   */
235:   PetscCall(MatZeroEntries(C));

237:   /*
238:      Assemble matrix again.  Note that we retain the same matrix data
239:      structure and the same nonzero pattern; we just change the values
240:      of the matrix entries.
241:   */
242:   for (i = 0; i < m; i++) {
243:     for (j = 2 * rank; j < 2 * rank + 2; j++) {
244:       v  = -1.0;
245:       Ii = j + n * i;
246:       if (i > 0) {
247:         J = Ii - n;
248:         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
249:       }
250:       if (i < m - 1) {
251:         J = Ii + n;
252:         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
253:       }
254:       if (j > 0) {
255:         J = Ii - 1;
256:         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
257:       }
258:       if (j < n - 1) {
259:         J = Ii + 1;
260:         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
261:       }
262:       v = 6.0;
263:       PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
264:     }
265:   }
266:   if (mat_nonsymmetric) {
267:     for (Ii = Istart; Ii < Iend; Ii++) {
268:       v = -1.5;
269:       i = Ii / n;
270:       if (i > 1) {
271:         J = Ii - n - 1;
272:         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
273:       }
274:     }
275:   }
276:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
277:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));

279:   if (testscaledMat) {
280:     PetscRandom rctx;

282:     /* Scale a(0,0) and a(M-1,M-1) */
283:     if (rank == 0) {
284:       v  = 6.0 * 0.00001;
285:       Ii = 0;
286:       J  = 0;
287:       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
288:     } else if (rank == size - 1) {
289:       v  = 6.0 * 0.00001;
290:       Ii = m * n - 1;
291:       J  = m * n - 1;
292:       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293:     }
294:     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
295:     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));

297:     /* Compute a new right-hand-side vector */
298:     PetscCall(VecDestroy(&u));
299:     PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
300:     PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
301:     PetscCall(VecSetFromOptions(u));

303:     PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
304:     PetscCall(PetscRandomSetFromOptions(rctx));
305:     PetscCall(VecSetRandom(u, rctx));
306:     PetscCall(PetscRandomDestroy(&rctx));
307:     PetscCall(VecAssemblyBegin(u));
308:     PetscCall(VecAssemblyEnd(u));
309:   }

311:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_newMat", &testnewC, NULL));
312:   if (testnewC) {
313:     /*
314:      User may use a new matrix C with same nonzero pattern, e.g.
315:       ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
316:     */
317:     Mat Ctmp;
318:     PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &Ctmp));
319:     PetscCall(MatDestroy(&C));
320:     PetscCall(MatDuplicate(Ctmp, MAT_COPY_VALUES, &C));
321:     PetscCall(MatDestroy(&Ctmp));
322:   }

324:   PetscCall(MatMult(C, u, b));

326:   /*
327:      Set operators. Here the matrix that defines the linear system
328:      also serves as the preconditioning matrix.
329:   */
330:   PetscCall(KSPSetOperators(ksp, C, C));

332:   /*
333:      Solve linear system
334:   */
335:   PetscCall(KSPSetUp(ksp));
336:   PetscCall(KSPSolve(ksp, b, x));

338:   /*
339:      Check the residual
340:   */
341:   PetscCall(VecAXPY(x, none, u));
342:   PetscCall(VecNorm(x, NORM_2, &norm));
343:   PetscCall(KSPGetIterationNumber(ksp, &its));
344:   if (!testscaledMat || (!single && norm / bnorm > PETSC_SMALL)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));

346:   /*
347:      Free work space.  All PETSc objects should be destroyed when they
348:      are no longer needed.
349:   */
350:   PetscCall(KSPDestroy(&ksp));
351:   PetscCall(VecDestroy(&u));
352:   PetscCall(VecDestroy(&x));
353:   PetscCall(VecDestroy(&b));
354:   PetscCall(MatDestroy(&C));

356:   /*
357:      Indicate to PETSc profiling that we're concluding the second stage
358:   */
359:   PetscCall(PetscLogStagePop());

361:   PetscCall(PetscFinalize());
362:   return 0;
363: }

365: /*TEST

367:    test:
368:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

370:    test:
371:       suffix: 2
372:       nsize: 2
373:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001

375:    test:
376:       suffix: 5
377:       nsize: 2
378:       args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg

380:    test:
381:       suffix: asm
382:       nsize: 4
383:       args: -pc_type asm

385:    test:
386:       suffix: asm_baij
387:       nsize: 4
388:       args: -pc_type asm -mat_type baij
389:       output_file: output/ex5_asm.out

391:    test:
392:       suffix: redundant_0
393:       args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

395:    test:
396:       suffix: redundant_1
397:       nsize: 5
398:       args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

400:    test:
401:       suffix: redundant_2
402:       nsize: 5
403:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi

405:    test:
406:       suffix: redundant_3
407:       nsize: 5
408:       args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi

410:    test:
411:       suffix: redundant_4
412:       nsize: 5
413:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced

415:    test:
416:       suffix: superlu_dist
417:       nsize: 15
418:       requires: superlu_dist
419:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat

421:    test:
422:       suffix: superlu_dist_2
423:       nsize: 15
424:       requires: superlu_dist
425:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
426:       output_file: output/ex5_superlu_dist.out

428:    test:
429:       suffix: superlu_dist_3
430:       nsize: 15
431:       requires: superlu_dist
432:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
433:       output_file: output/ex5_superlu_dist.out

435:    test:
436:       suffix: superlu_dist_3s
437:       nsize: 15
438:       requires: superlu_dist defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
439:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT -pc_precision single
440:       output_file: output/ex5_superlu_dist.out

442:    test:
443:       suffix: superlu_dist_0
444:       nsize: 1
445:       requires: superlu_dist
446:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
447:       output_file: output/ex5_superlu_dist.out

449: TEST*/