Actual source code: ex3.c

  1: static char help[] = "Test PC redistribute on matrix with load imbalance. \n\
  2:                       Modified from src/ksp/ksp/tutorials/ex2.c.\n\
  3: Input parameters include:\n\
  4:   -random_exact_sol : use a random exact solution vector\n\
  5:   -view_exact_sol   : write exact solution vector to stdout\n\
  6:   -n <mesh_y>       : number of mesh points\n\n";
  7: /*
  8: Example:
  9:   mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view
 10:   mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view
 11: */

 13: #include <petscksp.h>

 15: int main(int argc, char **args)
 16: {
 17:   Vec           x, b, u; /* approx solution, RHS, exact solution */
 18:   Mat           A;       /* linear system matrix */
 19:   KSP           ksp;     /* linear solver context */
 20:   PetscRandom   rctx;    /* random number generator context */
 21:   PetscReal     norm;    /* norm of solution error */
 22:   PetscInt      i, j, Ii, J, Istart, Iend, m, n = 7, its, nloc, matdistribute = 0;
 23:   PetscBool     flg = PETSC_FALSE;
 24:   PetscScalar   v;
 25:   PetscMPIInt   rank, size;
 26:   PetscLogStage stage;

 28:   PetscFunctionBeginUser;
 29:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 30:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 31:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 32:   PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This example requires at least 2 MPI processes!");

 34:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 35:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-matdistribute", &matdistribute, NULL));
 36:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 37:          Compute the matrix and right-hand-side vector that define
 38:          the linear system, Ax = b.
 39:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 40:   switch (matdistribute) {
 41:   case 1: /* very imbalanced process load for matrix A */
 42:     m    = (1 + size) * size;
 43:     nloc = (rank + 1) * n;
 44:     if (rank == size - 1) { /* proc[size-1] stores all remaining rows */
 45:       nloc = m * n;
 46:       for (i = 0; i < size - 1; i++) nloc -= (i + 1) * n;
 47:     }
 48:     break;
 49:   default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */
 50:     if (rank == 0 || rank == 1) {
 51:       nloc = n;
 52:     } else {
 53:       nloc = 10 * n; /* 10x larger load */
 54:     }
 55:     m = 2 + (size - 2) * 10;
 56:     break;
 57:   }
 58:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 59:   PetscCall(MatSetSizes(A, nloc, nloc, PETSC_DECIDE, PETSC_DECIDE));
 60:   PetscCall(MatSetFromOptions(A));
 61:   PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
 62:   PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
 63:   PetscCall(MatSetUp(A));

 65:   PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 66:   nloc = Iend - Istart;
 67:   PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "[%d] A Istart,Iend: %" PetscInt_FMT " %" PetscInt_FMT "; nloc %" PetscInt_FMT "\n", rank, Istart, Iend, nloc));
 68:   PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

 70:   PetscCall(PetscLogStageRegister("Assembly", &stage));
 71:   PetscCall(PetscLogStagePush(stage));
 72:   for (Ii = Istart; Ii < Iend; Ii++) {
 73:     v = -1.0;
 74:     i = Ii / n;
 75:     j = Ii - i * n;
 76:     if (i > 0) {
 77:       J = Ii - n;
 78:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 79:     }
 80:     if (i < m - 1) {
 81:       J = Ii + n;
 82:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 83:     }
 84:     if (j > 0) {
 85:       J = Ii - 1;
 86:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 87:     }
 88:     if (j < n - 1) {
 89:       J = Ii + 1;
 90:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 91:     }
 92:     v = 4.0;
 93:     PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
 94:   }
 95:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 96:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
 97:   PetscCall(PetscLogStagePop());

 99:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
100:   PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));

102:   /* Create parallel vectors. */
103:   PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
104:   PetscCall(VecSetSizes(u, nloc, PETSC_DECIDE));
105:   PetscCall(VecSetFromOptions(u));
106:   PetscCall(VecDuplicate(u, &b));
107:   PetscCall(VecDuplicate(b, &x));

109:   /* Set exact solution; then compute right-hand-side vector. */
110:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL));
111:   if (flg) {
112:     PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
113:     PetscCall(PetscRandomSetFromOptions(rctx));
114:     PetscCall(VecSetRandom(u, rctx));
115:     PetscCall(PetscRandomDestroy(&rctx));
116:   } else {
117:     PetscCall(VecSet(u, 1.0));
118:   }
119:   PetscCall(MatMult(A, u, b));

121:   /* View the exact solution vector if desired */
122:   flg = PETSC_FALSE;
123:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
124:   if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));

126:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
127:                 Create the linear solver and set various options
128:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
129:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
130:   PetscCall(KSPSetOperators(ksp, A, A));
131:   PetscCall(KSPSetTolerances(ksp, 1.e-2 / ((m + 1) * (n + 1)), PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
132:   PetscCall(KSPSetFromOptions(ksp));

134:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
135:                       Solve the linear system
136:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
137:   PetscCall(KSPSolve(ksp, b, x));

139:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
140:                       Check solution and clean up
141:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
142:   PetscCall(VecAXPY(x, -1.0, u));
143:   PetscCall(VecNorm(x, NORM_2, &norm));
144:   PetscCall(KSPGetIterationNumber(ksp, &its));
145:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));

147:   /* Free work space. */
148:   PetscCall(KSPDestroy(&ksp));
149:   PetscCall(VecDestroy(&u));
150:   PetscCall(VecDestroy(&x));
151:   PetscCall(VecDestroy(&b));
152:   PetscCall(MatDestroy(&A));
153:   PetscCall(PetscFinalize());
154:   return 0;
155: }

157: /*TEST

159:    test:
160:       nsize: 8
161:       args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8

163:    test:
164:       suffix: 2
165:       nsize: 8
166:       args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8

168: TEST*/