Actual source code: pipelcg.c

  1: #include <petsc/private/kspimpl.h>
  2: #include <petsc/private/vecimpl.h>

  4: #define offset(j)       PetscMax(((j) - (2 * l)), 0)
  5: #define shift(i, j)     ((i) - offset(j))
  6: #define G(i, j)         (plcg->G[((j) * (2 * l + 1)) + (shift((i), (j)))])
  7: #define G_noshift(i, j) (plcg->G[((j) * (2 * l + 1)) + (i)])
  8: #define alpha(i)        (plcg->alpha[i])
  9: #define gamma(i)        (plcg->gamma[i])
 10: #define delta(i)        (plcg->delta[i])
 11: #define sigma(i)        (plcg->sigma[i])
 12: #define req(i)          (plcg->req[i])

 14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
 15: struct KSP_CG_PIPE_L_s {
 16:   PetscInt     l; /* pipeline depth */
 17:   Vec         *Z; /* Z vectors (shifted base) */
 18:   Vec         *U; /* U vectors (unpreconditioned shifted base) */
 19:   Vec         *V; /* V vectors (original base) */
 20:   Vec         *Q; /* Q vectors (auxiliary bases) */
 21:   Vec          p; /* work vector */
 22:   PetscScalar *G; /* such that Z = VG (band matrix)*/
 23:   PetscScalar *gamma, *delta, *alpha;
 24:   PetscReal    lmin, lmax; /* min and max eigen values estimates to compute base shifts */
 25:   PetscReal   *sigma;      /* base shifts */
 26:   MPI_Request *req;        /* request array for asynchronous global collective */
 27:   PetscBool    show_rstrt; /* flag to show restart information in output (default: not shown) */
 28: };

 30: /*
 31:   KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.

 33:   This is called once, usually automatically by KSPSolve() or KSPSetUp()
 34:   but can be called directly by KSPSetUp()
 35: */
 36: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
 37: {
 38:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
 39:   PetscInt       l = plcg->l, max_it = ksp->max_it;
 40:   MPI_Comm       comm;

 42:   PetscFunctionBegin;
 43:   comm = PetscObjectComm((PetscObject)ksp);
 44:   PetscCheck(max_it >= 1, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: max_it argument must be positive.", ((PetscObject)ksp)->type_name);
 45:   PetscCheck(l >= 1, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: pipel argument must be positive.", ((PetscObject)ksp)->type_name);
 46:   PetscCheck(l <= max_it, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: pipel argument must be less than max_it.", ((PetscObject)ksp)->type_name);

 48:   PetscCall(KSPSetWorkVecs(ksp, 1)); /* get work vectors needed by PIPELCG */
 49:   plcg->p = ksp->work[0];

 51:   PetscCall(VecDuplicateVecs(plcg->p, PetscMax(3, l + 1), &plcg->Z));
 52:   PetscCall(VecDuplicateVecs(plcg->p, 3, &plcg->U));
 53:   PetscCall(VecDuplicateVecs(plcg->p, 3, &plcg->V));
 54:   PetscCall(VecDuplicateVecs(plcg->p, 3 * (l - 1) + 1, &plcg->Q));
 55:   PetscCall(PetscCalloc1(2, &plcg->alpha));
 56:   PetscCall(PetscCalloc1(l, &plcg->sigma));
 57:   PetscFunctionReturn(PETSC_SUCCESS);
 58: }

 60: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
 61: {
 62:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
 63:   PetscInt       l    = plcg->l;

 65:   PetscFunctionBegin;
 66:   PetscCall(PetscFree(plcg->sigma));
 67:   PetscCall(PetscFree(plcg->alpha));
 68:   PetscCall(VecDestroyVecs(PetscMax(3, l + 1), &plcg->Z));
 69:   PetscCall(VecDestroyVecs(3, &plcg->U));
 70:   PetscCall(VecDestroyVecs(3, &plcg->V));
 71:   PetscCall(VecDestroyVecs(3 * (l - 1) + 1, &plcg->Q));
 72:   PetscFunctionReturn(PETSC_SUCCESS);
 73: }

 75: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
 76: {
 77:   PetscFunctionBegin;
 78:   PetscCall(KSPReset_PIPELCG(ksp));
 79:   PetscCall(KSPDestroyDefault(ksp));
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: static PetscErrorCode KSPSetFromOptions_PIPELCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
 84: {
 85:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
 86:   PetscBool      flag = PETSC_FALSE;

 88:   PetscFunctionBegin;
 89:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPELCG options");
 90:   PetscCall(PetscOptionsInt("-ksp_pipelcg_pipel", "Pipeline length", "", plcg->l, &plcg->l, &flag));
 91:   if (!flag) plcg->l = 1;
 92:   PetscCall(PetscOptionsReal("-ksp_pipelcg_lmin", "Estimate for smallest eigenvalue", "", plcg->lmin, &plcg->lmin, &flag));
 93:   if (!flag) plcg->lmin = 0.0;
 94:   PetscCall(PetscOptionsReal("-ksp_pipelcg_lmax", "Estimate for largest eigenvalue", "", plcg->lmax, &plcg->lmax, &flag));
 95:   if (!flag) plcg->lmax = 0.0;
 96:   PetscCall(PetscOptionsBool("-ksp_pipelcg_monitor", "Output information on restarts when they occur? (default: 0)", "", plcg->show_rstrt, &plcg->show_rstrt, &flag));
 97:   if (!flag) plcg->show_rstrt = PETSC_FALSE;
 98:   PetscOptionsHeadEnd();
 99:   PetscFunctionReturn(PETSC_SUCCESS);
100: }

102: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
103: {
104:   PetscFunctionBegin;
105: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
106:   PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
107: #else
108:   PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
109:   *request = MPI_REQUEST_NULL;
110: #endif
111:   PetscFunctionReturn(PETSC_SUCCESS);
112: }

114: static PetscErrorCode KSPView_PIPELCG(KSP ksp, PetscViewer viewer)
115: {
116:   KSP_CG_PIPE_L *plcg   = (KSP_CG_PIPE_L *)ksp->data;
117:   PetscBool      iascii = PETSC_FALSE, isstring = PETSC_FALSE;

119:   PetscFunctionBegin;
120:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
121:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
122:   if (iascii) {
123:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Pipeline depth: %" PetscInt_FMT "\n", plcg->l));
124:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Minimal eigenvalue estimate %g\n", (double)plcg->lmin));
125:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Maximal eigenvalue estimate %g\n", (double)plcg->lmax));
126:   } else if (isstring) {
127:     PetscCall(PetscViewerStringSPrintf(viewer, "  Pipeline depth: %" PetscInt_FMT "\n", plcg->l));
128:     PetscCall(PetscViewerStringSPrintf(viewer, "  Minimal eigenvalue estimate %g\n", (double)plcg->lmin));
129:     PetscCall(PetscViewerStringSPrintf(viewer, "  Maximal eigenvalue estimate %g\n", (double)plcg->lmax));
130:   }
131:   PetscFunctionReturn(PETSC_SUCCESS);
132: }

134: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
135: {
136:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
137:   Mat            A = NULL, Pmat = NULL;
138:   PetscInt       it = 0, max_it = ksp->max_it, l = plcg->l, i = 0, j = 0, k = 0;
139:   PetscInt       start = 0, middle = 0, end = 0;
140:   Vec           *Z = plcg->Z, *U = plcg->U, *V = plcg->V, *Q = plcg->Q;
141:   Vec            x = NULL, p = NULL, temp = NULL;
142:   PetscScalar    sum_dummy = 0.0, eta = 0.0, zeta = 0.0, lambda = 0.0;
143:   PetscReal      dp = 0.0, tmp = 0.0, beta = 0.0, invbeta2 = 0.0;
144:   MPI_Comm       comm;

146:   PetscFunctionBegin;
147:   x = ksp->vec_sol;
148:   p = plcg->p;

150:   comm = PetscObjectComm((PetscObject)ksp);
151:   PetscCall(PCGetOperators(ksp->pc, &A, &Pmat));

153:   for (it = 0; it < max_it + l; ++it) {
154:     /* Multiplication  z_{it+1} =  Az_{it} */
155:     /* Shift the U vector pointers */
156:     temp = U[2];
157:     for (i = 2; i > 0; i--) U[i] = U[i - 1];
158:     U[0] = temp;
159:     if (it < l) {
160:       /* SpMV and Sigma-shift and Prec */
161:       PetscCall(MatMult(A, Z[l - it], U[0]));
162:       PetscCall(VecAXPY(U[0], -sigma(it), U[1]));
163:       PetscCall(KSP_PCApply(ksp, U[0], Z[l - it - 1]));
164:       if (it < l - 1) PetscCall(VecCopy(Z[l - it - 1], Q[3 * it]));
165:     } else {
166:       /* Shift the Z vector pointers */
167:       temp = Z[PetscMax(l, 2)];
168:       for (i = PetscMax(l, 2); i > 0; --i) Z[i] = Z[i - 1];
169:       Z[0] = temp;
170:       /* SpMV and Prec */
171:       PetscCall(MatMult(A, Z[1], U[0]));
172:       PetscCall(KSP_PCApply(ksp, U[0], Z[0]));
173:     }

175:     /* Adjust the G matrix */
176:     if (it >= l) {
177:       if (it == l) {
178:         /* MPI_Wait for G(0,0),scale V0 and Z and U and Q vectors with 1/beta */
179:         PetscCallMPI(MPI_Wait(&req(0), MPI_STATUS_IGNORE));
180:         beta    = PetscSqrtReal(PetscRealPart(G(0, 0)));
181:         G(0, 0) = 1.0;
182:         PetscCall(VecAXPY(V[0], 1.0 / beta, p)); /* this assumes V[0] to be zero initially */
183:         for (j = 0; j <= PetscMax(l, 2); ++j) PetscCall(VecScale(Z[j], 1.0 / beta));
184:         for (j = 0; j <= 2; ++j) PetscCall(VecScale(U[j], 1.0 / beta));
185:         for (j = 0; j < l - 1; ++j) PetscCall(VecScale(Q[3 * j], 1.0 / beta));
186:       }

188:       /* MPI_Wait until the dot products,started l iterations ago,are completed */
189:       PetscCallMPI(MPI_Wait(&req(it - l + 1), MPI_STATUS_IGNORE));
190:       if (it >= 2 * l) {
191:         for (j = PetscMax(0, it - 3 * l + 1); j <= it - 2 * l; j++) { G(j, it - l + 1) = G(it - 2 * l + 1, j + l); /* exploit symmetry in G matrix */ }
192:       }

194:       if (it <= 2 * l - 1) {
195:         invbeta2 = 1.0 / (beta * beta);
196:         /* Scale columns 1 up to l of G with 1/beta^2 */
197:         for (j = PetscMax(it - 3 * l + 1, 0); j <= it - l + 1; ++j) G(j, it - l + 1) *= invbeta2;
198:       }

200:       for (j = PetscMax(it - 2 * l + 2, 0); j <= it - l; ++j) {
201:         sum_dummy = 0.0;
202:         for (k = PetscMax(it - 3 * l + 1, 0); k <= j - 1; ++k) sum_dummy = sum_dummy + G(k, j) * G(k, it - l + 1);
203:         G(j, it - l + 1) = (G(j, it - l + 1) - sum_dummy) / G(j, j);
204:       }

206:       sum_dummy = 0.0;
207:       for (k = PetscMax(it - 3 * l + 1, 0); k <= it - l; ++k) sum_dummy = sum_dummy + G(k, it - l + 1) * G(k, it - l + 1);

209:       tmp = PetscRealPart(G(it - l + 1, it - l + 1) - sum_dummy);
210:       /* Breakdown check */
211:       if (tmp < 0) {
212:         if (plcg->show_rstrt) PetscCall(PetscPrintf(comm, "Sqrt breakdown in iteration %" PetscInt_FMT ": sqrt argument is %e. Iteration was restarted.\n", ksp->its + 1, (double)tmp));
213:         /* End hanging dot-products in the pipeline before exiting for-loop */
214:         start = it - l + 2;
215:         end   = PetscMin(it + 1, max_it + 1); /* !warning! 'it' can actually be greater than 'max_it' */
216:         for (i = start; i < end; ++i) PetscCallMPI(MPI_Wait(&req(i), MPI_STATUS_IGNORE));
217:         break;
218:       }
219:       G(it - l + 1, it - l + 1) = PetscSqrtReal(tmp);

221:       if (it < 2 * l) {
222:         if (it == l) {
223:           gamma(it - l) = (G(it - l, it - l + 1) + sigma(it - l) * G(it - l, it - l)) / G(it - l, it - l);
224:         } else {
225:           gamma(it - l) = (G(it - l, it - l + 1) + sigma(it - l) * G(it - l, it - l) - delta(it - l - 1) * G(it - l - 1, it - l)) / G(it - l, it - l);
226:         }
227:         delta(it - l) = G(it - l + 1, it - l + 1) / G(it - l, it - l);
228:       } else {
229:         gamma(it - l) = (G(it - l, it - l) * gamma(it - 2 * l) + G(it - l, it - l + 1) * delta(it - 2 * l) - G(it - l - 1, it - l) * delta(it - l - 1)) / G(it - l, it - l);
230:         delta(it - l) = (G(it - l + 1, it - l + 1) * delta(it - 2 * l)) / G(it - l, it - l);
231:       }

233:       /* Recursively compute the next V, Q, Z and U vectors */
234:       /* Shift the V vector pointers */
235:       temp = V[2];
236:       for (i = 2; i > 0; i--) V[i] = V[i - 1];
237:       V[0] = temp;

239:       /* Recurrence V vectors */
240:       if (l == 1) {
241:         PetscCall(VecCopy(Z[1], V[0]));
242:       } else {
243:         PetscCall(VecCopy(Q[0], V[0]));
244:       }
245:       if (it == l) {
246:         PetscCall(VecAXPY(V[0], sigma(0) - gamma(it - l), V[1]));
247:       } else {
248:         alpha(0) = sigma(0) - gamma(it - l);
249:         alpha(1) = -delta(it - l - 1);
250:         PetscCall(VecMAXPY(V[0], 2, &alpha(0), &V[1]));
251:       }
252:       PetscCall(VecScale(V[0], 1.0 / delta(it - l)));

254:       /* Recurrence Q vectors */
255:       for (j = 0; j < l - 1; ++j) {
256:         /* Shift the Q vector pointers */
257:         temp = Q[3 * j + 2];
258:         for (i = 2; i > 0; i--) Q[3 * j + i] = Q[3 * j + i - 1];
259:         Q[3 * j] = temp;

261:         if (j < l - 2) {
262:           PetscCall(VecCopy(Q[3 * (j + 1)], Q[3 * j]));
263:         } else {
264:           PetscCall(VecCopy(Z[1], Q[3 * j]));
265:         }
266:         if (it == l) {
267:           PetscCall(VecAXPY(Q[3 * j], sigma(j + 1) - gamma(it - l), Q[3 * j + 1]));
268:         } else {
269:           alpha(0) = sigma(j + 1) - gamma(it - l);
270:           alpha(1) = -delta(it - l - 1);
271:           PetscCall(VecMAXPY(Q[3 * j], 2, &alpha(0), &Q[3 * j + 1]));
272:         }
273:         PetscCall(VecScale(Q[3 * j], 1.0 / delta(it - l)));
274:       }

276:       /* Recurrence Z and U vectors */
277:       if (it == l) {
278:         PetscCall(VecAXPY(Z[0], -gamma(it - l), Z[1]));
279:         PetscCall(VecAXPY(U[0], -gamma(it - l), U[1]));
280:       } else {
281:         alpha(0) = -gamma(it - l);
282:         alpha(1) = -delta(it - l - 1);
283:         PetscCall(VecMAXPY(Z[0], 2, &alpha(0), &Z[1]));
284:         PetscCall(VecMAXPY(U[0], 2, &alpha(0), &U[1]));
285:       }
286:       PetscCall(VecScale(Z[0], 1.0 / delta(it - l)));
287:       PetscCall(VecScale(U[0], 1.0 / delta(it - l)));
288:     }

290:     /* Compute and communicate the dot products */
291:     if (it < l) {
292:       for (j = 0; j < it + 2; ++j) { PetscCall((*U[0]->ops->dot_local)(U[0], Z[l - j], &G(j, it + 1))); /* dot-products (U[0],Z[j]) */ }
293:       PetscCall(MPIPetsc_Iallreduce(MPI_IN_PLACE, &G(0, it + 1), it + 2, MPIU_SCALAR, MPIU_SUM, comm, &req(it + 1)));
294:     } else if ((it >= l) && (it < max_it)) {
295:       middle = it - l + 2;
296:       end    = it + 2;
297:       PetscCall((*U[0]->ops->dot_local)(U[0], V[0], &G(it - l + 1, it + 1))); /* dot-product (U[0],V[0]) */
298:       for (j = middle; j < end; ++j) { PetscCall((*U[0]->ops->dot_local)(U[0], plcg->Z[it + 1 - j], &G(j, it + 1))); /* dot-products (U[0],Z[j]) */ }
299:       PetscCall(MPIPetsc_Iallreduce(MPI_IN_PLACE, &G(it - l + 1, it + 1), l + 1, MPIU_SCALAR, MPIU_SUM, comm, &req(it + 1)));
300:     }

302:     /* Compute solution vector and residual norm */
303:     if (it >= l) {
304:       if (it == l) {
305:         if (ksp->its != 0) ++ksp->its;
306:         eta  = gamma(0);
307:         zeta = beta;
308:         PetscCall(VecCopy(V[1], p));
309:         PetscCall(VecScale(p, 1.0 / eta));
310:         PetscCall(VecAXPY(x, zeta, p));
311:         dp = beta;
312:       } else if (it > l) {
313:         k = it - l;
314:         ++ksp->its;
315:         lambda = delta(k - 1) / eta;
316:         eta    = gamma(k) - lambda * delta(k - 1);
317:         zeta   = -lambda * zeta;
318:         PetscCall(VecScale(p, -delta(k - 1) / eta));
319:         PetscCall(VecAXPY(p, 1.0 / eta, V[1]));
320:         PetscCall(VecAXPY(x, zeta, p));
321:         dp = PetscAbsScalar(zeta);
322:       }
323:       ksp->rnorm = dp;
324:       PetscCall(KSPLogResidualHistory(ksp, dp));
325:       PetscCall(KSPMonitor(ksp, ksp->its, dp));
326:       PetscCall((*ksp->converged)(ksp, ksp->its, dp, &ksp->reason, ksp->cnvP));

328:       if (ksp->its >= max_it && !ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
329:       if (ksp->reason) {
330:         /* End hanging dot-products in the pipeline before exiting for-loop */
331:         start = it - l + 2;
332:         end   = PetscMin(it + 2, max_it + 1); /* !warning! 'it' can actually be greater than 'max_it' */
333:         for (i = start; i < end; ++i) PetscCallMPI(MPI_Wait(&req(i), MPI_STATUS_IGNORE));
334:         break;
335:       }
336:     }
337:   } /* End inner for loop */
338:   PetscFunctionReturn(PETSC_SUCCESS);
339: }

341: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
342: {
343:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
344:   PetscInt       i = 0, j = 0, l = plcg->l, max_it = ksp->max_it;

346:   PetscFunctionBegin;
347:   for (i = 0; i < PetscMax(3, l + 1); ++i) PetscCall(VecSet(plcg->Z[i], 0.0));
348:   for (i = 1; i < 3; ++i) PetscCall(VecSet(plcg->U[i], 0.0));
349:   for (i = 0; i < 3; ++i) PetscCall(VecSet(plcg->V[i], 0.0));
350:   for (i = 0; i < 3 * (l - 1) + 1; ++i) PetscCall(VecSet(plcg->Q[i], 0.0));
351:   for (j = 0; j < (max_it + 1); ++j) {
352:     gamma(j) = 0.0;
353:     delta(j) = 0.0;
354:     for (i = 0; i < (2 * l + 1); ++i) G_noshift(i, j) = 0.0;
355:   }
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: /*
360:   KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
361: */
362: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
363: {
364:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
365:   Mat            A = NULL, Pmat = NULL;
366:   Vec            b = NULL, x = NULL, p = NULL;
367:   PetscInt       max_it = ksp->max_it, l = plcg->l;
368:   PetscInt       i = 0, outer_it = 0, curr_guess_zero = 0;
369:   PetscReal      lmin = plcg->lmin, lmax = plcg->lmax;
370:   PetscBool      diagonalscale = PETSC_FALSE;
371:   MPI_Comm       comm;

373:   PetscFunctionBegin;
374:   comm = PetscObjectComm((PetscObject)ksp);
375:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
376:   PetscCheck(!diagonalscale, comm, PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

378:   x = ksp->vec_sol;
379:   b = ksp->vec_rhs;
380:   p = plcg->p;

382:   PetscCall(PetscCalloc1((max_it + 1) * (2 * l + 1), &plcg->G));
383:   PetscCall(PetscCalloc1(max_it + 1, &plcg->gamma));
384:   PetscCall(PetscCalloc1(max_it + 1, &plcg->delta));
385:   PetscCall(PetscCalloc1(max_it + 1, &plcg->req));

387:   PetscCall(PCGetOperators(ksp->pc, &A, &Pmat));

389:   for (i = 0; i < l; ++i) sigma(i) = (0.5 * (lmin + lmax) + (0.5 * (lmax - lmin) * PetscCosReal(PETSC_PI * (2.0 * i + 1.0) / (2.0 * l))));

391:   ksp->its        = 0;
392:   outer_it        = 0;
393:   curr_guess_zero = !!ksp->guess_zero;

395:   while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
396:     /* RESTART LOOP */
397:     if (!curr_guess_zero) {
398:       PetscCall(KSP_MatMult(ksp, A, x, plcg->U[0])); /* u <- b - Ax */
399:       PetscCall(VecAYPX(plcg->U[0], -1.0, b));
400:     } else {
401:       PetscCall(VecCopy(b, plcg->U[0])); /* u <- b (x is 0) */
402:     }
403:     PetscCall(KSP_PCApply(ksp, plcg->U[0], p)); /* p <- Bu */

405:     if (outer_it > 0) {
406:       /* Re-initialize Z,U,V,Q,gamma,delta,G after restart occurred */
407:       PetscCall(KSPSolve_ReInitData_PIPELCG(ksp));
408:     }

410:     PetscCall((*plcg->U[0]->ops->dot_local)(plcg->U[0], p, &G(0, 0)));
411:     PetscCall(MPIPetsc_Iallreduce(MPI_IN_PLACE, &G(0, 0), 1, MPIU_SCALAR, MPIU_SUM, comm, &req(0)));
412:     PetscCall(VecCopy(p, plcg->Z[l]));

414:     PetscCall(KSPSolve_InnerLoop_PIPELCG(ksp));

416:     if (ksp->reason) break; /* convergence or divergence */
417:     ++outer_it;
418:     curr_guess_zero = 0;
419:   }

421:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
422:   PetscCall(PetscFree(plcg->G));
423:   PetscCall(PetscFree(plcg->gamma));
424:   PetscCall(PetscFree(plcg->delta));
425:   PetscCall(PetscFree(plcg->req));
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }

429: /*MC
430:     KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method {cite}`cornelis2018communication` and {cite}`cools2019numerically`.
431:     This method has only a single non-blocking global
432:     reduction per iteration, compared to 2 blocking reductions for standard `KSPCG`. The reduction is overlapped by the
433:     matrix-vector product and preconditioner application of the next l iterations. The pipeline length l is a parameter
434:     of the method. [](sec_pipelineksp)

436:     Options Database Keys:
437: +   -ksp_pipelcg_pipel - pipelined length
438: .   -ksp_pipelcg_lmin - approximation to the smallest eigenvalue of the preconditioned operator (default: 0.0)
439: .   -ksp_pipelcg_lmax - approximation to the largest eigenvalue of the preconditioned operator (default: 0.0)
440: -   -ksp_pipelcg_monitor - output where/why the method restarts when a sqrt breakdown occurs

442:     Example usage:
443: .vb
444:     KSP tutorials ex2, no preconditioner, pipel = 2, lmin = 0.0, lmax = 8.0 :
445:         $mpiexec -n 14 ./ex2 -m 1000 -n 1000 -ksp_type pipelcg -pc_type none -ksp_norm_type natural
446:            -ksp_rtol 1e-10 -ksp_max_it 1000 -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 8.0 -log_view
447:     SNES tutorials ex48, bjacobi preconditioner, pipel = 3, lmin = 0.0, lmax = 2.0, show restart information :
448:         $mpiexec -n 14 ./ex48 -M 150 -P 100 -ksp_type pipelcg -pc_type bjacobi -ksp_rtol 1e-10 -ksp_pipelcg_pipel 3
449:            -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 2.0 -ksp_pipelcg_monitor -log_view
450: .ve

452:     Level: advanced

454:     Notes:
455:     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
456:     performance of pipelined methods. See [](doc_faq_pipelined)

458:     Contributed by:
459:     Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
460:     funded by Flemish Research Foundation (FWO) grant number 12H4617N.

462: .seealso: [](ch_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSPCG`, `KSPPIPECG`, `KSPPIPECGRR`, `KSPPGMRES`,
463:           `KSPPIPEBCGS`, `KSPSetPCSide()`, `KSPGROPPCG`
464: M*/
465: PETSC_EXTERN PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
466: {
467:   KSP_CG_PIPE_L *plcg = NULL;

469:   PetscFunctionBegin;
470:   PetscCall(PetscNew(&plcg));
471:   ksp->data = (void *)plcg;

473:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
474:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));

476:   ksp->ops->setup          = KSPSetUp_PIPELCG;
477:   ksp->ops->solve          = KSPSolve_PIPELCG;
478:   ksp->ops->reset          = KSPReset_PIPELCG;
479:   ksp->ops->destroy        = KSPDestroy_PIPELCG;
480:   ksp->ops->view           = KSPView_PIPELCG;
481:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
482:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
483:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
484:   PetscFunctionReturn(PETSC_SUCCESS);
485: }