Actual source code: pipefcg.c

  1: #include <../src/ksp/ksp/impls/fcg/pipefcg/pipefcgimpl.h>

  3: static PetscBool  cited      = PETSC_FALSE;
  4: static const char citation[] = "@article{SSM2016,\n"
  5:                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
  6:                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
  7:                                "  journal = {SIAM Journal on Scientific Computing},\n"
  8:                                "  volume = {38},\n"
  9:                                "  number = {5},\n"
 10:                                "  pages = {C441-C470},\n"
 11:                                "  year = {2016},\n"
 12:                                "  doi = {10.1137/15M1049130},\n"
 13:                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
 14:                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
 15:                                "}\n";

 17: #define KSPPIPEFCG_DEFAULT_MMAX       15
 18: #define KSPPIPEFCG_DEFAULT_NPREALLOC  5
 19: #define KSPPIPEFCG_DEFAULT_VECB       5
 20: #define KSPPIPEFCG_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY

 22: static PetscErrorCode KSPAllocateVectors_PIPEFCG(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
 23: {
 24:   PetscInt     i;
 25:   KSP_PIPEFCG *pipefcg;
 26:   PetscInt     nnewvecs, nvecsprev;

 28:   PetscFunctionBegin;
 29:   pipefcg = (KSP_PIPEFCG *)ksp->data;

 31:   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
 32:   if (pipefcg->nvecs < PetscMin(pipefcg->mmax + 1, nvecsneeded)) {
 33:     nvecsprev = pipefcg->nvecs;
 34:     nnewvecs  = PetscMin(PetscMax(nvecsneeded - pipefcg->nvecs, chunksize), pipefcg->mmax + 1 - pipefcg->nvecs);
 35:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pQvecs[pipefcg->nchunks], 0, NULL));
 36:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pZETAvecs[pipefcg->nchunks], 0, NULL));
 37:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pPvecs[pipefcg->nchunks], 0, NULL));
 38:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pSvecs[pipefcg->nchunks], 0, NULL));
 39:     pipefcg->nvecs += nnewvecs;
 40:     for (i = 0; i < nnewvecs; ++i) {
 41:       pipefcg->Qvecs[nvecsprev + i]    = pipefcg->pQvecs[pipefcg->nchunks][i];
 42:       pipefcg->ZETAvecs[nvecsprev + i] = pipefcg->pZETAvecs[pipefcg->nchunks][i];
 43:       pipefcg->Pvecs[nvecsprev + i]    = pipefcg->pPvecs[pipefcg->nchunks][i];
 44:       pipefcg->Svecs[nvecsprev + i]    = pipefcg->pSvecs[pipefcg->nchunks][i];
 45:     }
 46:     pipefcg->chunksizes[pipefcg->nchunks] = nnewvecs;
 47:     ++pipefcg->nchunks;
 48:   }
 49:   PetscFunctionReturn(PETSC_SUCCESS);
 50: }

 52: static PetscErrorCode KSPSetUp_PIPEFCG(KSP ksp)
 53: {
 54:   KSP_PIPEFCG   *pipefcg;
 55:   const PetscInt nworkstd = 5;

 57:   PetscFunctionBegin;
 58:   pipefcg = (KSP_PIPEFCG *)ksp->data;

 60:   /* Allocate "standard" work vectors (not including the basis and transformed basis vectors) */
 61:   PetscCall(KSPSetWorkVecs(ksp, nworkstd));

 63:   /* Allocated space for pointers to additional work vectors
 64:    note that mmax is the number of previous directions, so we add 1 for the current direction,
 65:    and an extra 1 for the prealloc (which might be empty) */
 66:   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &pipefcg->Pvecs, pipefcg->mmax + 1, &pipefcg->pPvecs, pipefcg->mmax + 1, &pipefcg->Svecs, pipefcg->mmax + 1, &pipefcg->pSvecs));
 67:   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &pipefcg->Qvecs, pipefcg->mmax + 1, &pipefcg->pQvecs, pipefcg->mmax + 1, &pipefcg->ZETAvecs, pipefcg->mmax + 1, &pipefcg->pZETAvecs));
 68:   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &pipefcg->Pold, pipefcg->mmax + 1, &pipefcg->Sold, pipefcg->mmax + 1, &pipefcg->Qold, pipefcg->mmax + 1, &pipefcg->ZETAold));
 69:   PetscCall(PetscMalloc1(pipefcg->mmax + 1, &pipefcg->chunksizes));
 70:   PetscCall(PetscMalloc3(pipefcg->mmax + 2, &pipefcg->dots, pipefcg->mmax + 1, &pipefcg->etas, pipefcg->mmax + 2, &pipefcg->redux));

 72:   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
 73:   if (pipefcg->nprealloc > pipefcg->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipefcg->nprealloc, pipefcg->mmax + 1));

 75:   /* Preallocate additional work vectors */
 76:   PetscCall(KSPAllocateVectors_PIPEFCG(ksp, pipefcg->nprealloc, pipefcg->nprealloc));
 77:   PetscFunctionReturn(PETSC_SUCCESS);
 78: }

 80: static PetscErrorCode KSPSolve_PIPEFCG_cycle(KSP ksp)
 81: {
 82:   PetscInt     i, j, k, idx, kdx, mi;
 83:   KSP_PIPEFCG *pipefcg;
 84:   PetscScalar  alpha = 0.0, gamma, *betas, *dots;
 85:   PetscReal    dp    = 0.0, delta, *eta, *etas;
 86:   Vec          B, R, Z, X, Qcurr, W, ZETAcurr, M, N, Pcurr, Scurr, *redux;
 87:   Mat          Amat, Pmat;

 89:   PetscFunctionBegin;
 90:   /* We have not checked these routines for use with complex numbers. The inner products
 91:      are likely not defined correctly for that case */
 92:   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");

 94: #define VecXDot(x, y, a)          (pipefcg->type == KSP_CG_HERMITIAN ? VecDot(x, y, a) : VecTDot(x, y, a))
 95: #define VecXDotBegin(x, y, a)     (pipefcg->type == KSP_CG_HERMITIAN ? VecDotBegin(x, y, a) : VecTDotBegin(x, y, a))
 96: #define VecXDotEnd(x, y, a)       (pipefcg->type == KSP_CG_HERMITIAN ? VecDotEnd(x, y, a) : VecTDotEnd(x, y, a))
 97: #define VecMXDot(x, n, y, a)      (pipefcg->type == KSP_CG_HERMITIAN ? VecMDot(x, n, y, a) : VecMTDot(x, n, y, a))
 98: #define VecMXDotBegin(x, n, y, a) (pipefcg->type == KSP_CG_HERMITIAN ? VecMDotBegin(x, n, y, a) : VecMTDotBegin(x, n, y, a))
 99: #define VecMXDotEnd(x, n, y, a)   (pipefcg->type == KSP_CG_HERMITIAN ? VecMDotEnd(x, n, y, a) : VecMTDotEnd(x, n, y, a))

101:   pipefcg = (KSP_PIPEFCG *)ksp->data;
102:   X       = ksp->vec_sol;
103:   B       = ksp->vec_rhs;
104:   R       = ksp->work[0];
105:   Z       = ksp->work[1];
106:   W       = ksp->work[2];
107:   M       = ksp->work[3];
108:   N       = ksp->work[4];

110:   redux = pipefcg->redux;
111:   dots  = pipefcg->dots;
112:   etas  = pipefcg->etas;
113:   betas = dots; /* dots takes the result of all dot products of which the betas are a subset */

115:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

117:   /* Compute cycle initial residual */
118:   PetscCall(KSP_MatMult(ksp, Amat, X, R));
119:   PetscCall(VecAYPX(R, -1.0, B));    /* r <- b - Ax */
120:   PetscCall(KSP_PCApply(ksp, R, Z)); /* z <- Br     */

122:   Pcurr    = pipefcg->Pvecs[0];
123:   Scurr    = pipefcg->Svecs[0];
124:   Qcurr    = pipefcg->Qvecs[0];
125:   ZETAcurr = pipefcg->ZETAvecs[0];
126:   PetscCall(VecCopy(Z, Pcurr));
127:   PetscCall(KSP_MatMult(ksp, Amat, Pcurr, Scurr)); /* S = Ap     */
128:   PetscCall(VecCopy(Scurr, W));                    /* w = s = Az */

130:   /* Initial state of pipelining intermediates */
131:   redux[0] = R;
132:   redux[1] = W;
133:   PetscCall(VecMXDotBegin(Z, 2, redux, dots));
134:   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z))); /* perform asynchronous reduction */
135:   PetscCall(KSP_PCApply(ksp, W, M));                                        /* m = B(w) */
136:   PetscCall(KSP_MatMult(ksp, Amat, M, N));                                  /* n = Am   */
137:   PetscCall(VecCopy(M, Qcurr));                                             /* q = m    */
138:   PetscCall(VecCopy(N, ZETAcurr));                                          /* zeta = n */
139:   PetscCall(VecMXDotEnd(Z, 2, redux, dots));
140:   gamma   = dots[0];
141:   delta   = PetscRealPart(dots[1]);
142:   etas[0] = delta;
143:   alpha   = gamma / delta;

145:   i = 0;
146:   do {
147:     ksp->its++;

149:     /* Update X, R, Z, W */
150:     PetscCall(VecAXPY(X, +alpha, Pcurr));    /* x <- x + alpha * pi    */
151:     PetscCall(VecAXPY(R, -alpha, Scurr));    /* r <- r - alpha * si    */
152:     PetscCall(VecAXPY(Z, -alpha, Qcurr));    /* z <- z - alpha * qi    */
153:     PetscCall(VecAXPY(W, -alpha, ZETAcurr)); /* w <- w - alpha * zetai */

155:     /* Compute norm for convergence check */
156:     switch (ksp->normtype) {
157:     case KSP_NORM_PRECONDITIONED:
158:       PetscCall(VecNorm(Z, NORM_2, &dp)); /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
159:       break;
160:     case KSP_NORM_UNPRECONDITIONED:
161:       PetscCall(VecNorm(R, NORM_2, &dp)); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
162:       break;
163:     case KSP_NORM_NATURAL:
164:       dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
165:       break;
166:     case KSP_NORM_NONE:
167:       dp = 0.0;
168:       break;
169:     default:
170:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
171:     }

173:     /* Check for convergence */
174:     ksp->rnorm = dp;
175:     PetscCall(KSPLogResidualHistory(ksp, dp));
176:     PetscCall(KSPMonitor(ksp, ksp->its, dp));
177:     PetscCall((*ksp->converged)(ksp, ksp->its, dp, &ksp->reason, ksp->cnvP));
178:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

180:     /* Computations of current iteration done */
181:     ++i;

183:     /* If needbe, allocate a new chunk of vectors in P and C */
184:     PetscCall(KSPAllocateVectors_PIPEFCG(ksp, i + 1, pipefcg->vecb));

186:     /* Note that we wrap around and start clobbering old vectors */
187:     idx      = i % (pipefcg->mmax + 1);
188:     Pcurr    = pipefcg->Pvecs[idx];
189:     Scurr    = pipefcg->Svecs[idx];
190:     Qcurr    = pipefcg->Qvecs[idx];
191:     ZETAcurr = pipefcg->ZETAvecs[idx];
192:     eta      = pipefcg->etas + idx;

194:     /* number of old directions to orthogonalize against */
195:     switch (pipefcg->truncstrat) {
196:     case KSP_FCD_TRUNC_TYPE_STANDARD:
197:       mi = pipefcg->mmax;
198:       break;
199:     case KSP_FCD_TRUNC_TYPE_NOTAY:
200:       mi = ((i - 1) % pipefcg->mmax) + 1;
201:       break;
202:     default:
203:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
204:     }

206:     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
207:     PetscCall(VecCopy(Z, Pcurr));
208:     for (k = PetscMax(0, i - mi), j = 0; k < i; ++j, ++k) {
209:       kdx                 = k % (pipefcg->mmax + 1);
210:       pipefcg->Pold[j]    = pipefcg->Pvecs[kdx];
211:       pipefcg->Sold[j]    = pipefcg->Svecs[kdx];
212:       pipefcg->Qold[j]    = pipefcg->Qvecs[kdx];
213:       pipefcg->ZETAold[j] = pipefcg->ZETAvecs[kdx];
214:       redux[j]            = pipefcg->Svecs[kdx];
215:     }
216:     redux[j]     = R; /* If the above loop is not executed redux contains only R => all beta_k = 0, only gamma, delta != 0 */
217:     redux[j + 1] = W;

219:     PetscCall(VecMXDotBegin(Z, j + 2, redux, betas));                         /* Start split reductions for beta_k = (z,s_k), gamma = (z,r), delta = (z,w) */
220:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z))); /* perform asynchronous reduction */
221:     PetscCall(VecWAXPY(N, -1.0, R, W));                                       /* m = u + B(w-r): (a) ntmp = w-r              */
222:     PetscCall(KSP_PCApply(ksp, N, M));                                        /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
223:     PetscCall(VecAXPY(M, 1.0, Z));                                            /* m = u + B(w-r): (c) m = z + mtmp            */
224:     PetscCall(KSP_MatMult(ksp, Amat, M, N));                                  /* n = Am                                      */
225:     PetscCall(VecMXDotEnd(Z, j + 2, redux, betas));                           /* Finish split reductions */
226:     gamma = betas[j];
227:     delta = PetscRealPart(betas[j + 1]);

229:     *eta = 0.;
230:     for (k = PetscMax(0, i - mi), j = 0; k < i; ++j, ++k) {
231:       kdx = k % (pipefcg->mmax + 1);
232:       betas[j] /= -etas[kdx]; /* betak  /= etak */
233:       *eta -= ((PetscReal)(PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]))) * etas[kdx];
234:       /* etaitmp = -betaik^2 * etak */
235:     }
236:     *eta += delta; /* etai    = delta -betaik^2 * etak */
237:     if (*eta < 0.) {
238:       pipefcg->norm_breakdown = PETSC_TRUE;
239:       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
240:       break;
241:     } else {
242:       alpha = gamma / (*eta); /* alpha = gamma/etai */
243:     }

245:     /* project out stored search directions using classical G-S */
246:     PetscCall(VecCopy(Z, Pcurr));
247:     PetscCall(VecCopy(W, Scurr));
248:     PetscCall(VecCopy(M, Qcurr));
249:     PetscCall(VecCopy(N, ZETAcurr));
250:     PetscCall(VecMAXPY(Pcurr, j, betas, pipefcg->Pold));       /* pi    <- ui - sum_k beta_k p_k    */
251:     PetscCall(VecMAXPY(Scurr, j, betas, pipefcg->Sold));       /* si    <- wi - sum_k beta_k s_k    */
252:     PetscCall(VecMAXPY(Qcurr, j, betas, pipefcg->Qold));       /* qi    <- m  - sum_k beta_k q_k    */
253:     PetscCall(VecMAXPY(ZETAcurr, j, betas, pipefcg->ZETAold)); /* zetai <- n  - sum_k beta_k zeta_k */

255:   } while (ksp->its < ksp->max_it);
256:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
257:   PetscFunctionReturn(PETSC_SUCCESS);
258: }

260: static PetscErrorCode KSPSolve_PIPEFCG(KSP ksp)
261: {
262:   KSP_PIPEFCG *pipefcg;
263:   PetscScalar  gamma;
264:   PetscReal    dp = 0.0;
265:   Vec          B, R, Z, X;
266:   Mat          Amat, Pmat;

268: #define VecXDot(x, y, a) (pipefcg->type == KSP_CG_HERMITIAN ? VecDot(x, y, a) : VecTDot(x, y, a))

270:   PetscFunctionBegin;
271:   PetscCall(PetscCitationsRegister(citation, &cited));

273:   pipefcg = (KSP_PIPEFCG *)ksp->data;
274:   X       = ksp->vec_sol;
275:   B       = ksp->vec_rhs;
276:   R       = ksp->work[0];
277:   Z       = ksp->work[1];

279:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

281:   /* Compute initial residual needed for convergence check*/
282:   ksp->its = 0;
283:   if (!ksp->guess_zero) {
284:     PetscCall(KSP_MatMult(ksp, Amat, X, R));
285:     PetscCall(VecAYPX(R, -1.0, B)); /* r <- b - Ax                             */
286:   } else {
287:     PetscCall(VecCopy(B, R)); /* r <- b (x is 0)                         */
288:   }
289:   switch (ksp->normtype) {
290:   case KSP_NORM_PRECONDITIONED:
291:     PetscCall(KSP_PCApply(ksp, R, Z));  /* z <- Br                                 */
292:     PetscCall(VecNorm(Z, NORM_2, &dp)); /* dp <- dqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
293:     break;
294:   case KSP_NORM_UNPRECONDITIONED:
295:     PetscCall(VecNorm(R, NORM_2, &dp)); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
296:     break;
297:   case KSP_NORM_NATURAL:
298:     PetscCall(KSP_PCApply(ksp, R, Z)); /* z <- Br                                 */
299:     PetscCall(VecXDot(Z, R, &gamma));
300:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
301:     break;
302:   case KSP_NORM_NONE:
303:     dp = 0.0;
304:     break;
305:   default:
306:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
307:   }

309:   /* Initial Convergence Check */
310:   PetscCall(KSPLogResidualHistory(ksp, dp));
311:   PetscCall(KSPMonitor(ksp, 0, dp));
312:   ksp->rnorm = dp;
313:   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
314:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

316:   do {
317:     /* A cycle is broken only if a norm breakdown occurs. If not the entire solve happens in a single cycle.
318:        This is coded this way to allow both truncation and truncation-restart strategy
319:        (see KSPFCDGetNumOldDirections()) */
320:     PetscCall(KSPSolve_PIPEFCG_cycle(ksp));
321:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
322:     if (pipefcg->norm_breakdown) {
323:       pipefcg->n_restarts++;
324:       pipefcg->norm_breakdown = PETSC_FALSE;
325:     }
326:   } while (ksp->its < ksp->max_it);

328:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }

332: static PetscErrorCode KSPDestroy_PIPEFCG(KSP ksp)
333: {
334:   PetscInt     i;
335:   KSP_PIPEFCG *pipefcg;

337:   PetscFunctionBegin;
338:   pipefcg = (KSP_PIPEFCG *)ksp->data;

340:   /* Destroy "standard" work vecs */
341:   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));

343:   /* Destroy vectors of old directions and the arrays that manage pointers to them */
344:   if (pipefcg->nvecs) {
345:     for (i = 0; i < pipefcg->nchunks; ++i) {
346:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pPvecs[i]));
347:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pSvecs[i]));
348:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pQvecs[i]));
349:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pZETAvecs[i]));
350:     }
351:   }
352:   PetscCall(PetscFree4(pipefcg->Pvecs, pipefcg->Svecs, pipefcg->pPvecs, pipefcg->pSvecs));
353:   PetscCall(PetscFree4(pipefcg->Qvecs, pipefcg->ZETAvecs, pipefcg->pQvecs, pipefcg->pZETAvecs));
354:   PetscCall(PetscFree4(pipefcg->Pold, pipefcg->Sold, pipefcg->Qold, pipefcg->ZETAold));
355:   PetscCall(PetscFree(pipefcg->chunksizes));
356:   PetscCall(PetscFree3(pipefcg->dots, pipefcg->etas, pipefcg->redux));
357:   PetscCall(KSPDestroyDefault(ksp));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }

361: static PetscErrorCode KSPView_PIPEFCG(KSP ksp, PetscViewer viewer)
362: {
363:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
364:   PetscBool    iascii, isstring;
365:   const char  *truncstr;

367:   PetscFunctionBegin;
368:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
369:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));

371:   if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
372:     truncstr = "Using standard truncation strategy";
373:   } else if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
374:     truncstr = "Using Notay's truncation strategy";
375:   } else {
376:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
377:   }

379:   if (iascii) {
380:     PetscCall(PetscViewerASCIIPrintf(viewer, "  max previous directions = %" PetscInt_FMT "\n", pipefcg->mmax));
381:     PetscCall(PetscViewerASCIIPrintf(viewer, "  preallocated %" PetscInt_FMT " directions\n", PetscMin(pipefcg->nprealloc, pipefcg->mmax + 1)));
382:     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", truncstr));
383:     PetscCall(PetscViewerASCIIPrintf(viewer, "  restarts performed = %" PetscInt_FMT " \n", pipefcg->n_restarts));
384:   } else if (isstring) {
385:     PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipefcg->mmax, pipefcg->nprealloc, truncstr));
386:   }
387:   PetscFunctionReturn(PETSC_SUCCESS);
388: }

390: /*@
391:   KSPPIPEFCGSetMmax - set the maximum number of previous directions `KSPPIPEFCG` will store for orthogonalization

393:   Logically Collective

395:   Input Parameters:
396: + ksp  - the Krylov space context
397: - mmax - the maximum number of previous directions to orthogonalize against

399:   Options Database Key:
400: . -ksp_pipefcg_mmax <N> - maximum number of previous directions

402:   Level: intermediate

404:   Note:
405:   `mmax` + 1 directions are stored (`mmax` previous ones along with the current one)
406:   and whether all are used in each iteration also depends on the truncation strategy, see `KSPPIPEFCGSetTruncationType()`

408: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGSetNprealloc()`, `KSPFCGSetMmax()`, `KSPFCGGetMmax()`
409: @*/
410: PetscErrorCode KSPPIPEFCGSetMmax(KSP ksp, PetscInt mmax)
411: {
412:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

414:   PetscFunctionBegin;
417:   pipefcg->mmax = mmax;
418:   PetscFunctionReturn(PETSC_SUCCESS);
419: }

421: /*@
422:   KSPPIPEFCGGetMmax - get the maximum number of previous directions `KSPPIPEFCG` will store

424:   Not Collective

426:   Input Parameter:
427: . ksp - the Krylov space context

429:   Output Parameter:
430: . mmax - the maximum number of previous directions allowed for orthogonalization

432:   Level: intermediate

434: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType()`, `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPFCGGetMmax()`, `KSPFCGSetMmax()`
435: @*/
436: PetscErrorCode KSPPIPEFCGGetMmax(KSP ksp, PetscInt *mmax)
437: {
438:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

440:   PetscFunctionBegin;
442:   *mmax = pipefcg->mmax;
443:   PetscFunctionReturn(PETSC_SUCCESS);
444: }

446: /*@
447:   KSPPIPEFCGSetNprealloc - set the number of directions to preallocate with `KSPPIPEFCG`

449:   Logically Collective

451:   Input Parameters:
452: + ksp       - the Krylov space context
453: - nprealloc - the number of vectors to preallocate

455:   Options Database Key:
456: . -ksp_pipefcg_nprealloc <N> - the number of vectors to preallocate

458:   Level: advanced

460: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`
461: @*/
462: PetscErrorCode KSPPIPEFCGSetNprealloc(KSP ksp, PetscInt nprealloc)
463: {
464:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

466:   PetscFunctionBegin;
469:   pipefcg->nprealloc = nprealloc;
470:   PetscFunctionReturn(PETSC_SUCCESS);
471: }

473: /*@
474:   KSPPIPEFCGGetNprealloc - get the number of directions to preallocate by `KSPPIPEFCG`

476:   Not Collective

478:   Input Parameter:
479: . ksp - the Krylov space context

481:   Output Parameter:
482: . nprealloc - the number of directions preallocated

484:   Level: advanced

486: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType()`, `KSPPIPEFCGSetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`
487: @*/
488: PetscErrorCode KSPPIPEFCGGetNprealloc(KSP ksp, PetscInt *nprealloc)
489: {
490:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

492:   PetscFunctionBegin;
494:   *nprealloc = pipefcg->nprealloc;
495:   PetscFunctionReturn(PETSC_SUCCESS);
496: }

498: /*@
499:   KSPPIPEFCGSetTruncationType - specify how many of its stored previous directions `KSPPIPEFCG` uses during orthoganalization

501:   Logically Collective

503:   Input Parameters:
504: + ksp        - the Krylov space context
505: - truncstrat - the choice of strategy
506: .vb
507:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
508:   KSP_FCD_TRUNC_TYPE_NOTAY uses max(1,mod(i,mmax)) stored directions at iteration i=0,1,..
509: .ve

511:   Options Database Key:
512: . -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against

514:   Level: intermediate

516: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
517: @*/
518: PetscErrorCode KSPPIPEFCGSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
519: {
520:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

522:   PetscFunctionBegin;
525:   pipefcg->truncstrat = truncstrat;
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }

529: /*@
530:   KSPPIPEFCGGetTruncationType - get the truncation strategy employed by `KSPPIPEFCG`

532:   Not Collective

534:   Input Parameter:
535: . ksp - the Krylov space context

537:   Output Parameter:
538: . truncstrat - the strategy type

540:   Level: intermediate

542: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
543: @*/
544: PetscErrorCode KSPPIPEFCGGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
545: {
546:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

548:   PetscFunctionBegin;
550:   *truncstrat = pipefcg->truncstrat;
551:   PetscFunctionReturn(PETSC_SUCCESS);
552: }

554: static PetscErrorCode KSPSetFromOptions_PIPEFCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
555: {
556:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
557:   PetscInt     mmax, nprealloc;
558:   PetscBool    flg;

560:   PetscFunctionBegin;
561:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEFCG options");
562:   PetscCall(PetscOptionsInt("-ksp_pipefcg_mmax", "Number of search directions to storue", "KSPPIPEFCGSetMmax", pipefcg->mmax, &mmax, &flg));
563:   if (flg) PetscCall(KSPPIPEFCGSetMmax(ksp, mmax));
564:   PetscCall(PetscOptionsInt("-ksp_pipefcg_nprealloc", "Number of directions to preallocate", "KSPPIPEFCGSetNprealloc", pipefcg->nprealloc, &nprealloc, &flg));
565:   if (flg) PetscCall(KSPPIPEFCGSetNprealloc(ksp, nprealloc));
566:   PetscCall(PetscOptionsEnum("-ksp_pipefcg_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipefcg->truncstrat, (PetscEnum *)&pipefcg->truncstrat, NULL));
567:   PetscOptionsHeadEnd();
568:   PetscFunctionReturn(PETSC_SUCCESS);
569: }

571: /*MC
572:   KSPPIPEFCG - Implements a Pipelined, Flexible Conjugate Gradient method {cite}`sananschneppmay2016`. [](sec_pipelineksp). [](sec_flexibleksp)

574:   Options Database Keys:
575: +   -ksp_pipefcg_mmax <N> - The number of previous search directions to store
576: .   -ksp_pipefcg_nprealloc <N> - The number of previous search directions to preallocate
577: -   -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against

579:   Level: intermediate

581:   Notes:
582:   Compare to `KSPFCG`

584:   Supports left preconditioning only.

586:   The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. As with standard `KSPCG`, this norm is available at no additional computational cost.
587:   Choosing preconditioned or unpreconditioned norms involve an extra blocking global reduction, thus removing any benefit from pipelining.

589:   MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
590:   See [](doc_faq_pipelined)

592:   Contributed by:
593:   Patrick Sanan and Sascha M. Schnepp

595: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPFCG`, `KSPPIPECG`, `KSPPIPECR`, `KSPGCR`, `KSPPIPEGCR`, `KSPFGMRES`,
596:           `KSPCG`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`, `KSPPIPEFCGSetNprealloc()`,
597:           `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGGetTruncationType()`
598: M*/
599: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFCG(KSP ksp)
600: {
601:   KSP_PIPEFCG *pipefcg;

603:   PetscFunctionBegin;
604:   PetscCall(PetscNew(&pipefcg));
605: #if !defined(PETSC_USE_COMPLEX)
606:   pipefcg->type = KSP_CG_SYMMETRIC;
607: #else
608:   pipefcg->type = KSP_CG_HERMITIAN;
609: #endif
610:   pipefcg->mmax       = KSPPIPEFCG_DEFAULT_MMAX;
611:   pipefcg->nprealloc  = KSPPIPEFCG_DEFAULT_NPREALLOC;
612:   pipefcg->nvecs      = 0;
613:   pipefcg->vecb       = KSPPIPEFCG_DEFAULT_VECB;
614:   pipefcg->nchunks    = 0;
615:   pipefcg->truncstrat = KSPPIPEFCG_DEFAULT_TRUNCSTRAT;
616:   pipefcg->n_restarts = 0;

618:   ksp->data = (void *)pipefcg;

620:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
621:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 1));
622:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
623:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

625:   ksp->ops->setup          = KSPSetUp_PIPEFCG;
626:   ksp->ops->solve          = KSPSolve_PIPEFCG;
627:   ksp->ops->destroy        = KSPDestroy_PIPEFCG;
628:   ksp->ops->view           = KSPView_PIPEFCG;
629:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFCG;
630:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
631:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
632:   PetscFunctionReturn(PETSC_SUCCESS);
633: }