Actual source code: pipefgmres.c

petsc-master 2020-07-01
Report Typos and Errors
  1: /*
  2:   Contributed by Patrick Sanan and Sascha M. Schnepp
  3: */

  5:  #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>

  7: static PetscBool  cited = PETSC_FALSE;
  8: static const char citation[] =
  9:   "@article{SSM2016,\n"
 10:   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
 11:   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
 12:   "  journal = {SIAM Journal on Scientific Computing},\n"
 13:   "  volume = {38},\n"
 14:   "  number = {5},\n"
 15:   "  pages = {C441-C470},\n"
 16:   "  year = {2016},\n"
 17:   "  doi = {10.1137/15M1049130},\n"
 18:   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
 19:   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
 20:   "}\n";

 22: #define PIPEFGMRES_DELTA_DIRECTIONS 10
 23: #define PIPEFGMRES_DEFAULT_MAXK     30

 25: static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
 26: static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
 27: static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
 28: extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);

 30: /*

 32:     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.

 34:     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
 35:     but can be called directly by KSPSetUp().

 37: */
 38: static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
 39: {
 41:   PetscInt       k;
 42:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
 43:   const PetscInt max_k = pipefgmres->max_k;

 46:   KSPSetUp_GMRES(ksp);

 48:   PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);
 49:   PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);
 50:   PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));

 52:   KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);
 53:   PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);
 54:   for (k=0; k < pipefgmres->vv_allocated; k++) {
 55:     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
 56:   }

 58:   PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);
 59:   PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);
 60:   PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));

 62:   PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);
 63:   PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));

 65:   KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);
 66:   PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);
 67:   for (k=0; k < pipefgmres->vv_allocated; k++) {
 68:     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
 69:   }

 71:   return(0);
 72: }

 74: /*

 76:     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
 77:                   history if requested.

 79:     input parameters:
 80: .        pipefgmres  - structure containing parameters and work areas

 82:     output parameters:
 83: .        itcount - number of iterations used.  If null, ignored.
 84: .        converged - 0 if not converged

 86:     Notes:
 87:     On entry, the value in vector VEC_VV(0) should be
 88:     the initial residual.


 91:  */

 93: static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
 94: {
 95:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
 96:   PetscReal      res_norm;
 97:   PetscReal      hapbnd,tt;
 98:   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
 99:   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
101:   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
102:   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
103:   PetscInt       i,j,k;
104:   Mat            Amat,Pmat;
105:   Vec            Q,W; /* Pipelining vectors */
106:   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */

109:   if (itcount) *itcount = 0;

111:   /* Assign simpler names to these vectors, allocated as pipelining workspace */
112:   Q = VEC_Q;
113:   W = VEC_W;

115:   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
116:   /* Note that we add an extra value here to allow for a single reduction */
117:   if (!pipefgmres->orthogwork) { PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);
118:   }
119:   lhh = pipefgmres->orthogwork;

121:   /* Number of pseudo iterations since last restart is the number
122:      of prestart directions */
123:   loc_it = 0;

125:   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
126:      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
127:      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
128:      (loc_it -1) is passed, so the two are equivalent */
129:   pipefgmres->it = (loc_it - 1);

131:   /* initial residual is in VEC_VV(0)  - compute its norm*/
132:   VecNorm(VEC_VV(0),NORM_2,&res_norm);

134:   /* first entry in right-hand-side of hessenberg system is just
135:      the initial residual norm */
136:   *RS(0) = res_norm;

138:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
139:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
140:   else ksp->rnorm = 0;
141:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
142:   KSPLogResidualHistory(ksp,ksp->rnorm);
143:   KSPMonitor(ksp,ksp->its,ksp->rnorm);

145:   /* check for the convergence - maybe the current guess is good enough */
146:   (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
147:   if (ksp->reason) {
148:     if (itcount) *itcount = 0;
149:     return(0);
150:   }

152:   /* scale VEC_VV (the initial residual) */
153:   VecScale(VEC_VV(0),1.0/res_norm);

155:   /* Fill the pipeline */
156:   KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));
157:   PCGetOperators(ksp->pc,&Amat,&Pmat);
158:   KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));
159:   VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it)); /* Note shift */

161:   /* MAIN ITERATION LOOP BEGINNING*/
162:   /* keep iterating until we have converged OR generated the max number
163:      of directions OR reached the max number of iterations for the method */
164:   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
165:     if (loc_it) {
166:       KSPLogResidualHistory(ksp,res_norm);
167:       KSPMonitor(ksp,ksp->its,res_norm);
168:     }
169:     pipefgmres->it = (loc_it - 1);

171:     /* see if more space is needed for work vectors */
172:     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
173:       KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);
174:       /* (loc_it+1) is passed in as number of the first vector that should
175:          be allocated */
176:     }

178:     /* Note that these inner products are with "Z" now, so
179:        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
180:        not the value from the equivalent FGMRES run (even in exact arithmetic)
181:        That is, the H we need for the Arnoldi relation is different from the
182:        coefficients we use in the orthogonalization process,because of the shift */

184:     /* Do some local twiddling to allow for a single reduction */
185:     for(i=0;i<loc_it+1;i++){
186:       redux[i] = VEC_VV(i);
187:     }
188:     redux[loc_it+1] = ZVEC(loc_it);

190:     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
191:     VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);

193:     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
194:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));

196:     /* The work to be overlapped with the inner products follows.
197:        This is application of the preconditioner and the operator
198:        to compute intermediate quantites which will be combined (locally)
199:        with the results of the inner products.
200:        */
201:     KSP_PCApply(ksp,ZVEC(loc_it),Q);
202:     PCGetOperators(ksp->pc,&Amat,&Pmat);
203:     KSP_MatMult(ksp,Amat,Q,W);

205:     /* Compute inner products of the new direction with previous directions,
206:        and the norm of the to-be-orthogonalized direction "Z".
207:        This information is enough to build the required entries
208:        of H. The inner product with VEC_VV(it_loc) is
209:        *different* than in the standard FGMRES and need to be dealt with specially.
210:        That is, for standard FGMRES the orthogonalization coefficients are the same
211:        as the coefficients used in the Arnoldi relation to reconstruct, but here this
212:        is not true (albeit only for the one entry of H which we "unshift" below. */

214:     /* Finish the dot product, retrieving the extra entry */
215:     VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);
216:     tt = PetscRealPart(lhh[loc_it+1]);

218:     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
219:       Note that the Hessenberg entries require a shift, as these are for the
220:       relation AU = VH, which is wrt unshifted basis vectors */
221:     hh = HH(0,loc_it); hes=HES(0,loc_it);
222:     for (j=0; j<loc_it; j++) {
223:       hh[j]  = lhh[j];
224:       hes[j] = lhh[j];
225:     }
226:     hh[loc_it]  = lhh[loc_it] + shift;
227:     hes[loc_it] = lhh[loc_it] + shift;

229:     /* we delay applying the shift here */
230:     for (j=0; j<=loc_it; j++) {
231:       lhh[j]        = -lhh[j]; /* flip sign */
232:     }

234:     /* Compute the norm of the un-normalized new direction using the rearranged formula
235:        Note that these are shifted ("barred") quantities */
236:     for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
237:     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
238:     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ;
239:     if (tt < 0.0) {
240:       /* If we detect square root breakdown in the norm, we must restart the algorithm.
241:          Here this means we simply break the current loop and reconstruct the solution
242:          using the basis we have computed thus far. Note that by breaking immediately,
243:          we do not update the iteration count, so computation done in this iteration
244:          should be disregarded.
245:          */
246:       PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);
247:       break;
248:     } else {
249:       tt = PetscSqrtReal(tt);
250:     }

252:     /* new entry in hessenburg is the 2-norm of our new direction */
253:     hh[loc_it+1]  = tt;
254:     hes[loc_it+1] = tt;

256:     /* The recurred computation for the new direction
257:        The division by tt is delayed to the happy breakdown check later
258:        Note placement BEFORE the unshift
259:        */
260:     VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));
261:     VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));
262:     /* (VEC_VV(loc_it+1) is not normalized yet) */

264:     /* The recurred computation for the preconditioned vector (u) */
265:     VecCopy(Q,PREVEC(loc_it+1));
266:     VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));
267:     VecScale(PREVEC(loc_it+1),1.0/tt);

269:     /* Unshift an entry in the GS coefficients ("removing the bar") */
270:     lhh[loc_it]         -= shift;

272:     /* The recurred computation for z (Au)
273:        Note placement AFTER the "unshift" */
274:     VecCopy(W,ZVEC(loc_it+1));
275:     VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));
276:     VecScale(ZVEC(loc_it+1),1.0/tt);

278:     /* Happy Breakdown Check */
279:     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
280:     /* RS(loc_it) contains the res_norm from the last iteration  */
281:     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
282:     if (tt > hapbnd) {
283:       /* scale new direction by its norm  */
284:       VecScale(VEC_VV(loc_it+1),1.0/tt);
285:     } else {
286:       /* This happens when the solution is exactly reached. */
287:       /* So there is no new direction... */
288:       VecSet(VEC_TEMP,0.0);     /* set VEC_TEMP to 0 */
289:       hapend = PETSC_TRUE;
290:     }
291:     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
292:        current solution would not be exact if HES was singular.  Note that
293:        HH non-singular implies that HES is not singular, and HES is guaranteed
294:        to be nonsingular when PREVECS are linearly independent and A is
295:        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
296:        of HES). So we should really add a check to verify that HES is nonsingular.*/

298:     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
299:        here to check that the hessenberg matrix is indeed non-singular (since
300:        FGMRES does not guarantee this) */

302:     /* Now apply rotations to new col of hessenberg (and right side of system),
303:        calculate new rotation, and get new residual norm at the same time*/
304:     KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);
305:     if (ksp->reason) break;

307:     loc_it++;
308:     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */

310:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
311:     ksp->its++;
312:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
313:     else ksp->rnorm = 0;
314:     PetscObjectSAWsGrantAccess((PetscObject)ksp);

316:     (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);

318:     /* Catch error in happy breakdown and signal convergence and break from loop */
319:     if (hapend) {
320:       if (!ksp->reason) {
321:         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
322:         else {
323:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
324:           break;
325:         }
326:       }
327:     }
328:   }
329:   /* END OF ITERATION LOOP */
330:   KSPLogResidualHistory(ksp,ksp->rnorm);

332:   /*
333:      Monitor if we know that we will not return for a restart */
334:   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
335:     KSPMonitor(ksp,ksp->its,ksp->rnorm);
336:   }

338:   if (itcount) *itcount = loc_it;

340:   /*
341:     Down here we have to solve for the "best" coefficients of the Krylov
342:     columns, add the solution values together, and possibly unwind the
343:     preconditioning from the solution
344:    */

346:   /* Form the solution (or the solution so far) */
347:   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
348:      properly navigates */

350:   KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);

352:   return(0);
353: }

355: /*
356:     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.


359:    Input Parameter:
360: .     ksp - the Krylov space object that was set to use pipefgmres

362:    Output Parameter:
363: .     outits - number of iterations used

365: */
366: static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
367: {
369:   PetscInt       its,itcount;
370:   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
371:   PetscBool      guess_zero = ksp->guess_zero;


375:   /* We have not checked these routines for use with complex numbers. The inner products
376:      are likely not defined correctly for that case */
377: #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
378:   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
379: #endif

381:   PetscCitationsRegister(citation,&cited);

383:   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
384:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
385:   ksp->its = 0;
386:   PetscObjectSAWsGrantAccess((PetscObject)ksp);

388:   itcount     = 0;
389:   ksp->reason = KSP_CONVERGED_ITERATING;
390:   while (!ksp->reason) {
391:     KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
392:     KSPPIPEFGMRESCycle(&its,ksp);
393:     itcount += its;
394:     if (itcount >= ksp->max_it) {
395:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
396:       break;
397:     }
398:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
399:   }
400:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
401:   return(0);
402: }

404: static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
405: {

409:   KSPReset_PIPEFGMRES(ksp);
410:   KSPDestroy_GMRES(ksp);
411:   return(0);
412: }

414: /*
415:     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
416:                       current iterates.

418:     Input parameters:
419:         nrs - work area of size it + 1.
420:         vguess  - index of initial guess
421:         vdest - index of result.  Note that vguess may == vdest (replace
422:                 guess with the solution).
423:         it - HH upper triangular part is a block of size (it+1) x (it+1)

425:      This is an internal routine that knows about the PIPEFGMRES internals.
426:  */
427: static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
428: {
429:   PetscScalar    tt;
431:   PetscInt       k,j;
432:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);

435:   /* Solve for solution vector that minimizes the residual */

437:   if (it < 0) {                                 /* no pipefgmres steps have been performed */
438:     VecCopy(vguess,vdest); /* VecCopy() is smart, exits immediately if vguess == vdest */
439:     return(0);
440:   }

442:   /* solve the upper triangular system - RS is the right side and HH is
443:      the upper triangular matrix  - put soln in nrs */
444:   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
445:   else nrs[it] = 0.0;

447:   for (k=it-1; k>=0; k--) {
448:     tt = *RS(k);
449:     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
450:     nrs[k] = tt / *HH(k,k);
451:   }

453:   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
454:   VecZeroEntries(VEC_TEMP);
455:   VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));

457:   /* add solution to previous solution */
458:   if (vdest == vguess) {
459:     VecAXPY(vdest,1.0,VEC_TEMP);
460:   } else {
461:     VecWAXPY(vdest,1.0,VEC_TEMP,vguess);
462:   }
463:   return(0);
464: }

466: /*

468:     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
469:                             Return new residual.

471:     input parameters:

473: .        ksp -    Krylov space object
474: .        it  -    plane rotations are applied to the (it+1)th column of the
475:                   modified hessenberg (i.e. HH(:,it))
476: .        hapend - PETSC_FALSE not happy breakdown ending.

478:     output parameters:
479: .        res - the new residual

481:  */
482: /*
483: .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
484:  */
485: static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
486: {
487:   PetscScalar    *hh,*cc,*ss,*rs;
488:   PetscInt       j;
489:   PetscReal      hapbnd;
490:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);

494:   hh = HH(0,it);   /* pointer to beginning of column to update */
495:   cc = CC(0);      /* beginning of cosine rotations */
496:   ss = SS(0);      /* beginning of sine rotations */
497:   rs = RS(0);      /* right hand side of least squares system */

499:   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
500:   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];

502:   /* check for the happy breakdown */
503:   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
504:   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
505:     PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));
506:     *hapend = PETSC_TRUE;
507:   }

509:   /* Apply all the previously computed plane rotations to the new column
510:      of the Hessenberg matrix */
511:   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
512:      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */

514:   for (j=0; j<it; j++) {
515:     PetscScalar hhj = hh[j];
516:     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
517:     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
518:   }

520:   /*
521:     compute the new plane rotation, and apply it to:
522:      1) the right-hand-side of the Hessenberg system (RS)
523:         note: it affects RS(it) and RS(it+1)
524:      2) the new column of the Hessenberg matrix
525:         note: it affects HH(it,it) which is currently pointed to
526:         by hh and HH(it+1, it) (*(hh+1))
527:     thus obtaining the updated value of the residual...
528:   */

530:   /* compute new plane rotation */

532:   if (!*hapend) {
533:     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
534:     if (delta == 0.0) {
535:       ksp->reason = KSP_DIVERGED_NULL;
536:       return(0);
537:     }

539:     cc[it] = hh[it] / delta;    /* new cosine value */
540:     ss[it] = hh[it+1] / delta;  /* new sine value */

542:     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
543:     rs[it+1] = -ss[it]*rs[it];
544:     rs[it]   = PetscConj(cc[it])*rs[it];
545:     *res     = PetscAbsScalar(rs[it+1]);
546:   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
547:             another rotation matrix (so RH doesn't change).  The new residual is
548:             always the new sine term times the residual from last time (RS(it)),
549:             but now the new sine rotation would be zero...so the residual should
550:             be zero...so we will multiply "zero" by the last residual.  This might
551:             not be exactly what we want to do here -could just return "zero". */

553:     *res = 0.0;
554:   }
555:   return(0);
556: }

558: /*
559:    KSPBuildSolution_PIPEFGMRES

561:      Input Parameter:
562: .     ksp - the Krylov space object
563: .     ptr-

565:    Output Parameter:
566: .     result - the solution

568:    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
569:    calls directly.

571: */
572: PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
573: {
574:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;

578:   if (!ptr) {
579:     if (!pipefgmres->sol_temp) {
580:       VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);
581:       PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);
582:     }
583:     ptr = pipefgmres->sol_temp;
584:   }
585:   if (!pipefgmres->nrs) {
586:     /* allocate the work area */
587:     PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);
588:     PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));
589:   }

591:   KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);
592:   if (result) *result = ptr;
593:   return(0);
594: }

596: PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
597: {
599:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
600:   PetscBool      flg;
601:   PetscScalar    shift;

604:   KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
605:   PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");
606:   PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);
607:   if (flg) { KSPPIPEFGMRESSetShift(ksp,shift); }
608:   PetscOptionsTail();
609:   return(0);
610: }

612: PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
613: {
614:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
616:   PetscBool      iascii,isstring;

619:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
620:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);

622:   if (iascii) {
623:     PetscViewerASCIIPrintf(viewer,"  restart=%D\n",pipefgmres->max_k);
624:     PetscViewerASCIIPrintf(viewer,"  happy breakdown tolerance %g\n",(double)pipefgmres->haptol);
625: #if defined(PETSC_USE_COMPLEX)
626:     PetscViewerASCIIPrintf(viewer,"  shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));
627: #else
628:     PetscViewerASCIIPrintf(viewer,"  shift=%g\n",pipefgmres->shift);
629: #endif
630:   } else if (isstring) {
631:     PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);
632: #if defined(PETSC_USE_COMPLEX)
633:     PetscViewerStringSPrintf(viewer,"   shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));
634: #else
635:     PetscViewerStringSPrintf(viewer,"   shift=%g\n",pipefgmres->shift);
636: #endif
637:   }
638:   return(0);
639: }

641: PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
642: {
643:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
644:   PetscErrorCode   ierr;
645:   PetscInt         i;

648:   PetscFree(pipefgmres->prevecs);
649:   PetscFree(pipefgmres->zvecs);
650:   for (i=0; i<pipefgmres->nwork_alloc; i++) {
651:     VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);
652:     VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);
653:   }
654:   PetscFree(pipefgmres->prevecs_user_work);
655:   PetscFree(pipefgmres->zvecs_user_work);
656:   PetscFree(pipefgmres->redux);
657:   KSPReset_GMRES(ksp);
658:   return(0);
659: }

661: /*MC
662:    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.

664:    A flexible, 1-stage pipelined variant of GMRES.

666:    Options Database Keys:
667: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
668: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
669: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
670: .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
671:                              vectors are allocated as needed)
672: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated


675:    Level: intermediate

677:    Notes:

679:    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.

681:    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.

683:    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
684:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
685:    See the FAQ on the PETSc website for details.

687:    Developer Notes:
688:     This class is subclassed off of KSPGMRES.

690:    Reference:
691:     P. Sanan, S.M. Schnepp, and D.A. May,
692:     "Pipelined, Flexible Krylov Subspace Methods,"
693:     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
694:     DOI: 10.1137/15M1049130

696: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
697:            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift()
698: M*/

700: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
701: {
702:   KSP_PIPEFGMRES *pipefgmres;

706:   PetscNewLog(ksp,&pipefgmres);

708:   ksp->data                              = (void*)pipefgmres;
709:   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
710:   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
711:   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
712:   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
713:   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
714:   ksp->ops->view                         = KSPView_PIPEFGMRES;
715:   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
716:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
717:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

719:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);
720:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);

722:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
723:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
724:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);

726:   pipefgmres->nextra_vecs    = 1;
727:   pipefgmres->haptol         = 1.0e-30;
728:   pipefgmres->q_preallocate  = 0;
729:   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
730:   pipefgmres->orthog         = NULL;
731:   pipefgmres->nrs            = NULL;
732:   pipefgmres->sol_temp       = NULL;
733:   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
734:   pipefgmres->Rsvd           = NULL;
735:   pipefgmres->orthogwork     = NULL;
736:   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
737:   pipefgmres->shift          = 1.0;
738:   return(0);
739: }

741: static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
742: {
743:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
744:   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
745:   PetscInt       nalloc;                            /* number to allocate */
747:   PetscInt       k;

750:   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
751:                                       in a single chunk */

753:   /* Adjust the number to allocate to make sure that we don't exceed the
754:      number of available slots (pipefgmres->vecs_allocated)*/
755:   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
756:     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
757:   }
758:   if (!nalloc) return(0);

760:   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */

762:   /* work vectors */
763:   KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);
764:   PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);
765:   for (k=0; k < nalloc; k++) {
766:     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
767:   }
768:   /* specify size of chunk allocated */
769:   pipefgmres->mwork_alloc[nwork] = nalloc;

771:   /* preconditioned vectors (note we don't use VEC_OFFSET) */
772:   KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);
773:   PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);
774:   for (k=0; k < nalloc; k++) {
775:     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
776:   }

778:   KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);
779:   PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);
780:   for (k=0; k < nalloc; k++) {
781:     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
782:   }

784:   /* increment the number of work vector chunks */
785:   pipefgmres->nwork_alloc++;
786:   return(0);
787: }
788: /*@
789:   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.

791:   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).

793: Logically Collective on ksp

795: Input Parameters:
796: +  ksp - the Krylov space context
797: -  shift - the shift

799: Level: intermediate

801: Options Database:
802: . -ksp_pipefgmres_shift <shift>

804: .seealso: KSPComputeEigenvalues()
805: @*/
806: PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
807: {
808:   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;

813:   pipefgmres->shift = shift;
814:   return(0);
815: }