Actual source code: pipelcg.c

petsc-master 2019-08-15
Report Typos and Errors
  1:  #include <petsc/private/kspimpl.h>
  2:  #include <petsc/private/vecimpl.h>

  4: #define offset(j)      PetscMax(((j) - (2*l)), 0)
  5: #define shift(i,j)     ((i) - offset((j)))
  6: #define G(i,j)         (plcg->G[((j)*(2*l+1))+(shift((i),(j))) ])
  7: #define G_noshift(i,j) (plcg->G[((j)*(2*l+1))+(i)])
  8: #define alpha(i)       (plcg->alpha[(i)])
  9: #define gamma(i)       (plcg->gamma[(i)])
 10: #define delta(i)       (plcg->delta[(i)])
 11: #define sigma(i)       (plcg->sigma[(i)])
 12: #define req(i)         (plcg->req[(i)])

 14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
 15: struct KSP_CG_PIPE_L_s {
 16:   PetscInt    l;          /* pipeline depth */
 17:   Vec         *Z;         /* Z vectors (shifted base) */
 18:   Vec         *U;         /* U vectors (unpreconditioned shifted base) */
 19:   Vec         *V;         /* V vectors (original base) */
 20:   Vec         *Q;         /* Q vectors (auxiliary bases) */
 21:   Vec         p;          /* work vector */
 22:   PetscScalar *G;         /* such that Z = VG (band matrix)*/
 23:   PetscScalar *gamma,*delta,*alpha;
 24:   PetscReal   lmin,lmax;  /* min and max eigen values estimates to compute base shifts */
 25:   PetscReal   *sigma;     /* base shifts */
 26:   MPI_Request *req;       /* request array for asynchronous global collective */
 27:   PetscBool   show_rstrt; /* flag to show restart information in output (default: not shown) */
 28: };

 30: /**
 31:  * KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.
 32:  *
 33:  * This is called once, usually automatically by KSPSolve() or KSPSetUp()
 34:  * but can be called directly by KSPSetUp()
 35:  */
 36: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
 37: {
 39:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
 40:   PetscInt       l=plcg->l,max_it=ksp->max_it;
 41:   MPI_Comm       comm;

 44:   comm = PetscObjectComm((PetscObject)ksp);
 45:   if (max_it < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: max_it argument must be positive.",((PetscObject)ksp)->type_name);
 46:   if (l < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be positive.",((PetscObject)ksp)->type_name);
 47:   if (l > max_it) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be less than max_it.",((PetscObject)ksp)->type_name);

 49:   KSPSetWorkVecs(ksp,1); /* get work vectors needed by PIPELCG */
 50:   plcg->p   = ksp->work[0];

 52:   VecDuplicateVecs(plcg->p,PetscMax(3,l+1),&plcg->Z);
 53:   VecDuplicateVecs(plcg->p,3,&plcg->U);
 54:   VecDuplicateVecs(plcg->p,3,&plcg->V);
 55:   VecDuplicateVecs(plcg->p,3*(l-1)+1,&plcg->Q);
 56:   PetscCalloc1(2,&plcg->alpha);
 57:   PetscCalloc1(l,&plcg->sigma);
 58: 
 59:   return(0);
 60: }

 62: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
 63: {
 64:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
 65:   PetscInt      ierr=0,l=plcg->l;

 68:   PetscFree(plcg->sigma);
 69:   PetscFree(plcg->alpha);
 70:   VecDestroyVecs(PetscMax(3,l+1),&plcg->Z);
 71:   VecDestroyVecs(3,&plcg->U);
 72:   VecDestroyVecs(3,&plcg->V);
 73:   VecDestroyVecs(3*(l-1)+1,&plcg->Q);
 74:   return(0);
 75: }

 77: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
 78: {
 79:   PetscInt ierr=0;

 82:   KSPReset_PIPELCG(ksp);
 83:   KSPDestroyDefault(ksp);
 84:   return(0);
 85: }

 87: static PetscErrorCode KSPSetFromOptions_PIPELCG(PetscOptionItems *PetscOptionsObject,KSP ksp)
 88: {
 89:   PetscInt      ierr=0;
 90:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
 91:   PetscBool     flag=PETSC_FALSE;

 94:   PetscOptionsHead(PetscOptionsObject,"KSP PIPELCG options");
 95:   PetscOptionsInt("-ksp_pipelcg_pipel","Pipeline length","",plcg->l,&plcg->l,&flag);
 96:   if (!flag) plcg->l = 1;
 97:   PetscOptionsReal("-ksp_pipelcg_lmin","Estimate for smallest eigenvalue","",plcg->lmin,&plcg->lmin,&flag);
 98:   if (!flag) plcg->lmin = 0.0;
 99:   PetscOptionsReal("-ksp_pipelcg_lmax","Estimate for largest eigenvalue","",plcg->lmax,&plcg->lmax,&flag);
100:   if (!flag) plcg->lmax = 0.0;
101:   PetscOptionsBool("-ksp_pipelcg_monitor","Output information on restarts when they occur? (default: 0)","",plcg->show_rstrt,&plcg->show_rstrt,&flag);
102:   if (!flag) plcg->show_rstrt = PETSC_FALSE;
103:   PetscOptionsTail();
104:   return(0);
105: }

107: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
108: {
109:   PetscErrorCode ierr=0;

112: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
113:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
114: #else
115:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
116:   *request = MPI_REQUEST_NULL;
117: #endif
118:   return(0);
119: }

121: static PetscErrorCode KSPView_PIPELCG(KSP ksp,PetscViewer viewer)
122: {
123:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
124:   PetscErrorCode ierr=0;
125:   PetscBool      iascii=PETSC_FALSE,isstring=PETSC_FALSE;

128:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
129:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
130:   if (iascii) {
131:     PetscViewerASCIIPrintf(viewer,"  Pipeline depth: %D\n", plcg->l);
132:     PetscViewerASCIIPrintf(viewer,"  Minimal eigenvalue estimate %g\n",plcg->lmin);
133:     PetscViewerASCIIPrintf(viewer,"  Maximal eigenvalue estimate %g\n",plcg->lmax);
134:   } else if (isstring) {
135:     PetscViewerStringSPrintf(viewer,"  Pipeline depth: %D\n", plcg->l);
136:     PetscViewerStringSPrintf(viewer,"  Minimal eigenvalue estimate %g\n",plcg->lmin);
137:     PetscViewerStringSPrintf(viewer,"  Maximal eigenvalue estimate %g\n",plcg->lmax);
138:   }
139:   return(0);
140: }

142: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
143: {
144:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
145:   Mat           A=NULL,Pmat=NULL;
146:   PetscInt      it=0,max_it=ksp->max_it,ierr=0,l=plcg->l,i=0,j=0,k=0;
147:   PetscInt      start=0,middle=0,end=0;
148:   Vec           *Z=plcg->Z,*U=plcg->U,*V=plcg->V,*Q=plcg->Q;
149:   Vec           x=NULL,p=NULL,temp=NULL;
150:   PetscScalar   sum_dummy=0.0,eta=0.0,zeta=0.0,lambda=0.0;
151:   PetscReal     dp=0.0,tmp=0.0,beta=0.0,invbeta2=0.0;
152:   MPI_Comm      comm;

155:   x   = ksp->vec_sol;
156:   p   = plcg->p;

158:   comm = PetscObjectComm((PetscObject)ksp);
159:   PCGetOperators(ksp->pc,&A,&Pmat);

161:   for (it = 0; it < max_it+l; ++it) {
162:     /* ----------------------------------- */
163:     /* Multiplication  z_{it+1} =  Az_{it} */
164:     /* ----------------------------------- */
165:     /* Shift the U vector pointers */
166:     temp = U[2];
167:     for (i = 2; i>0; i--) {
168:       U[i] = U[i-1];
169:     }
170:     U[0] = temp;
171:     if (it < l) {
172:       /* SpMV and Sigma-shift and Prec */
173:       MatMult(A,Z[l-it],U[0]);
174:       VecAXPY(U[0],-sigma(it),U[1]);
175:       KSP_PCApply(ksp,U[0],Z[l-it-1]);
176:       if (it < l-1) {
177:         VecCopy(Z[l-it-1],Q[3*it]);
178:       }
179:     } else {
180:       /* Shift the Z vector pointers */
181:       temp = Z[PetscMax(l,2)];
182:       for (i = PetscMax(l,2); i > 0; --i) {
183:         Z[i] = Z[i-1];
184:       }
185:       Z[0] = temp;
186:       /* SpMV and Prec */
187:       MatMult(A,Z[1],U[0]);
188:       KSP_PCApply(ksp,U[0],Z[0]);
189:     }

191:     /* ----------------------------------- */
192:     /* Adjust the G matrix */
193:     /* ----------------------------------- */
194:     if (it >= l) {
195:       if (it == l) {
196:         /* MPI_Wait for G(0,0),scale V0 and Z and U and Q vectors with 1/beta */
197:         MPI_Wait(&req(0),MPI_STATUS_IGNORE);
198:         beta = PetscSqrtReal(PetscRealPart(G(0,0)));
199:         G(0,0) = 1.0;
200:         VecAXPY(V[0],1.0/beta,p); /* this assumes V[0] to be zero initially */
201:         for (j = 0; j <= PetscMax(l,2); ++j) {
202:           VecScale(Z[j],1.0/beta);
203:         }
204:         for (j = 0; j <= 2; ++j) {
205:           VecScale(U[j],1.0/beta);
206:         }
207:         for (j = 0; j < l-1; ++j) {
208:           VecScale(Q[3*j],1.0/beta);
209:         }
210:       }

212:       /* MPI_Wait until the dot products,started l iterations ago,are completed */
213:       MPI_Wait(&req(it-l+1),MPI_STATUS_IGNORE);
214:       if (it >= 2*l) {
215:         for (j = PetscMax(0,it-3*l+1); j <= it-2*l; j++) {
216:           G(j,it-l+1) = G(it-2*l+1,j+l); /* exploit symmetry in G matrix */
217:         }
218:       }

220:       if (it <= 2*l-1) {
221:         invbeta2 = 1.0 / (beta * beta);
222:         /* Scale columns 1 up to l of G with 1/beta^2 */
223:         for (j = PetscMax(it-3*l+1,0); j <= it-l+1; ++j) {
224:           G(j,it-l+1) *= invbeta2;
225:         }
226:       }

228:       for (j = PetscMax(it-2*l+2,0); j <= it-l; ++j) {
229:         sum_dummy = 0.0;
230:         for (k = PetscMax(it-3*l+1,0); k <= j-1; ++k) {
231:           sum_dummy = sum_dummy + G(k,j) * G(k,it-l+1);
232:         }
233:         G(j,it-l+1) = (G(j,it-l+1) - sum_dummy) / G(j,j);
234:       }

236:       sum_dummy = 0.0;
237:       for (k = PetscMax(it-3*l+1,0); k <= it-l; ++k) {
238:         sum_dummy = sum_dummy + G(k,it-l+1) * G(k,it-l+1);
239:       }

241:       tmp = PetscRealPart(G(it-l+1,it-l+1) - sum_dummy);
242:       /* Breakdown check */
243:       if (tmp < 0) {
244:         if (plcg->show_rstrt) {
245:           PetscPrintf(comm,"Sqrt breakdown in iteration %D: sqrt argument is %e. Iteration was restarted.\n",ksp->its+1,(double)tmp);
246:         }
247:         /* End hanging dot-products in the pipeline before exiting for-loop */
248:         start = it-l+2;
249:         end = PetscMin(it+1,max_it+1);  /* !warning! 'it' can actually be greater than 'max_it' */
250:         for (i = start; i < end; ++i) {
251:           MPI_Wait(&req(i),MPI_STATUS_IGNORE);
252:         }
253:         break;
254:       }
255:       G(it-l+1,it-l+1) = PetscSqrtReal(tmp);

257:       if (it < 2*l) {
258:         if (it == l) {
259:           gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)) / G(it-l,it-l);
260:         } else {
261:           gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)
262:                          - delta(it-l-1) * G(it-l-1,it-l)) / G(it-l,it-l);
263:         }
264:         delta(it-l) = G(it-l+1,it-l+1) / G(it-l,it-l);
265:       } else {
266:         gamma(it-l) = (G(it-l,it-l) * gamma(it-2*l)
267:                        + G(it-l,it-l+1) * delta(it-2*l)
268:                        - G(it-l-1,it-l) * delta(it-l-1)) / G(it-l,it-l);
269:         delta(it-l) = (G(it-l+1,it-l+1) * delta(it-2*l)) / G(it-l,it-l);
270:       }

272:       /* -------------------------------------------------- */
273:       /* Recursively compute the next V, Q, Z and U vectors */
274:       /* -------------------------------------------------- */
275:       /* Shift the V vector pointers */
276:       temp = V[2];
277:       for (i = 2; i>0; i--) {
278:         V[i] = V[i-1];
279:       }
280:       V[0] = temp;

282:       /* Recurrence V vectors */
283:       if (l == 1) {
284:         VecCopy(Z[1],V[0]);
285:       } else {
286:         VecCopy(Q[0],V[0]);
287:       }
288:       if (it == l) {
289:         VecAXPY(V[0],sigma(0)-gamma(it-l),V[1]);
290:       } else {
291:         alpha(0) = sigma(0)-gamma(it-l);
292:         alpha(1) = -delta(it-l-1);
293:         VecMAXPY(V[0],2,&alpha(0),&V[1]);
294:       }
295:       VecScale(V[0],1.0/delta(it-l));

297:       /* Recurrence Q vectors */
298:       for (j = 0; j < l-1; ++j) {
299:         /* Shift the Q vector pointers */
300:         temp = Q[3*j+2];
301:         for (i = 2; i>0; i--) {
302:           Q[3*j+i] = Q[3*j+i-1];
303:         }
304:         Q[3*j] = temp;

306:         if (j < l-2){
307:           VecCopy(Q[3*(j+1)],Q[3*j]);
308:         } else {
309:           VecCopy(Z[1],Q[3*j]);
310:         }
311:         if (it == l){
312:           VecAXPY(Q[3*j],sigma(j+1)-gamma(it-l),Q[3*j+1]);
313:         } else {
314:           alpha(0) = sigma(j+1)-gamma(it-l);
315:           alpha(1) = -delta(it-l-1);
316:           VecMAXPY(Q[3*j],2,&alpha(0),&Q[3*j+1]);
317:         }
318:         VecScale(Q[3*j],1.0/delta(it-l));
319:       }

321:       /* Recurrence Z and U vectors */
322:       if (it == l) {
323:         VecAXPY(Z[0],-gamma(it-l),Z[1]);
324:         VecAXPY(U[0],-gamma(it-l),U[1]);
325:       } else {
326:         alpha(0) = -gamma(it-l);
327:         alpha(1) = -delta(it-l-1);
328:         VecMAXPY(Z[0],2,&alpha(0),&Z[1]);
329:         VecMAXPY(U[0],2,&alpha(0),&U[1]);
330:       }
331:       VecScale(Z[0],1.0/delta(it-l));
332:       VecScale(U[0],1.0/delta(it-l));
333:     }

335:     /* ---------------------------------------- */
336:     /* Compute and communicate the dot products */
337:     /* ---------------------------------------- */
338:     if (it < l) {
339:       for (j = 0; j < it+2; ++j) {
340:         (*U[0]->ops->dot_local)(U[0],Z[l-j],&G(j,it+1)); /* dot-products (U[0],Z[j]) */
341:       }
342:       MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,it+1),it+2,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
343:     } else if ((it >= l) && (it < max_it)) {
344:       middle = it-l+2;
345:       end = it+2;
346:       (*U[0]->ops->dot_local)(U[0],V[0],&G(it-l+1,it+1)); /* dot-product (U[0],V[0]) */
347:       for (j = middle; j < end; ++j) {
348:         (*U[0]->ops->dot_local)(U[0],plcg->Z[it+1-j],&G(j,it+1)); /* dot-products (U[0],Z[j]) */
349:       }
350:       MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(it-l+1,it+1),l+1,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
351:     }

353:     /* ----------------------------------------- */
354:     /* Compute solution vector and residual norm */
355:     /* ----------------------------------------- */
356:     if (it >= l) {
357:       if (it == l) {
358:         if (ksp->its != 0) {
359:           ++ ksp->its;
360:         }
361:         eta  = gamma(0);
362:         zeta = beta;
363:         VecCopy(V[1],p);
364:         VecScale(p,1.0/eta);
365:         VecAXPY(x,zeta,p);
366:         dp   = beta;
367:       } else if (it > l) {
368:         k = it-l;
369:         ++ ksp->its;
370:         lambda = delta(k-1)/eta;
371:         eta  = gamma(k) - lambda * delta(k-1);
372:         zeta = -lambda * zeta;
373:         VecScale(p,-delta(k-1)/eta);
374:         VecAXPY(p,1.0/eta,V[1]);
375:         VecAXPY(x,zeta,p);
376:         dp   = PetscAbsScalar(zeta);
377:       }
378:       ksp->rnorm = dp;
379:       KSPLogResidualHistory(ksp,dp);
380:       KSPMonitor(ksp,ksp->its,dp);
381:       (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);

383:       if (ksp->its >= max_it-1) {
384:         ksp->reason = KSP_DIVERGED_ITS;
385:       }
386:       if (ksp->reason) {
387:         /* End hanging dot-products in the pipeline before exiting for-loop */
388:         start = it-l+2;
389:         end = PetscMin(it+2,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
390:         for (i = start; i < end; ++i) {
391:           MPI_Wait(&req(i),MPI_STATUS_IGNORE);
392:         }
393:         break;
394:       }
395:     }
396:   } /* End inner for loop */
397:   return(0);
398: }

400: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
401: {
402:   KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
403:   PetscInt      ierr=0,i=0,j=0,l=plcg->l,max_it=ksp->max_it;

406:   for (i = 0; i < PetscMax(3,l+1); ++i) {
407:     VecSet(plcg->Z[i],0.0);
408:   }
409:   for (i = 1; i < 3; ++i) {
410:     VecSet(plcg->U[i],0.0);
411:   }
412:   for (i = 0; i < 3; ++i) {
413:     VecSet(plcg->V[i],0.0);
414:   }
415:   for (i = 0; i < 3*(l-1)+1; ++i) {
416:     VecSet(plcg->Q[i],0.0);
417:   }
418:   for (j = 0; j < (max_it+1); ++j) {
419:     gamma(j) = 0.0;
420:     delta(j) = 0.0;
421:     for (i = 0; i < (2*l+1); ++i) {
422:       G_noshift(i,j) = 0.0;
423:     }
424:   }
425:   return(0);
426: }

428: /**
429:  * KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
430:  *
431:  * Input Parameter:
432:  *     ksp - the Krylov space object that was set to use conjugate gradient,by,for
433:  *     example,KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
434:  */
435: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
436: {
437:   PetscErrorCode ierr=0;
438:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
439:   Mat            A=NULL,Pmat=NULL;
440:   Vec            b=NULL,x=NULL,p=NULL;
441:   PetscInt       max_it=ksp->max_it,l=plcg->l;
442:   PetscInt       i=0,outer_it=0,curr_guess_zero=0;
443:   PetscReal      lmin=plcg->lmin,lmax=plcg->lmax;
444:   PetscBool      diagonalscale=PETSC_FALSE;
445:   MPI_Comm       comm;

448:   comm = PetscObjectComm((PetscObject)ksp);
449:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
450:   if (diagonalscale) {
451:     SETERRQ1(comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
452:   }

454:   x = ksp->vec_sol;
455:   b = ksp->vec_rhs;
456:   p = plcg->p;

458:   PetscCalloc1((max_it+1)*(2*l+1),&plcg->G);
459:   PetscCalloc1(max_it+1,&plcg->gamma);
460:   PetscCalloc1(max_it+1,&plcg->delta);
461:   PetscCalloc1(max_it+1,&plcg->req);

463:   PCGetOperators(ksp->pc,&A,&Pmat);

465:   for (i = 0; i < l; ++i) {
466:     sigma(i) = (0.5*(lmin+lmax) + (0.5*(lmax-lmin) * PetscCosReal(PETSC_PI*(2.0*i+1.0)/(2.0*l))));
467:   }

469:   ksp->its = 0;
470:   outer_it = 0;
471:   curr_guess_zero = !! ksp->guess_zero;

473:   while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
474:     /* RESTART LOOP */
475:     if (!curr_guess_zero) {
476:       KSP_MatMult(ksp,A,x,plcg->U[0]);  /* u <- b - Ax */
477:       VecAYPX(plcg->U[0],-1.0,b);
478:     } else {
479:       VecCopy(b,plcg->U[0]);            /* u <- b (x is 0) */
480:     }
481:     KSP_PCApply(ksp,plcg->U[0],p);      /* p <- Bu */

483:     if (outer_it > 0) {
484:       /* Re-initialize Z,U,V,Q,gamma,delta,G after restart occurred */
485:       KSPSolve_ReInitData_PIPELCG(ksp);
486:     }

488:     (*plcg->U[0]->ops->dot_local)(plcg->U[0],p,&G(0,0));
489:     MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,0),1,MPIU_SCALAR,MPIU_SUM,comm,&req(0));
490:     VecCopy(p,plcg->Z[l]);

492:     KSPSolve_InnerLoop_PIPELCG(ksp);

494:     if (ksp->reason) break; /* convergence or divergence */
495:     ++ outer_it;
496:     curr_guess_zero = 0;
497:   }

499:   if (ksp->its >= max_it-1) {
500:     ksp->reason = KSP_DIVERGED_ITS;
501:   }
502:   PetscFree(plcg->G);
503:   PetscFree(plcg->gamma);
504:   PetscFree(plcg->delta);
505:   PetscFree(plcg->req);
506:   return(0);
507: }

509: /*MC
510:     KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method. This method has only a single non-blocking global
511:     reduction per iteration, compared to 2 blocking reductions for standard CG. The reduction is overlapped by the
512:     matrix-vector product and preconditioner application of the next l iterations. The pipeline length l is a parameter
513:     of the method.

515:     Options Database Keys:
516: +   -ksp_pipelcg_pipel - pipelined length
517: .   -ksp_pipelcg_lmin - approximation to the smallest eigenvalue of the preconditioned operator (default: 0.0)
518: .   -ksp_pipelcg_lmax - approximation to the largest eigenvalue of the preconditioned operator (default: 0.0)
519: .   -ksp_pipelcg_monitor - output where/why the method restarts when a sqrt breakdown occurs
520: -   see KSPSolve() for additional options

522:     Level: advanced

524:     Notes:
525:     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
526:     performance of pipelined methods. See the FAQ on the PETSc website for details.

528:     Contributed by:
529:     Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
530:     funded by Flemish Research Foundation (FWO) grant number 12H4617N.

532:     Example usage:
533:     [*] KSP ex2, no preconditioner, pipel = 2, lmin = 0.0, lmax = 8.0 :
534:             $mpirun -ppn 14 ./ex2 -m 1000 -n 1000 -ksp_type pipelcg -pc_type none -ksp_norm_type UNPRECONDITIONED 
535:         -ksp_rtol 1e-10 -ksp_max_it 1000 -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 8.0 -log_summary
536:     [*] SNES ex48, bjacobi preconditioner, pipel = 3, lmin = 0.0, lmax = 2.0, show restart information :
537:         $mpirun -ppn 14 ./ex48 -M 150 -P 100 -ksp_type pipelcg -pc_type bjacobi -ksp_rtol 1e-10 -ksp_pipelcg_pipel 3 
538:         -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 2.0 -ksp_pipelcg_monitor -log_summary

540:     References:
541:     [*] J. Cornelis, S. Cools and W. Vanroose,
542:         "The Communication-Hiding Conjugate Gradient Method with Deep Pipelines" 
543:         Submitted to SIAM Journal on Scientific Computing (SISC), 2018.
544:     [*] S. Cools, J. Cornelis and W. Vanroose,
545:         "Numerically Stable Recurrence Relations for the Communication Hiding Pipelined Conjugate Gradient Method"
546:         Submitted to IEEE Transactions on Parallel and Distributed Systems, 2019.

548: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSPCG, KSPPIPECG, KSPPIPECGRR, KSPPGMRES,
549:     KSPPIPEBCGS, KSPSetPCSide()
550: M*/
551: PETSC_EXTERN
552: PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
553: {
555:   KSP_CG_PIPE_L  *plcg = NULL;

558:   PetscNewLog(ksp,&plcg);
559:   ksp->data = (void*)plcg;

561:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
562:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);

564:   ksp->ops->setup          = KSPSetUp_PIPELCG;
565:   ksp->ops->solve          = KSPSolve_PIPELCG;
566:   ksp->ops->reset          = KSPReset_PIPELCG;
567:   ksp->ops->destroy        = KSPDestroy_PIPELCG;
568:   ksp->ops->view           = KSPView_PIPELCG;
569:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
570:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
571:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
572:   return(0);
573: }