Actual source code: pipecgrr.c

petsc-master 2020-09-23
Report Typos and Errors

  2: #include <petsc/private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
 11: {

 15:   /* get work vectors needed by PIPECGRR */
 16:   KSPSetWorkVecs(ksp,9);
 17:   return(0);
 18: }

 20: /*
 21:  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
 22: */
 23: static PetscErrorCode  KSPSolve_PIPECGRR(KSP ksp)
 24: {
 26:   PetscInt       i = 0,replace = 0,totreplaces = 0,nsize;
 27:   PetscScalar    alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0,alphap = 0.0,betap = 0.0;
 28:   PetscReal      dp = 0.0,nsi = 0.0,sqn = 0.0,Anorm = 0.0,rnp = 0.0,pnp = 0.0,snp = 0.0,unp = 0.0,wnp = 0.0,xnp = 0.0,qnp = 0.0,znp = 0.0,mnz = 5.0,tol = PETSC_SQRT_MACHINE_EPSILON,eps = PETSC_MACHINE_EPSILON;
 29:   PetscReal      ds = 0.0,dz = 0.0,dx = 0.0,dpp = 0.0,dq = 0.0,dm = 0.0,du = 0.0,dw = 0.0,db = 0.0,errr = 0.0,errrprev = 0.0,errs = 0.0,errw = 0.0,errz = 0.0,errncr = 0.0,errncs = 0.0,errncw = 0.0,errncz = 0.0;
 30:   Vec            X,B,Z,P,W,Q,U,M,N,R,S;
 31:   Mat            Amat,Pmat;
 32:   PetscBool      diagonalscale;

 35:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
 36:   if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

 38:   X = ksp->vec_sol;
 39:   B = ksp->vec_rhs;
 40:   M = ksp->work[0];
 41:   Z = ksp->work[1];
 42:   P = ksp->work[2];
 43:   N = ksp->work[3];
 44:   W = ksp->work[4];
 45:   Q = ksp->work[5];
 46:   U = ksp->work[6];
 47:   R = ksp->work[7];
 48:   S = ksp->work[8];

 50:   PCGetOperators(ksp->pc,&Amat,&Pmat);

 52:   ksp->its = 0;
 53:   if (!ksp->guess_zero) {
 54:     KSP_MatMult(ksp,Amat,X,R);  /*  r <- b - Ax  */
 55:     VecAYPX(R,-1.0,B);
 56:   } else {
 57:     VecCopy(B,R);               /*  r <- b (x is 0)  */
 58:   }

 60:   KSP_PCApply(ksp,R,U);         /*  u <- Br  */

 62:   switch (ksp->normtype) {
 63:   case KSP_NORM_PRECONDITIONED:
 64:     VecNormBegin(U,NORM_2,&dp); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
 65:     VecNormBegin(B,NORM_2,&db);
 66:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 67:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 68:     VecNormEnd(U,NORM_2,&dp);
 69:     VecNormEnd(B,NORM_2,&db);
 70:     break;
 71:   case KSP_NORM_UNPRECONDITIONED:
 72:     VecNormBegin(R,NORM_2,&dp); /*  dp <- r'*r = e'*A'*A*e  */
 73:     VecNormBegin(B,NORM_2,&db);
 74:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 75:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 76:     VecNormEnd(R,NORM_2,&dp);
 77:     VecNormEnd(B,NORM_2,&db);
 78:     break;
 79:   case KSP_NORM_NATURAL:
 80:     VecDotBegin(R,U,&gamma);    /*  gamma <- u'*r  */
 81:     VecNormBegin(B,NORM_2,&db);
 82:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 83:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 84:     VecDotEnd(R,U,&gamma);
 85:     VecNormEnd(B,NORM_2,&db);
 86:     KSPCheckDot(ksp,gamma);
 87:     dp = PetscSqrtReal(PetscAbsScalar(gamma));       /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
 88:     break;
 89:   case KSP_NORM_NONE:
 90:     KSP_MatMult(ksp,Amat,U,W);
 91:     dp   = 0.0;
 92:     break;
 93:   default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
 94:   }
 95:   KSPLogResidualHistory(ksp,dp);
 96:   KSPMonitor(ksp,0,dp);
 97:   ksp->rnorm = dp;
 98:   (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /*  test for convergence  */
 99:   if (ksp->reason) return(0);

101:   MatNorm(Amat,NORM_INFINITY,&Anorm);
102:   VecGetSize(B,&nsize);
103:   nsi = (PetscReal) nsize;
104:   sqn = PetscSqrtReal(nsi);

106:   do {
107:     if (i > 1) {
108:       pnp = dpp;
109:       snp = ds;
110:       qnp = dq;
111:       znp = dz;
112:     }
113:     if (i > 0) {
114:       rnp = dp;
115:       unp = du;
116:       wnp = dw;
117:       xnp = dx;
118:       alphap = alpha;
119:       betap = beta;
120:     }

122:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
123:       VecNormBegin(R,NORM_2,&dp);
124:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
125:       VecNormBegin(U,NORM_2,&dp);
126:     }
127:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
128:       VecDotBegin(R,U,&gamma);
129:     }
130:     VecDotBegin(W,U,&delta);

132:     if (i > 0) {
133:       VecNormBegin(S,NORM_2,&ds);
134:       VecNormBegin(Z,NORM_2,&dz);
135:       VecNormBegin(P,NORM_2,&dpp);
136:       VecNormBegin(Q,NORM_2,&dq);
137:       VecNormBegin(M,NORM_2,&dm);
138:     }
139:     VecNormBegin(X,NORM_2,&dx);
140:     VecNormBegin(U,NORM_2,&du);
141:     VecNormBegin(W,NORM_2,&dw);

143:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
144:     KSP_PCApply(ksp,W,M);           /*   m <- Bw       */
145:     KSP_MatMult(ksp,Amat,M,N);      /*   n <- Am       */

147:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
148:       VecNormEnd(R,NORM_2,&dp);
149:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
150:       VecNormEnd(U,NORM_2,&dp);
151:     }
152:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
153:       VecDotEnd(R,U,&gamma);
154:     }
155:     VecDotEnd(W,U,&delta);

157:     if (i > 0) {
158:       VecNormEnd(S,NORM_2,&ds);
159:       VecNormEnd(Z,NORM_2,&dz);
160:       VecNormEnd(P,NORM_2,&dpp);
161:       VecNormEnd(Q,NORM_2,&dq);
162:       VecNormEnd(M,NORM_2,&dm);
163:     }
164:     VecNormEnd(X,NORM_2,&dx);
165:     VecNormEnd(U,NORM_2,&du);
166:     VecNormEnd(W,NORM_2,&dw);

168:     if (i > 0) {
169:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
170:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

172:       ksp->rnorm = dp;
173:       KSPLogResidualHistory(ksp,dp);
174:       KSPMonitor(ksp,i,dp);
175:       (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
176:       if (ksp->reason) return(0);
177:     }

179:     if (i == 0) {
180:       alpha = gamma / delta;
181:       VecCopy(N,Z);          /*  z <- n  */
182:       VecCopy(M,Q);          /*  q <- m  */
183:       VecCopy(U,P);          /*  p <- u  */
184:       VecCopy(W,S);          /*  s <- w  */
185:     } else {
186:       beta = gamma / gammaold;
187:       alpha = gamma / (delta - beta / alpha * gamma);
188:       VecAYPX(Z,beta,N);     /*  z <- n + beta * z  */
189:       VecAYPX(Q,beta,M);     /*  q <- m + beta * q  */
190:       VecAYPX(P,beta,U);     /*  p <- u + beta * p  */
191:       VecAYPX(S,beta,W);     /*  s <- w + beta * s  */
192:     }
193:     VecAXPY(X, alpha,P);     /*  x <- x + alpha * p  */
194:     VecAXPY(U,-alpha,Q);     /*  u <- u - alpha * q  */
195:     VecAXPY(W,-alpha,Z);     /*  w <- w - alpha * z  */
196:     VecAXPY(R,-alpha,S);     /*  r <- r - alpha * s  */
197:     gammaold = gamma;

199:     if (i > 0) {
200:       errncr = PetscSqrtReal(Anorm*xnp+2.0*Anorm*PetscAbsScalar(alphap)*dpp+rnp+2.0*PetscAbsScalar(alphap)*ds)*eps;
201:       errncw = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(alphap)*dq+wnp+2.0*PetscAbsScalar(alphap)*dz)*eps;
202:     }
203:     if (i > 1) {
204:       errncs = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(betap)*pnp+wnp+2.0*PetscAbsScalar(betap)*snp)*eps;
205:       errncz = PetscSqrtReal((mnz*sqn+2)*Anorm*dm+2.0*Anorm*PetscAbsScalar(betap)*qnp+2.0*PetscAbsScalar(betap)*znp)*eps;
206:     }

208:     if (i > 0) {
209:       if (i == 1) {
210:         errr = PetscSqrtReal((mnz*sqn+1)*Anorm*xnp+db)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dpp)*eps+errncr;
211:         errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
212:         errw = PetscSqrtReal(mnz*sqn*Anorm*unp)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dq)*eps+errncw;
213:         errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
214:       } else if (replace == 1) {
215:         errrprev = errr;
216:         errr = PetscSqrtReal((mnz*sqn+1)*Anorm*dx+db)*eps;
217:         errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
218:         errw = PetscSqrtReal(mnz*sqn*Anorm*du)*eps;
219:         errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
220:         replace = 0;
221:       } else {
222:         errrprev = errr;
223:         errr = errr+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errs+PetscAbsScalar(alphap)*errw+errncr+PetscAbsScalar(alphap)*errncs;
224:         errs = errw+PetscAbsScalar(betap)*errs+errncs;
225:         errw = errw+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errz+errncw+PetscAbsScalar(alphap)*errncz;
226:         errz = PetscAbsScalar(betap)*errz+errncz;
227:       }
228:       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
229:         KSP_MatMult(ksp,Amat,X,R);        /*  r <- Ax - b  */
230:         VecAYPX(R,-1.0,B);
231:         KSP_PCApply(ksp,R,U);             /*  u <- Br  */
232:         KSP_MatMult(ksp,Amat,U,W);        /*  w <- Au  */
233:         KSP_MatMult(ksp,Amat,P,S);        /*  s <- Ap  */
234:         KSP_PCApply(ksp,S,Q);             /*  q <- Bs  */
235:         KSP_MatMult(ksp,Amat,Q,Z);        /*  z <- Aq  */
236:         replace = 1;
237:         totreplaces++;
238:       }
239:     }

241:     i++;
242:     ksp->its = i;

244:   } while (i<=ksp->max_it);
245:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
246:   return(0);
247: }


250: /*MC
251:    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements.

253:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG.  The
254:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

256:    KSPPIPECGRR improves the robustness of KSPPIPECG by adding an automated residual replacement strategy.
257:    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
258:    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.

260:    See also KSPPIPECG, which is identical to KSPPIPECGRR without residual replacements.
261:    See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.

263:    Level: intermediate

265:    Notes:
266:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
267:    performance of pipelined methods. See the FAQ on the PETSc website for details.

269:    Contributed by:
270:    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
271:    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)

273:    Reference:
274:    S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
275:    propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
276:    SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.

278: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPIPECG, KSPPGMRES, KSPCG, KSPPIPEBCGS, KSPCGUseSingleReduction()
279: M*/
280: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
281: {

285:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
286:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
287:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
288:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);

290:   ksp->ops->setup          = KSPSetUp_PIPECGRR;
291:   ksp->ops->solve          = KSPSolve_PIPECGRR;
292:   ksp->ops->destroy        = KSPDestroyDefault;
293:   ksp->ops->view           = NULL;
294:   ksp->ops->setfromoptions = NULL;
295:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
296:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
297:   return(0);
298: }