Actual source code: pipebcgs.c

petsc-master 2019-06-19
Report Typos and Errors

  2: /*
  3:     This file implements pipelined BiCGStab (pipe-BiCGStab).
  4:     Only allow right preconditioning.
  5: */
  6:  #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>

  8: static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
  9: {

 13:   KSPSetWorkVecs(ksp,15);
 14:   return(0);
 15: }

 17: /* Only need a few hacks from KSPSolve_BCGS */
 18:  #include <petsc/private/pcimpl.h>
 19: static PetscErrorCode  KSPSolve_PIPEBCGS(KSP ksp)
 20: {
 22:   PetscInt       i;
 23:   PetscScalar    rho,rhoold,alpha,beta,omega=0.0,d1,d2,d3;
 24:   Vec            X,B,S,R,RP,Y,Q,P2,Q2,R2,S2,W,Z,W2,Z2,T,V;
 25:   PetscReal      dp    = 0.0;
 26:   KSP_BCGS       *bcgs = (KSP_BCGS*)ksp->data;
 27:   PC             pc;

 30:   X  = ksp->vec_sol;
 31:   B  = ksp->vec_rhs;
 32:   R  = ksp->work[0];
 33:   RP = ksp->work[1];
 34:   S  = ksp->work[2];
 35:   Y  = ksp->work[3];
 36:   Q  = ksp->work[4];
 37:   Q2 = ksp->work[5];
 38:   P2 = ksp->work[6];
 39:   R2 = ksp->work[7];
 40:   S2 = ksp->work[8];
 41:   W  = ksp->work[9];
 42:   Z  = ksp->work[10];
 43:   W2 = ksp->work[11];
 44:   Z2 = ksp->work[12];
 45:   T  = ksp->work[13];
 46:   V  = ksp->work[14];
 47: 
 48:   /* Only supports right preconditioning */
 49:   if (ksp->pc_side != PC_RIGHT) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"KSP pipebcgs does not support %s",PCSides[ksp->pc_side]);
 50:   if (!ksp->guess_zero) {
 51:     if (!bcgs->guess) {
 52:       VecDuplicate(X,&bcgs->guess);
 53:     }
 54:     VecCopy(X,bcgs->guess);
 55:   } else {
 56:     VecSet(X,0.0);
 57:   }

 59:   /* Compute initial residual */
 60:   KSPGetPC(ksp,&pc);
 61:   PCSetUp(pc);
 62:   if (!ksp->guess_zero) {
 63:     KSP_MatMult(ksp,pc->mat,X,Q2);
 64:     VecCopy(B,R);
 65:     VecAXPY(R,-1.0,Q2);
 66:   } else {
 67:     VecCopy(B,R);
 68:   }

 70:   /* Test for nothing to do */
 71:   if (ksp->normtype != KSP_NORM_NONE) {
 72:     VecNorm(R,NORM_2,&dp);
 73:   }
 74:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
 75:   ksp->its   = 0;
 76:   ksp->rnorm = dp;
 77:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
 78:   KSPLogResidualHistory(ksp,dp);
 79:   KSPMonitor(ksp,0,dp);
 80:   (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);
 81:   if (ksp->reason) return(0);

 83:   /* Initialize */
 84:   VecCopy(R,RP); /* rp <- r */
 85: 
 86:   VecDotBegin(R,RP,&rho); /* rho <- (r,rp) */
 87:   PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 88:   KSP_PCApply(ksp,R,R2); /* r2 <- K r */
 89:   KSP_MatMult(ksp,pc->mat,R2,W); /* w <- A r2 */
 90:   VecDotEnd(R,RP,&rho);
 91: 
 92:   VecDotBegin(W,RP,&d2); /* d2 <- (w,rp) */
 93:   PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W));
 94:   KSP_PCApply(ksp,W,W2); /* w2 <- K w */
 95:   KSP_MatMult(ksp,pc->mat,W2,T); /* t <- A w2 */
 96:   VecDotEnd(W,RP,&d2);

 98:   alpha = rho/d2;
 99:   beta = 0.0;
100: 
101:   /* Main loop */
102:   i=0;
103:   do {
104:     if (i == 0) {
105:       VecCopy(R2,P2); /* p2 <- r2 */
106:       VecCopy(W,S);   /* s  <- w  */
107:       VecCopy(W2,S2); /* s2 <- w2 */
108:       VecCopy(T,Z);   /* z  <- t  */
109:     } else {
110:       VecAXPBYPCZ(P2,1.0,-beta*omega,beta,R2,S2); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
111:       VecAXPBYPCZ(S,1.0,-beta*omega,beta,W,Z);    /* s  <- beta * s  + w  - beta * omega * z  */
112:       VecAXPBYPCZ(S2,1.0,-beta*omega,beta,W2,Z2); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
113:       VecAXPBYPCZ(Z,1.0,-beta*omega,beta,T,V);    /* z  <- beta * z  + t  - beta * omega * v  */
114:     }
115:     VecWAXPY(Q,-alpha,S,R);    /* q  <- r  - alpha s  */
116:     VecWAXPY(Q2,-alpha,S2,R2); /* q2 <- r2 - alpha s2 */
117:     VecWAXPY(Y,-alpha,Z,W);    /* y  <- w  - alpha z  */
118: 
119:     VecDotBegin(Q,Y,&d1); /* d1 <- (q,y) */
120:     VecDotBegin(Y,Y,&d2); /* d2 <- (y,y) */
121: 
122:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q));
123:     KSP_PCApply(ksp,Z,Z2); /* z2 <- K z */
124:     KSP_MatMult(ksp,pc->mat,Z2,V); /* v <- A z2 */
125: 
126:     VecDotEnd(Q,Y,&d1);
127:     VecDotEnd(Y,Y,&d2);
128: 
129:     if (d2 == 0.0) {
130:       /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
131:       VecDot(Q,Q,&d1);
132:       if (d1 != 0.0) {
133:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
134:         break;
135:       }
136:       VecAXPY(X,alpha,P2);   /* x <- x + alpha p2 */
137:       PetscObjectSAWsTakeAccess((PetscObject)ksp);
138:       ksp->its++;
139:       ksp->rnorm  = 0.0;
140:       ksp->reason = KSP_CONVERGED_RTOL;
141:       PetscObjectSAWsGrantAccess((PetscObject)ksp);
142:       KSPLogResidualHistory(ksp,dp);
143:       KSPMonitor(ksp,i+1,0.0);
144:       break;
145:     }
146:     omega = d1/d2; /* omega <- (y'q) / (y'y) */
147:     VecAXPBYPCZ(X,alpha,omega,1.0,P2,Q2); /* x <- alpha * p2 + omega * q2 + x */
148:     VecWAXPY(R,-omega,Y,Q);    /* r <- q - omega y */
149:     VecWAXPY(R2,-alpha,Z2,W2); /* r2 <- w2 - alpha z2 */
150:     VecAYPX(R2,-omega,Q2);     /* r2 <- q2 - omega r2 */
151:     VecWAXPY(W,-alpha,V,T);    /* w <- t - alpha v */
152:     VecAYPX(W,-omega,Y);       /* w <- y - omega w */
153:     rhoold = rho;
154: 
155:     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i+2) {
156:       VecNormBegin(R,NORM_2,&dp); /* dp <- norm(r) */
157:     }
158:     VecDotBegin(R,RP,&rho); /* rho <- (r,rp) */
159:     VecDotBegin(S,RP,&d1);  /* d1 <- (s,rp) */
160:     VecDotBegin(W,RP,&d2);  /* d2 <- (w,rp) */
161:     VecDotBegin(Z,RP,&d3);  /* d3 <- (z,rp) */
162: 
163:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
164:     KSP_PCApply(ksp,W,W2); /* w2 <- K w */
165:     KSP_MatMult(ksp,pc->mat,W2,T); /* t <- A w2 */
166: 
167:     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i+2) {
168:       VecNormEnd(R,NORM_2,&dp);
169:     }
170:     VecDotEnd(R,RP,&rho);
171:     VecDotEnd(S,RP,&d1);
172:     VecDotEnd(W,RP,&d2);
173:     VecDotEnd(Z,RP,&d3);
174: 
175:     if (d2 + beta * d1 - beta * omega * d3 == 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Divide by zero");
176: 
177:     beta = (rho/rhoold) * (alpha/omega);
178:     alpha = rho/(d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */
179: 
180:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
181:     ksp->its++;
182: 
183:     /* Residual replacement step  */
184:     if (i > 0 && i%100 == 0 && i < 1001) {
185:       KSP_MatMult(ksp,pc->mat,X,R);
186:       VecAYPX(R,-1.0,B);              /* r  <- b - Ax */
187:       KSP_PCApply(ksp,R,R2);          /* r2 <- K r */
188:       KSP_MatMult(ksp,pc->mat,R2,W);  /* w  <- A r2 */
189:       KSP_PCApply(ksp,W,W2);          /* w2 <- K w */
190:       KSP_MatMult(ksp,pc->mat,W2,T);  /* t  <- A w2 */
191:       KSP_MatMult(ksp,pc->mat,P2,S);  /* s  <- A p2 */
192:       KSP_PCApply(ksp,S,S2);          /* s2 <- K s */
193:       KSP_MatMult(ksp,pc->mat,S2,Z);  /* z  <- A s2 */
194:       KSP_PCApply(ksp,Z,Z2);          /* z2 <- K z */
195:       KSP_MatMult(ksp,pc->mat,Z2,V);  /* v  <- A z2 */
196:     }
197: 
198:     ksp->rnorm = dp;
199:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
200:     KSPLogResidualHistory(ksp,dp);
201:     KSPMonitor(ksp,i+1,dp);
202:     (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);
203:     if (ksp->reason) break;
204:     if (rho == 0.0) {
205:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
206:       break;
207:     }
208:     i++;
209:   } while (i<ksp->max_it);

211:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
212:   return(0);
213: }

215: /*MC
216:     KSPPIPEBCGS - Implements the pipelined BiCGStab method.
217:     
218:     This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard FBCGS.  The
219:     non-blocking reductions are overlapped by matrix-vector products and preconditioner applications. 
220:     
221:     Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.

223:     Options Database Keys:
224: .see KSPSolve()

226:     Level: intermediate

228:     Notes:
229:     Like KSPFBCGS, the KSPPIPEBCGS implementation only allows for right preconditioning.
230:     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for 
231:     performance of pipelined methods. See the FAQ on the PETSc website for details.
232:         
233:     Contributed by:
234:     Siegfried Cools, Universiteit Antwerpen, 
235:     EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.
236:     
237:     Reference:
238:     S. Cools and W. Vanroose,
239:     "The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems",
240:     Parallel Computing, 65:1-20, 2017.

242: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGS, KSPFBCGSL, KSPSetPCSide()
243: M*/
244: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
245: {
247:   KSP_BCGS       *bcgs;

250:   PetscNewLog(ksp,&bcgs);

252:   ksp->data                = bcgs;
253:   ksp->ops->setup          = KSPSetUp_PIPEBCGS;
254:   ksp->ops->solve          = KSPSolve_PIPEBCGS;
255:   ksp->ops->destroy        = KSPDestroy_BCGS;
256:   ksp->ops->reset          = KSPReset_BCGS;
257:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
258:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;

260:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
261:   return(0);
262: }