Actual source code: ifbcgs.c

petsc-3.3-p5 2012-12-01
  2: /*
  3:     This file implements improved flexible BiCGStab contributed by Jie Chen.
  4:     It can almost certainly supercede fbcgs.c.
  5:     Only right preconditioning is supported. 
  6: */
  7: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>       /*I  "petscksp.h"  I*/

 11: PetscErrorCode KSPSetUp_IFBCGS(KSP ksp)
 12: {
 14: 
 16:   KSPDefaultGetWork(ksp,8);
 17:   return(0);
 18: }

 20: #include <petsc-private/pcimpl.h>            /*I "petscksp.h" I*/
 23: PetscErrorCode  KSPSolve_IFBCGS(KSP ksp)
 24: {
 25:   PetscErrorCode    ierr;
 26:   PetscInt          i,j,N;
 27:   PetscScalar       tau,sigma,alpha,omega,beta;
 28:   PetscScalar       xi1,xi2,xi3,xi4;
 29:   PetscReal         rho;
 30:   Vec               X,B,P,P2,RP,R,V,S,T,S2;
 31:   PetscScalar       *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
 32:   PetscScalar       *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
 33:   PetscScalar       insums[4],outsums[4];
 34:   KSP_BCGS          *bcgs = (KSP_BCGS*)ksp->data;
 35:   PC                pc;

 38:   if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors");
 39:   VecGetLocalSize(ksp->vec_sol,&N);

 41:   X   = ksp->vec_sol;
 42:   B   = ksp->vec_rhs;
 43:   P2  = ksp->work[0];

 45:   /* The followings are involved in modified inner product calculations and vector updates */
 46:   RP  = ksp->work[1]; VecGetArray(RP,(PetscScalar**)&rp);
 47:   R   = ksp->work[2]; VecGetArray(R,(PetscScalar**)&r);
 48:   P   = ksp->work[3]; VecGetArray(P,(PetscScalar**)&p);
 49:   V   = ksp->work[4]; VecGetArray(V,(PetscScalar**)&v);
 50:   S   = ksp->work[5]; VecGetArray(S,(PetscScalar**)&s);
 51:   T   = ksp->work[6]; VecGetArray(T,(PetscScalar**)&t);
 52:   S2  = ksp->work[7]; VecGetArray(S2,(PetscScalar**)&s2);

 54:   /* Only supports right preconditioning */
 55:   if (ksp->pc_side != PC_RIGHT)
 56:     SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"KSP ifbcgs does not support %s",PCSides[ksp->pc_side]);
 57:   if (!ksp->guess_zero) {
 58:     if (!bcgs->guess) {
 59:       VecDuplicate(X,&bcgs->guess);
 60:     }
 61:     VecCopy(X,bcgs->guess);
 62:   } else {
 63:     VecSet(X,0.0);
 64:   }

 66:   /* Compute initial residual */
 67:   KSPGetPC(ksp,&pc);
 68:   if (pc->setupcalled < 2) { PCSetUp(pc); } /* really needed? */
 69:   if (!ksp->guess_zero) {
 70:     MatMult(pc->mat,X,P2); /* P2 is used as temporary storage */
 71:     VecCopy(B,R);
 72:     VecAXPY(R,-1.0,P2);
 73:   }
 74:   else {
 75:     VecCopy(B,R);
 76:   }

 78:   /* Test for nothing to do */
 79:   if (ksp->normtype != KSP_NORM_NONE) {
 80:     VecNorm(R,NORM_2,&rho);
 81:   }
 82:   PetscObjectTakeAccess(ksp);
 83:   ksp->its   = 0;
 84:   ksp->rnorm = rho;
 85:   PetscObjectGrantAccess(ksp);
 86:   KSPLogResidualHistory(ksp,rho);
 87:   KSPMonitor(ksp,0,rho);
 88:   (*ksp->converged)(ksp,0,rho,&ksp->reason,ksp->cnvP);
 89:   if (ksp->reason) return(0);

 91:   /* Initialize iterates */
 92:   VecCopy(R,RP); /* rp <- r */
 93:   VecCopy(R,P); /* p <- r */

 95:   /* Big loop */
 96:   for (i=0; i<ksp->max_it; i++) {

 98:     /* matmult and pc */
 99:     if (pc->setupcalled < 2) { PCSetUp(pc); } /* really needed? */
100:     PCApply(pc,P,P2); /* p2 <- K p */
101:     MatMult(pc->mat,P2,V); /* v <- A p2 */

103:     /* inner prodcuts */
104:     if (i==0) {
105:       tau = rho*rho;
106:       VecDot(V,RP,&sigma); /* sigma <- (v,rp) */
107:     }
108:     else {
109:       PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
110:       tau = sigma = 0.0;
111:       for (j=0; j<N; j++) {
112:         tau += r[j]*rp[j]; /* tau <- (r,rp) */
113:         sigma += v[j]*rp[j]; /* sigma <- (v,rp) */
114:       }
115:       PetscLogFlops(4.0*N);
116:       PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
117:       insums[0] = tau;
118:       insums[1] = sigma;
119:       PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
120:       MPI_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);
121:       PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
122:       tau = outsums[0];
123:       sigma = outsums[1];
124:     }

126:     /* scalar update */
127:     alpha = tau / sigma;

129:     /* vector update */
130:     VecWAXPY(S,-alpha,V,R);  /* s <- r - alpha v */

132:     /* matmult and pc */
133:     PCApply(pc,S,S2); /* s2 <- K s */
134:     MatMult(pc->mat,S2,T); /* t <- A s2 */

136:     /* inner prodcuts */
137:     PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
138:     xi1 = xi2 = xi3 = xi4 = 0.0;
139:     for (j=0; j<N; j++) {
140:       xi1 += s[j]*s[j]; /* xi1 <- (s,s) */
141:       xi2 += t[j]*s[j]; /* xi2 <- (t,s) */
142:       xi3 += t[j]*t[j]; /* xi3 <- (t,t) */
143:       xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */
144:     }
145:     PetscLogFlops(8.0*N);
146:     PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
147:     insums[0] = xi1;
148:     insums[1] = xi2;
149:     insums[2] = xi3;
150:     insums[3] = xi4;
151:     PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
152:     MPI_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);
153:     PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
154:     xi1 = outsums[0];
155:     xi2 = outsums[1];
156:     xi3 = outsums[2];
157:     xi4 = outsums[3];

159:     /* test denominator */
160:     if (xi3 == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero");
161:     if (sigma == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero");

163:     /* scalar updates */
164:     omega = xi2 / xi3;
165:     beta = - xi4 / sigma;
166:     rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */

168:     /* vector updates */
169:     VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2); /* x <- alpha * p2 + omega * s2 + x */

171:     /* convergence test */
172:     PetscObjectTakeAccess(ksp);
173:     ksp->its++;
174:     ksp->rnorm = rho;
175:     PetscObjectGrantAccess(ksp);
176:     KSPLogResidualHistory(ksp,rho);
177:     KSPMonitor(ksp,i+1,rho);
178:     (*ksp->converged)(ksp,i+1,rho,&ksp->reason,ksp->cnvP);
179:     if (ksp->reason) break;

181:     /* vector updates */
182:     PetscLogEventBegin(VEC_Ops,0,0,0,0);
183:     for (j=0; j<N; j++) {
184:       r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
185:       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
186:     }
187:     PetscLogFlops(6.0*N);
188:     PetscLogEventEnd(VEC_Ops,0,0,0,0);

190:   }

192:   VecRestoreArray(RP,(PetscScalar**)&rp);
193:   VecRestoreArray(R,(PetscScalar**)&r);
194:   VecRestoreArray(P,(PetscScalar**)&p);
195:   VecRestoreArray(V,(PetscScalar**)&v);
196:   VecRestoreArray(S,(PetscScalar**)&s);
197:   VecRestoreArray(T,(PetscScalar**)&t);
198:   VecRestoreArray(S2,(PetscScalar**)&s2);

200:   if (i >= ksp->max_it) {
201:     ksp->reason = KSP_DIVERGED_ITS;
202:   }

204:   return(0);
205: }

207: /*MC
208:      KSPIFBCGS - Implements the improved flexible BiCGStab (Stabilized version of BiConjugate Gradient Squared) method.

210:    Options Database Keys:
211: .   see KSPSolve()

213:    Level: beginner

215:    Notes: Only supports right preconditioning 

217: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide()
218: M*/
219: EXTERN_C_BEGIN
222: PetscErrorCode  KSPCreate_IFBCGS(KSP ksp)
223: {
225:   KSP_BCGS       *bcgs;

228:   PetscNewLog(ksp,KSP_BCGS,&bcgs);
229:   ksp->data                 = bcgs;
230:   ksp->ops->setup           = KSPSetUp_IFBCGS;
231:   ksp->ops->solve           = KSPSolve_IFBCGS;
232:   ksp->ops->destroy         = KSPDestroy_BCGS;
233:   ksp->ops->reset           = KSPReset_BCGS;
234:   ksp->ops->buildsolution   = KSPBuildSolution_BCGS;
235:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
236:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGS;
237:   ksp->ops->view            = KSPView_BCGS;
238:   ksp->pc_side              = PC_RIGHT; /* set default PC side */
239: 
240:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
241:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
242:   return(0);
243: }
244: EXTERN_C_END