Actual source code: ifbcgs.c
petsc-3.3-p5 2012-12-01
2: /*
3: This file implements improved flexible BiCGStab contributed by Jie Chen.
4: It can almost certainly supercede fbcgs.c.
5: Only right preconditioning is supported.
6: */
7: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/
11: PetscErrorCode KSPSetUp_IFBCGS(KSP ksp)
12: {
14:
16: KSPDefaultGetWork(ksp,8);
17: return(0);
18: }
20: #include <petsc-private/pcimpl.h> /*I "petscksp.h" I*/
23: PetscErrorCode KSPSolve_IFBCGS(KSP ksp)
24: {
25: PetscErrorCode ierr;
26: PetscInt i,j,N;
27: PetscScalar tau,sigma,alpha,omega,beta;
28: PetscScalar xi1,xi2,xi3,xi4;
29: PetscReal rho;
30: Vec X,B,P,P2,RP,R,V,S,T,S2;
31: PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
32: PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
33: PetscScalar insums[4],outsums[4];
34: KSP_BCGS *bcgs = (KSP_BCGS*)ksp->data;
35: PC pc;
38: if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors");
39: VecGetLocalSize(ksp->vec_sol,&N);
41: X = ksp->vec_sol;
42: B = ksp->vec_rhs;
43: P2 = ksp->work[0];
45: /* The followings are involved in modified inner product calculations and vector updates */
46: RP = ksp->work[1]; VecGetArray(RP,(PetscScalar**)&rp);
47: R = ksp->work[2]; VecGetArray(R,(PetscScalar**)&r);
48: P = ksp->work[3]; VecGetArray(P,(PetscScalar**)&p);
49: V = ksp->work[4]; VecGetArray(V,(PetscScalar**)&v);
50: S = ksp->work[5]; VecGetArray(S,(PetscScalar**)&s);
51: T = ksp->work[6]; VecGetArray(T,(PetscScalar**)&t);
52: S2 = ksp->work[7]; VecGetArray(S2,(PetscScalar**)&s2);
54: /* Only supports right preconditioning */
55: if (ksp->pc_side != PC_RIGHT)
56: SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"KSP ifbcgs does not support %s",PCSides[ksp->pc_side]);
57: if (!ksp->guess_zero) {
58: if (!bcgs->guess) {
59: VecDuplicate(X,&bcgs->guess);
60: }
61: VecCopy(X,bcgs->guess);
62: } else {
63: VecSet(X,0.0);
64: }
66: /* Compute initial residual */
67: KSPGetPC(ksp,&pc);
68: if (pc->setupcalled < 2) { PCSetUp(pc); } /* really needed? */
69: if (!ksp->guess_zero) {
70: MatMult(pc->mat,X,P2); /* P2 is used as temporary storage */
71: VecCopy(B,R);
72: VecAXPY(R,-1.0,P2);
73: }
74: else {
75: VecCopy(B,R);
76: }
78: /* Test for nothing to do */
79: if (ksp->normtype != KSP_NORM_NONE) {
80: VecNorm(R,NORM_2,&rho);
81: }
82: PetscObjectTakeAccess(ksp);
83: ksp->its = 0;
84: ksp->rnorm = rho;
85: PetscObjectGrantAccess(ksp);
86: KSPLogResidualHistory(ksp,rho);
87: KSPMonitor(ksp,0,rho);
88: (*ksp->converged)(ksp,0,rho,&ksp->reason,ksp->cnvP);
89: if (ksp->reason) return(0);
91: /* Initialize iterates */
92: VecCopy(R,RP); /* rp <- r */
93: VecCopy(R,P); /* p <- r */
95: /* Big loop */
96: for (i=0; i<ksp->max_it; i++) {
98: /* matmult and pc */
99: if (pc->setupcalled < 2) { PCSetUp(pc); } /* really needed? */
100: PCApply(pc,P,P2); /* p2 <- K p */
101: MatMult(pc->mat,P2,V); /* v <- A p2 */
103: /* inner prodcuts */
104: if (i==0) {
105: tau = rho*rho;
106: VecDot(V,RP,&sigma); /* sigma <- (v,rp) */
107: }
108: else {
109: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
110: tau = sigma = 0.0;
111: for (j=0; j<N; j++) {
112: tau += r[j]*rp[j]; /* tau <- (r,rp) */
113: sigma += v[j]*rp[j]; /* sigma <- (v,rp) */
114: }
115: PetscLogFlops(4.0*N);
116: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
117: insums[0] = tau;
118: insums[1] = sigma;
119: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
120: MPI_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);
121: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
122: tau = outsums[0];
123: sigma = outsums[1];
124: }
126: /* scalar update */
127: alpha = tau / sigma;
129: /* vector update */
130: VecWAXPY(S,-alpha,V,R); /* s <- r - alpha v */
132: /* matmult and pc */
133: PCApply(pc,S,S2); /* s2 <- K s */
134: MatMult(pc->mat,S2,T); /* t <- A s2 */
136: /* inner prodcuts */
137: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
138: xi1 = xi2 = xi3 = xi4 = 0.0;
139: for (j=0; j<N; j++) {
140: xi1 += s[j]*s[j]; /* xi1 <- (s,s) */
141: xi2 += t[j]*s[j]; /* xi2 <- (t,s) */
142: xi3 += t[j]*t[j]; /* xi3 <- (t,t) */
143: xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */
144: }
145: PetscLogFlops(8.0*N);
146: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
147: insums[0] = xi1;
148: insums[1] = xi2;
149: insums[2] = xi3;
150: insums[3] = xi4;
151: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
152: MPI_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);
153: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
154: xi1 = outsums[0];
155: xi2 = outsums[1];
156: xi3 = outsums[2];
157: xi4 = outsums[3];
159: /* test denominator */
160: if (xi3 == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero");
161: if (sigma == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero");
163: /* scalar updates */
164: omega = xi2 / xi3;
165: beta = - xi4 / sigma;
166: rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
168: /* vector updates */
169: VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2); /* x <- alpha * p2 + omega * s2 + x */
171: /* convergence test */
172: PetscObjectTakeAccess(ksp);
173: ksp->its++;
174: ksp->rnorm = rho;
175: PetscObjectGrantAccess(ksp);
176: KSPLogResidualHistory(ksp,rho);
177: KSPMonitor(ksp,i+1,rho);
178: (*ksp->converged)(ksp,i+1,rho,&ksp->reason,ksp->cnvP);
179: if (ksp->reason) break;
181: /* vector updates */
182: PetscLogEventBegin(VEC_Ops,0,0,0,0);
183: for (j=0; j<N; j++) {
184: r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
185: p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
186: }
187: PetscLogFlops(6.0*N);
188: PetscLogEventEnd(VEC_Ops,0,0,0,0);
190: }
192: VecRestoreArray(RP,(PetscScalar**)&rp);
193: VecRestoreArray(R,(PetscScalar**)&r);
194: VecRestoreArray(P,(PetscScalar**)&p);
195: VecRestoreArray(V,(PetscScalar**)&v);
196: VecRestoreArray(S,(PetscScalar**)&s);
197: VecRestoreArray(T,(PetscScalar**)&t);
198: VecRestoreArray(S2,(PetscScalar**)&s2);
200: if (i >= ksp->max_it) {
201: ksp->reason = KSP_DIVERGED_ITS;
202: }
204: return(0);
205: }
207: /*MC
208: KSPIFBCGS - Implements the improved flexible BiCGStab (Stabilized version of BiConjugate Gradient Squared) method.
210: Options Database Keys:
211: . see KSPSolve()
213: Level: beginner
215: Notes: Only supports right preconditioning
217: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide()
218: M*/
219: EXTERN_C_BEGIN
222: PetscErrorCode KSPCreate_IFBCGS(KSP ksp)
223: {
225: KSP_BCGS *bcgs;
228: PetscNewLog(ksp,KSP_BCGS,&bcgs);
229: ksp->data = bcgs;
230: ksp->ops->setup = KSPSetUp_IFBCGS;
231: ksp->ops->solve = KSPSolve_IFBCGS;
232: ksp->ops->destroy = KSPDestroy_BCGS;
233: ksp->ops->reset = KSPReset_BCGS;
234: ksp->ops->buildsolution = KSPBuildSolution_BCGS;
235: ksp->ops->buildresidual = KSPDefaultBuildResidual;
236: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
237: ksp->ops->view = KSPView_BCGS;
238: ksp->pc_side = PC_RIGHT; /* set default PC side */
239:
240: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
241: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
242: return(0);
243: }
244: EXTERN_C_END