Actual source code: fbcgsr.c

petsc-3.5.1 2014-07-24
Report Typos and Errors
  2: /*
  3:     This file implements FBiCGStab-R.
  4:     Only allow right preconditioning.
  5:     FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
  6:       (1) There are fewer MPI_Allreduce calls.
  7:       (2) The convergence occasionally is much faster than that of FBiCGStab.
  8: */
  9: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>       /*I  "petscksp.h"  I*/
 10: #include <petsc-private/vecimpl.h>

 14: PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
 15: {

 19:   KSPSetWorkVecs(ksp,8);
 20:   return(0);
 21: }

 23: #include <petsc-private/pcimpl.h>            /*I "petscksp.h" I*/
 26: PetscErrorCode  KSPSolve_FBCGSR(KSP ksp)
 27: {
 28:   PetscErrorCode    ierr;
 29:   PetscInt          i,j,N;
 30:   PetscScalar       tau,sigma,alpha,omega,beta;
 31:   PetscReal         rho;
 32:   PetscScalar       xi1,xi2,xi3,xi4;
 33:   Vec               X,B,P,P2,RP,R,V,S,T,S2;
 34:   PetscScalar       *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
 35:   PetscScalar       *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
 36:   PetscScalar       insums[4],outsums[4];
 37:   KSP_BCGS          *bcgs = (KSP_BCGS*)ksp->data;
 38:   PC                pc;

 41:   if (!ksp->vec_rhs->petscnative) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Only coded for PETSc vectors");
 42:   VecGetLocalSize(ksp->vec_sol,&N);

 44:   X  = ksp->vec_sol;
 45:   B  = ksp->vec_rhs;
 46:   P2 = ksp->work[0];

 48:   /* The followings are involved in modified inner product calculations and vector updates */
 49:   RP = ksp->work[1]; VecGetArray(RP,(PetscScalar**)&rp); VecRestoreArray(RP,NULL);
 50:   R  = ksp->work[2]; VecGetArray(R,(PetscScalar**)&r);   VecRestoreArray(R,NULL);
 51:   P  = ksp->work[3]; VecGetArray(P,(PetscScalar**)&p);   VecRestoreArray(P,NULL);
 52:   V  = ksp->work[4]; VecGetArray(V,(PetscScalar**)&v);   VecRestoreArray(V,NULL);
 53:   S  = ksp->work[5]; VecGetArray(S,(PetscScalar**)&s);   VecRestoreArray(S,NULL);
 54:   T  = ksp->work[6]; VecGetArray(T,(PetscScalar**)&t);   VecRestoreArray(T,NULL);
 55:   S2 = ksp->work[7]; VecGetArray(S2,(PetscScalar**)&s2); VecRestoreArray(S2,NULL);

 57:   /* Only supports right preconditioning */
 58:   if (ksp->pc_side != PC_RIGHT) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"KSP fbcgsr does not support %s",PCSides[ksp->pc_side]);
 59:   if (!ksp->guess_zero) {
 60:     if (!bcgs->guess) {
 61:       VecDuplicate(X,&bcgs->guess);
 62:     }
 63:     VecCopy(X,bcgs->guess);
 64:   } else {
 65:     VecSet(X,0.0);
 66:   }

 68:   /* Compute initial residual */
 69:   KSPGetPC(ksp,&pc);
 70:   PCSetUp(pc);
 71:   if (!ksp->guess_zero) {
 72:     MatMult(pc->mat,X,P2); /* P2 is used as temporary storage */
 73:     VecCopy(B,R);
 74:     VecAXPY(R,-1.0,P2);
 75:   } else {
 76:     VecCopy(B,R);
 77:   }

 79:   /* Test for nothing to do */
 80:   if (ksp->normtype != KSP_NORM_NONE) {
 81:     VecNorm(R,NORM_2,&rho);
 82:   }
 83:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
 84:   ksp->its   = 0;
 85:   ksp->rnorm = rho;
 86:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
 87:   KSPLogResidualHistory(ksp,rho);
 88:   KSPMonitor(ksp,0,rho);
 89:   (*ksp->converged)(ksp,0,rho,&ksp->reason,ksp->cnvP);
 90:   if (ksp->reason) return(0);

 92:   /* Initialize iterates */
 93:   VecCopy(R,RP); /* rp <- r */
 94:   VecCopy(R,P); /* p <- r */

 96:   /* Big loop */
 97:   for (i=0; i<ksp->max_it; i++) {

 99:     /* matmult and pc */
100:     PCApply(pc,P,P2); /* p2 <- K p */
101:     MatMult(pc->mat,P2,V); /* v <- A p2 */

103:     /* inner prodcuts */
104:     if (i==0) {
105:       tau  = rho*rho;
106:       VecDot(V,RP,&sigma); /* sigma <- (v,rp) */
107:     } else {
108:       PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
109:       tau  = sigma = 0.0;
110:       for (j=0; j<N; j++) {
111:         tau   += r[j]*rp[j]; /* tau <- (r,rp) */
112:         sigma += v[j]*rp[j]; /* sigma <- (v,rp) */
113:       }
114:       PetscLogFlops(4.0*N);
115:       PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
116:       insums[0] = tau;
117:       insums[1] = sigma;
118:       PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));
119:       MPI_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
120:       PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));
121:       tau       = outsums[0];
122:       sigma     = outsums[1];
123:     }

125:     /* scalar update */
126:     alpha = tau / sigma;

128:     /* vector update */
129:     VecWAXPY(S,-alpha,V,R);  /* s <- r - alpha v */

131:     /* matmult and pc */
132:     PCApply(pc,S,S2); /* s2 <- K s */
133:     MatMult(pc->mat,S2,T); /* t <- A s2 */

135:     /* inner prodcuts */
136:     PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
137:     xi1  = xi2 = xi3 = xi4 = 0.0;
138:     for (j=0; j<N; j++) {
139:       xi1 += s[j]*s[j]; /* xi1 <- (s,s) */
140:       xi2 += t[j]*s[j]; /* xi2 <- (t,s) */
141:       xi3 += t[j]*t[j]; /* xi3 <- (t,t) */
142:       xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */
143:     }
144:     PetscLogFlops(8.0*N);
145:     PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);

147:     insums[0] = xi1;
148:     insums[1] = xi2;
149:     insums[2] = xi3;
150:     insums[3] = xi4;

152:     PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));
153:     MPI_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
154:     PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));
155:     xi1  = outsums[0];
156:     xi2  = outsums[1];
157:     xi3  = outsums[2];
158:     xi4  = outsums[3];

160:     /* test denominator */
161:     if (xi3 == 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Divide by zero");
162:     if (sigma == 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Divide by zero");

164:     /* scalar updates */
165:     omega = xi2 / xi3;
166:     beta  = -xi4 / sigma;
167:     rho   = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */

169:     /* vector updates */
170:     VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2); /* x <- alpha * p2 + omega * s2 + x */

172:     /* convergence test */
173:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
174:     ksp->its++;
175:     ksp->rnorm = rho;
176:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
177:     KSPLogResidualHistory(ksp,rho);
178:     KSPMonitor(ksp,i+1,rho);
179:     (*ksp->converged)(ksp,i+1,rho,&ksp->reason,ksp->cnvP);
180:     if (ksp->reason) break;

182:     /* vector updates */
183:     PetscLogEventBegin(VEC_Ops,0,0,0,0);
184:     for (j=0; j<N; j++) {
185:       r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
186:       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
187:     }
188:     PetscLogFlops(6.0*N);
189:     PetscLogEventEnd(VEC_Ops,0,0,0,0);

191:   }

193:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
194:   return(0);
195: }

197: /*MC
198:      KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab.

200:    Options Database Keys:
201: .   see KSPSolve()

203:    Level: beginner

205:    Notes: Only allow right preconditioning

207: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide()
208: M*/
211: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
212: {
214:   KSP_BCGS       *bcgs;

217:   PetscNewLog(ksp,&bcgs);

219:   ksp->data                = bcgs;
220:   ksp->ops->setup          = KSPSetUp_FBCGSR;
221:   ksp->ops->solve          = KSPSolve_FBCGSR;
222:   ksp->ops->destroy        = KSPDestroy_BCGS;
223:   ksp->ops->reset          = KSPReset_BCGS;
224:   ksp->ops->buildsolution  = KSPBuildSolution_BCGS;
225:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
226:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
227:   ksp->pc_side             = PC_RIGHT; /* set default PC side */

229:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
230:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
231:   return(0);
232: }