Actual source code: ibcgs.c

petsc-3.3-p5 2012-12-01
  2: #include <petsc-private/kspimpl.h>

  6: static PetscErrorCode KSPSetUp_IBCGS(KSP ksp)
  7: {
  9:   PetscBool      diagonalscale;

 12:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
 13:   if (diagonalscale) SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
 14:   KSPDefaultGetWork(ksp,9);
 15:   return(0);
 16: }

 18: /* 
 19:     The code below "cheats" from PETSc style
 20:        1) VecRestoreArray() is called immediately after VecGetArray() and the array values are still accessed; the reason for the immediate
 21:           restore is that Vec operations are done on some of the vectors during the solve and if we did not restore immediately it would
 22:           generate two VecGetArray() (the second one inside the Vec operation) calls without a restore between them.
 23:        2) The vector operations on done directly on the arrays instead of with VecXXXX() calls

 25:        For clarity in the code we name single VECTORS with two names, for example, Rn_1 and R, but they actually always
 26:      the exact same memory. We do this with macro defines so that compiler won't think they are 
 27:      two different variables.

 29: */
 30: #define Xn_1 Xn
 31: #define xn_1 xn
 32: #define Rn_1 Rn
 33: #define rn_1 rn
 34: #define Un_1 Un
 35: #define un_1 un
 36: #define Vn_1 Vn
 37: #define vn_1 vn
 38: #define Qn_1 Qn
 39: #define qn_1 qn
 40: #define Zn_1 Zn
 41: #define zn_1 zn
 44: static PetscErrorCode  KSPSolve_IBCGS(KSP ksp)
 45: {
 47:   PetscInt       i,N;
 48:   PetscReal      rnorm,rnormin = 0.0;
 49: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
 50:   /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem 
 51:      on the same number of processes  with different runs) we support computing the inner products using Intel's 80 bit arithematic
 52:      rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
 53:      and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.

 55:      Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
 56:      precision into a 16 byte space (the rest of the space is ignored)  */
 57:   long double    insums[7],outsums[7];
 58: #else
 59:   PetscScalar    insums[7],outsums[7];
 60: #endif
 61:   PetscScalar    sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2;
 62:   PetscScalar    taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
 63:   const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
 64:   PetscScalar    *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
 65:   /* the rest do not have to keep n_1 values */
 66:   PetscScalar    kappan, thetan, etan, gamman, betan, deltan;
 67:   const PetscScalar *PETSC_RESTRICT tn;
 68:   PetscScalar    *PETSC_RESTRICT sn;
 69:   Vec            R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un;
 70:   Mat            A;

 73:   if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors");

 75:   PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);
 76:   VecGetLocalSize(ksp->vec_sol,&N);
 77:   Xn = ksp->vec_sol;VecGetArray(Xn_1,(PetscScalar**)&xn_1);VecRestoreArray(Xn_1,PETSC_NULL);
 78:   B  = ksp->vec_rhs;VecGetArrayRead(B,(const PetscScalar**)&b);VecRestoreArrayRead(B,PETSC_NULL);
 79:   R0 = ksp->work[0];VecGetArrayRead(R0,(const PetscScalar**)&r0);VecRestoreArrayRead(R0,PETSC_NULL);
 80:   Rn = ksp->work[1];VecGetArray(Rn_1,(PetscScalar**)&rn_1);VecRestoreArray(Rn_1,PETSC_NULL);
 81:   Un = ksp->work[2];VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);VecRestoreArrayRead(Un_1,PETSC_NULL);
 82:   F0 = ksp->work[3];VecGetArrayRead(F0,(const PetscScalar**)&f0);VecRestoreArrayRead(F0,PETSC_NULL);
 83:   Vn = ksp->work[4];VecGetArray(Vn_1,(PetscScalar**)&vn_1);VecRestoreArray(Vn_1,PETSC_NULL);
 84:   Zn = ksp->work[5];VecGetArray(Zn_1,(PetscScalar**)&zn_1);VecRestoreArray(Zn_1,PETSC_NULL);
 85:   Qn = ksp->work[6];VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);VecRestoreArrayRead(Qn_1,PETSC_NULL);
 86:   Tn = ksp->work[7];VecGetArrayRead(Tn,(const PetscScalar**)&tn);VecRestoreArrayRead(Tn,PETSC_NULL);
 87:   Sn = ksp->work[8];VecGetArrayRead(Sn,(const PetscScalar**)&sn);VecRestoreArrayRead(Sn,PETSC_NULL);

 89:   /* r0 = rn_1 = b - A*xn_1; */
 90:   /* KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);
 91:      VecAYPX(Rn_1,-1.0,B); */
 92:   KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);

 94:   VecNorm(Rn_1,NORM_2,&rnorm);
 95:   KSPMonitor(ksp,0,rnorm);
 96:   (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);
 97:   if (ksp->reason) return(0);

 99:   VecCopy(Rn_1,R0);

101:   /* un_1 = A*rn_1; */
102:   KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);
103: 
104:   /* f0   = A'*rn_1; */
105:   if (ksp->pc_side == PC_RIGHT) { /* B' A' */
106:     MatMultTranspose(A,R0,Tn);
107:     PCApplyTranspose(ksp->pc,Tn,F0);
108:   } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
109:     PCApplyTranspose(ksp->pc,R0,Tn);
110:     MatMultTranspose(A,Tn,F0);
111:   }

113:   /*qn_1 = vn_1 = zn_1 = 0.0; */
114:   VecSet(Qn_1,0.0);
115:   VecSet(Vn_1,0.0);
116:   VecSet(Zn_1,0.0);

118:   sigman_2 = pin_1 = taun_1 = 0.0;

120:   /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
121:   VecDot(R0,R0,&phin_1);

123:   /* sigman_1 = rn_1'un_1  */
124:   VecDot(R0,Un_1,&sigman_1);

126:   alphan_1 = omegan_1 = 1.0;

128:   for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) {
129:     rhon   = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1;
130:     /*    if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */
131:     if (ksp->its == 1) deltan = rhon;
132:     else deltan = rhon/taun_1;
133:     betan  = deltan/omegan_1;
134:     taun   = sigman_1 + betan*taun_1  - deltan*pin_1;
135:     if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its);
136:     alphan = rhon/taun;
137:     PetscLogFlops(15.0);

139:     /*  
140:         zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
141:         vn = un_1 + betan*vn_1 - deltan*qn_1
142:         sn = rn_1 - alphan*vn

144:        The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
145:     */
146:     PetscLogEventBegin(VEC_Ops,0,0,0,0);
147:     tmp1 = (alphan/alphan_1)*betan;
148:     tmp2 = alphan*deltan;
149:     for (i=0; i<N; i++) {
150:       zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i];
151:       vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i];
152:       sn[i] = rn_1[i] - alphan*vn[i];
153:     }
154:     PetscLogFlops(3.0+11.0*N);
155:     PetscLogEventEnd(VEC_Ops,0,0,0,0);

157:     /*
158:         qn = A*vn
159:     */
160:     KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);

162:     /*
163:         tn = un_1 - alphan*qn
164:     */
165:     VecWAXPY(Tn,-alphan,Qn,Un_1);
166: 

168:     /*
169:         phin = r0'sn
170:         pin  = r0'qn
171:         gamman = f0'sn
172:         etan   = f0'tn
173:         thetan = sn'tn
174:         kappan = tn'tn
175:     */
176:     PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
177:     phin = pin = gamman = etan = thetan = kappan = 0.0;
178:     for (i=0; i<N; i++) {
179:       phin += r0[i]*sn[i];
180:       pin  += r0[i]*qn[i];
181:       gamman += f0[i]*sn[i];
182:       etan   += f0[i]*tn[i];
183:       thetan += sn[i]*tn[i];
184:       kappan += tn[i]*tn[i];
185:     }
186:     PetscLogFlops(12.0*N);
187:     PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
188:     insums[0] = phin;
189:     insums[1] = pin;
190:     insums[2] = gamman;
191:     insums[3] = etan;
192:     insums[4] = thetan;
193:     insums[5] = kappan;
194:     insums[6] = rnormin;
195:     PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
196: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
197:     if (ksp->lagnorm && ksp->its > 1) {
198:       MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);
199:     } else {
200:       MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);
201:     }
202: #else
203:     if (ksp->lagnorm && ksp->its > 1) {
204:       MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);
205:     } else {
206:       MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);
207:     }
208: #endif
209:     PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
210:     phin     = outsums[0];
211:     pin      = outsums[1];
212:     gamman   = outsums[2];
213:     etan     = outsums[3];
214:     thetan   = outsums[4];
215:     kappan   = outsums[5];
216:     if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));

218:     if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its);
219:     if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its);
220:     omegan = thetan/kappan;
221:     sigman = gamman - omegan*etan;

223:     /*
224:         rn = sn - omegan*tn
225:         xn = xn_1 + zn + omegan*sn
226:     */
227:     PetscLogEventBegin(VEC_Ops,0,0,0,0);
228:     rnormin = 0.0;
229:     for (i=0; i<N; i++) {
230:       rn[i]    = sn[i] - omegan*tn[i];
231:       rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]);
232:       xn[i]   += zn[i] + omegan*sn[i];
233:     }
234:     PetscObjectStateIncrease((PetscObject)Xn);
235:     PetscLogFlops(7.0*N);
236:     PetscLogEventEnd(VEC_Ops,0,0,0,0);

238:     if (!ksp->lagnorm && ksp->chknorm < ksp->its) {
239:       PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
240:       MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);
241:       PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);
242:       rnorm = PetscSqrtReal(rnorm);
243:     }

245:     /* Test for convergence */
246:     KSPMonitor(ksp,ksp->its,rnorm);
247:     (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);
248:     if (ksp->reason) break;
249: 
250:     /* un = A*rn */
251:     KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);

253:     /* Update n-1 locations with n locations */
254:     sigman_2 = sigman_1;
255:     sigman_1 = sigman;
256:     pin_1    = pin;
257:     phin_1   = phin;
258:     alphan_1 = alphan;
259:     taun_1   = taun;
260:     omegan_1 = omegan;
261:   }
262:   if (ksp->its >= ksp->max_it) {
263:     ksp->reason = KSP_DIVERGED_ITS;
264:   }
265:   KSPUnwindPreconditioner(ksp,Xn,Tn);
266:   return(0);
267: }


270: /*MC
271:      KSPIBCGS - Implements the IBiCGStab (Improved Stabilized version of BiConjugate Gradient Squared) method
272:             in an alternative form to have only a single global reduction operation instead of the usual 3 (or 4)

274:    Options Database Keys:
275: .   see KSPSolve()

277:    Level: beginner

279:    Notes: Supports left and right preconditioning 

281:           See KSPBCGSL for additional stabilization

283:           Unlike the Bi-CG-stab algorithm, this requires one multiplication be the transpose of the operator
284:            before the iteration starts.

286:           The paper has two errors in the algorithm presented, they are fixed in the code in KSPSolve_IBCGS()

288:           For maximum reduction in the number of global reduction operations, this solver should be used with 
289:           KSPSetLagNorm().

291:           This is not supported for complex numbers.

293:    Reference: The Improved BiCGStab Method for Large and Sparse Unsymmetric Linear Systems on Parallel Distributed Memory
294:                      Architectures. L. T. Yang and R. Brent, Proceedings of the Fifth International Conference on Algorithms and
295:                      Architectures for Parallel Processing, 2002, IEEE.

297: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPBCGSL, KSPIBCGS, KSPSetLagNorm()
298: M*/
299: EXTERN_C_BEGIN
302: PetscErrorCode  KSPCreate_IBCGS(KSP ksp)
303: {

307:   ksp->data                 = (void*)0;
308:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
309:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
310:   ksp->ops->setup           = KSPSetUp_IBCGS;
311:   ksp->ops->solve           = KSPSolve_IBCGS;
312:   ksp->ops->destroy         = KSPDefaultDestroy;
313:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
314:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
315:   ksp->ops->setfromoptions  = 0;
316:   ksp->ops->view            = 0;
317: #if defined(PETSC_USE_COMPLEX)
318:   SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"This is not supported for complex numbers");
319: #endif
320:   return(0);
321: }
322: EXTERN_C_END