Actual source code: pipecr.c
petsc-3.4.5 2014-06-29
1: #include <petsc-private/kspimpl.h>
3: /*
4: KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.
6: This is called once, usually automatically by KSPSolve() or KSPSetUp()
7: but can be called directly by KSPSetUp()
8: */
11: PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
12: {
16: /* get work vectors needed by PIPECR */
17: KSPSetWorkVecs(ksp,7);
18: return(0);
19: }
21: /*
22: KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
24: Input Parameter:
25: . ksp - the Krylov space object that was set to use conjugate gradient, by, for
26: example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
27: */
30: PetscErrorCode KSPSolve_PIPECR(KSP ksp)
31: {
33: PetscInt i;
34: PetscScalar alpha=0.0,beta=0.0,gamma,gammaold=0.0,delta;
35: PetscReal dp = 0.0;
36: Vec X,B,Z,P,W,Q,U,M,N;
37: Mat Amat,Pmat;
38: MatStructure pflag;
39: PetscBool diagonalscale;
42: PCGetDiagonalScale(ksp->pc,&diagonalscale);
43: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
45: X = ksp->vec_sol;
46: B = ksp->vec_rhs;
47: M = ksp->work[0];
48: Z = ksp->work[1];
49: P = ksp->work[2];
50: N = ksp->work[3];
51: W = ksp->work[4];
52: Q = ksp->work[5];
53: U = ksp->work[6];
55: PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);
57: ksp->its = 0;
58: /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
59: if (!ksp->guess_zero) {
60: KSP_MatMult(ksp,Amat,X,W); /* w <- b - Ax */
61: VecAYPX(W,-1.0,B);
62: } else {
63: VecCopy(B,W); /* w <- b (x is 0) */
64: }
65: KSP_PCApply(ksp,W,U); /* u <- Bw */
67: switch (ksp->normtype) {
68: case KSP_NORM_PRECONDITIONED:
69: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
70: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
71: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
72: VecNormEnd(U,NORM_2,&dp);
73: break;
74: case KSP_NORM_NONE:
75: KSP_MatMult(ksp,Amat,U,W);
76: dp = 0.0;
77: break;
78: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
79: }
80: KSPLogResidualHistory(ksp,dp);
81: KSPMonitor(ksp,0,dp);
82: ksp->rnorm = dp;
83: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
84: if (ksp->reason) return(0);
86: i = 0;
87: do {
88: KSP_PCApply(ksp,W,M); /* m <- Bw */
90: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
91: VecNormBegin(U,NORM_2,&dp);
92: }
93: VecDotBegin(W,U,&gamma);
94: VecDotBegin(M,W,&delta);
95: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
97: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
99: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
100: VecNormEnd(U,NORM_2,&dp);
101: }
102: VecDotEnd(W,U,&gamma);
103: VecDotEnd(M,W,&delta);
105: if (i > 0) {
106: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
107: ksp->rnorm = dp;
108: KSPLogResidualHistory(ksp,dp);
109: KSPMonitor(ksp,i,dp);
110: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
111: if (ksp->reason) break;
112: }
114: if (i == 0) {
115: alpha = gamma / delta;
116: VecCopy(N,Z); /* z <- n */
117: VecCopy(M,Q); /* q <- m */
118: VecCopy(U,P); /* p <- u */
119: } else {
120: beta = gamma / gammaold;
121: alpha = gamma / (delta - beta / alpha * gamma);
122: VecAYPX(Z,beta,N); /* z <- n + beta * z */
123: VecAYPX(Q,beta,M); /* q <- m + beta * q */
124: VecAYPX(P,beta,U); /* p <- u + beta * p */
125: }
126: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
127: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
128: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
129: gammaold = gamma;
130: i++;
131: ksp->its = i;
133: /* if (i%50 == 0) { */
134: /* KSP_MatMult(ksp,Amat,X,W); /\* w <- b - Ax *\/ */
135: /* VecAYPX(W,-1.0,B); */
136: /* KSP_PCApply(ksp,W,U); */
137: /* KSP_MatMult(ksp,Amat,U,W); */
138: /* } */
140: } while (i<ksp->max_it);
141: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
142: return(0);
143: }
145: /*MC
146: KSPPIPECR - Pipelined conjugate residual method
148: There method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CR. The
149: non-blocking reduction is overlapped by the matrix-vector product, but not preconditioner application.
151: See also KSPPIPECG, where the reduction is only overlapped with the matrix-vector product.
153: Level:
154: beginner
156: Notes:
157: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
158: See the FAQ on the PETSc website for details.
160: Contributed by:
161: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
163: Reference:
164: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
165: Submitted to Parallel Computing, 2012.
167: .seealso: KSPCreate(), KSPSetType(), KSPPIPECG, KSPGROPPCG, KSPPGMRES, KSPCG, KSPCGUseSingleReduction()
168: M*/
172: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
173: {
177: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
178: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
180: ksp->ops->setup = KSPSetUp_PIPECR;
181: ksp->ops->solve = KSPSolve_PIPECR;
182: ksp->ops->destroy = KSPDestroyDefault;
183: ksp->ops->view = 0;
184: ksp->ops->setfromoptions = 0;
185: ksp->ops->buildsolution = KSPBuildSolutionDefault;
186: ksp->ops->buildresidual = KSPBuildResidualDefault;
187: return(0);
188: }