Actual source code: pipecg.c

petsc-3.4.4 2014-03-13
  2: #include <petsc-private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 12: PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
 13: {

 17:   /* get work vectors needed by PIPECG */
 18:   KSPSetWorkVecs(ksp,9);
 19:   return(0);
 20: }

 22: /*
 23:  KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method

 25:  Input Parameter:
 26:  .     ksp - the Krylov space object that was set to use conjugate gradient, by, for
 27:              example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
 28: */
 31: PetscErrorCode  KSPSolve_PIPECG(KSP ksp)
 32: {
 34:   PetscInt       i;
 35:   PetscScalar    alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0;
 36:   PetscReal      dp    = 0.0;
 37:   Vec            X,B,Z,P,W,Q,U,M,N,R,S;
 38:   Mat            Amat,Pmat;
 39:   MatStructure   pflag;
 40:   PetscBool      diagonalscale;

 43:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
 44:   if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

 46:   X = ksp->vec_sol;
 47:   B = ksp->vec_rhs;
 48:   M = ksp->work[0];
 49:   Z = ksp->work[1];
 50:   P = ksp->work[2];
 51:   N = ksp->work[3];
 52:   W = ksp->work[4];
 53:   Q = ksp->work[5];
 54:   U = ksp->work[6];
 55:   R = ksp->work[7];
 56:   S = ksp->work[8];

 58:   PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);

 60:   ksp->its = 0;
 61:   if (!ksp->guess_zero) {
 62:     KSP_MatMult(ksp,Amat,X,R);            /*     r <- b - Ax     */
 63:     VecAYPX(R,-1.0,B);
 64:   } else {
 65:     VecCopy(B,R);                         /*     r <- b (x is 0) */
 66:   }

 68:   KSP_PCApply(ksp,R,U);                   /*     u <- Br   */

 70:   switch (ksp->normtype) {
 71:   case KSP_NORM_PRECONDITIONED:
 72:     VecNormBegin(U,NORM_2,&dp);                /*     dp <- u'*u = e'*A'*B'*B*A'*e'     */
 73:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 74:     KSP_MatMult(ksp,Amat,U,W);              /*     w <- Au   */
 75:     VecNormEnd(U,NORM_2,&dp);
 76:     break;
 77:   case KSP_NORM_UNPRECONDITIONED:
 78:     VecNormBegin(R,NORM_2,&dp);                /*     dp <- r'*r = e'*A'*A*e            */
 79:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 80:     KSP_MatMult(ksp,Amat,U,W);              /*     w <- Au   */
 81:     VecNormEnd(R,NORM_2,&dp);
 82:     break;
 83:   case KSP_NORM_NATURAL:
 84:     VecDotBegin(R,U,&gamma);                  /*     gamma <- u'*r       */
 85:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 86:     KSP_MatMult(ksp,Amat,U,W);              /*     w <- Au   */
 87:     VecDotEnd(R,U,&gamma);
 88:     if (PetscIsInfOrNanScalar(gamma)) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_FP,"Infinite or not-a-number generated in dot product");
 89:     dp = PetscSqrtReal(PetscAbsScalar(gamma));                  /*     dp <- r'*u = r'*B*r = e'*A'*B*A*e */
 90:     break;
 91:   case KSP_NORM_NONE:
 92:     KSP_MatMult(ksp,Amat,U,W);
 93:     dp   = 0.0;
 94:     break;
 95:   default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
 96:   }
 97:   KSPLogResidualHistory(ksp,dp);
 98:   KSPMonitor(ksp,0,dp);
 99:   ksp->rnorm = dp;
100:   (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
101:   if (ksp->reason) return(0);

103:   i = 0;
104:   do {
105:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
106:       VecNormBegin(R,NORM_2,&dp);
107:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
108:       VecNormBegin(U,NORM_2,&dp);
109:     }
110:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
111:       VecDotBegin(R,U,&gamma);
112:     }
113:     VecDotBegin(W,U,&delta);
114:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));

116:     KSP_PCApply(ksp,W,M);           /*   m <- Bw       */
117:     KSP_MatMult(ksp,Amat,M,N);      /*   n <- Am       */

119:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
120:       VecNormEnd(R,NORM_2,&dp);
121:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
122:       VecNormEnd(U,NORM_2,&dp);
123:     }
124:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
125:       VecDotEnd(R,U,&gamma);
126:     }
127:     VecDotEnd(W,U,&delta);

129:     if (i > 0) {
130:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
131:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

133:       ksp->rnorm = dp;
134:       KSPLogResidualHistory(ksp,dp);
135:       KSPMonitor(ksp,i,dp);
136:       (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
137:       if (ksp->reason) break;
138:     }

140:     if (i == 0) {
141:       alpha = gamma / delta;
142:       VecCopy(N,Z);        /*     z <- n          */
143:       VecCopy(M,Q);        /*     q <- m          */
144:       VecCopy(U,P);        /*     p <- u          */
145:       VecCopy(W,S);        /*     s <- w          */
146:     } else {
147:       beta  = gamma / gammaold;
148:       alpha = gamma / (delta - beta / alpha * gamma);
149:       VecAYPX(Z,beta,N);   /*     z <- n + beta * z   */
150:       VecAYPX(Q,beta,M);   /*     q <- m + beta * q   */
151:       VecAYPX(P,beta,U);   /*     p <- u + beta * p   */
152:       VecAYPX(S,beta,W);   /*     s <- w + beta * s   */
153:     }
154:     VecAXPY(X, alpha,P); /*     x <- x + alpha * p   */
155:     VecAXPY(U,-alpha,Q); /*     u <- u - alpha * q   */
156:     VecAXPY(W,-alpha,Z); /*     w <- w - alpha * z   */
157:     VecAXPY(R,-alpha,S); /*     r <- r - alpha * s   */
158:     gammaold = gamma;
159:     i++;
160:     ksp->its = i;

162:     /* if (i%50 == 0) { */
163:     /*   KSP_MatMult(ksp,Amat,X,R);            /\*     w <- b - Ax     *\/ */
164:     /*   VecAYPX(R,-1.0,B); */
165:     /*   KSP_PCApply(ksp,R,U); */
166:     /*   KSP_MatMult(ksp,Amat,U,W); */
167:     /* } */

169:   } while (i<ksp->max_it);
170:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
171:   return(0);
172: }


175: /*MC
176:    KSPPIPECG - Pipelined conjugate gradient method.

178:    There method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG.  The
179:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

181:    See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.

183:    Level:
184:    beginner

186:    Notes:
187:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
188:    See the FAQ on the PETSc website for details.

190:    Contributed by:
191:    Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders

193:    Reference:
194:    P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
195:    Submitted to Parallel Computing, 2012.

197: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPGMRES, KSPCG, KSPCGUseSingleReduction()
198: M*/
201: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
202: {

206:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,1);
207:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
208:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,1);
209:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);

211:   ksp->ops->setup          = KSPSetUp_PIPECG;
212:   ksp->ops->solve          = KSPSolve_PIPECG;
213:   ksp->ops->destroy        = KSPDestroyDefault;
214:   ksp->ops->view           = 0;
215:   ksp->ops->setfromoptions = 0;
216:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
217:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
218:   return(0);
219: }