Actual source code: agmres.c

petsc-master 2019-10-12
Report Typos and Errors

  2: #define PETSCKSP_DLL

  4:  #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>

  6: #define AGMRES_DEFAULT_MAXK 30
  7: #define AGMRES_DELTA_DIRECTIONS 10
  8: static PetscErrorCode KSPAGMRESBuildSoln(KSP,PetscInt);
  9: static PetscErrorCode KSPAGMRESBuildBasis(KSP);
 10: static PetscErrorCode KSPAGMRESBuildHessenberg(KSP);

 12: PetscLogEvent KSP_AGMRESComputeDeflationData, KSP_AGMRESBuildBasis, KSP_AGMRESComputeShifts, KSP_AGMRESRoddec;

 14: extern PetscErrorCode KSPSetUp_DGMRES(KSP);
 15: extern PetscErrorCode KSPBuildSolution_DGMRES(KSP,Vec,Vec*);
 16: extern PetscErrorCode KSPSolve_DGMRES(KSP);
 17: extern PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP,PetscInt*);
 18: extern PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP,PetscInt*);
 19: extern PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP,Vec,Vec);
 20: extern PetscErrorCode KSPDestroy_DGMRES(KSP);
 21: extern PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *,KSP);
 22: extern PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP,PetscInt);
 23: /*
 24:  * This function allocates  data for the Newton basis GMRES implementation.
 25:  * Note that most data are allocated in KSPSetUp_DGMRES and KSPSetUp_GMRES, including the space for the basis vectors, the various Hessenberg matrices and the Givens rotations coefficients
 26:  *
 27:  */
 28: static PetscErrorCode    KSPSetUp_AGMRES(KSP ksp)
 29: {
 30:   PetscErrorCode  ierr;
 31:   PetscInt        hes;
 32:   PetscInt        nloc;
 33:   KSP_AGMRES      *agmres = (KSP_AGMRES*)ksp->data;
 34:   PetscInt        neig    = agmres->neig;
 35:   const PetscInt  max_k   = agmres->max_k;
 36:   PetscInt        N       = MAXKSPSIZE;
 37:   PetscInt        lwork   = PetscMax(8 * N + 16, 4 * neig * (N - neig));

 40:   if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"no symmetric preconditioning for KSPAGMRES");
 41:   N     = MAXKSPSIZE;
 42:   /* Preallocate space during the call to KSPSetup_GMRES for the Krylov basis */
 43:   agmres->q_preallocate = PETSC_TRUE; /* No allocation on the fly */
 44:   /* Preallocate space to compute later the eigenvalues in GMRES */
 45:   ksp->calc_sings = PETSC_TRUE;
 46:   agmres->max_k   = N; /* Set the augmented size to be allocated in KSPSetup_GMRES */
 47:   KSPSetUp_DGMRES(ksp);
 48:   agmres->max_k   = max_k;
 49:   hes             = (N + 1) * (N + 1);

 51:   /* Data for the Newton basis GMRES */
 52:   PetscCalloc4(max_k,&agmres->Rshift,max_k,&agmres->Ishift,hes,&agmres->Rloc,(N+1)*4,&agmres->wbufptr);
 53:   PetscMalloc3(N+1,&agmres->tau,lwork,&agmres->work,N+1,&agmres->nrs);
 54:   PetscCalloc4(N+1,&agmres->Scale,N+1,&agmres->sgn,N+1,&agmres->tloc,N+1,&agmres->temp);

 56:   /* Allocate space for the vectors in the orthogonalized basis*/
 57:   VecGetLocalSize(agmres->vecs[0], &nloc);
 58:   PetscMalloc1(nloc*(N+1), &agmres->Qloc);

 60:   /* Init the ring of processors for the roddec orthogonalization */
 61:   KSPAGMRESRoddecInitNeighboor(ksp);

 63:   if (agmres->neig < 1) return(0);

 65:   /* Allocate space for the deflation */
 66:   PetscMalloc1(N, &agmres->select);
 67:   VecDuplicateVecs(VEC_V(0), N, &agmres->TmpU);
 68:   PetscMalloc2(N*N, &agmres->MatEigL, N*N, &agmres->MatEigR);
 69:   /*  PetscMalloc6(N*N, &agmres->Q, N*N, &agmres->Z, N, &agmres->wr, N, &agmres->wi, N, &agmres->beta, N, &agmres->modul); */
 70:   PetscMalloc3(N*N, &agmres->Q, N*N, &agmres->Z, N, &agmres->beta);
 71:   PetscMalloc2((N+1),&agmres->perm,(2*neig*N),&agmres->iwork);
 72:   return(0);
 73: }

 75: /* This function returns the current solution from the private data structure of AGMRES back to ptr.
 76:  * This function is provided to be compliant with the KSP GMRES  scheme.
 77:  *
 78:  */
 79: static PetscErrorCode KSPBuildSolution_AGMRES(KSP ksp,Vec ptr, Vec *result)
 80: {
 81:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;

 85:   if (!ptr) {
 86:     if (!agmres->sol_temp) {
 87:       VecDuplicate(ksp->vec_sol,&agmres->sol_temp);
 88:       VecCopy(ksp->vec_sol,agmres->sol_temp);
 89:       PetscLogObjectParent((PetscObject)ksp,(PetscObject)agmres->sol_temp);
 90:     }
 91:     ptr = agmres->sol_temp;
 92:   } else {
 93:     VecCopy(ksp->vec_sol, ptr);
 94:   }
 95:   if (result) *result = ptr;
 96:   return(0);
 97: }

 99: /* This function computes the shifts  needed to generate stable basis vectors (through the Newton polynomials)
100:  * At input, the operators (matrix and preconditioners) are used to create a new GMRES KSP.
101:  * One cycle of GMRES with the Arnoldi process is performed and the eigenvalues of the induced Hessenberg matrix (the Ritz values) are computed.
102:  * NOTE: This function is not currently used; the next function is rather used when  the eigenvectors are needed next to augment the basis
103:  */
104: PetscErrorCode KSPComputeShifts_GMRES(KSP ksp)
105: {
106:   PetscErrorCode  ierr;
107:   KSP_AGMRES      *agmres = (KSP_AGMRES*)(ksp->data);
108:   KSP             kspgmres;
109:   Mat             Amat, Pmat;
110:   const PetscInt  max_k = agmres->max_k;
111:   PC              pc;
112:   PetscInt        m;
113:   PetscScalar     *Rshift, *Ishift;
114:   PetscBool       flg;

117:   /* Perform one cycle of classical GMRES (with the Arnoldi process) to get the Hessenberg matrix
118:    We assume here that the ksp is AGMRES and that the operators for the
119:    linear system have been set in this ksp */
120:   KSPCreate(PetscObjectComm((PetscObject)ksp), &kspgmres);
121:   if (!ksp->pc) { KSPGetPC(ksp,&ksp->pc); }
122:   PCGetOperators(ksp->pc, &Amat, &Pmat);
123:   KSPSetOperators(kspgmres, Amat, Pmat);
124:   KSPSetFromOptions(kspgmres);
125:   PetscOptionsHasName(NULL,((PetscObject)ksp)->prefix, "-ksp_view", &flg);
126:   if (flg) { PetscOptionsClearValue(NULL,"-ksp_view"); }
127:   KSPSetType(kspgmres, KSPGMRES);
128:   KSPGMRESSetRestart(kspgmres, max_k);
129:   KSPGetPC(ksp, &pc);
130:   KSPSetPC(kspgmres, pc);
131:   /* Copy common options */
132:   kspgmres->pc_side = ksp->pc_side;
133:   /* Setup KSP context */
134:   KSPSetComputeEigenvalues(kspgmres, PETSC_TRUE);
135:   KSPSetUp(kspgmres);

137:   kspgmres->max_it = max_k; /* Restrict the maximum number of iterations to one cycle of GMRES */
138:   kspgmres->rtol   = ksp->rtol;

140:   KSPSolve(kspgmres, ksp->vec_rhs, ksp->vec_sol);

142:   ksp->guess_zero = PETSC_FALSE;
143:   ksp->rnorm      = kspgmres->rnorm;
144:   ksp->its        = kspgmres->its;
145:   if (kspgmres->reason == KSP_CONVERGED_RTOL) {
146:     ksp->reason = KSP_CONVERGED_RTOL;
147:     return(0);
148:   } else ksp->reason = KSP_CONVERGED_ITERATING;
149:   /* Now, compute the Shifts values */
150:   PetscMalloc2(max_k,&Rshift,max_k,&Ishift);
151:   KSPComputeEigenvalues(kspgmres, max_k, Rshift, Ishift, &m);
152:   if (m < max_k) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Unable to compute the Shifts for the Newton basis");
153:   else {
154:     KSPAGMRESLejaOrdering(Rshift, Ishift, agmres->Rshift, agmres->Ishift, max_k);

156:     agmres->HasShifts = PETSC_TRUE;
157:   }
158:   /* Restore KSP view options */
159:   if (flg) { PetscOptionsSetValue(NULL,"-ksp_view", ""); }
160:   return(0);
161: }

163: /* This function computes the shift values (Ritz values) needed to generate stable basis vectors
164:  * One cycle of DGMRES is performed to find the eigenvalues. The same data structures are used since AGMRES extends DGMRES
165:  * Note that when the basis is  to be augmented, then this function computes the harmonic Ritz vectors from this first cycle.
166:  * Input :
167:  *  - The operators (matrix, preconditioners and right hand side) are  normally required.
168:  *  - max_k : the size of the (non augmented) basis.
169:  *  - neig: The number of eigenvectors to augment, if deflation is needed
170:  * Output :
171:  *  - The shifts as complex pair of arrays in wr and wi (size max_k).
172:  *  - The harmonic Ritz vectors (agmres->U) if deflation is needed.
173:  */
174: static PetscErrorCode KSPComputeShifts_DGMRES(KSP ksp)
175: {
177:   KSP_AGMRES     *agmres = (KSP_AGMRES*)(ksp->data);
178:   PetscInt       max_k   = agmres->max_k; /* size of the (non augmented) Krylov subspace */
179:   PetscInt       Neig    = 0;
180:   const PetscInt max_it  = ksp->max_it;
181:   PetscBool      flg;

183:   /* Perform one cycle of dgmres to find the eigenvalues and compute the first approximations of the eigenvectors */

186:   PetscLogEventBegin(KSP_AGMRESComputeShifts, ksp, 0,0,0);
187:   /* Send the size of the augmented basis to DGMRES */
188:   ksp->max_it             = max_k; /* set this to have DGMRES performing only one cycle */
189:   ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
190:   KSPSolve_DGMRES(ksp);
191:   ksp->guess_zero         = PETSC_FALSE;
192:   if (ksp->reason == KSP_CONVERGED_RTOL) {
193:     PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0,0,0);
194:     return(0);
195:   } else ksp->reason = KSP_CONVERGED_ITERATING;

197:   if ((agmres->r == 0) && (agmres->neig > 0)) {  /* Compute the eigenvalues for the shifts and the eigenvectors (to augment the Newton basis) */
198:     agmres->HasSchur = PETSC_FALSE;
199:     KSPDGMRESComputeDeflationData_DGMRES (ksp, &Neig);CHKERRQ (ierr);
200:     Neig             = max_k;
201:   } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */
202:      KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig);
203:   }

205:   /* It may happen that the Ritz values from one cycle of GMRES are not accurate enough to provide a good stability. In this case, another cycle of GMRES is performed.  The two sets of values thus generated are sorted and the most accurate are kept as shifts */
206:   PetscOptionsHasName(NULL,NULL, "-ksp_agmres_ImproveShifts", &flg);
207:   if (!flg) {
208:     KSPAGMRESLejaOrdering(agmres->wr, agmres->wi, agmres->Rshift, agmres->Ishift, max_k);
209:   } else { /* Perform another cycle of DGMRES to find another set of eigenvalues */
210:     PetscInt    i;
211:     PetscScalar *wr, *wi,*Rshift, *Ishift;
212:     PetscMalloc4(2*max_k, &wr, 2*max_k, &wi, 2*max_k, &Rshift, 2*max_k, &Ishift);
213:     for (i = 0; i < max_k; i++) {
214:       wr[i] = agmres->wr[i];
215:       wi[i] = agmres->wi[i];
216:     }

218:     KSPSolve_DGMRES(ksp);

220:     ksp->guess_zero = PETSC_FALSE;
221:     if (ksp->reason == KSP_CONVERGED_RTOL) return(0);
222:     else ksp->reason = KSP_CONVERGED_ITERATING;
223:     if (agmres->neig > 0) { /* Compute the eigenvalues for the shifts) and the eigenvectors (to augment the Newton basis */
224:       agmres->HasSchur = PETSC_FALSE;

226:       KSPDGMRESComputeDeflationData_DGMRES(ksp, &Neig);
227:       Neig = max_k;
228:     } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */
229:        KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig);
230:     }
231:     for (i = 0; i < max_k; i++) {
232:       wr[max_k+i] = agmres->wr[i];
233:       wi[max_k+i] = agmres->wi[i];
234:     }
235:     KSPAGMRESLejaOrdering(wr, wi, Rshift, Ishift, 2*max_k);
236:     for (i = 0; i< max_k; i++) {
237:       agmres->Rshift[i] = Rshift[i];
238:       agmres->Ishift[i] = Ishift[i];
239:     }
240:     PetscFree(Rshift);
241:     PetscFree(wr);
242:     PetscFree(Ishift);
243:     PetscFree(wi);
244:   }

246:   agmres->HasShifts = PETSC_TRUE;
247:   ksp->max_it       = max_it;
248:   PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0,0,0);
249:   return(0);
250: }

252: /*
253:  * Generate the basis vectors from the Newton polynomials with shifts and scaling factors
254:  * The scaling factors are computed to obtain unit vectors. Note that this step can be avoided with the preprocessing option KSP_AGMRES_NONORM.
255:  * Inputs :
256:  *  - Operators (Matrix and preconditioners and the first basis vector in VEC_V(0)
257:  *  - Shifts values in agmres->Rshift and agmres->Ishift.
258:  * Output :
259:  *  - agmres->vecs or VEC_V : basis vectors
260:  *  - agmres->Scale : Scaling factors (equal to 1 if no scaling is done)
261:  */
262: static PetscErrorCode KSPAGMRESBuildBasis(KSP ksp)
263: {
265:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
266:   PetscReal      *Rshift = agmres->Rshift;
267:   PetscReal      *Ishift = agmres->Ishift;
268:   PetscReal      *Scale  = agmres->Scale;
269:   const PetscInt max_k   = agmres->max_k;
270:   PetscInt       KspSize = KSPSIZE;  /* if max_k == KspSizen then the basis should not be augmented */
271:   PetscInt       j       = 1;

274:   PetscLogEventBegin(KSP_AGMRESBuildBasis, ksp, 0,0,0);
275:   Scale[0] = 1.0;
276:   while (j <= max_k) {
277:     if (Ishift[j-1] == 0) {
278:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
279:         /* Apply the precond-matrix operators */
280:         KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);
281:         /* Then apply deflation as a preconditioner */
282:         KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));
283:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
284:         KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);
285:         KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);
286:       } else {
287:         KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);
288:       }
289:       VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));
290: #if defined(KSP_AGMRES_NONORM)
291:       Scale[j] = 1.0;
292: #else
293:       VecScale(VEC_V(j), Scale[j-1]); /* This step can be postponed until all vectors are built */
294:       VecNorm(VEC_V(j), NORM_2, &(Scale[j]));
295:       Scale[j] = 1.0/Scale[j];
296: #endif

298:       agmres->matvecs += 1;
299:       j++;
300:     } else {
301:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
302:         /* Apply the precond-matrix operators */
303:         KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);
304:         /* Then apply deflation as a preconditioner */
305:         KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));
306:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
307:         KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);
308:         KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);
309:       } else {
310:         KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);
311:       }
312:       VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));
313: #if defined(KSP_AGMRES_NONORM)
314:       Scale[j] = 1.0;
315: #else
316:       VecScale(VEC_V(j), Scale[j-1]);
317:       VecNorm(VEC_V(j), NORM_2, &(Scale[j]));
318:       Scale[j] = 1.0/Scale[j];
319: #endif
320:       agmres->matvecs += 1;
321:       j++;
322:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
323:         /* Apply the precond-matrix operators */
324:         KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);
325:         /* Then apply deflation as a preconditioner */
326:         KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));
327:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
328:         KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);
329:         KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);
330:       } else {
331:         KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);
332:       }
333:       VecAXPY(VEC_V(j), -Rshift[j-2], VEC_V(j-1));
334:       VecAXPY(VEC_V(j), Scale[j-2]*Ishift[j-2]*Ishift[j-2], VEC_V(j-2));
335: #if defined(KSP_AGMRES_NONORM)
336:       Scale[j] = 1.0;
337: #else
338:       VecNorm(VEC_V(j), NORM_2, &(Scale[j]));
339:       Scale[j] = 1.0/Scale[j];
340: #endif
341:       agmres->matvecs += 1;
342:       j++;
343:     }
344:   }
345:   /* Augment the subspace with the eigenvectors*/
346:   while (j <= KspSize) {
347:     KSP_PCApplyBAorAB(ksp, agmres->U[j - max_k - 1], VEC_V(j), VEC_TMP_MATOP);
348: #if defined(KSP_AGMRES_NONORM)
349:     Scale[j] = 1.0;
350: #else
351:     VecScale(VEC_V(j), Scale[j-1]);
352:     VecNorm(VEC_V(j), NORM_2, &(Scale[j]));
353:     Scale[j] = 1.0/Scale[j];
354: #endif
355:     agmres->matvecs += 1;
356:     j++;
357:   }
358:   PetscLogEventEnd(KSP_AGMRESBuildBasis, ksp, 0,0,0);
359:   return(0);
360: }

362: /* Form the Hessenberg matrix for the Arnoldi-like relation.
363:  * Inputs :
364:  * - Shifts values in agmres->Rshift and agmres->Ishift
365:  * - RLoc : Triangular matrix from the RODDEC orthogonalization
366:  * Outputs :
367:  * - H = agmres->hh_origin : The Hessenberg matrix.
368:  *
369:  * NOTE: Note that the computed Hessenberg matrix is not mathematically equivalent to that in the real Arnoldi process (in KSP GMRES). If it is needed, it can be explicitly  formed as H <-- H * RLoc^-1.
370:  *
371:  */
372: static PetscErrorCode KSPAGMRESBuildHessenberg(KSP ksp)
373: {
374:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
375:   PetscScalar    *Rshift = agmres->Rshift;
376:   PetscScalar    *Ishift = agmres->Ishift;
377:   PetscScalar    *Scale  = agmres->Scale;
379:   PetscInt       i       = 0, j = 0;
380:   const PetscInt max_k   = agmres->max_k;
381:   PetscInt       KspSize = KSPSIZE;
382:   PetscInt       N       = MAXKSPSIZE+1;

385:   PetscArrayzero(agmres->hh_origin, (N+1)*N);
386:   while (j < max_k) {
387:     /* Real shifts */
388:     if (Ishift[j] == 0) {
389:       for (i = 0; i <= j; i++) {
390:         *H(i,j) = *RLOC(i,j+1)/Scale[j]  + (Rshift[j] * *RLOC(i,j));
391:       }
392:       *H(j+1,j) = *RLOC(j+1,j+1)/Scale[j];
393:       j++;
394:     } else if (Ishift[j] > 0) {
395:       for (i = 0; i <= j; i++) {
396:         *H(i,j) = *RLOC(i,j+1)/Scale[j] +  Rshift[j] * *RLOC(i, j);
397:       }
398:       *H(j+1,j) = *RLOC(j+1, j+1)/Scale[j];
399:       j++;
400:       for (i = 0; i <= j; i++) {
401:         *H(i,j) = (*RLOC(i,j+1) + Rshift[j-1] * *RLOC(i,j) - Scale[j-1] * Ishift[j-1]*Ishift[j-1]* *RLOC(i,j-1));
402:       }
403:       *H(j,j) = (*RLOC(j,j+1) + Rshift[j-1] * *RLOC(j,j));
404:       *H(j+1,j) = *RLOC(j+1,j+1);
405:       j++;
406:     } else SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "BAD ORDERING OF THE SHIFTS VALUES IN THE NEWTON BASIS");
407:   }
408:   for (j = max_k; j< KspSize; j++) { /* take into account the norm of the augmented vectors */
409:     for (i = 0; i <= j+1; i++) *H(i,j) = *RLOC(i, j+1)/Scale[j];
410:   }
411:   return(0);
412: }

414: /*
415:  * Form the new approximate solution from the least-square problem
416:  *
417:  */

419: static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp,PetscInt it)
420: {
421:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
423:   const PetscInt max_k = agmres->max_k;       /* Size of the non-augmented Krylov basis */
424:   PetscInt       i, j;
425:   PetscInt       r = agmres->r;           /* current number of augmented eigenvectors */
426:   PetscBLASInt   KspSize;
427:   PetscBLASInt   lC;
428:   PetscBLASInt   N;
429:   PetscBLASInt   ldH = it + 1;
430:   PetscBLASInt   lwork;
431:   PetscBLASInt   info, nrhs = 1;

434:   PetscBLASIntCast(KSPSIZE,&KspSize);
435:   PetscBLASIntCast(4 * (KspSize+1),&lwork);
436:   PetscBLASIntCast(KspSize+1,&lC);
437:   PetscBLASIntCast(MAXKSPSIZE + 1,&N);
438:   PetscBLASIntCast(N + 1,&ldH);
439:   /* Save a copy of the Hessenberg matrix */
440:   for (j = 0; j < N-1; j++) {
441:     for (i = 0; i < N; i++) {
442:       *HS(i,j) = *H(i,j);
443:     }
444:   }
445:   /* QR factorize the Hessenberg matrix */
446: #if defined(PETSC_MISSING_LAPACK_GEQRF)
447:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable.");
448: #else
449:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info));
450:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGEQRF INFO=%d", info);
451: #endif
452:   /* Update the right hand side of the least square problem */
453:   PetscArrayzero(agmres->nrs, N);

455:   agmres->nrs[0] = ksp->rnorm;
456: #if defined(PETSC_MISSING_LAPACK_ORMQR)
457:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable.");
458: #else
459:   PetscStackCallBLAS("LAPACKormqr",LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info));
460:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XORMQR INFO=%d",info);
461: #endif
462:   ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]);
463:   /* solve the least-square problem */
464: #if defined(PETSC_MISSING_LAPACK_TRTRS)
465:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRTRS - Lapack routine is unavailable.");
466: #else
467:   PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info));
468:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XTRTRS INFO=%d",info);
469: #endif
470:   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */
471:   VecZeroEntries(VEC_TMP);
472:   VecMAXPY(VEC_TMP, max_k, agmres->nrs, &VEC_V(0));
473:   if (!agmres->DeflPrecond) { VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U); }

475:   if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
476:     KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP);
477:     VecCopy(VEC_TMP_MATOP, VEC_TMP);
478:   }
479:   KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP);
480:   /* add the solution to the previous one */
481:   VecAXPY(ksp->vec_sol, 1.0, VEC_TMP);
482:   return(0);
483: }

485: /*
486:  * Run  one cycle of the Newton-basis gmres, possibly augmented with eigenvectors.
487:  *
488:  * Return residual history if requested.
489:  * Input :
490:  * - The vector VEC_V(0) is the initia residual
491:  * Output :
492:  *  - the solution vector is in agmres->vec_sol
493:  * - itcount : number of inner iterations
494:  *  - res : the new residual norm
495:  .
496:  NOTE: Unlike GMRES where the residual norm is available at each (inner) iteration,  here it is available at the end of the cycle.
497:  */
498: static PetscErrorCode KSPAGMRESCycle(PetscInt *itcount,KSP ksp)
499: {
500:   KSP_AGMRES     *agmres = (KSP_AGMRES*)(ksp->data);
501:   PetscReal      res;
503:   PetscInt       KspSize = KSPSIZE;

506:   /* check for the convergence */
507:   res = ksp->rnorm; /* Norm of the initial residual vector */
508:   if (!res) {
509:     if (itcount) *itcount = 0;
510:     ksp->reason = KSP_CONVERGED_ATOL;
511:     PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
512:     return(0);
513:   }
514:   (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
515:   /* Build the Krylov basis with Newton polynomials */
516:   KSPAGMRESBuildBasis(ksp);
517:   /* QR Factorize the basis with RODDEC */
518:   KSPAGMRESRoddec(ksp, KspSize+1);

520:   /* Recover a (partial) Hessenberg matrix for the Arnoldi-like relation */
521:   KSPAGMRESBuildHessenberg(ksp);
522:   /* Solve the least square problem and unwind the preconditioner */
523:   KSPAGMRESBuildSoln(ksp, KspSize);

525:   res        = ksp->rnorm;
526:   ksp->its  += KspSize;
527:   agmres->it = KspSize-1;
528:   /*  Test for the convergence */
529:   (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
530:   KSPLogResidualHistory(ksp,res);
531:   KSPMonitor(ksp,ksp->its,res);


534:   *itcount = KspSize;
535:   return(0);
536: }

538: static PetscErrorCode KSPSolve_AGMRES(KSP ksp)
539: {
541:   PetscInt       its;
542:   KSP_AGMRES     *agmres    = (KSP_AGMRES*)ksp->data;
543:   PetscBool      guess_zero = ksp->guess_zero;
544:   PetscReal      res_old, res;
545:   PetscInt       test;

548:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
549:   ksp->its = 0;
550:   PetscObjectSAWsGrantAccess((PetscObject)ksp);

552:   ksp->reason = KSP_CONVERGED_ITERATING;
553:   if (!agmres->HasShifts) { /* Compute Shifts for the Newton basis */
554:     KSPComputeShifts_DGMRES(ksp);
555:   }
556:   /* NOTE: At this step, the initial guess is not equal to zero since one cycle of the classical GMRES is performed to compute the shifts */
557:   (*ksp->converged)(ksp,0,ksp->rnorm,&ksp->reason,ksp->cnvP);
558:   while (!ksp->reason) {
559:     KSPInitialResidual(ksp,ksp->vec_sol,VEC_TMP,VEC_TMP_MATOP,VEC_V(0),ksp->vec_rhs);
560:     if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
561:       KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(0), VEC_TMP);
562:       VecCopy(VEC_TMP, VEC_V(0));

564:       agmres->matvecs += 1;
565:     }
566:     VecNormalize(VEC_V(0),&(ksp->rnorm));
567:     KSPCheckNorm(ksp,ksp->rnorm);
568:     res_old = ksp->rnorm; /* Record the residual norm to test if deflation is needed */

570:     ksp->ops->buildsolution = KSPBuildSolution_AGMRES;

572:     KSPAGMRESCycle(&its,ksp);
573:     if (ksp->its >= ksp->max_it) {
574:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
575:       break;
576:     }
577:     /* compute the eigenvectors to augment the subspace : use an adaptive strategy */
578:     res = ksp->rnorm;
579:     if (!ksp->reason && agmres->neig > 0) {
580:       test = agmres->max_k * PetscLogReal(ksp->rtol/res) / PetscLogReal(res/res_old); /* estimate the remaining number of steps */
581:       if ((test > agmres->smv*(ksp->max_it-ksp->its)) || agmres->force) {
582:         if (!agmres->force && ((test > agmres->bgv*(ksp->max_it-ksp->its)) && ((agmres->r + 1) < agmres->max_neig))) {
583:           agmres->neig += 1; /* Augment the number of eigenvalues to deflate if the convergence is too slow */
584:         }
585:         KSPDGMRESComputeDeflationData_DGMRES(ksp,&agmres->neig);
586:       }
587:     }
588:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
589:   }
590:   ksp->guess_zero = guess_zero; /* restore if user has provided nonzero initial guess */
591:   return(0);
592: }

594: static PetscErrorCode KSPDestroy_AGMRES(KSP ksp)
595: {
597:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;

600:   PetscFree(agmres->hh_origin);

602:   PetscFree(agmres->Qloc);
603:   PetscFree4(agmres->Rshift,agmres->Ishift,agmres->Rloc,agmres->wbufptr);
604:   PetscFree3(agmres->tau,agmres->work,agmres->nrs);
605:   PetscFree4(agmres->Scale,agmres->sgn,agmres->tloc,agmres->temp);

607:   PetscFree(agmres->select);
608:   PetscFree(agmres->wr);
609:   PetscFree(agmres->wi);
610:   if (agmres->neig) {
611:     VecDestroyVecs(MAXKSPSIZE,&agmres->TmpU);
612:     PetscFree(agmres->perm);
613:     PetscFree(agmres->MatEigL);
614:     PetscFree(agmres->MatEigR);
615:     PetscFree(agmres->Q);
616:     PetscFree(agmres->Z);
617:     PetscFree(agmres->beta);
618:   }
619:   KSPDestroy_DGMRES(ksp);
620:   return(0);
621: }


624: static PetscErrorCode KSPView_AGMRES(KSP ksp,PetscViewer viewer)
625: {
626:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
627:   const char     *cstr   = "RODDEC ORTHOGONOLIZATION";
628:   char           ritzvec[25];
630:   PetscBool      iascii,isstring;
631: #if defined(KSP_AGMRES_NONORM)
632:   const char *Nstr = "SCALING FACTORS : NO";
633: #else
634:   const char *Nstr = "SCALING FACTORS : YES";
635: #endif

638:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
639:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);

641:   if (iascii) {
642:     PetscViewerASCIIPrintf(viewer, " restart=%d using %s\n", agmres->max_k, cstr);
643:     PetscViewerASCIIPrintf(viewer, " %s\n", Nstr);
644:     PetscViewerASCIIPrintf(viewer, " Number of matvecs : %D\n", agmres->matvecs);
645:     if (agmres->force) {PetscViewerASCIIPrintf (viewer, " Adaptive strategy is used: FALSE\n");}
646:     else PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: TRUE\n");
647:     if (agmres->DeflPrecond) {
648:       ierr=PetscViewerASCIIPrintf(viewer, " STRATEGY OF DEFLATION: PRECONDITIONER \n");
649:       ierr=PetscViewerASCIIPrintf(viewer, "  Frequency of extracted eigenvalues = %D\n", agmres->neig);
650:       ierr=PetscViewerASCIIPrintf(viewer, "  Total number of extracted eigenvalues = %D\n", agmres->r);
651:       ierr=PetscViewerASCIIPrintf(viewer, "  Maximum number of eigenvalues set to be extracted = %D\n", agmres->max_neig);
652:     } else {
653:       if (agmres->ritz) sprintf(ritzvec, "Ritz vectors");
654:       else sprintf(ritzvec, "Harmonic Ritz vectors");
655:       PetscViewerASCIIPrintf(viewer, " STRATEGY OF DEFLATION: AUGMENT\n");
656:       PetscViewerASCIIPrintf(viewer," augmented vectors  %d at frequency %d with %s\n", agmres->r, agmres->neig, ritzvec);
657:     }
658:     ierr=PetscViewerASCIIPrintf(viewer, " Minimum relaxation parameter for the adaptive strategy(smv)  = %g\n", agmres->smv);
659:     ierr=PetscViewerASCIIPrintf(viewer, " Maximum relaxation parameter for the adaptive strategy(bgv)  = %g\n", agmres->bgv);
660:   } else if (isstring) {
661:     PetscViewerStringSPrintf(viewer,"%s restart %D",cstr,agmres->max_k);
662:   }
663:   return(0);
664: }

666: static PetscErrorCode KSPSetFromOptions_AGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
667: {
669:   PetscInt       neig;
670:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
671:   PetscBool      flg;

674:   KSPSetFromOptions_DGMRES(PetscOptionsObject,ksp);  /* Set common options from DGMRES and GMRES */
675:   PetscOptionsHead(PetscOptionsObject,"KSP AGMRES Options");
676:   PetscOptionsInt("-ksp_agmres_eigen", "Number of eigenvalues to deflate", "KSPDGMRESSetEigen", agmres->neig, &neig, &flg);
677:   if (flg) {
678:     KSPDGMRESSetEigen_DGMRES(ksp, neig);
679:     agmres->r = 0;
680:   } else agmres->neig = 0;
681:   PetscOptionsInt("-ksp_agmres_maxeigen", "Maximum number of eigenvalues to deflate", "KSPDGMRESSetMaxEigen", agmres->max_neig, &neig, &flg);
682:   if (flg) agmres->max_neig = neig+EIG_OFFSET;
683:   else agmres->max_neig = agmres->neig+EIG_OFFSET;
684:   PetscOptionsBool("-ksp_agmres_DeflPrecond", "Determine if the deflation should be applied as a preconditioner -- similar to KSP DGMRES", "KSPGMRESDeflPrecond",agmres->DeflPrecond,&agmres->DeflPrecond,NULL);
685:   PetscOptionsBool("-ksp_agmres_ritz", "Compute the Ritz vectors instead of the Harmonic Ritz vectors ", "KSPGMRESHarmonic",agmres->ritz,&agmres->ritz ,&flg);
686:   PetscOptionsReal("-ksp_agmres_MinRatio", "Relaxation parameter in the adaptive strategy; smallest multiple of the remaining number of steps allowed", "KSPGMRESSetMinRatio", agmres->smv, &agmres->smv, NULL);
687:   PetscOptionsReal("-ksp_agmres_MaxRatio", "Relaxation parameter in the adaptive strategy; Largest multiple of the remaining number of steps allowed", "KSPGMRESSetMaxRatio",agmres->bgv,&agmres->bgv, &flg);
688:   PetscOptionsTail();
689:   return(0);
690: }


693: /*MC
694:  KSPAGMRES - Newton basis GMRES implementation with adaptive augmented eigenvectors

696: The techniques used are best described in [1]. The contribution of this work is that it combines many of the previous work to reduce the amount of MPI messages and improve the robustness of the global approach by using deflation techniques. It has been successfully applied to a class of real and industrial problems. Please see [1] for numerical experiments and [2] for a description of these problems.
697: There are  many ongoing work that aim at avoiding (or minimizing) the communication in Krylov subspace methods. This code can be used as an experimental framework to combine several techniques in the particular case of GMRES. For instance, the computation of the shifts can be improved with techniques described in [3]. The orthogonalization technique can be replaced by TSQR [4]. The generation of the basis can be done using s-steps approaches[5].


700:  Options Database Keys:
701:  +   -ksp_gmres_restart <restart> -  the number of Krylov directions
702:  .   -ksp_gmres_krylov_monitor - plot the Krylov space generated
703:  .   -ksp_agmres_eigen <neig> - Number of eigenvalues to deflate (Number of vectors to augment)
704:  .   -ksp_agmres_maxeigen <max_neig> - Maximum number of eigenvalues to deflate
705:  .   -ksp_agmres_MinRatio <1> - Relaxation parameter in the adaptive strategy; smallest multiple of the remaining number of steps allowed
706:  .   -ksp_agmres_MaxRatio <1> - Relaxation parameter in the adaptive strategy; Largest multiple of the remaining number of steps allowed
707:  .   --ksp_agmres_DeflPrecond  Apply deflation as a preconditioner, this is similar to DGMRES but it rather builds a Newton basis.  This is an experimental option.
708:  .   -ksp_dgmres_force <0, 1> - Force the deflation at each restart.
709:  .   - There are many experimental parameters. Run with -help option to see the whole list

711:  Level: beginner

713:  Notes:
714:     Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported

716:  Developer Notes:
717:     This object is subclassed off of KSPDGMRES

719:  Contributed by Desire NUENTSA WAKAM, INRIA <desire.nuentsa_wakam@inria.fr>
720:  Inputs from Guy Atenekeng <atenekeng@yahoo.com> and R.B. Sidje <roger.b.sidje@ua.edu>

722:  References :
723:  +   [1] D. Nuentsa Wakam and J. Erhel, Parallelism and robustness in GMRES with the Newton basis and the deflated restarting. Research report INRIA RR-7787, November 2011,https://hal.inria.fr/inria-00638247/en,  in revision for ETNA.
724:  .  [2] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
725:  .  [3] B. Philippe and L. Reichel, On the generation of Krylov subspace bases, Applied Numerical
726: Mathematics, 62(9), pp. 1171-1186, 2012
727:  .  [4] J. Demmel, L. Grigori, M. F. Hoemmen, and J. Langou, Communication-optimal parallel and sequential QR and LU factorizations, SIAM journal on Scientific Computing, 34(1), A206-A239, 2012
728:  .  [5] M. Mohiyuddin, M. Hoemmen, J. Demmel, and K. Yelick, Minimizing communication in sparse matrix solvers, in SC '09: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis, New York, NY, USA, 2009, ACM, pp. 1154-1171.
729:  .    Sidje, Roger B. Alternatives for parallel Krylov subspace basis computation. Numer. Linear Algebra Appl. 4 (1997), no. 4, 305-331

731:  .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPDGMRES, KSPPGMRES,
732:  KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
733:  KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
734:  KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
735:  M*/

737: PETSC_EXTERN PetscErrorCode KSPCreate_AGMRES(KSP ksp)
738: {
739:   KSP_AGMRES     *agmres;

743:   PetscNewLog(ksp,&agmres);
744:   ksp->data = (void*)agmres;

746:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
747:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
748:   ksp->ops->buildsolution                = KSPBuildSolution_AGMRES;
749:   ksp->ops->setup                        = KSPSetUp_AGMRES;
750:   ksp->ops->solve                        = KSPSolve_AGMRES;
751:   ksp->ops->destroy                      = KSPDestroy_AGMRES;
752:   ksp->ops->view                         = KSPView_AGMRES;
753:   ksp->ops->setfromoptions               = KSPSetFromOptions_AGMRES;
754:   ksp->guess_zero                        = PETSC_TRUE;
755:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
756:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;


759:   PetscObjectComposeFunction((PetscObject) ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
760:   PetscObjectComposeFunction((PetscObject) ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
761:   PetscObjectComposeFunction((PetscObject) ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
762:   PetscObjectComposeFunction((PetscObject) ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
763:   PetscObjectComposeFunction((PetscObject) ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
764:   /* -- New functions defined in DGMRES -- */
765:   ierr=PetscObjectComposeFunction((PetscObject) ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
766:   ierr=PetscObjectComposeFunction((PetscObject) ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
767:   ierr=PetscObjectComposeFunction((PetscObject) ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
768:   ierr=PetscObjectComposeFunction((PetscObject) ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);

770:   PetscLogEventRegister("AGMRESCompDefl",   KSP_CLASSID, &KSP_AGMRESComputeDeflationData);
771:   PetscLogEventRegister("AGMRESBuildBasis", KSP_CLASSID, &KSP_AGMRESBuildBasis);
772:   PetscLogEventRegister("AGMRESCompShifts", KSP_CLASSID, &KSP_AGMRESComputeShifts);
773:   PetscLogEventRegister("AGMRESOrthog",     KSP_CLASSID, &KSP_AGMRESRoddec);

775:   agmres->haptol         = 1.0e-30;
776:   agmres->q_preallocate  = 0;
777:   agmres->delta_allocate = AGMRES_DELTA_DIRECTIONS;
778:   agmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
779:   agmres->nrs            = 0;
780:   agmres->sol_temp       = 0;
781:   agmres->max_k          = AGMRES_DEFAULT_MAXK;
782:   agmres->Rsvd           = 0;
783:   agmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
784:   agmres->orthogwork     = 0;

786:   /* Default values for the deflation */
787:   agmres->r           = 0;
788:   agmres->neig        = 0;
789:   agmres->max_neig    = 0;
790:   agmres->lambdaN     = 0.0;
791:   agmres->smv         = SMV;
792:   agmres->bgv         = 1;
793:   agmres->force       = PETSC_FALSE;
794:   agmres->matvecs     = 0;
795:   agmres->improve     = PETSC_FALSE;
796:   agmres->HasShifts   = PETSC_FALSE;
797:   agmres->r           = 0;
798:   agmres->HasSchur    = PETSC_FALSE;
799:   agmres->DeflPrecond = PETSC_FALSE;
800:   PetscObjectGetNewTag((PetscObject)ksp,&agmres->tag);
801:   return(0);
802: }

804: /*  LocalWords:  iascii
805:  */