Actual source code: dgmres.c

petsc-master 2020-09-18
Report Typos and Errors
  1: /*
  2:  This file implements the deflated GMRES.

  4:  */

  6: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>

  8: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;

 10: #define GMRES_DELTA_DIRECTIONS 10
 11: #define GMRES_DEFAULT_MAXK     30
 12: static PetscErrorCode    KSPDGMRESGetNewVectors(KSP,PetscInt);
 13: static PetscErrorCode    KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
 14: static PetscErrorCode    KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);

 16: PetscErrorCode  KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
 17: {

 21:   PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
 22:   return(0);
 23: }
 24: PetscErrorCode  KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
 25: {

 29:   PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
 30:   return(0);
 31: }
 32: PetscErrorCode  KSPDGMRESForce(KSP ksp,PetscBool force)
 33: {

 37:   PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
 38:   return(0);
 39: }
 40: PetscErrorCode  KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
 41: {

 45:   PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
 46:   return(0);
 47: }
 48: PetscErrorCode  KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
 49: {

 53:   PetscUseMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
 54:   return(0);
 55: }
 56: PetscErrorCode  KSPDGMRESComputeDeflationData(KSP ksp,PetscInt *curneigh)
 57: {

 61:   PetscUseMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP,PetscInt*),(ksp,curneigh));
 62:   return(0);
 63: }
 64: PetscErrorCode  KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
 65: {

 69:   PetscUseMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
 70:   return(0);
 71: }

 73: PetscErrorCode  KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
 74: {

 78:   PetscUseMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
 79:   return(0);
 80: }

 82: PetscErrorCode  KSPSetUp_DGMRES(KSP ksp)
 83: {
 85:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
 86:   PetscInt       neig    = dgmres->neig+EIG_OFFSET;
 87:   PetscInt       max_k   = dgmres->max_k+1;

 90:   KSPSetUp_GMRES(ksp);
 91:   if (!dgmres->neig) return(0);

 93:   /* Allocate workspace for the Schur vectors*/
 94:   PetscMalloc1(neig*max_k, &SR);
 95:   dgmres->wr    = NULL;
 96:   dgmres->wi    = NULL;
 97:   dgmres->perm  = NULL;
 98:   dgmres->modul = NULL;
 99:   dgmres->Q     = NULL;
100:   dgmres->Z     = NULL;

102:   UU   = NULL;
103:   XX   = NULL;
104:   MX   = NULL;
105:   AUU  = NULL;
106:   XMX  = NULL;
107:   XMU  = NULL;
108:   UMX  = NULL;
109:   AUAU = NULL;
110:   TT   = NULL;
111:   TTF  = NULL;
112:   INVP = NULL;
113:   X1   = NULL;
114:   X2   = NULL;
115:   MU   = NULL;
116:   return(0);
117: }

119: /*
120:  Run GMRES, possibly with restart.  Return residual history if requested.
121:  input parameters:

123:  .       gmres  - structure containing parameters and work areas

125:  output parameters:
126:  .        nres    - residuals (from preconditioned system) at each step.
127:  If restarting, consider passing nres+it.  If null,
128:  ignored
129:  .        itcount - number of iterations used.  nres[0] to nres[itcount]
130:  are defined.  If null, ignored.

132:  Notes:
133:  On entry, the value in vector VEC_VV(0) should be the initial residual
134:  (this allows shortcuts where the initial preconditioned residual is 0).
135:  */
136: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
137: {
138:   KSP_DGMRES     *dgmres = (KSP_DGMRES*)(ksp->data);
139:   PetscReal      res_norm,res,hapbnd,tt;
141:   PetscInt       it     = 0;
142:   PetscInt       max_k  = dgmres->max_k;
143:   PetscBool      hapend = PETSC_FALSE;
144:   PetscReal      res_old;
145:   PetscInt       test = 0;

148:   VecNormalize(VEC_VV(0),&res_norm);
149:   KSPCheckNorm(ksp,res_norm);
150:   res     = res_norm;
151:   *GRS(0) = res_norm;

153:   /* check for the convergence */
154:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
155:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
156:   else ksp->rnorm = 0.0;
157:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
158:   dgmres->it = (it - 1);
159:   KSPLogResidualHistory(ksp,ksp->rnorm);
160:   KSPMonitor(ksp,ksp->its,ksp->rnorm);
161:   if (!res) {
162:     if (itcount) *itcount = 0;
163:     ksp->reason = KSP_CONVERGED_ATOL;
164:     PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
165:     return(0);
166:   }
167:   /* record the residual norm to test if deflation is needed */
168:   res_old = res;

170:   (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
171:   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
172:     if (it) {
173:       KSPLogResidualHistory(ksp,ksp->rnorm);
174:       KSPMonitor(ksp,ksp->its,ksp->rnorm);
175:     }
176:     dgmres->it = (it - 1);
177:     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
178:       KSPDGMRESGetNewVectors(ksp,it+1);
179:     }
180:     if (dgmres->r > 0) {
181:       if (ksp->pc_side == PC_LEFT) {
182:         /* Apply the first preconditioner */
183:         KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
184:         /* Then apply Deflation as a preconditioner */
185:         KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
186:       } else if (ksp->pc_side == PC_RIGHT) {
187:         KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
188:         KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
189:       }
190:     } else {
191:       KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
192:     }
193:     dgmres->matvecs += 1;
194:     /* update hessenberg matrix and do Gram-Schmidt */
195:     (*dgmres->orthog)(ksp,it);

197:     /* vv(i+1) . vv(i+1) */
198:     VecNormalize(VEC_VV(it+1),&tt);
199:     /* save the magnitude */
200:     *HH(it+1,it)  = tt;
201:     *HES(it+1,it) = tt;

203:     /* check for the happy breakdown */
204:     hapbnd = PetscAbsScalar(tt / *GRS(it));
205:     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
206:     if (tt < hapbnd) {
207:       PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
208:       hapend = PETSC_TRUE;
209:     }
210:     KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);

212:     it++;
213:     dgmres->it = (it-1);     /* For converged */
214:     ksp->its++;
215:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
216:     else ksp->rnorm = 0.0;
217:     if (ksp->reason) break;

219:     (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);

221:     /* Catch error in happy breakdown and signal convergence and break from loop */
222:     if (hapend) {
223:       if (!ksp->reason) {
224:         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res);
225:         else {
226:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
227:           break;
228:         }
229:       }
230:     }
231:   }

233:   /* Monitor if we know that we will not return for a restart */
234:   if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
235:     KSPLogResidualHistory(ksp,ksp->rnorm);
236:     KSPMonitor(ksp,ksp->its,ksp->rnorm);
237:   }
238:   if (itcount) *itcount = it;

240:   /*
241:    Down here we have to solve for the "best" coefficients of the Krylov
242:    columns, add the solution values together, and possibly unwind the
243:    preconditioning from the solution
244:    */
245:   /* Form the solution (or the solution so far) */
246:   KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);

248:   /* Compute data for the deflation to be used during the next restart */
249:   if (!ksp->reason && ksp->its < ksp->max_it) {
250:     test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
251:     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
252:     if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
253:        KSPDGMRESComputeDeflationData(ksp,NULL);
254:     }
255:   }
256:   return(0);
257: }

259: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
260: {
262:   PetscInt       i,its,itcount;
263:   KSP_DGMRES     *dgmres    = (KSP_DGMRES*) ksp->data;
264:   PetscBool      guess_zero = ksp->guess_zero;

267:   if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");

269:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
270:   ksp->its        = 0;
271:   dgmres->matvecs = 0;
272:   PetscObjectSAWsGrantAccess((PetscObject)ksp);

274:   itcount     = 0;
275:   ksp->reason = KSP_CONVERGED_ITERATING;
276:   while (!ksp->reason) {
277:     KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
278:     if (ksp->pc_side == PC_LEFT) {
279:       dgmres->matvecs += 1;
280:       if (dgmres->r > 0) {
281:         KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
282:         VecCopy(VEC_TEMP, VEC_VV(0));
283:       }
284:     }

286:     KSPDGMRESCycle(&its,ksp);
287:     itcount += its;
288:     if (itcount >= ksp->max_it) {
289:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
290:       break;
291:     }
292:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
293:   }
294:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */

296:   for (i = 0; i < dgmres->r; i++) {
297:     VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
298:   }
299:   return(0);
300: }

302: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
303: {
305:   KSP_DGMRES     *dgmres  = (KSP_DGMRES*) ksp->data;
306:   PetscInt       neig1    = dgmres->neig+EIG_OFFSET;
307:   PetscInt       max_neig = dgmres->max_neig;

310:   if (dgmres->r) {
311:     VecDestroyVecs(max_neig, &UU);
312:     VecDestroyVecs(max_neig, &MU);
313:     if (XX) {
314:       VecDestroyVecs(neig1, &XX);
315:       VecDestroyVecs(neig1, &MX);
316:     }

318:     PetscFree(TT);
319:     PetscFree(TTF);
320:     PetscFree(INVP);

322:     PetscFree(XMX);
323:     PetscFree(UMX);
324:     PetscFree(XMU);
325:     PetscFree(X1);
326:     PetscFree(X2);
327:     PetscFree(dgmres->work);
328:     PetscFree(dgmres->iwork);
329:     PetscFree(dgmres->wr);
330:     PetscFree(dgmres->wi);
331:     PetscFree(dgmres->modul);
332:     PetscFree(dgmres->Q);
333:     PetscFree(ORTH);
334:     PetscFree(AUAU);
335:     PetscFree(AUU);
336:     PetscFree(SR2);
337:   }
338:   PetscFree(SR);
339:   KSPDestroy_GMRES(ksp);
340:   return(0);
341: }
342: /*
343:  KSPDGMRESBuildSoln - create the solution from the starting vector and the
344:  current iterates.

346:  Input parameters:
347:  nrs - work area of size it + 1.
348:  vs  - index of initial guess
349:  vdest - index of result.  Note that vs may == vdest (replace
350:  guess with the solution).

352:  This is an internal routine that knows about the GMRES internals.
353:  */
354: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
355: {
356:   PetscScalar    tt;
358:   PetscInt       ii,k,j;
359:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) (ksp->data);

361:   /* Solve for solution vector that minimizes the residual */

364:   /* If it is < 0, no gmres steps have been performed */
365:   if (it < 0) {
366:     VecCopy(vs,vdest);     /* VecCopy() is smart, exists immediately if vguess == vdest */
367:     return(0);
368:   }
369:   if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it)));
370:   if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
371:   else nrs[it] = 0.0;

373:   for (ii=1; ii<=it; ii++) {
374:     k  = it - ii;
375:     tt = *GRS(k);
376:     for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
377:     if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
378:     nrs[k] = tt / *HH(k,k);
379:   }

381:   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
382:   VecSet(VEC_TEMP,0.0);
383:   VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));

385:   /* Apply deflation */
386:   if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
387:     KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
388:     VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
389:   }
390:   KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);

392:   /* add solution to previous solution */
393:   if (vdest != vs) {
394:     VecCopy(vs,vdest);
395:   }
396:   VecAXPY(vdest,1.0,VEC_TEMP);
397:   return(0);
398: }
399: /*
400:  Do the scalar work for the orthogonalization.  Return new residual norm.
401:  */
402: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
403: {
404:   PetscScalar *hh,*cc,*ss,tt;
405:   PetscInt    j;
406:   KSP_DGMRES  *dgmres = (KSP_DGMRES*) (ksp->data);

409:   hh = HH(0,it);
410:   cc = CC(0);
411:   ss = SS(0);

413:   /* Apply all the previously computed plane rotations to the new column
414:    of the Hessenberg matrix */
415:   for (j=1; j<=it; j++) {
416:     tt  = *hh;
417:     *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
418:     hh++;
419:     *hh = *cc++ * *hh -(*ss++ * tt);
420:   }

422:   /*
423:    compute the new plane rotation, and apply it to:
424:    1) the right-hand-side of the Hessenberg system
425:    2) the new column of the Hessenberg matrix
426:    thus obtaining the updated value of the residual
427:    */
428:   if (!hapend) {
429:     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
430:     if (tt == 0.0) {
431:       ksp->reason = KSP_DIVERGED_NULL;
432:       return(0);
433:     }
434:     *cc        = *hh / tt;
435:     *ss        = *(hh+1) / tt;
436:     *GRS(it+1) = -(*ss * *GRS(it));
437:     *GRS(it)   = PetscConj(*cc) * *GRS(it);
438:     *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);
439:     *res       = PetscAbsScalar(*GRS(it+1));
440:   } else {
441:     /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
442:      another rotation matrix (so RH doesn't change).  The new residual is
443:      always the new sine term times the residual from last time (GRS(it)),
444:      but now the new sine rotation would be zero...so the residual should
445:      be zero...so we will multiply "zero" by the last residual.  This might
446:      not be exactly what we want to do here -could just return "zero". */

448:     *res = 0.0;
449:   }
450:   return(0);
451: }
452: /*
453:  This routine allocates more work vectors, starting from VEC_VV(it).
454:  */
455: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
456: {
457:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
459:   PetscInt       nwork = dgmres->nwork_alloc,k,nalloc;

462:   nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
463:   /* Adjust the number to allocate to make sure that we don't exceed the
464:    number of available slots */
465:   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
466:     nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
467:   }
468:   if (!nalloc) return(0);

470:   dgmres->vv_allocated += nalloc;

472:   KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
473:   PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);

475:   dgmres->mwork_alloc[nwork] = nalloc;
476:   for (k=0; k<nalloc; k++) {
477:     dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
478:   }
479:   dgmres->nwork_alloc++;
480:   return(0);
481: }

483: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
484: {
485:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;

489:   if (!ptr) {
490:     if (!dgmres->sol_temp) {
491:       VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
492:       PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
493:     }
494:     ptr = dgmres->sol_temp;
495:   }
496:   if (!dgmres->nrs) {
497:     /* allocate the work area */
498:     PetscMalloc1(dgmres->max_k,&dgmres->nrs);
499:     PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
500:   }

502:   KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
503:   if (result) *result = ptr;
504:   return(0);
505: }

507: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
508: {
509:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
511:   PetscBool      iascii,isharmonic;

514:   KSPView_GMRES(ksp,viewer);
515:   PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
516:   if (iascii) {
517:     if (dgmres->force) PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: FALSE\n");
518:     else PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: TRUE\n");
519:     PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
520:     if (isharmonic) {
521:       PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
522:     } else {
523:       PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
524:     }
525:     PetscViewerASCIIPrintf(viewer, "   Total number of extracted eigenvalues = %D\n", dgmres->r);
526:     PetscViewerASCIIPrintf(viewer, "   Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
527:     PetscViewerASCIIPrintf(viewer, "   relaxation parameter for the adaptive strategy(smv)  = %g\n", dgmres->smv);
528:     PetscViewerASCIIPrintf(viewer, "   Number of matvecs : %D\n", dgmres->matvecs);
529:   }
530:   return(0);
531: }

533: /* New DGMRES functions */

535: PetscErrorCode  KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
536: {
537:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

540:   if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
541:   dgmres->neig=neig;
542:   return(0);
543: }

545: static PetscErrorCode  KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
546: {
547:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

550:   if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
551:   dgmres->max_neig=max_neig;
552:   return(0);
553: }

555: static PetscErrorCode  KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
556: {
557:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

560:   if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
561:   dgmres->smv=ratio;
562:   return(0);
563: }

565: static PetscErrorCode  KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
566: {
567:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

570:   dgmres->force = force;
571:   return(0);
572: }

574: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
575: {
577:   PetscInt       neig;
578:   PetscInt       max_neig;
579:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
580:   PetscBool      flg;

583:   KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
584:   PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
585:   PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
586:   if (flg) {
587:     KSPDGMRESSetEigen(ksp, neig);
588:   }
589:   PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
590:   if (flg) {
591:     KSPDGMRESSetMaxEigen(ksp, max_neig);
592:   }
593:   PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
594:   PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
595:   PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
596:   PetscOptionsTail();
597:   return(0);
598: }

600: PetscErrorCode  KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
601: {
602:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
604:   PetscInt       i,j, k;
605:   PetscBLASInt   nr, bmax;
606:   PetscInt       r = dgmres->r;
607:   PetscInt       neig;          /* number of eigenvalues to extract at each restart */
608:   PetscInt       neig1    = dgmres->neig + EIG_OFFSET;  /* max number of eig that can be extracted at each restart */
609:   PetscInt       max_neig = dgmres->max_neig;  /* Max number of eigenvalues to extract during the iterative process */
610:   PetscInt       N        = dgmres->max_k+1;
611:   PetscInt       n        = dgmres->it+1;
612:   PetscReal      alpha;

615:   PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
616:   if (dgmres->neig == 0 || (max_neig < (r+neig1) && !dgmres->improve)) {
617:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
618:     return(0);
619:   }

621:    KSPDGMRESComputeSchurForm(ksp, &neig);
622:   /* Form the extended Schur vectors X=VV*Sr */
623:   if (!XX) {
624:     VecDuplicateVecs(VEC_VV(0), neig1, &XX);
625:   }
626:   for (j = 0; j<neig; j++) {
627:     VecZeroEntries(XX[j]);
628:     VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
629:   }

631:   /* Orthogonalize X against U */
632:   if (!ORTH) {
633:     PetscMalloc1(max_neig, &ORTH);
634:   }
635:   if (r > 0) {
636:     /* modified Gram-Schmidt */
637:     for (j = 0; j<neig; j++) {
638:       for (i=0; i<r; i++) {
639:         /* First, compute U'*X[j] */
640:         VecDot(XX[j], UU[i], &alpha);
641:         /* Then, compute X(j)=X(j)-U*U'*X(j) */
642:         VecAXPY(XX[j], -alpha, UU[i]);
643:       }
644:     }
645:   }
646:   /* Compute MX = M^{-1}*A*X */
647:   if (!MX) {
648:     VecDuplicateVecs(VEC_VV(0), neig1, &MX);
649:   }
650:   for (j = 0; j<neig; j++) {
651:     KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
652:   }
653:   dgmres->matvecs += neig;

655:   if ((r+neig1) > max_neig && dgmres->improve) {    /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
656:     KSPDGMRESImproveEig(ksp, neig);
657:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
658:     return(0);   /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
659:   }

661:   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
662:   if (!XMX) {
663:     PetscMalloc1(neig1*neig1, &XMX);
664:   }
665:   for (j = 0; j < neig; j++) {
666:     VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
667:   }

669:   if (r > 0) {
670:     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
671:     if (!UMX) {
672:       PetscMalloc1(max_neig*neig1, &UMX);
673:     }
674:     for (j = 0; j < neig; j++) {
675:       VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
676:     }
677:     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
678:     if (!XMU) {
679:       PetscMalloc1(max_neig*neig1, &XMU);
680:     }
681:     for (j = 0; j<r; j++) {
682:       VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
683:     }
684:   }

686:   /* Form the new matrix T = [T UMX; XMU XMX]; */
687:   if (!TT) {
688:     PetscMalloc1(max_neig*max_neig, &TT);
689:   }
690:   if (r > 0) {
691:     /* Add XMU to T */
692:     for (j = 0; j < r; j++) {
693:       PetscArraycpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig);
694:     }
695:     /* Add [UMX; XMX] to T */
696:     for (j = 0; j < neig; j++) {
697:       k = r+j;
698:       PetscArraycpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r);
699:       PetscArraycpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig);
700:     }
701:   } else { /* Add XMX to T */
702:     for (j = 0; j < neig; j++) {
703:       PetscArraycpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig);
704:     }
705:   }

707:   dgmres->r += neig;
708:   r          = dgmres->r;
709:   PetscBLASIntCast(r,&nr);
710:   /*LU Factorize T with Lapack xgetrf routine */

712:   PetscBLASIntCast(max_neig,&bmax);
713:   if (!TTF) {
714:     PetscMalloc1(bmax*bmax, &TTF);
715:   }
716:   PetscArraycpy(TTF, TT, bmax*r);
717:   if (!INVP) {
718:     PetscMalloc1(bmax, &INVP);
719:   }
720:   {
721:     PetscBLASInt info;
722:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
723:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
724:   }

726:   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
727:   if (!UU) {
728:     VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
729:     VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
730:   }
731:   for (j=0; j<neig; j++) {
732:     VecCopy(XX[j], UU[r-neig+j]);
733:     VecCopy(MX[j], MU[r-neig+j]);
734:   }
735:   PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
736:   return(0);
737: }

739: PetscErrorCode  KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
740: {
741:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
743:   PetscInt       N = dgmres->max_k + 1, n=dgmres->it+1;
744:   PetscBLASInt   bn, bN;
745:   PetscReal      *A;
746:   PetscBLASInt   ihi;
747:   PetscBLASInt   ldA;          /* leading dimension of A */
748:   PetscBLASInt   ldQ;          /* leading dimension of Q */
749:   PetscReal      *Q;           /*  orthogonal matrix of  (left) schur vectors */
750:   PetscReal      *work;        /* working vector */
751:   PetscBLASInt   lwork;        /* size of the working vector */
752:   PetscInt       *perm;        /* Permutation vector to sort eigenvalues */
753:   PetscInt       i, j;
754:   PetscBLASInt   NbrEig;       /* Number of eigenvalues really extracted */
755:   PetscReal      *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
756:   PetscBLASInt   *select;
757:   PetscBLASInt   *iwork;
758:   PetscBLASInt   liwork;
759:   PetscScalar    *Ht;           /* Transpose of the Hessenberg matrix */
760:   PetscScalar    *t;            /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
761:   PetscBLASInt   *ipiv;         /* Permutation vector to be used in LAPACK */
762:   PetscBool      flag;            /* determine whether to use Ritz vectors or harmonic Ritz vectors */

765:   PetscBLASIntCast(n,&bn);
766:   PetscBLASIntCast(N,&bN);
767:   ihi  = ldQ = bn;
768:   ldA  = bN;
769:   PetscBLASIntCast(5*N,&lwork);

771: #if defined(PETSC_USE_COMPLEX)
772:   SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
773: #endif

775:   PetscMalloc1(ldA*ldA, &A);
776:   PetscMalloc1(ldQ*n, &Q);
777:   PetscMalloc1(lwork, &work);
778:   if (!dgmres->wr) {
779:     PetscMalloc1(n, &dgmres->wr);
780:     PetscMalloc1(n, &dgmres->wi);
781:   }
782:   wr   = dgmres->wr;
783:   wi   = dgmres->wi;
784:   PetscMalloc1(n,&modul);
785:   PetscMalloc1(n,&perm);
786:   /* copy the Hessenberg matrix to work space */
787:   PetscArraycpy(A, dgmres->hes_origin, ldA*ldA);
788:   PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
789:   if (flag) {
790:     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
791:     /* Transpose the Hessenberg matrix */
792:     PetscMalloc1(bn*bn, &Ht);
793:     for (i = 0; i < bn; i++) {
794:       for (j = 0; j < bn; j++) {
795:         Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
796:       }
797:     }

799:     /* Solve the system H^T*t = h_{m+1,m}e_m */
800:     PetscCalloc1(bn, &t);
801:     t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
802:     PetscMalloc1(bn, &ipiv);
803:     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
804:     {
805:       PetscBLASInt info;
806:       PetscBLASInt nrhs = 1;
807:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
808:       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
809:     }
810:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
811:     for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
812:     PetscFree(t);
813:     PetscFree(Ht);
814:   }
815:   /* Compute eigenvalues with the Schur form */
816:   {
817:     PetscBLASInt info=0;
818:     PetscBLASInt ilo = 1;
819:     PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
820:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
821:   }
822:   PetscFree(work);

824:   /* sort the eigenvalues */
825:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
826:   for (i=0; i<n; i++) perm[i] = i;

828:   PetscSortRealWithPermutation(n, modul, perm);
829:   /* save the complex modulus of the largest eigenvalue in magnitude */
830:   if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
831:   /* count the number of extracted eigenvalues (with complex conjugates) */
832:   NbrEig = 0;
833:   while (NbrEig < dgmres->neig) {
834:     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
835:     else NbrEig += 1;
836:   }
837:   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

839:   PetscCalloc1(n, &select);

841:   if (!dgmres->GreatestEig) {
842:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
843:   } else {
844:     for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
845:   }
846:   /* call Lapack dtrsen */
847:   lwork  =  PetscMax(1, 4 * NbrEig *(bn-NbrEig));
848:   liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
849:   PetscMalloc1(lwork, &work);
850:   PetscMalloc1(liwork, &iwork);
851:   {
852:     PetscBLASInt info=0;
853:     PetscReal    CondEig;         /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
854:     PetscReal    CondSub;         /* estimated reciprocal condition number of the specified invariant subspace. */
855:     PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
856:     if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
857:   }
858:   PetscFree(select);

860:   /* Extract the Schur vectors */
861:   for (j = 0; j < NbrEig; j++) {
862:     PetscArraycpy(&SR[j*N], &(Q[j*ldQ]), n);
863:   }
864:   *neig = NbrEig;
865:   PetscFree(A);
866:   PetscFree(work);
867:   PetscFree(perm);
868:   PetscFree(work);
869:   PetscFree(iwork);
870:   PetscFree(modul);
871:   PetscFree(Q);
872:   return(0);
873: }

875: PetscErrorCode  KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
876: {
877:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
878:   PetscInt       i, r     = dgmres->r;
880:   PetscReal      alpha    = 1.0;
881:   PetscInt       max_neig = dgmres->max_neig;
882:   PetscBLASInt   br,bmax;
883:   PetscReal      lambda = dgmres->lambdaN;

886:   PetscBLASIntCast(r,&br);
887:   PetscBLASIntCast(max_neig,&bmax);
888:   PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
889:   if (!r) {
890:     VecCopy(x,y);
891:     return(0);
892:   }
893:   /* Compute U'*x */
894:   if (!X1) {
895:     PetscMalloc1(bmax, &X1);
896:     PetscMalloc1(bmax, &X2);
897:   }
898:   VecMDot(x, r, UU, X1);

900:   /* Solve T*X1=X2 for X1*/
901:   PetscArraycpy(X2, X1, br);
902:   {
903:     PetscBLASInt info;
904:     PetscBLASInt nrhs = 1;
905:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
906:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
907:   }
908:   /* Iterative refinement -- is it really necessary ?? */
909:   if (!WORK) {
910:     PetscMalloc1(3*bmax, &WORK);
911:     PetscMalloc1(bmax, &IWORK);
912:   }
913:   {
914:     PetscBLASInt info;
915:     PetscReal    berr, ferr;
916:     PetscBLASInt nrhs = 1;
917:     PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
918:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
919:   }

921:   for (i = 0; i < r; i++) X2[i] =  X1[i]/lambda - X2[i];

923:   /* Compute X2=U*X2 */
924:   VecZeroEntries(y);
925:   VecMAXPY(y, r, X2, UU);
926:   VecAXPY(y, alpha, x);

928:   PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
929:   return(0);
930: }

932: static PetscErrorCode  KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
933: {
934:   KSP_DGMRES   *dgmres = (KSP_DGMRES*) ksp->data;
935:   PetscInt     j,r_old, r = dgmres->r;
936:   PetscBLASInt i     = 0;
937:   PetscInt     neig1 = dgmres->neig + EIG_OFFSET;
938:   PetscInt     bmax  = dgmres->max_neig;
939:   PetscInt     aug   = r + neig;         /* actual size of the augmented invariant basis */
940:   PetscInt     aug1  = bmax+neig1;       /* maximum size of the augmented invariant basis */
941:   PetscBLASInt ldA;            /* leading dimension of AUAU and AUU*/
942:   PetscBLASInt N;              /* size of AUAU */
943:   PetscReal    *Q;             /*  orthogonal matrix of  (left) schur vectors */
944:   PetscReal    *Z;             /*  orthogonal matrix of  (right) schur vectors */
945:   PetscReal    *work;          /* working vector */
946:   PetscBLASInt lwork;          /* size of the working vector */
947:   PetscInt     *perm;          /* Permutation vector to sort eigenvalues */
948:   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
949:   PetscInt     ierr;
950:   PetscBLASInt NbrEig = 0,nr,bm;
951:   PetscBLASInt *select;
952:   PetscBLASInt liwork, *iwork;

955:   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
956:   if (!AUU) {
957:     PetscMalloc1(aug1*aug1, &AUU);
958:     PetscMalloc1(aug1*aug1, &AUAU);
959:   }
960:   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
961:    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
962:   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
963:   for (j=0; j < r; j++) {
964:     VecMDot(UU[j], r, MU, &AUU[j*aug1]);
965:   }
966:   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
967:   for (j = 0; j < neig; j++) {
968:     VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
969:   }
970:   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
971:   for (j = 0; j < r; j++) {
972:     VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
973:   }
974:   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
975:   for (j = 0; j < neig; j++) {
976:     VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
977:   }

979:   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
980:   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
981:   for (j=0; j < r; j++) {
982:     VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
983:   }
984:   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
985:   for (j = 0; j < neig; j++) {
986:     VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
987:   }
988:   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
989:   for (j = 0; j < r; j++) {
990:     VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
991:   }
992:   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
993:   for (j = 0; j < neig; j++) {
994:     VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
995:   }

997:   /* Computation of the eigenvectors */
998:   PetscBLASIntCast(aug1,&ldA);
999:   PetscBLASIntCast(aug,&N);
1000:   lwork = 8 * N + 20; /* sizeof the working space */
1001:   PetscMalloc1(N, &wr);
1002:   PetscMalloc1(N, &wi);
1003:   PetscMalloc1(N, &beta);
1004:   PetscMalloc1(N, &modul);
1005:   PetscMalloc1(N, &perm);
1006:   PetscMalloc1(N*N, &Q);
1007:   PetscMalloc1(N*N, &Z);
1008:   PetscMalloc1(lwork, &work);
1009:   {
1010:     PetscBLASInt info=0;
1011:     PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1012:     if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1013:   }
1014:   for (i=0; i<N; i++) {
1015:     if (beta[i] !=0.0) {
1016:       wr[i] /=beta[i];
1017:       wi[i] /=beta[i];
1018:     }
1019:   }
1020:   /* sort the eigenvalues */
1021:   for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1022:   for (i=0; i<N; i++) perm[i] = i;
1023:   PetscSortRealWithPermutation(N, modul, perm);
1024:   /* Save the norm of the largest eigenvalue */
1025:   if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1026:   /* Allocate space to extract the first r schur vectors   */
1027:   if (!SR2) {
1028:     PetscMalloc1(aug1*bmax, &SR2);
1029:   }
1030:   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1031:   while (NbrEig < bmax) {
1032:     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1033:     else NbrEig += 2;
1034:   }
1035:   if (NbrEig > bmax) NbrEig = bmax - 1;
1036:   r_old     = r; /* previous size of r */
1037:   dgmres->r = r = NbrEig;

1039:   /* Select the eigenvalues to reorder */
1040:   PetscCalloc1(N, &select);
1041:   if (!dgmres->GreatestEig) {
1042:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1043:   } else {
1044:     for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1045:   }
1046:   /* Reorder and extract the new <r> schur vectors */
1047:   lwork  = PetscMax(4 * N + 16,  2 * NbrEig *(N - NbrEig));
1048:   liwork = PetscMax(N + 6,  2 * NbrEig *(N - NbrEig));
1049:   PetscFree(work);
1050:   PetscMalloc1(lwork, &work);
1051:   PetscMalloc1(liwork, &iwork);
1052:   {
1053:     PetscBLASInt info=0;
1054:     PetscReal    Dif[2];
1055:     PetscBLASInt ijob  = 2;
1056:     PetscBLASInt wantQ = 1, wantZ = 1;
1057:     PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1058:     if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1059:   }
1060:   PetscFree(select);

1062:   for (j=0; j<r; j++) {
1063:     PetscArraycpy(&SR2[j*aug1], &(Z[j*N]), N);
1064:   }

1066:   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
1067:    -- save it temporarily in MU */
1068:   for (j = 0; j < r; j++) {
1069:     VecZeroEntries(MU[j]);
1070:     VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1071:     VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1072:   }
1073:   /* Form T = U'*MU*U */
1074:   for (j = 0; j < r; j++) {
1075:     VecCopy(MU[j], UU[j]);
1076:     KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1077:   }
1078:   dgmres->matvecs += r;
1079:   for (j = 0; j < r; j++) {
1080:     VecMDot(MU[j], r, UU, &TT[j*bmax]);
1081:   }
1082:   /* Factorize T */
1083:   PetscArraycpy(TTF, TT, bmax*r);
1084:   PetscBLASIntCast(r,&nr);
1085:   PetscBLASIntCast(bmax,&bm);
1086:   {
1087:     PetscBLASInt info;
1088:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1089:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1090:   }
1091:   /* Free Memory */
1092:   PetscFree(wr);
1093:   PetscFree(wi);
1094:   PetscFree(beta);
1095:   PetscFree(modul);
1096:   PetscFree(perm);
1097:   PetscFree(Q);
1098:   PetscFree(Z);
1099:   PetscFree(work);
1100:   PetscFree(iwork);
1101:   return(0);
1102: }

1104: /* end new DGMRES functions */

1106: /*MC
1107:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1108:                  In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1109:                  stagnation occurs.

1111:    Options Database Keys:
1112:    GMRES Options (inherited):
1113: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1114: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1115: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1116:                              vectors are allocated as needed)
1117: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1118: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1119: .   -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
1120:                                    stability of the classical Gram-Schmidt  orthogonalization.
1121: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated

1123:    DGMRES Options Database Keys:
1124: +   -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1125: .   -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1126:                                        process
1127: .   -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1128: -   -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1129:                                                    parsed by PetscOptionsGetViewer().  If neig > 1, viewerspec should
1130:                                                    end with ":append".  No vectors will be viewed if the adaptive
1131:                                                    strategy chooses not to deflate, so -ksp_dgmres_force should also
1132:                                                    be given.
1133:                                                    The deflation vectors span a subspace that may be a good
1134:                                                    approximation of the subspace of smallest eigenvectors of the
1135:                                                    preconditioned operator, so this option can aid in understanding
1136:                                                    the performance of a preconditioner.

1138:  Level: beginner

1140:  Notes:
1141:     Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported

1143:  References:
1144: +  1. - J. Erhel, K. Burrage and B. Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1145: -  2. - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1146:    In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

1148:  Contributed by: Desire NUENTSA WAKAM,INRIA

1150:  .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1151:  KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1152:  KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1153:  KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()

1155:  M*/

1157: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1158: {
1159:   KSP_DGMRES     *dgmres;

1163:   PetscNewLog(ksp,&dgmres);
1164:   ksp->data = (void*) dgmres;

1166:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1167:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1168:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);

1170:   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1171:   ksp->ops->setup                        = KSPSetUp_DGMRES;
1172:   ksp->ops->solve                        = KSPSolve_DGMRES;
1173:   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1174:   ksp->ops->view                         = KSPView_DGMRES;
1175:   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1176:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1177:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1179:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1180:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1181:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1182:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1183:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1184:   /* -- New functions defined in DGMRES -- */
1185:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1186:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1187:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1188:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1189:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1190:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1191:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1192:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);

1194:   PetscLogEventRegister("DGMRESCompDefl",  KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1195:   PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1197:   dgmres->haptol         = 1.0e-30;
1198:   dgmres->q_preallocate  = 0;
1199:   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1200:   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1201:   dgmres->nrs            = NULL;
1202:   dgmres->sol_temp       = NULL;
1203:   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1204:   dgmres->Rsvd           = NULL;
1205:   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1206:   dgmres->orthogwork     = NULL;

1208:   /* Default values for the deflation */
1209:   dgmres->r           = 0;
1210:   dgmres->neig        = DGMRES_DEFAULT_EIG;
1211:   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG-1;
1212:   dgmres->lambdaN     = 0.0;
1213:   dgmres->smv         = SMV;
1214:   dgmres->matvecs     = 0;
1215:   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1216:   dgmres->HasSchur    = PETSC_FALSE;
1217:   return(0);
1218: }