Actual source code: dgmres.c

petsc-3.4.4 2014-03-13
  2: /*
  3:  This file implements the deflated GMRES.
  4:  References:
  5:  [1]J. Erhel, K. Burrage and B. Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
  6:  [2] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

  8:  */

 10:  #include ../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h

 12: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;

 14: #define GMRES_DELTA_DIRECTIONS 10
 15: #define GMRES_DEFAULT_MAXK     30
 16: static PetscErrorCode    KSPDGMRESGetNewVectors(KSP,PetscInt);
 17: static PetscErrorCode    KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
 18: static PetscErrorCode    KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);

 22: PetscErrorCode  KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
 23: {

 27:   PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
 28:   return(0);
 29: }
 32: PetscErrorCode  KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
 33: {

 37:   PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
 38:   return(0);
 39: }
 42: PetscErrorCode  KSPDGMRESForce(KSP ksp,PetscInt force)
 43: {

 47:   PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscInt),(ksp,force));
 48:   return(0);
 49: }
 52: PetscErrorCode  KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
 53: {

 57:   PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
 58:   return(0);
 59: }
 62: PetscErrorCode  KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
 63: {

 67:   PetscTryMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
 68:   return(0);
 69: }
 72: PetscErrorCode  KSPDGMRESComputeDeflationData(KSP ksp)
 73: {

 77:   PetscTryMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP),(ksp));
 78:   return(0);
 79: }
 82: PetscErrorCode  KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
 83: {

 87:   PetscTryMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
 88:   return(0);
 89: }

 93: PetscErrorCode  KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
 94: {

 98:   PetscTryMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
 99:   return(0);
100: }

104: PetscErrorCode  KSPSetUp_DGMRES(KSP ksp)
105: {
107:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
108:   PetscInt       neig    = dgmres->neig+EIG_OFFSET;
109:   PetscInt       max_k   = dgmres->max_k+1;

112:   KSPSetUp_GMRES(ksp);
113:   if (!dgmres->neig) return(0);

115:   /* Allocate workspace for the Schur vectors*/
116:   PetscMalloc((neig) *max_k*sizeof(PetscReal), &SR);
117:   dgmres->wr    = NULL;
118:   dgmres->wi    = NULL;
119:   dgmres->perm  = NULL;
120:   dgmres->modul = NULL;
121:   dgmres->Q     = NULL;
122:   dgmres->Z     = NULL;

124:   UU   = NULL;
125:   XX   = NULL;
126:   MX   = NULL;
127:   AUU  = NULL;
128:   XMX  = NULL;
129:   XMU  = NULL;
130:   UMX  = NULL;
131:   AUAU = NULL;
132:   TT   = NULL;
133:   TTF  = NULL;
134:   INVP = NULL;
135:   X1   = NULL;
136:   X2   = NULL;
137:   MU   = NULL;
138:   return(0);
139: }

141: /*
142:  Run GMRES, possibly with restart.  Return residual history if requested.
143:  input parameters:

145:  .       gmres  - structure containing parameters and work areas

147:  output parameters:
148:  .        nres    - residuals (from preconditioned system) at each step.
149:  If restarting, consider passing nres+it.  If null,
150:  ignored
151:  .        itcount - number of iterations used.  nres[0] to nres[itcount]
152:  are defined.  If null, ignored.

154:  Notes:
155:  On entry, the value in vector VEC_VV(0) should be the initial residual
156:  (this allows shortcuts where the initial preconditioned residual is 0).
157:  */
160: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
161: {
162:   KSP_DGMRES     *dgmres = (KSP_DGMRES*)(ksp->data);
163:   PetscReal      res_norm,res,hapbnd,tt;
165:   PetscInt       it     = 0;
166:   PetscInt       max_k  = dgmres->max_k;
167:   PetscBool      hapend = PETSC_FALSE;
168:   PetscReal      res_old;
169:   PetscInt       test = 0;

172:   VecNormalize(VEC_VV(0),&res_norm);
173:   res     = res_norm;
174:   *GRS(0) = res_norm;

176:   /* check for the convergence */
177:   PetscObjectAMSTakeAccess((PetscObject)ksp);
178:   ksp->rnorm = res;
179:   PetscObjectAMSGrantAccess((PetscObject)ksp);
180:   dgmres->it = (it - 1);
181:   KSPLogResidualHistory(ksp,res);
182:   KSPMonitor(ksp,ksp->its,res);
183:   if (!res) {
184:     if (itcount) *itcount = 0;
185:     ksp->reason = KSP_CONVERGED_ATOL;
186:     PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
187:     return(0);
188:   }
189:   /* record the residual norm to test if deflation is needed */
190:   res_old = res;

192:   (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
193:   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
194:     if (it) {
195:       KSPLogResidualHistory(ksp,res);
196:       KSPMonitor(ksp,ksp->its,res);
197:     }
198:     dgmres->it = (it - 1);
199:     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
200:       KSPDGMRESGetNewVectors(ksp,it+1);
201:     }
202:     if (dgmres->r > 0) {
203:       if (ksp->pc_side == PC_LEFT) {
204:         /* Apply the first preconditioner */
205:         KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
206:         /* Then apply Deflation as a preconditioner */
207:         KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
208:       } else if (ksp->pc_side == PC_RIGHT) {
209:         KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
210:         KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
211:       }
212:     } else {
213:       KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
214:     }
215:     dgmres->matvecs += 1;
216:     /* update hessenberg matrix and do Gram-Schmidt */
217:     (*dgmres->orthog)(ksp,it);

219:     /* vv(i+1) . vv(i+1) */
220:     VecNormalize(VEC_VV(it+1),&tt);
221:     /* save the magnitude */
222:     *HH(it+1,it)  = tt;
223:     *HES(it+1,it) = tt;

225:     /* check for the happy breakdown */
226:     hapbnd = PetscAbsScalar(tt / *GRS(it));
227:     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
228:     if (tt < hapbnd) {
229:       PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
230:       hapend = PETSC_TRUE;
231:     }
232:     KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);

234:     it++;
235:     dgmres->it = (it-1);     /* For converged */
236:     ksp->its++;
237:     ksp->rnorm = res;
238:     if (ksp->reason) break;

240:     (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);

242:     /* Catch error in happy breakdown and signal convergence and break from loop */
243:     if (hapend) {
244:       if (!ksp->reason) {
245:         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
246:         else {
247:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
248:           break;
249:         }
250:       }
251:     }
252:   }

254:   /* Monitor if we know that we will not return for a restart */
255:   if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
256:     KSPLogResidualHistory(ksp,res);
257:     KSPMonitor(ksp,ksp->its,res);
258:   }
259:   if (itcount) *itcount = it;

261:   /*
262:    Down here we have to solve for the "best" coefficients of the Krylov
263:    columns, add the solution values together, and possibly unwind the
264:    preconditioning from the solution
265:    */
266:   /* Form the solution (or the solution so far) */
267:   KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);

269:   /* Compute data for the deflation to be used during the next restart */
270:   if (!ksp->reason && ksp->its < ksp->max_it) {
271:     test = max_k *log(ksp->rtol/res) /log(res/res_old);
272:     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
273:     if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
274:        KSPDGMRESComputeDeflationData(ksp);
275:     }
276:   }
277:   return(0);
278: }

282: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
283: {
285:   PetscInt       its,itcount;
286:   KSP_DGMRES     *dgmres    = (KSP_DGMRES*) ksp->data;
287:   PetscBool      guess_zero = ksp->guess_zero;
288:   PetscBool      flag;

291:   if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");

293:   PetscObjectAMSTakeAccess((PetscObject)ksp);
294:   ksp->its        = 0;
295:   dgmres->matvecs = 0;
296:   PetscObjectAMSGrantAccess((PetscObject)ksp);

298:   itcount     = 0;
299:   ksp->reason = KSP_CONVERGED_ITERATING;
300:   while (!ksp->reason) {
301:     KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
302:     if (ksp->pc_side == PC_LEFT) {
303:       dgmres->matvecs += 1;
304:       if (dgmres->r > 0) {
305:         KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
306:         VecCopy(VEC_TEMP, VEC_VV(0));
307:       }
308:     }

310:     KSPDGMRESCycle(&its,ksp);
311:     itcount += its;
312:     if (itcount >= ksp->max_it) {
313:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
314:       break;
315:     }
316:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
317:   }
318:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */

320:   PetscOptionsHasName(((PetscObject)ksp)->prefix,"-ksp_dgmres_view_deflation_vecs",&flag);
321:   if (flag) {
322:     PetscInt i;

324:     for (i = 0; i < dgmres->r; i++) {
325:       VecViewFromOptions(UU[i],((PetscObject)ksp)->prefix,"-ksp_dgmres_view_deflation_vecs");
326:     }
327:   }
328:   return(0);
329: }


334: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
335: {
337:   KSP_DGMRES     *dgmres  = (KSP_DGMRES*) ksp->data;
338:   PetscInt       neig1    = dgmres->neig+EIG_OFFSET;
339:   PetscInt       max_neig = dgmres->max_neig;

342:   if (dgmres->r) {
343:     VecDestroyVecs(max_neig, &UU);
344:     VecDestroyVecs(max_neig, &MU);
345:     if (XX) {
346:       VecDestroyVecs(neig1, &XX);
347:       VecDestroyVecs(neig1, &MX);
348:     }

350:     PetscFree(TT);
351:     PetscFree(TTF);
352:     PetscFree(INVP);

354:     PetscFree(XMX);
355:     PetscFree(UMX);
356:     PetscFree(XMU);
357:     PetscFree(X1);
358:     PetscFree(X2);
359:     PetscFree(dgmres->work);
360:     PetscFree(dgmres->iwork);
361:     PetscFree(dgmres->wr);
362:     PetscFree(dgmres->wi);
363:     PetscFree(dgmres->modul);
364:     PetscFree(dgmres->Q);
365:     PetscFree(ORTH);
366:     PetscFree(AUAU);
367:     PetscFree(AUU);
368:     PetscFree(SR2);
369:   }
370:   PetscFree(SR);
371:   KSPDestroy_GMRES(ksp);
372:   return(0);
373: }
374: /*
375:  KSPDGMRESBuildSoln - create the solution from the starting vector and the
376:  current iterates.

378:  Input parameters:
379:  nrs - work area of size it + 1.
380:  vs  - index of initial guess
381:  vdest - index of result.  Note that vs may == vdest (replace
382:  guess with the solution).

384:  This is an internal routine that knows about the GMRES internals.
385:  */
388: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
389: {
390:   PetscScalar    tt;
392:   PetscInt       ii,k,j;
393:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) (ksp->data);

395:   /* Solve for solution vector that minimizes the residual */

398:   /* If it is < 0, no gmres steps have been performed */
399:   if (it < 0) {
400:     VecCopy(vs,vdest);     /* VecCopy() is smart, exists immediately if vguess == vdest */
401:     return(0);
402:   }
403:   if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));
404:   if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
405:   else nrs[it] = 0.0;

407:   for (ii=1; ii<=it; ii++) {
408:     k  = it - ii;
409:     tt = *GRS(k);
410:     for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
411:     if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
412:     nrs[k] = tt / *HH(k,k);
413:   }

415:   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
416:   VecSet(VEC_TEMP,0.0);
417:   VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));

419:   /* Apply deflation */
420:   if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
421:     KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
422:     VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
423:   }
424:   KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);

426:   /* add solution to previous solution */
427:   if (vdest != vs) {
428:     VecCopy(vs,vdest);
429:   }
430:   VecAXPY(vdest,1.0,VEC_TEMP);
431:   return(0);
432: }
433: /*
434:  Do the scalar work for the orthogonalization.  Return new residual norm.
435:  */
438: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
439: {
440:   PetscScalar *hh,*cc,*ss,tt;
441:   PetscInt    j;
442:   KSP_DGMRES  *dgmres = (KSP_DGMRES*) (ksp->data);

445:   hh = HH(0,it);
446:   cc = CC(0);
447:   ss = SS(0);

449:   /* Apply all the previously computed plane rotations to the new column
450:    of the Hessenberg matrix */
451:   for (j=1; j<=it; j++) {
452:     tt  = *hh;
453:     *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
454:     hh++;
455:     *hh = *cc++ * *hh -(*ss++ * tt);
456:   }

458:   /*
459:    compute the new plane rotation, and apply it to:
460:    1) the right-hand-side of the Hessenberg system
461:    2) the new column of the Hessenberg matrix
462:    thus obtaining the updated value of the residual
463:    */
464:   if (!hapend) {
465:     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
466:     if (tt == 0.0) {
467:       ksp->reason = KSP_DIVERGED_NULL;
468:       return(0);
469:     }
470:     *cc        = *hh / tt;
471:     *ss        = *(hh+1) / tt;
472:     *GRS(it+1) = -(*ss * *GRS(it));
473:     *GRS(it)   = PetscConj(*cc) * *GRS(it);
474:     *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);
475:     *res       = PetscAbsScalar(*GRS(it+1));
476:   } else {
477:     /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
478:      another rotation matrix (so RH doesn't change).  The new residual is
479:      always the new sine term times the residual from last time (GRS(it)),
480:      but now the new sine rotation would be zero...so the residual should
481:      be zero...so we will multiply "zero" by the last residual.  This might
482:      not be exactly what we want to do here -could just return "zero". */

484:     *res = 0.0;
485:   }
486:   return(0);
487: }
488: /*
489:  This routine allocates more work vectors, starting from VEC_VV(it).
490:  */
493: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
494: {
495:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
497:   PetscInt       nwork = dgmres->nwork_alloc,k,nalloc;

500:   nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
501:   /* Adjust the number to allocate to make sure that we don't exceed the
502:    number of available slots */
503:   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
504:     nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
505:   }
506:   if (!nalloc) return(0);

508:   dgmres->vv_allocated += nalloc;

510:   KSPGetVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
511:   PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);

513:   dgmres->mwork_alloc[nwork] = nalloc;
514:   for (k=0; k<nalloc; k++) {
515:     dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
516:   }
517:   dgmres->nwork_alloc++;
518:   return(0);
519: }

523: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
524: {
525:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;

529:   if (!ptr) {
530:     if (!dgmres->sol_temp) {
531:       VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
532:       PetscLogObjectParent(ksp,dgmres->sol_temp);
533:     }
534:     ptr = dgmres->sol_temp;
535:   }
536:   if (!dgmres->nrs) {
537:     /* allocate the work area */
538:     PetscMalloc(dgmres->max_k*sizeof(PetscScalar),&dgmres->nrs);
539:     PetscLogObjectMemory(ksp,dgmres->max_k*sizeof(PetscScalar));
540:   }

542:   KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
543:   if (result) *result = ptr;
544:   return(0);
545: }

549: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
550: {
551:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
553:   PetscBool      iascii,isharmonic;

556:   KSPView_GMRES(ksp,viewer);
557:   PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
558:   if (iascii) {
559:     if (dgmres->force) PetscViewerASCIIPrintf(viewer, "   DGMRES: Adaptive strategy is used: FALSE\n");
560:     else PetscViewerASCIIPrintf(viewer, "   DGMRES: Adaptive strategy is used: TRUE\n");
561:     PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &isharmonic);
562:     if (isharmonic) {
563:       PetscViewerASCIIPrintf(viewer, "  DGMRES: Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
564:     } else {
565:       PetscViewerASCIIPrintf(viewer, "  DGMRES: Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
566:     }
567:     PetscViewerASCIIPrintf(viewer, "  DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
568:     PetscViewerASCIIPrintf(viewer, "  DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
569:     PetscViewerASCIIPrintf(viewer, "  DGMRES: relaxation parameter for the adaptive strategy(smv)  = %g\n", dgmres->smv);
570:     PetscViewerASCIIPrintf(viewer, "  DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
571:   }
572:   return(0);
573: }

575: /* New DGMRES functions */

579: static PetscErrorCode  KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
580: {
581:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

584:   if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
585:   dgmres->neig=neig;
586:   return(0);
587: }

591: static PetscErrorCode  KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
592: {
593:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

596:   if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
597:   dgmres->max_neig=max_neig;
598:   return(0);
599: }

603: static PetscErrorCode  KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
604: {
605:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

608:   if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
609:   dgmres->smv=ratio;
610:   return(0);
611: }

615: static PetscErrorCode  KSPDGMRESForce_DGMRES(KSP ksp,PetscInt force)
616: {
617:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

620:   if (force != 0 && force != 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
621:   dgmres->force = 1;
622:   return(0);
623: }

625: extern PetscErrorCode KSPSetFromOptions_GMRES(KSP);

629: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp)
630: {
632:   PetscInt       neig;
633:   PetscInt       max_neig;
634:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
635:   PetscBool      flg;
636:   PetscReal      smv;
637:   PetscInt       input;

640:   KSPSetFromOptions_GMRES(ksp);
641:   PetscOptionsHead("KSP DGMRES Options");
642:   PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
643:   if (flg) {
644:     KSPDGMRESSetEigen(ksp, neig);
645:   }
646:   PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
647:   if (flg) {
648:     KSPDGMRESSetMaxEigen(ksp, max_neig);
649:   }
650:   PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
651:   if (flg) dgmres->smv = smv;
652:   PetscOptionsInt("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
653:   if (flg) dgmres->improve = input;
654:   PetscOptionsInt("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
655:   if (flg) dgmres->force = input;
656:   PetscOptionsTail();
657:   return(0);
658: }

662: static PetscErrorCode  KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
663: {
664:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
666:   PetscInt       i,j, k;
667:   PetscBLASInt   nr, bmax;
668:   PetscInt       r = dgmres->r;
669:   PetscInt       neig;          /* number of eigenvalues to extract at each restart */
670:   PetscInt       neig1    = dgmres->neig + EIG_OFFSET;  /* max number of eig that can be extracted at each restart */
671:   PetscInt       max_neig = dgmres->max_neig;  /* Max number of eigenvalues to extract during the iterative process */
672:   PetscInt       N        = dgmres->max_k+1;
673:   PetscInt       n        = dgmres->it+1;
674:   PetscReal      alpha;

677:   PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
678:   if (dgmres->neig == 0) return(0);
679:   if (max_neig < (r+neig1) && !dgmres->improve) {
680:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
681:     return(0);
682:   }

684:    KSPDGMRESComputeSchurForm(ksp, &neig);
685:   /* Form the extended Schur vectors X=VV*Sr */
686:   if (!XX) {
687:     VecDuplicateVecs(VEC_VV(0), neig1, &XX);
688:   }
689:   for (j = 0; j<neig; j++) {
690:     VecZeroEntries(XX[j]);
691:     VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
692:   }

694:   /* Orthogonalize X against U */
695:   if (!ORTH) {
696:     PetscMalloc(max_neig*sizeof(PetscReal), &ORTH);
697:   }
698:   if (r > 0) {
699:     /* modified Gram-Schmidt */
700:     for (j = 0; j<neig; j++) {
701:       for (i=0; i<r; i++) {
702:         /* First, compute U'*X[j] */
703:         VecDot(XX[j], UU[i], &alpha);
704:         /* Then, compute X(j)=X(j)-U*U'*X(j) */
705:         VecAXPY(XX[j], -alpha, UU[i]);
706:       }
707:     }
708:   }
709:   /* Compute MX = M^{-1}*A*X */
710:   if (!MX) {
711:     VecDuplicateVecs(VEC_VV(0), neig1, &MX);
712:   }
713:   for (j = 0; j<neig; j++) {
714:     KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
715:   }
716:   dgmres->matvecs += neig;

718:   if ((r+neig1) > max_neig && dgmres->improve) {    /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
719:     KSPDGMRESImproveEig(ksp, neig);
720:     return(0);   /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
721:   }

723:   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
724:   if (!XMX) {
725:     PetscMalloc(neig1*neig1*sizeof(PetscReal), &XMX);
726:   }
727:   for (j = 0; j < neig; j++) {
728:     VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
729:   }

731:   if (r > 0) {
732:     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
733:     if (!UMX) {
734:       PetscMalloc(max_neig*neig1*sizeof(PetscReal), &UMX);
735:     }
736:     for (j = 0; j < neig; j++) {
737:       VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
738:     }
739:     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
740:     if (!XMU) {
741:       PetscMalloc(max_neig*neig1*sizeof(PetscReal), &XMU);
742:     }
743:     for (j = 0; j<r; j++) {
744:       VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
745:     }
746:   }

748:   /* Form the new matrix T = [T UMX; XMU XMX]; */
749:   if (!TT) {
750:     PetscMalloc(max_neig*max_neig*sizeof(PetscReal), &TT);
751:   }
752:   if (r > 0) {
753:     /* Add XMU to T */
754:     for (j = 0; j < r; j++) {
755:       PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
756:     }
757:     /* Add [UMX; XMX] to T */
758:     for (j = 0; j < neig; j++) {
759:       k = r+j;
760:       PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
761:       PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
762:     }
763:   } else { /* Add XMX to T */
764:     for (j = 0; j < neig; j++) {
765:       PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
766:     }
767:   }

769:   dgmres->r += neig;
770:   r          = dgmres->r;
771:   PetscBLASIntCast(r,&nr);
772:   /*LU Factorize T with Lapack xgetrf routine */

774:   PetscBLASIntCast(max_neig,&bmax);
775:   if (!TTF) {
776:     PetscMalloc(bmax*bmax*sizeof(PetscReal), &TTF);
777:   }
778:   PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
779:   if (!INVP) {
780:     PetscMalloc(bmax*sizeof(PetscBLASInt), &INVP);
781:   }
782: #if defined(PETSC_MISSING_LAPACK_GETRF)
783:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
784: #else
785:   {
786:     PetscBLASInt info;
787:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
788:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
789:   }
790: #endif

792:   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
793:   if (!UU) {
794:     VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
795:     VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
796:   }
797:   for (j=0; j<neig; j++) {
798:     VecCopy(XX[j], UU[r-neig+j]);
799:     VecCopy(MX[j], MU[r-neig+j]);
800:   }
801:   PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
802:   return(0);
803: }

807: static PetscErrorCode  KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
808: {
809:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
811:   PetscInt       N = dgmres->max_k + 1, n=dgmres->it+1;
812:   PetscBLASInt   bn, bN;
813:   PetscReal      *A;
814:   PetscBLASInt   ihi;
815:   PetscBLASInt   ldA;          /* leading dimension of A */
816:   PetscBLASInt   ldQ;          /* leading dimension of Q */
817:   PetscReal      *Q;           /*  orthogonal matrix of  (left) schur vectors */
818:   PetscReal      *work;        /* working vector */
819:   PetscBLASInt   lwork;        /* size of the working vector */
820:   PetscInt       *perm;        /* Permutation vector to sort eigenvalues */
821:   PetscInt       i, j;
822:   PetscBLASInt   NbrEig;       /* Number of eigenvalues really extracted */
823:   PetscReal      *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
824:   PetscBLASInt   *select;
825:   PetscBLASInt   *iwork;
826:   PetscBLASInt   liwork;
827:   PetscScalar    *Ht;           /* Transpose of the Hessenberg matrix */
828:   PetscScalar    *t;            /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
829:   PetscBLASInt   *ipiv;         /* Permutation vector to be used in LAPACK */
830:   PetscBool      flag;            /* determine whether to use Ritz vectors or harmonic Ritz vectors */

833:   PetscBLASIntCast(n,&bn);
834:   PetscBLASIntCast(N,&bN);
835:   ihi  = ldQ = bn;
836:   ldA  = bN;
837:   PetscBLASIntCast(5*N,&lwork);

839: #if defined(PETSC_USE_COMPLEX)
840:   SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
841: #endif

843:   PetscMalloc(ldA*ldA*sizeof(PetscReal), &A);
844:   PetscMalloc(ldQ*n*sizeof(PetscReal), &Q);
845:   PetscMalloc(lwork*sizeof(PetscReal), &work);
846:   if (!dgmres->wr) {
847:     PetscMalloc(n*sizeof(PetscReal), &dgmres->wr);
848:     PetscMalloc(n*sizeof(PetscReal), &dgmres->wi);
849:   }
850:   wr   = dgmres->wr;
851:   wi   = dgmres->wi;
852:   PetscMalloc(n*sizeof(PetscReal),&modul);
853:   PetscMalloc(n*sizeof(PetscInt),&perm);
854:   /* copy the Hessenberg matrix to work space */
855:   PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
856:   PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &flag);
857:   if (flag) {
858:     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
859:     /* Transpose the Hessenberg matrix */
860:     PetscMalloc(bn*bn*sizeof(PetscScalar), &Ht);
861:     for (i = 0; i < bn; i++) {
862:       for (j = 0; j < bn; j++) {
863:         Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
864:       }
865:     }

867:     /* Solve the system H^T*t = h_{m+1,m}e_m */
868:     PetscMalloc(bn*sizeof(PetscScalar), &t);
869:     PetscMemzero(t, bn*sizeof(PetscScalar));
870:     t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
871:     PetscMalloc(bn*sizeof(PetscBLASInt), &ipiv);
872:     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
873: #if   defined(PETSC_MISSING_LAPACK_GESV)
874:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
875: #else
876:     {
877:       PetscBLASInt info;
878:       PetscBLASInt nrhs = 1;
879:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
880:       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
881:     }
882: #endif
883:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
884:     for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
885:     PetscFree(t);
886:     PetscFree(Ht);
887:   }
888:   /* Compute eigenvalues with the Schur form */
889: #if defined(PETSC_MISSING_LAPACK_HSEQR)
890:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
891: #else
892:   {
893:     PetscBLASInt info;
894:     PetscBLASInt ilo = 1;
895:     PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
896:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
897:   }
898: #endif
899:   PetscFree(work);

901:   /* sort the eigenvalues */
902:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
903:   for (i=0; i<n; i++) perm[i] = i;

905:   PetscSortRealWithPermutation(n, modul, perm);
906:   /* save the complex modulus of the largest eigenvalue in magnitude */
907:   if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
908:   /* count the number of extracted eigenvalues (with complex conjugates) */
909:   NbrEig = 0;
910:   while (NbrEig < dgmres->neig) {
911:     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
912:     else NbrEig += 1;
913:   }
914:   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

916:   PetscMalloc(n * sizeof(PetscBLASInt), &select);
917:   PetscMemzero(select, n * sizeof(PetscBLASInt));

919:   if (!dgmres->GreatestEig) {
920:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
921:   } else {
922:     for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
923:   }
924:   /* call Lapack dtrsen */
925:   lwork  =  PetscMax(1, 4 * NbrEig *(bn-NbrEig));
926:   liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
927:   PetscMalloc(lwork * sizeof(PetscScalar), &work);
928:   PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
929: #if defined(PETSC_MISSING_LAPACK_TRSEN)
930:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
931: #else
932:   {
933:     PetscBLASInt info;
934:     PetscReal    CondEig;         /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
935:     PetscReal    CondSub;         /* estimated reciprocal condition number of the specified invariant subspace. */
936:     PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
937:     if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
938:   }
939: #endif
940:   PetscFree(select);

942:   /* Extract the Schur vectors */
943:   for (j = 0; j < NbrEig; j++) {
944:     PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
945:   }
946:   *neig = NbrEig;
947:   PetscFree(A);
948:   PetscFree(work);
949:   PetscFree(perm);
950:   PetscFree(work);
951:   PetscFree(iwork);
952:   PetscFree(modul);
953:   PetscFree(Q);
954:   return(0);
955: }

959: static PetscErrorCode  KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
960: {
961:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
962:   PetscInt       i, r     = dgmres->r;
964:   PetscReal      alpha    = 1.0;
965:   PetscInt       max_neig = dgmres->max_neig;
966:   PetscBLASInt   br,bmax;
967:   PetscReal      lambda = dgmres->lambdaN;

970:   PetscBLASIntCast(r,&br);
971:   PetscBLASIntCast(max_neig,&bmax);
972:   PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
973:   if (!r) {
974:     VecCopy(x,y);
975:     return(0);
976:   }
977:   /* Compute U'*x */
978:   if (!X1) {
979:     PetscMalloc(bmax*sizeof(PetscReal), &X1);
980:     PetscMalloc(bmax*sizeof(PetscReal), &X2);
981:   }
982:   VecMDot(x, r, UU, X1);

984:   /* Solve T*X1=X2 for X1*/
985:   PetscMemcpy(X2, X1, br*sizeof(PetscReal));
986: #if defined(PETSC_MISSING_LAPACK_GETRS)
987:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
988: #else
989:   {
990:     PetscBLASInt info;
991:     PetscBLASInt nrhs = 1;
992:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
993:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
994:   }
995: #endif
996:   /* Iterative refinement -- is it really necessary ?? */
997:   if (!WORK) {
998:     PetscMalloc(3*bmax*sizeof(PetscReal), &WORK);
999:     PetscMalloc(bmax*sizeof(PetscBLASInt), &IWORK);
1000:   }
1001: #if defined(PETSC_MISSING_LAPACK_GERFS)
1002:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
1003: #else
1004:   {
1005:     PetscBLASInt info;
1006:     PetscReal    berr, ferr;
1007:     PetscBLASInt nrhs = 1;
1008:     PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
1009:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
1010:   }
1011: #endif

1013:   for (i = 0; i < r; i++) X2[i] =  X1[i]/lambda - X2[i];

1015:   /* Compute X2=U*X2 */
1016:   VecZeroEntries(y);
1017:   VecMAXPY(y, r, X2, UU);
1018:   VecAXPY(y, alpha, x);

1020:   PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1021:   return(0);
1022: }

1026: static PetscErrorCode  KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
1027: {
1028:   KSP_DGMRES   *dgmres = (KSP_DGMRES*) ksp->data;
1029:   PetscInt     j,r_old, r = dgmres->r;
1030:   PetscBLASInt i     = 0;
1031:   PetscInt     neig1 = dgmres->neig + EIG_OFFSET;
1032:   PetscInt     bmax  = dgmres->max_neig;
1033:   PetscInt     aug   = r + neig;         /* actual size of the augmented invariant basis */
1034:   PetscInt     aug1  = bmax+neig1;       /* maximum size of the augmented invariant basis */
1035:   PetscBLASInt ldA;            /* leading dimension of AUAU and AUU*/
1036:   PetscBLASInt N;              /* size of AUAU */
1037:   PetscReal    *Q;             /*  orthogonal matrix of  (left) schur vectors */
1038:   PetscReal    *Z;             /*  orthogonal matrix of  (right) schur vectors */
1039:   PetscReal    *work;          /* working vector */
1040:   PetscBLASInt lwork;          /* size of the working vector */
1041:   PetscInt     *perm;          /* Permutation vector to sort eigenvalues */
1042:   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1043:   PetscInt     ierr;
1044:   PetscBLASInt NbrEig = 0,nr,bm;
1045:   PetscBLASInt *select;
1046:   PetscBLASInt liwork, *iwork;

1049:   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1050:   if (!AUU) {
1051:     PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUU);
1052:     PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUAU);
1053:   }
1054:   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1055:    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1056:   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1057:   for (j=0; j < r; j++) {
1058:     VecMDot(UU[j], r, MU, &AUU[j*aug1]);
1059:   }
1060:   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1061:   for (j = 0; j < neig; j++) {
1062:     VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
1063:   }
1064:   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1065:   for (j = 0; j < r; j++) {
1066:     VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
1067:   }
1068:   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
1069:   for (j = 0; j < neig; j++) {
1070:     VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
1071:   }

1073:   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1074:   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1075:   for (j=0; j < r; j++) {
1076:     VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1077:   }
1078:   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1079:   for (j = 0; j < neig; j++) {
1080:     VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1081:   }
1082:   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1083:   for (j = 0; j < r; j++) {
1084:     VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1085:   }
1086:   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
1087:   for (j = 0; j < neig; j++) {
1088:     VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1089:   }

1091:   /* Computation of the eigenvectors */
1092:   PetscBLASIntCast(aug1,&ldA);
1093:   PetscBLASIntCast(aug,&N);
1094:   lwork = 8 * N + 20; /* sizeof the working space */
1095:   PetscMalloc(N*sizeof(PetscReal), &wr);
1096:   PetscMalloc(N*sizeof(PetscReal), &wi);
1097:   PetscMalloc(N*sizeof(PetscReal), &beta);
1098:   PetscMalloc(N*sizeof(PetscReal), &modul);
1099:   PetscMalloc(N*sizeof(PetscInt), &perm);
1100:   PetscMalloc(N*N*sizeof(PetscReal), &Q);
1101:   PetscMalloc(N*N*sizeof(PetscReal), &Z);
1102:   PetscMalloc(lwork*sizeof(PetscReal), &work);
1103: #if defined(PETSC_MISSING_LAPACK_GGES)
1104:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1105: #else
1106:   {
1107:     PetscBLASInt info;
1108:     PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1109:     if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1110:   }
1111: #endif
1112:   for (i=0; i<N; i++) {
1113:     if (beta[i] !=0.0) {
1114:       wr[i] /=beta[i];
1115:       wi[i] /=beta[i];
1116:     }
1117:   }
1118:   /* sort the eigenvalues */
1119:   for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1120:   for (i=0; i<N; i++) perm[i] = i;
1121:   PetscSortRealWithPermutation(N, modul, perm);
1122:   /* Save the norm of the largest eigenvalue */
1123:   if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1124:   /* Allocate space to extract the first r schur vectors   */
1125:   if (!SR2) {
1126:     PetscMalloc(aug1*bmax*sizeof(PetscReal), &SR2);
1127:   }
1128:   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1129:   while (NbrEig < bmax) {
1130:     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1131:     else NbrEig += 2;
1132:   }
1133:   if (NbrEig > bmax) NbrEig = bmax - 1;
1134:   r_old     = r; /* previous size of r */
1135:   dgmres->r = r = NbrEig;

1137:   /* Select the eigenvalues to reorder */
1138:   PetscMalloc(N * sizeof(PetscBLASInt), &select);
1139:   PetscMemzero(select, N * sizeof(PetscBLASInt));
1140:   if (!dgmres->GreatestEig) {
1141:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1142:   } else {
1143:     for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1144:   }
1145:   /* Reorder and extract the new <r> schur vectors */
1146:   lwork  = PetscMax(4 * N + 16,  2 * NbrEig *(N - NbrEig));
1147:   liwork = PetscMax(N + 6,  2 * NbrEig *(N - NbrEig));
1148:   PetscFree(work);
1149:   PetscMalloc(lwork * sizeof(PetscReal), &work);
1150:   PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1151: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1152:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1153: #else
1154:   {
1155:     PetscBLASInt info;
1156:     PetscReal    Dif[2];
1157:     PetscBLASInt ijob  = 2;
1158:     PetscBLASInt wantQ = 1, wantZ = 1;
1159:     PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1160:     if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1161:   }
1162: #endif
1163:   PetscFree(select);

1165:   for (j=0; j<r; j++) {
1166:     PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1167:   }

1169:   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
1170:    -- save it temporarily in MU */
1171:   for (j = 0; j < r; j++) {
1172:     VecZeroEntries(MU[j]);
1173:     VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1174:     VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1175:   }
1176:   /* Form T = U'*MU*U */
1177:   for (j = 0; j < r; j++) {
1178:     VecCopy(MU[j], UU[j]);
1179:     KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1180:   }
1181:   dgmres->matvecs += r;
1182:   for (j = 0; j < r; j++) {
1183:     VecMDot(MU[j], r, UU, &TT[j*bmax]);
1184:   }
1185:   /* Factorize T */
1186:   PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1187:   PetscBLASIntCast(r,&nr);
1188:   PetscBLASIntCast(bmax,&bm);
1189: #if defined(PETSC_MISSING_LAPACK_GETRF)
1190:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1191: #else
1192:   {
1193:     PetscBLASInt info;
1194:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1195:     if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1196:   }
1197: #endif
1198:   /* Free Memory */
1199:   PetscFree(wr);
1200:   PetscFree(wi);
1201:   PetscFree(beta);
1202:   PetscFree(modul);
1203:   PetscFree(perm);
1204:   PetscFree(Q);
1205:   PetscFree(Z);
1206:   PetscFree(work);
1207:   PetscFree(iwork);
1208:   return(0);
1209: }

1211: /* end new DGMRES functions */

1213: /*MC
1214:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1215:                  In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1216:                  stagnation occurs.

1218:    Options Database Keys:
1219:    GMRES Options (inherited):
1220: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1221: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1222: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1223:                              vectors are allocated as needed)
1224: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1225: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1226: .   -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1227:                                    stability of the classical Gram-Schmidt  orthogonalization.
1228: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated

1230:    DGMRES Options Database Keys:
1231: +   -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1232: .   -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1233:                                        process
1234: .   -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1235: -   -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1236:                                                    parsed by PetscOptionsGetViewer().  If neig > 1, viewerspec should
1237:                                                    end with ":append".  No vectors will be viewed if the adaptive
1238:                                                    strategy chooses not to deflate, so -ksp_dgmres_force should also
1239:                                                    be given.
1240:                                                    The deflation vectors span a subspace that may be a good
1241:                                                    approximation of the subspace of smallest eigenvectors of the
1242:                                                    preconditioned operator, so this option can aid in understanding
1243:                                                    the performance of a preconditioner.

1245:  Level: beginner

1247:  Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported

1249:  References:

1251:  [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1252:  [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.
1253:  [3] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

1255:  Contributed by: Desire NUENTSA WAKAM,INRIA

1257:  .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1258:  KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1259:  KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1260:  KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()

1262:  M*/

1266: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1267: {
1268:   KSP_DGMRES     *dgmres;

1272:   PetscNewLog(ksp,KSP_DGMRES,&dgmres);
1273:   ksp->data = (void*) dgmres;

1275:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1276:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);

1278:   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1279:   ksp->ops->setup                        = KSPSetUp_DGMRES;
1280:   ksp->ops->solve                        = KSPSolve_DGMRES;
1281:   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1282:   ksp->ops->view                         = KSPView_DGMRES;
1283:   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1284:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1285:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1287:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1288:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1289:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1290:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1291:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1292:   /* -- New functions defined in DGMRES -- */
1293:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1294:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1295:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1296:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1297:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1298:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1299:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1300:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);

1302:   PetscLogEventRegister("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1303:   PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1305:   dgmres->haptol         = 1.0e-30;
1306:   dgmres->q_preallocate  = 0;
1307:   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1308:   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1309:   dgmres->nrs            = 0;
1310:   dgmres->sol_temp       = 0;
1311:   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1312:   dgmres->Rsvd           = 0;
1313:   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1314:   dgmres->orthogwork     = 0;

1316:   /* Default values for the deflation */
1317:   dgmres->r           = 0;
1318:   dgmres->neig        = DGMRES_DEFAULT_EIG;
1319:   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG-1;
1320:   dgmres->lambdaN     = 0.0;
1321:   dgmres->smv         = SMV;
1322:   dgmres->force       = 0;
1323:   dgmres->matvecs     = 0;
1324:   dgmres->improve     = 0;
1325:   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1326:   dgmres->HasSchur    = PETSC_FALSE;
1327:   return(0);
1328: }