Actual source code: dgmres.c

petsc-3.3-p5 2012-12-01
  2: /*
  3:     This file implements the deflated GMRES
  4:     References:  Erhel, Burrage and Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.


  7:  */

 9:  #include dgmresimpl.h
 10: #define GMRES_DELTA_DIRECTIONS 10
 11: #define GMRES_DEFAULT_MAXK     30
 12: #define PetscTruth PetscBool
 13: static PetscErrorCode    KSPDGMRESGetNewVectors (KSP,PetscInt);
 14: static PetscErrorCode    KSPDGMRESUpdateHessenberg (KSP,PetscInt,PetscTruth,PetscReal*);
 15: static PetscErrorCode    KSPDGMRESBuildSoln (PetscScalar*,Vec,Vec,KSP,PetscInt);

 19: PetscErrorCode  KSPDGMRESSetEigen (KSP ksp,PetscInt nb_eig)
 20: {

 24:     PetscTryMethod ( (ksp),"KSPDGMRESSetEigen_C", (KSP,PetscInt), (ksp,nb_eig));
 25:     PetscFunctionReturn (0);
 26: }
 29: PetscErrorCode  KSPDGMRESSetMaxEigen (KSP ksp,PetscInt max_neig)
 30: {

 34:     PetscTryMethod ( (ksp),"KSPDGMRESSetMaxEigen_C", (KSP,PetscInt), (ksp,max_neig));
 35:     PetscFunctionReturn (0);
 36: }
 39: PetscErrorCode  KSPDGMRESForce (KSP ksp,PetscInt force)
 40: {

 44:         PetscTryMethod ( (ksp),"KSPDGMRESForce_C", (KSP,PetscInt), (ksp,force));
 45:         PetscFunctionReturn (0);
 46: }
 49: PetscErrorCode  KSPDGMRESSetRatio (KSP ksp,PetscReal ratio)
 50: {

 54:         PetscTryMethod ( (ksp),"KSPDGMRESSetRatio_C", (KSP,PetscReal), (ksp,ratio));
 55:         PetscFunctionReturn (0);
 56: }
 59: PetscErrorCode  KSPDGMRESComputeSchurForm (KSP ksp,PetscInt *neig)
 60:  {

 64:     PetscTryMethod ( (ksp),"KSPDGMRESComputeSchurForm_C", (KSP, PetscInt*), (ksp, neig));
 65:     PetscFunctionReturn (0);
 66: }
 69: PetscErrorCode  KSPDGMRESComputeDeflationData (KSP ksp)
 70: {

 74:     PetscTryMethod ( (ksp),"KSPDGMRESComputeDeflationData_C", (KSP), (ksp));
 75:     PetscFunctionReturn (0);
 76: }
 79: PetscErrorCode  KSPDGMRESApplyDeflation (KSP ksp, Vec x, Vec y)
 80: {

 84:     PetscTryMethod ( (ksp),"KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
 85:     PetscFunctionReturn (0);
 86: }

 90: PetscErrorCode  KSPDGMRESImproveEig (KSP ksp, PetscInt neig)
 91: {

 95:     PetscTryMethod ( (ksp), "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
 96:     PetscFunctionReturn (0);
 97: }
100: PetscErrorCode    KSPSetUp_DGMRES (KSP ksp)
101: {
102:     PetscErrorCode    ierr;
103:     KSP_DGMRES        *dgmres   = (KSP_DGMRES *) ksp->data;
104:     PetscInt          neig            = dgmres->neig+EIG_OFFSET;
105:     PetscInt          max_k           = dgmres->max_k+1;


109:     KSPSetUp_GMRES (ksp);
110: 
111:     if (dgmres->neig==0)
112:         PetscFunctionReturn (0);

114:     /* Allocate workspace for the Schur vectors*/
115:     ierr=PetscMalloc ( (neig) *max_k*sizeof (PetscReal), &SR);
116: 
117:     UU = PETSC_NULL;
118:     XX = PETSC_NULL;
119:     MX = PETSC_NULL;
120:     AUU = PETSC_NULL;
121:     XMX = PETSC_NULL;
122:     XMU = PETSC_NULL;
123:     UMX = PETSC_NULL;
124:     AUAU = PETSC_NULL;
125:     TT = PETSC_NULL;
126:     TTF = PETSC_NULL;
127:     INVP = PETSC_NULL;
128:     X1 = PETSC_NULL;
129:     X2 = PETSC_NULL;
130:     MU = PETSC_NULL;
131:     PetscFunctionReturn (0);
132: }

134: /*
135:     Run GMRES, possibly with restart.  Return residual history if requested.
136:     input parameters:

138: .       gmres  - structure containing parameters and work areas

140:     output parameters:
141: .        nres    - residuals (from preconditioned system) at each step.
142:                   If restarting, consider passing nres+it.  If null,
143:                   ignored
144: .        itcount - number of iterations used.  nres[0] to nres[itcount]
145:                   are defined.  If null, ignored.

147:     Notes:
148:     On entry, the value in vector VEC_VV(0) should be the initial residual
149:     (this allows shortcuts where the initial preconditioned residual is 0).
150:  */
153: PetscErrorCode KSPDGMRESCycle (PetscInt *itcount,KSP ksp)
154: {
155:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) (ksp->data);
156:     PetscReal       res_norm,res,hapbnd,tt;
157:     PetscErrorCode  ierr;
158:     PetscInt        it = 0, max_k = dgmres->max_k;
159:     PetscTruth      hapend = PETSC_FALSE;
160:     PetscReal       res_old;
161:     PetscInt        test;

164:     VecNormalize (VEC_VV (0),&res_norm);
165:     res     = res_norm;
166:     *GRS (0) = res_norm;

168:     /* check for the convergence */
169:     PetscObjectTakeAccess (ksp);
170:     ksp->rnorm = res;
171:     PetscObjectGrantAccess (ksp);
172:     dgmres->it = (it - 1);
173:     KSPLogResidualHistory (ksp,res);
174:     KSPMonitor (ksp,ksp->its,res);
175:     if (!res) {
176:         if (itcount) *itcount = 0;
177:         ksp->reason = KSP_CONVERGED_ATOL;
178:         PetscInfo (ksp,"Converged due to zero residual norm on entry\n");
179:         PetscFunctionReturn (0);
180:     }
181:     /* record the residual norm to test if deflation is needed */
182:     res_old = res;

184:     (*ksp->converged) (ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
185:     while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
186:         if (it) {
187:             KSPLogResidualHistory (ksp,res);
188:             KSPMonitor (ksp,ksp->its,res);
189:         }
190:         dgmres->it = (it - 1);
191:         if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
192:             KSPDGMRESGetNewVectors (ksp,it+1);
193:         }
194:         if (dgmres->r > 0) {
195:             if (ksp->pc_side == PC_LEFT) {
196:                 /* Apply the first preconditioner */
197:                 KSP_PCApplyBAorAB (ksp,VEC_VV (it), VEC_TEMP,VEC_TEMP_MATOP);
198:                 /* Then apply Deflation as a preconditioner */
199:                 ierr=KSPDGMRESApplyDeflation (ksp, VEC_TEMP, VEC_VV (1+it));
200:             } else if (ksp->pc_side == PC_RIGHT) {
201:                 ierr=KSPDGMRESApplyDeflation (ksp, VEC_VV (it), VEC_TEMP);
202:                 ierr=KSP_PCApplyBAorAB (ksp, VEC_TEMP, VEC_VV (1+it), VEC_TEMP_MATOP);
203:             }
204:         } else {
205:             KSP_PCApplyBAorAB (ksp,VEC_VV (it),VEC_VV (1+it),VEC_TEMP_MATOP);
206: 
207:         }
208:         dgmres->matvecs += 1;
209:         /* update hessenberg matrix and do Gram-Schmidt */
210:         (*dgmres->orthog) (ksp,it);
211: 

213:         /* vv(i+1) . vv(i+1) */
214:         VecNormalize (VEC_VV (it+1),&tt);
215: 
216:         /* save the magnitude */
217:         *HH (it+1,it)    = tt;
218:         *HES (it+1,it)   = tt;

220:         /* check for the happy breakdown */
221:         hapbnd  = PetscAbsScalar (tt / *GRS (it));
222:         if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
223:         if (tt < hapbnd) {
224:             PetscInfo2 (ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
225: 
226:             hapend = PETSC_TRUE;
227:         }
228:         KSPDGMRESUpdateHessenberg (ksp,it,hapend,&res);
229: 

231:         it++;
232:         dgmres->it  = (it-1);    /* For converged */
233:         ksp->its++;
234:         ksp->rnorm = res;
235:         if (ksp->reason) break;

237:         (*ksp->converged) (ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
238: 

240:         /* Catch error in happy breakdown and signal convergence and break from loop */
241:         if (hapend) {
242:             if (!ksp->reason) {
243:               SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_PLIB,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
244:             }
245:             break;
246:         }
247:     }

249:     /* Monitor if we know that we will not return for a restart */
250:     if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
251:         KSPLogResidualHistory (ksp,res);
252:         KSPMonitor (ksp,ksp->its,res);
253:     }

255:     if (itcount) *itcount    = it;


258:     /*
259:       Down here we have to solve for the "best" coefficients of the Krylov
260:       columns, add the solution values together, and possibly unwind the
261:       preconditioning from the solution
262:      */
263:     /* Form the solution (or the solution so far) */
264:     KSPDGMRESBuildSoln (GRS (0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
265: 

267:     /* Compute data for the deflation to be used during the next restart */
268:     if (!ksp->reason && ksp->its < ksp->max_it) {
269:         test = max_k *log (ksp->rtol/res) /log (res/res_old);
270:         /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
271:         if ( (test > dgmres->smv* (ksp->max_it-ksp->its)) || dgmres->force) {
272:             ierr= KSPDGMRESComputeDeflationData (ksp);
273: 
274:         }
275:     }

277:     PetscFunctionReturn (0);
278: }

282: PetscErrorCode KSPSolve_DGMRES (KSP ksp) {
284:     PetscInt       its,itcount;
285:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
286:     PetscTruth     guess_zero = ksp->guess_zero;

289:     if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");

291:     PetscObjectTakeAccess (ksp);
292: 
293:     ksp->its = 0;
294:     dgmres->matvecs = 0;
295:     PetscObjectGrantAccess (ksp);
296: 

298:     itcount     = 0;
299:     ksp->reason = KSP_CONVERGED_ITERATING;
300:     while (!ksp->reason) {
301:         KSPInitialResidual (ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV (0),ksp->vec_rhs);
302: 
303:         if (ksp->pc_side == PC_LEFT) {
304:             dgmres->matvecs += 1;
305:             if (dgmres->r > 0) {
306:                 KSPDGMRESApplyDeflation (ksp, VEC_VV (0), VEC_TEMP);
307: 
308:                 ierr=VecCopy (VEC_TEMP, VEC_VV (0));
309: 
310:             }
311:         }

313:         KSPDGMRESCycle (&its,ksp);
314: 
315:         itcount += its;
316:         if (itcount >= ksp->max_it) {
317:             if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
318:             break;
319:         }
320:         ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
321:     }
322:     ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
323:     PetscFunctionReturn (0);
324: }


329: PetscErrorCode KSPDestroy_DGMRES (KSP ksp) {
331:     KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
332:     PetscInt            neig1 = dgmres->neig+EIG_OFFSET;
333:     PetscInt            max_neig = dgmres->max_neig;

336:     if (dgmres->r) {
337:         ierr=VecDestroyVecs (max_neig, &UU);
338: 
339:         ierr=VecDestroyVecs (max_neig, &MU);
340: 
341:         ierr=VecDestroyVecs (neig1, &XX);
342: 
343:         ierr=VecDestroyVecs (neig1, &MX);
344: 

346:         ierr=PetscFree (TT);
347:         ierr=PetscFree (TTF);
348:         ierr=PetscFree (INVP);

350:         ierr=PetscFree (XMX);
351:         ierr=PetscFree (UMX);
352:         ierr=PetscFree (XMU);
353:         ierr=PetscFree (X1);
354:         ierr=PetscFree (X2);
355:         ierr=PetscFree (dgmres->work);
356:         ierr=PetscFree (dgmres->iwork);
357:         ierr=PetscFree (ORTH);

359:         PetscFree (AUAU);
360:         PetscFree (AUU);
361:         PetscFree (SR2);
362:     }
363:     ierr=PetscFree (SR);
364:     KSPDestroy_GMRES (ksp);
365: 
366:     PetscFunctionReturn (0);
367: }
368: /*
369:     KSPDGMRESBuildSoln - create the solution from the starting vector and the
370:     current iterates.

372:     Input parameters:
373:         nrs - work area of size it + 1.
374:         vs  - index of initial guess
375:         vdest - index of result.  Note that vs may == vdest (replace
376:                 guess with the solution).

378:      This is an internal routine that knows about the GMRES internals.
379:  */
382: static PetscErrorCode KSPDGMRESBuildSoln (PetscScalar* nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it) {
383:     PetscScalar    tt;
385:     PetscInt       ii,k,j;
386:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) (ksp->data);

389:     /* Solve for solution vector that minimizes the residual */

391:     /* If it is < 0, no gmres steps have been performed */
392:     if (it < 0) {
393:         VecCopy (vs,vdest);
394:              /* VecCopy() is smart, exists immediately if vguess == vdest */
395:         PetscFunctionReturn (0);
396:     }
397:     if (*HH (it,it) == 0.0) SETERRQ2 (((PetscObject)ksp)->comm, PETSC_ERR_CONV_FAILED,"Likley your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar (*GRS (it)));
398:     if (*HH (it,it) != 0.0) {
399:         nrs[it] = *GRS (it) / *HH (it,it);
400:     } else {
401:         nrs[it] = 0.0;
402:     }
403:     for (ii=1; ii<=it; ii++) {
404:         k   = it - ii;
405:         tt  = *GRS (k);
406:         for (j=k+1; j<=it; j++) tt  = tt - *HH (k,j) * nrs[j];
407:         if (*HH (k,k) == 0.0) SETERRQ2 (((PetscObject)ksp)->comm, PETSC_ERR_CONV_FAILED,"Likley your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
408:         nrs[k]   = tt / *HH (k,k);
409:     }

411:     /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
412:     VecSet (VEC_TEMP,0.0);
413: 
414:     VecMAXPY (VEC_TEMP,it+1,nrs,&VEC_VV (0));
415: 

417:     /* Apply deflation */
418:     if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
419:         ierr=KSPDGMRESApplyDeflation (ksp, VEC_TEMP, VEC_TEMP_MATOP);
420: 
421:         ierr=VecCopy (VEC_TEMP_MATOP, VEC_TEMP);
422: 
423:     }
424:     KSPUnwindPreconditioner (ksp,VEC_TEMP,VEC_TEMP_MATOP);
425: 


428:     /* add solution to previous solution */
429:     if (vdest != vs) {
430:         VecCopy (vs,vdest);
431: 
432:     }
433:     VecAXPY (vdest,1.0,VEC_TEMP);
434: 
435:     PetscFunctionReturn (0);
436: }
437: /*
438:    Do the scalar work for the orthogonalization.  Return new residual norm.
439:  */
442: static PetscErrorCode KSPDGMRESUpdateHessenberg (KSP ksp,PetscInt it,PetscTruth hapend,PetscReal *res) {
443:     PetscScalar *hh,*cc,*ss,tt;
444:     PetscInt    j;
445:     KSP_DGMRES   *dgmres = (KSP_DGMRES *) (ksp->data);

448:     hh  = HH (0,it);
449:     cc  = CC (0);
450:     ss  = SS (0);

452:     /* Apply all the previously computed plane rotations to the new column
453:       of the Hessenberg matrix */
454:     for (j=1; j<=it; j++) {
455:         tt  = *hh;
456: #if defined(PETSC_USE_COMPLEX)
457:         *hh = PetscConj (*cc) * tt + *ss * * (hh+1);
458: #else
459:         *hh = *cc * tt + *ss * * (hh+1);
460: #endif
461:         hh++;
462:         *hh = *cc++ * *hh - (*ss++ * tt);
463:     }

465:     /*
466:       compute the new plane rotation, and apply it to:
467:       1) the right-hand-side of the Hessenberg system
468:       2) the new column of the Hessenberg matrix
469:       thus obtaining the updated value of the residual
470:     */
471:     if (!hapend) {
472: #if defined(PETSC_USE_COMPLEX)
473:         tt        = PetscSqrtScalar (PetscConj (*hh) * *hh + PetscConj (* (hh+1)) * * (hh+1));
474: #else
475:         tt        = PetscSqrtScalar (*hh * *hh + * (hh+1) * * (hh+1));
476: #endif
477:         if (tt == 0.0) {
478:             ksp->reason = KSP_DIVERGED_NULL;
479:             PetscFunctionReturn (0);
480:         }
481:         *cc       = *hh / tt;
482:         *ss       = * (hh+1) / tt;
483:         *GRS (it+1) = - (*ss * *GRS (it));
484: #if defined(PETSC_USE_COMPLEX)
485:         *GRS (it)   = PetscConj (*cc) * *GRS (it);
486:         *hh       = PetscConj (*cc) * *hh + *ss * * (hh+1);
487: #else
488:         *GRS (it)   = *cc * *GRS (it);
489:         *hh       = *cc * *hh + *ss * * (hh+1);
490: #endif
491:         *res      = PetscAbsScalar (*GRS (it+1));
492:     } else {
493:         /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
494:                 another rotation matrix (so RH doesn't change).  The new residual is
495:                 always the new sine term times the residual from last time (GRS(it)),
496:                 but now the new sine rotation would be zero...so the residual should
497:                 be zero...so we will multiply "zero" by the last residual.  This might
498:                 not be exactly what we want to do here -could just return "zero". */

500:         *res = 0.0;
501:     }
502:     PetscFunctionReturn (0);
503: }
504: /*
505:    This routine allocates more work vectors, starting from VEC_VV(it).
506:  */
509: static PetscErrorCode KSPDGMRESGetNewVectors (KSP ksp,PetscInt it) {
510:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
512:     PetscInt       nwork = dgmres->nwork_alloc,k,nalloc;

515:     nalloc = PetscMin (ksp->max_it,dgmres->delta_allocate);
516:     /* Adjust the number to allocate to make sure that we don't exceed the
517:       number of available slots */
518:     if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
519:         nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
520:     }
521:     if (!nalloc) PetscFunctionReturn (0);

523:     dgmres->vv_allocated += nalloc;
524:     KSPGetVecs (ksp,nalloc,&dgmres->user_work[nwork],0,PETSC_NULL);
525: 
526:     PetscLogObjectParents (ksp,nalloc,dgmres->user_work[nwork]);
527: 
528:     dgmres->mwork_alloc[nwork] = nalloc;
529:     for (k=0; k<nalloc; k++) {
530:         dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
531:     }
532:     dgmres->nwork_alloc++;
533:     PetscFunctionReturn (0);
534: }

538: PetscErrorCode KSPBuildSolution_DGMRES (KSP ksp,Vec  ptr,Vec *result) {
539:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;

543:     if (!ptr) {
544:         if (!dgmres->sol_temp) {
545:             VecDuplicate (ksp->vec_sol,&dgmres->sol_temp);
546: 
547:             PetscLogObjectParent (ksp,dgmres->sol_temp);
548: 
549:         }
550:         ptr = dgmres->sol_temp;
551:     }
552:     if (!dgmres->nrs) {
553:         /* allocate the work area */
554:         PetscMalloc (dgmres->max_k*sizeof (PetscScalar),&dgmres->nrs);
555: 
556:         PetscLogObjectMemory (ksp,dgmres->max_k*sizeof (PetscScalar));
557: 
558:     }

560:     KSPDGMRESBuildSoln (dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
561: 
562:     if (result) *result = ptr;
563:     PetscFunctionReturn (0);
564: }


569: PetscErrorCode KSPView_DGMRES (KSP ksp,PetscViewer viewer) {
570:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
572:     PetscTruth     iascii;

575:     KSPView_GMRES (ksp,viewer);
576: 
577:     PetscObjectTypeCompare ( (PetscObject) viewer,PETSCVIEWERASCII,&iascii);
578: 
579:     if (iascii) {
580:         if (dgmres->force)
581:             PetscViewerASCIIPrintf (viewer, "   DGMRES: Adaptive strategy is used: FALSE\n");
582:         else
583:             PetscViewerASCIIPrintf (viewer, "   DGMRES: Adaptive strategy is used: TRUE\n");
584:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Frequency of extracted eigenvalues = %D\n", dgmres->neig);
585: 
586:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
587: 
588:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
589: 
590:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: relaxation parameter for the adaptive strategy(smv)  = %g\n", dgmres->smv);
591: 
592:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
593: 
594:     } else {
595:       SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_SUP,"Viewer type %s not supported for KSP DGMRES", ( (PetscObject) viewer)->type_name);
596:     }

598:     PetscFunctionReturn (0);
599: }

601: /* New DGMRES functions */

603: EXTERN_C_BEGIN
606: PetscErrorCode  KSPDGMRESSetEigen_DGMRES (KSP ksp,PetscInt neig) {
607:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
609:     if (neig< 0 && neig >dgmres->max_k)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
610:     dgmres->neig=neig;
611:     PetscFunctionReturn (0);
612: }
613: EXTERN_C_END

615: EXTERN_C_BEGIN
618: PetscErrorCode  KSPDGMRESSetMaxEigen_DGMRES (KSP ksp,PetscInt max_neig) {
619:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
621:     if (max_neig < 0 && max_neig >dgmres->max_k)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
622:     dgmres->max_neig=max_neig;
623:     PetscFunctionReturn (0);
624: }
625: EXTERN_C_END


628: EXTERN_C_BEGIN
631: PetscErrorCode  KSPDGMRESSetRatio_DGMRES (KSP ksp,PetscReal ratio) {
632:         KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
634:         if (ratio <= 0)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
635:         dgmres->smv=ratio;
636:         PetscFunctionReturn (0);
637: }
638: EXTERN_C_END

640: EXTERN_C_BEGIN
643: PetscErrorCode  KSPDGMRESForce_DGMRES (KSP ksp,PetscInt force) {
644:         KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
646:         if (force != 0 && force != 1)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
647:         dgmres->force = 1;
648:         PetscFunctionReturn (0);
649: }
650: EXTERN_C_END


653: extern PetscErrorCode KSPSetFromOptions_DGMRES (KSP);

657: PetscErrorCode KSPSetFromOptions_DGMRES (KSP ksp) {
659:     PetscInt       neig;
660:     PetscInt         max_neig;
661:     KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
662:     PetscTruth     flg;
663:     PetscReal           smv;
664:     PetscInt            input;

667:     KSPSetFromOptions_GMRES (ksp);
668: 
669:     PetscOptionsHead ("KSP DGMRES Options");
670: 
671:     PetscOptionsInt ("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
672: 
673:     if (flg) {
674:         KSPDGMRESSetEigen (ksp, neig);
675: 
676:     }
677:     PetscOptionsInt ("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
678: 
679:     if (flg) {
680:         KSPDGMRESSetMaxEigen (ksp, max_neig);
681: 
682:     }
683:     ierr=PetscOptionsReal ("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
684: 
685:     if (flg) dgmres->smv = smv;
686:     PetscOptionsInt ("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
687: 
688:     if (flg) dgmres->improve = input;
689:     PetscOptionsInt ("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
690: 
691:     if (flg) dgmres->force = input;
692:     PetscOptionsTail();
693: 
694:     PetscFunctionReturn (0);
695: }

697: EXTERN_C_BEGIN
700: PetscErrorCode  KSPDGMRESComputeDeflationData_DGMRES (KSP ksp)
701: {
702:     KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
704:     PetscInt       i,j, k;
705:     PetscInt       r=dgmres->r;
706:     PetscBLASInt   nr, bmax;
707:     PetscInt       neig; /* number of eigenvalues to extract at each restart */
708:     PetscInt       neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
709:     PetscInt       max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
710:     PetscInt           N = dgmres->max_k+1;
711:     PetscInt       n = dgmres->it+1;
712:     PetscReal           alpha;
713: #if !defined(PETSC_MISSING_LAPACK_GETRF) 
714:     PetscBLASInt info;
715: #endif

718:     ierr=PetscLogEventBegin (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
719: 
720:     if (dgmres->neig == 0)
721:         PetscFunctionReturn (0);
722:     if (max_neig < (r+neig1) && !dgmres->improve) {
723:         ierr=PetscLogEventEnd (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
724: 
725:         PetscFunctionReturn (0);
726:     }

728:     ierr= KSPDGMRESComputeSchurForm (ksp, &neig);
729: 

731:     /* Form the extended Schur vectors X=VV*Sr */
732:     if (!XX) {
733:         ierr=VecDuplicateVecs (VEC_VV (0), neig1, &XX);
734: 
735:     }
736:     for (j = 0; j<neig; j++) {
737:         VecZeroEntries (XX[j]);
738: 
739:         VecMAXPY (XX[j], n, &SR[j*N], &VEC_VV (0));
740: 
741:     }

743:     /* Orthogonalize X against U */
744:     if (!ORTH) {
745:         ierr=PetscMalloc (max_neig*sizeof (PetscReal), &ORTH);
746: 
747:     }
748:     if (r > 0) {
749:         /* modified Gram-Schmidt */
750:         for (j = 0; j<neig; j++) {
751:             for (i=0; i<r; i++) {
752:                 /* First, compute U'*X[j] */
753:                 VecDot (XX[j], UU[i], &alpha);
754: 
755:                 /* Then, compute X(j)=X(j)-U*U'*X(j) */
756:                 VecAXPY (XX[j], -alpha, UU[i]);
757: 
758:             }
759:         }
760:     }
761:     /* Compute MX = M^{-1}*A*X */
762:     if (!MX) {
763:         ierr=VecDuplicateVecs (VEC_VV (0), neig1, &MX);
764: 
765:     }
766:     for (j = 0; j<neig; j++) {
767:         KSP_PCApplyBAorAB (ksp, XX[j], MX[j], VEC_TEMP_MATOP);
768: 
769:     }
770:     dgmres->matvecs += neig;

772:     if ( (r+neig1) > max_neig && dgmres->improve) {    /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
773:         KSPDGMRESImproveEig (ksp, neig);
774: 
775:         PetscFunctionReturn (0);   /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
776:     }

778:     /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
779:     if (!XMX) {
780:         ierr=PetscMalloc (neig1*neig1*sizeof (PetscReal), &XMX);
781: 
782:     }
783:     for (j = 0; j < neig; j++) {
784:         ierr=VecMDot (MX[j], neig, XX, & (XMX[j*neig1]));
785: 
786:     }

788:     if (r > 0) {
789:         /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
790:         if (!UMX) {
791:             PetscMalloc (max_neig*neig1*sizeof (PetscReal), &UMX);
792: 
793:         }
794:         for (j = 0; j < neig; j++) {
795:             VecMDot (MX[j], r, UU, & (UMX[j*max_neig]));
796: 
797:         }
798:         /* Compute XMU = X'*M^{-1}*A*U -- size (neig, r) */
799:         if (!XMU) {
800:             PetscMalloc (max_neig*neig1*sizeof (PetscReal), &XMU);
801: 
802:         }
803:         for (j = 0; j<r; j++) {
804:             ierr=VecMDot (MU[j], neig, XX, & (XMU[j*neig1]));
805: 
806:         }

808:     }

810:     /* Form the new matrix T = [T UMX; XMU XMX]; */
811:     if (!TT) {
812:         PetscMalloc (max_neig*max_neig*sizeof (PetscReal), &TT);
813: 
814:     }
815:     if (r > 0) {
816:         /* Add XMU to T */
817:         for (j = 0; j < r; j++) {
818:             PetscMemcpy (& (TT[max_neig*j+r]), & (XMU[neig1*j]), neig*sizeof (PetscReal));
819: 
820:         }
821:         /* Add [UMX; XMX] to T */
822:         for (j = 0; j < neig; j++) {
823:             k = r+j;
824:             PetscMemcpy (& (TT[max_neig*k]), & (UMX[max_neig*j]), r*sizeof (PetscReal));
825: 
826:             ierr=PetscMemcpy (& (TT[max_neig*k + r]), & (XMX[neig1*j]), neig*sizeof (PetscReal));
827: 
828:         }
829:     } else { /* Add XMX to T */
830:         for (j = 0; j < neig; j++) {
831:             ierr=PetscMemcpy (& (TT[max_neig*j]), & (XMX[neig1*j]), neig*sizeof (PetscReal));
832: 
833:         }
834:     }

836:     dgmres->r += neig;
837:     r=dgmres->r;
838:     nr = PetscBLASIntCast (r);
839:     /*LU Factorize T with Lapack xgetrf routine */

841:     bmax = PetscBLASIntCast (max_neig);
842:     if (!TTF) {
843:         PetscMalloc (bmax*bmax*sizeof (PetscReal), &TTF);
844: 
845:     }
846:     ierr=PetscMemcpy (TTF, TT, bmax*r*sizeof (PetscReal));
847: 
848:     if (!INVP) {
849:         ierr=PetscMalloc (bmax*sizeof (PetscBLASInt), &INVP);
850: 
851:     }
852: #if defined(PETSC_MISSING_LAPACK_GETRF) 
853:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
854: #else
855:     LAPACKgetrf_ (&nr, &nr, TTF, &bmax, INVP, &info);
856:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d", (int) info);
857: #endif

859:     /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
860:     if (!UU) {
861:         ierr=VecDuplicateVecs (VEC_VV (0), max_neig, &UU);
862: 
863:         ierr=VecDuplicateVecs (VEC_VV (0), max_neig, &MU);
864: 
865:     }
866:     for (j=0; j<neig; j++) {
867:         ierr=VecCopy (XX[j], UU[r-neig+j]);
868: 
869:         ierr=VecCopy (MX[j], MU[r-neig+j]);
870: 
871:     }
872:     ierr=PetscLogEventEnd (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
873: 
874:     PetscFunctionReturn (0);
875: }
876: EXTERN_C_END

878: EXTERN_C_BEGIN
881: PetscErrorCode  KSPDGMRESComputeSchurForm_DGMRES (KSP ksp, PetscInt *neig) {
882:     KSP_DGMRES             *dgmres = (KSP_DGMRES*) ksp->data;
883:     PetscErrorCode         ierr;
884:     PetscInt               N = dgmres->max_k + 1, n=dgmres->it+1;
885:     PetscBLASInt        bn, bN;
886:     PetscReal            *A;
887:     PetscBLASInt         ihi;
888:     PetscBLASInt         ldA;                 /* leading dimension of A */
889:     PetscBLASInt        ldQ;                /* leading dimension of Q */
890:     PetscReal                *Q;                 /*  orthogonal matrix of  (left) schur vectors */
891:     PetscReal                 *work;                /* working vector */
892:     PetscBLASInt         lwork;                /* size of the working vector */
893:     PetscInt                 *perm;                /* Permutation vector to sort eigenvalues */
894:     PetscInt                i, j;
895:     PetscBLASInt        NbrEig;         /* Number of eigenvalues really extracted */
896:     PetscReal                *wr, *wi, *modul;         /* Real and imaginary part and modul of the eigenvalues of A*/
897:     PetscBLASInt *select;
898:     PetscBLASInt *iwork;
899:     PetscBLASInt liwork;
900: #if !defined(PETSC_MISSING_LAPACK_HSEQR) 
901:     PetscBLASInt         ilo=1;
902:     PetscBLASInt        info;
903:     PetscReal                 CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
904:     PetscReal                 CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
905: #endif

908:     bn=PetscBLASIntCast (n);
909:     bN=PetscBLASIntCast (N);
910:     ihi = ldQ = bn;
911:     ldA=bN;
912:     lwork = PetscBLASIntCast (5*N);

914: #if defined(PETSC_USE_COMPLEX)
915:     SETERRQ (((PetscObject)ksp)->comm, -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
916:     PetscFunctionReturn (-1);
917: #endif

919:     PetscMalloc (ldA*ldA*sizeof (PetscReal), &A);
920:     PetscMalloc (ldQ*n*sizeof (PetscReal), &Q);
921:     PetscMalloc (lwork*sizeof (PetscReal), &work);
922:     PetscMalloc (n*sizeof (PetscReal), &wr);
923:     PetscMalloc (n*sizeof (PetscReal), &wi);
924:     PetscMalloc (n*sizeof (PetscReal),&modul);
925:     PetscMalloc (n*sizeof (PetscInt),&perm);
926:     /* copy the Hessenberg matrix to work space */
927:     PetscMemcpy (A, dgmres->hes_origin, ldA*ldA*sizeof (PetscReal));
928:     /* Compute eigenvalues with the Schur form */
929: #if defined(PETSC_MISSING_LAPACK_HSEQR) 
930:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
931: #else
932:     LAPACKhseqr_ ("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info);
933:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d", (int) info);
934: #endif
935:     PetscFree(work);

937:     /* sort the eigenvalues */
938:     for (i=0; i<n; i++)  {
939:         modul[i]=sqrt (wr[i]*wr[i]+wi[i]*wi[i]);
940:     }
941:     for (i=0; i<n; i++) {
942:         perm[i] = i;
943:     }
944:     PetscSortRealWithPermutation (n, modul, perm);
945:     /* save the complex modulus of the largest eigenvalue in magnitude */
946:     if (dgmres->lambdaN < modul[perm[n-1]]) {
947:         dgmres->lambdaN=modul[perm[n-1]];
948:     }
949:         /* count the number of extracted eigenvalues (with complex conjugates) */
950:         NbrEig = 0;
951:         while (NbrEig < dgmres->neig)
952:         {
953:           if (wi[perm[NbrEig]] != 0)
954:                 NbrEig += 2;
955:           else
956:                 NbrEig += 1;
957:         }
958:         /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

960:         PetscMalloc(n * sizeof(PetscBLASInt), &select);
961:         PetscMemzero(select, n * sizeof(PetscBLASInt));

963:         if (dgmres->GreatestEig == PETSC_FALSE)
964:         {
965:                 for (j = 0; j < NbrEig; j++)
966:                         select[perm[j]] = 1;
967:         }
968:         else
969:         {
970:                 for (j = 0; j < NbrEig; j++)
971:                         select[perm[n-j-1]] = 1;
972:         }
973:         /* call Lapack dtrsen */
974:         lwork  =  PetscMax(1, 4 * NbrEig * (bn-NbrEig));
975:         liwork = PetscMax(1, 2 * NbrEig * (bn-NbrEig));
976:         PetscMalloc(lwork * sizeof(PetscScalar), &work);
977:         PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
978: #if defined(PETSC_MISSING_LAPACK_TRSEN) 
979:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
980: #else
981:         LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info);
982:         if (info == 1) SETERRQ(((PetscObject)ksp)->comm, PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
983: #endif

985:         /* Extract the Schur vectors */
986:         for (j = 0; j < NbrEig; j++)
987:         {
988:                 PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
989:         }
990:     *neig=NbrEig;
991:         ierr=PetscFree (A);
992:         ierr=PetscFree (Q);
993:         ierr=PetscFree (wr);
994:         ierr=PetscFree (wi);
995:         ierr=PetscFree (work);
996:     ierr=PetscFree (modul);
997:     ierr=PetscFree (perm);
998:         PetscFree(work);
999:         PetscFree(iwork);
1000:     PetscFunctionReturn (0);
1001: }
1002: EXTERN_C_END


1005: EXTERN_C_BEGIN
1008: PetscErrorCode  KSPDGMRESApplyDeflation_DGMRES (KSP ksp, Vec x, Vec y) {
1009:     KSP_DGMRES             *dgmres = (KSP_DGMRES*) ksp->data;
1010:     PetscInt        i, r = dgmres->r;
1011:     PetscErrorCode         ierr;
1012:     PetscBLASInt        nrhs = 1;
1013:     PetscReal        alpha = 1.0;
1014:     PetscInt        max_neig = dgmres->max_neig;
1015:     PetscBLASInt        br,bmax;
1016:     PetscInt        lambda = dgmres->lambdaN;
1017: #if !defined(PETSC_MISSING_LAPACK_GETRS) 
1018:     PetscReal        berr, ferr;
1019:     PetscBLASInt info;
1020: #endif

1023:     br = PetscBLASIntCast (r);
1024:     bmax = PetscBLASIntCast (max_neig);
1025:     ierr=PetscLogEventBegin (KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1026:     if (!r) {
1027:         ierr=VecCopy (x,y);
1028:         PetscFunctionReturn (0);
1029:     }
1030:     /* Compute U'*x */
1031:     if (!X1) {
1032:         PetscMalloc (bmax*sizeof (PetscReal), &X1);
1033:         PetscMalloc (bmax*sizeof (PetscReal), &X2);
1034:     }
1035:     VecMDot (x, r, UU, X1);

1037:     /* Solve T*X1=X2 for X1*/
1038:     PetscMemcpy (X2, X1, br*sizeof (PetscReal));
1039: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1040:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
1041: #else
1042:     LAPACKgetrs_ ("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info);
1043:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
1044: #endif
1045:     /* Iterative refinement -- is it really necessary ?? */
1046:     if (!WORK) {
1047:         ierr=PetscMalloc (3*bmax*sizeof (PetscReal), &WORK);
1048:         ierr=PetscMalloc (bmax*sizeof (PetscInt), &IWORK);
1049:     }
1050: #if defined(PETSC_MISSING_LAPACK_GERFS) 
1051:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
1052: #else
1053:     LAPACKgerfs_ ("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,
1054:                   X1, &bmax, &ferr, &berr, WORK, IWORK, &info);
1055:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
1056: #endif

1058:     for (i = 0; i < r; i++) {
1059:         X2[i] =  X1[i]/lambda - X2[i];
1060:     }

1062:     /* Compute X2=U*X2 */
1063:     ierr=VecZeroEntries (y);
1064: 
1065:     ierr=VecMAXPY (y, r, X2, UU);
1066: 

1068:     ierr=VecAXPY (y, alpha, x);
1069: 

1071:     ierr=PetscLogEventEnd (KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1072: 
1073:     PetscFunctionReturn (0);
1074: }
1075: EXTERN_C_END

1077: EXTERN_C_BEGIN
1080: PetscErrorCode  KSPDGMRESImproveEig_DGMRES (KSP ksp, PetscInt neig)
1081: {
1082:         KSP_DGMRES                *dgmres = (KSP_DGMRES*) ksp->data;
1083:         PetscInt                j,r_old, r = dgmres->r;
1084:         PetscBLASInt                i = 0;
1085:         PetscInt                neig1 = dgmres->neig + EIG_OFFSET;
1086:         PetscInt                bmax = dgmres->max_neig;
1087:         PetscInt                aug = r + neig; /* actual size of the augmented invariant basis */
1088:         PetscInt                aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1089:         PetscBLASInt         ldA;                 /* leading dimension of AUAU and AUU*/
1090:         PetscBLASInt        N;                         /* size of AUAU */
1091:         PetscReal                *Q;                 /*  orthogonal matrix of  (left) schur vectors */
1092:         PetscReal                *Z;                 /*  orthogonal matrix of  (right) schur vectors */
1093:         PetscReal                 *work;                /* working vector */
1094:         PetscBLASInt         lwork;                /* size of the working vector */
1095:         PetscBLASInt         info;                /* Output info from Lapack routines */
1096:         PetscInt         *perm;                /* Permutation vector to sort eigenvalues */
1097:         PetscReal                *wr, *wi, *beta, *modul;         /* Real and imaginary part and modul of the eigenvalues of A*/
1098:         PetscInt                ierr;
1099:         PetscBLASInt NbrEig = 0,nr,bm;
1100:         PetscBLASInt *select;
1101:         PetscBLASInt liwork, *iwork;
1102: #if !defined(PETSC_MISSING_LAPACK_TGSEN)
1103:         PetscReal Dif[2];
1104:         PetscBLASInt ijob = 2;
1105:         PetscBLASInt wantQ = 1, wantZ = 1;
1106: #endif

1109:         /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1110:         if (!AUU) {
1111:                 ierr=PetscMalloc (aug1*aug1*sizeof (PetscReal), &AUU);
1112: 
1113:                 ierr=PetscMalloc (aug1*aug1*sizeof (PetscReal), &AUAU);
1114: 
1115:         }
1116:     /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1117:         * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1118:         /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1119:         for (j=0; j < r; j++) {
1120:                 VecMDot (UU[j], r, MU, &AUU[j*aug1]);
1121:         }
1122:         /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1123:         for (j = 0; j < neig; j++) {
1124:                 VecMDot (XX[j], r, MU, &AUU[ (r+j) *aug1]);
1125:         }
1126:         /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1127:         for (j = 0; j < r; j++) {
1128:                 VecMDot (UU[j], neig, MX, &AUU[j*aug1+r]);
1129:         }
1130:         /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
1131:         for (j = 0; j < neig; j++) {
1132:                 VecMDot (XX[j], neig, MX, &AUU[ (r+j) *aug1 + r]);
1133:         }

1135:         /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1136:         /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1137:         for (j=0; j < r; j++) {
1138:                 VecMDot (MU[j], r, MU, &AUAU[j*aug1]);
1139:         }
1140:         /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1141:         for (j = 0; j < neig; j++) {
1142:                 VecMDot (MX[j], r, MU, &AUAU[ (r+j) *aug1]);
1143:         }
1144:         /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1145:         for (j = 0; j < r; j++) {
1146:                 VecMDot (MU[j], neig, MX, &AUAU[j*aug1+r]);
1147:         }
1148:         /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
1149:         for (j = 0; j < neig; j++) {
1150:                 VecMDot (MX[j], neig, MX, &AUAU[ (r+j) *aug1 + r]);
1151:         }

1153:         /* Computation of the eigenvectors */
1154:         ldA = PetscBLASIntCast (aug1);
1155:         N = PetscBLASIntCast (aug);
1156:         lwork = 8 * N + 20; /* sizeof the working space */
1157:         PetscMalloc (N*sizeof (PetscReal), &wr);
1158:         PetscMalloc (N*sizeof (PetscReal), &wi);
1159:         PetscMalloc (N*sizeof (PetscReal), &beta);
1160:         PetscMalloc (N*sizeof (PetscReal), &modul);
1161:         PetscMalloc (N*sizeof (PetscInt), &perm);
1162:         PetscMalloc (N*N*sizeof (PetscReal), &Q);
1163:         PetscMalloc (N*N*sizeof (PetscReal), &Z);
1164:         PetscMalloc (lwork*sizeof (PetscReal), &work);
1165: #if defined(PETSC_MISSING_LAPACK_GGES) 
1166:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1167: #else
1168:         LAPACKgges_ ("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info);
1169:         if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1170: #endif
1171:         for (i=0; i<N; i++) {
1172:                 if (beta[i] !=0.0) {
1173:                         wr[i] /=beta[i];
1174:                         wi[i] /=beta[i];
1175:                 }
1176:         }
1177:         /* sort the eigenvalues */
1178:         for (i=0; i<N; i++)  {
1179:                 modul[i]=sqrt (wr[i]*wr[i]+wi[i]*wi[i]);
1180:         }
1181:         for (i=0; i<N; i++) {
1182:                 perm[i] = i;
1183:         }
1184:         PetscSortRealWithPermutation (N, modul, perm);
1185:         /* Save the norm of the largest eigenvalue */
1186:         if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1187:         /* Allocate space to extract the first r schur vectors   */
1188:         if (!SR2) {
1189:                 ierr=PetscMalloc (aug1*bmax*sizeof (PetscReal), &SR2);
1190:         }
1191:         /* count the number of extracted eigenvalues ( complex conjugates count as 2) */
1192:         while (NbrEig < bmax) {
1193:                 if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1194:                 else NbrEig += 2;
1195:         }
1196:         if (NbrEig > bmax) NbrEig = bmax - 1;
1197:         r_old=r; /* previous size of r */
1198:         dgmres->r = r = NbrEig;
1199: 
1200:         /* Select the eigenvalues to reorder */
1201:         PetscMalloc(N * sizeof(PetscBLASInt), &select);
1202:         PetscMemzero(select, N * sizeof(PetscBLASInt));
1203:         if (dgmres->GreatestEig == PETSC_FALSE)
1204:         {
1205:                 for (j = 0; j < NbrEig; j++)
1206:                         select[perm[j]] = 1;
1207:         }
1208:         else
1209:         {
1210:                 for (j = 0; j < NbrEig; j++)
1211:                         select[perm[N-j-1]] = 1;
1212:         }
1213:         /* Reorder and extract the new <r> schur vectors */
1214:         lwork  = PetscMax(4 * N + 16,  2 * NbrEig * (N - NbrEig) );
1215:         liwork = PetscMax(N + 6,  2 * NbrEig * (N - NbrEig) );
1216:         PetscFree(work);
1217:         PetscMalloc(lwork * sizeof(PetscReal), &work);
1218:         PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1219: #if defined(PETSC_MISSING_LAPACK_TGSEN )
1220:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1221: #else
1222:         LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, & (Dif[0]), work, &lwork, iwork, &liwork, &info);
1223:         if (info == 1) SETERRQ(((PetscObject)ksp)->comm, -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1224: #endif
1225: 
1226: 
1227:         for (j=0; j<r; j++) {
1228:                 PetscMemcpy (&SR2[j*aug1], & (Z[j*N]), N*sizeof (PetscReal));
1229:         }

1231:     /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
1232:         -- save it temporarily in MU */
1233:         for (j = 0; j < r; j++) {
1234:                 VecZeroEntries (MU[j]);
1235:                 VecMAXPY (MU[j], r_old, &SR2[j*aug1], UU);
1236:                 VecMAXPY (MU[j], neig, &SR2[j*aug1+r_old], XX);
1237:         }
1238:         /* Form T = U'*MU*U */
1239:         for (j = 0; j < r; j++) {
1240:                 ierr=VecCopy (MU[j], UU[j]);
1241:                 KSP_PCApplyBAorAB (ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1242:         }
1243:         dgmres->matvecs += r;
1244:         for (j = 0; j < r; j++) {
1245:                 VecMDot (MU[j], r, UU, &TT[j*bmax]);
1246:         }
1247:         /* Factorize T */
1248:         ierr=PetscMemcpy (TTF, TT, bmax*r*sizeof (PetscReal));
1249:         nr = PetscBLASIntCast (r);
1250:         bm = PetscBLASIntCast (bmax);
1251: #if defined(PETSC_MISSING_LAPACK_GETRF) 
1252:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1253: #else
1254:         LAPACKgetrf_ (&nr, &nr, TTF, &bm, INVP, &info);
1255:         if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d", (int) info);
1256: #endif
1257:         /* Free Memory */
1258:         PetscFree (wr);    PetscFree (wi);    PetscFree (beta);
1259:         PetscFree (modul);    PetscFree (perm);
1260:         PetscFree (Q);    PetscFree (Z);
1261:         PetscFree (work); PetscFree(iwork);

1263:         PetscFunctionReturn (0);
1264: }
1265: EXTERN_C_END

1267: /* end new DGMRES functions */

1269: /*MC
1270:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2]; In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the stagnation occurs.
1271:         GMRES Options Database Keys:
1272: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1273: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1274: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1275:                              vectors are allocated as needed)
1276: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1277: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1278: .   -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1279:                                    stability of the classical Gram-Schmidt  orthogonalization.
1280: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
1281:         DGMRES Options Database Keys:
1282:         -ksp_dgmres_eigen <neig> - Number of smallest eigenvalues to extract at each restart
1283:         -ksp_dgmres_max_eigen <max_neig> - Maximum number of eigenvalues that can be extracted during the iterative process
1284:         -ksp_dgmres_force <0, 1> - Use the deflation at each restart; switch off the adaptive strategy.

1286:    Level: beginner

1288:    Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported

1290:    References:
1291:    [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1292:    [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.

1294:  Contributed by: Desire NUENTSA WAKAM,INRIA

1296: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1297:            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1298:            KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1299:            KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()

1301: M*/

1303: EXTERN_C_BEGIN
1306: PetscErrorCode  KSPCreate_DGMRES (KSP ksp) {
1307:     KSP_DGMRES      *dgmres;

1311:     PetscNewLog (ksp,KSP_DGMRES,&dgmres);
1312:     ksp->data                              = (void*) dgmres;

1314:     KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1315:     KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);

1317:     ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1318:     ksp->ops->setup                        = KSPSetUp_DGMRES;
1319:     ksp->ops->solve                        = KSPSolve_DGMRES;
1320:     ksp->ops->destroy                      = KSPDestroy_DGMRES;
1321:     ksp->ops->view                         = KSPView_DGMRES;
1322:     ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1323:     ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1324:     ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1326:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetPreAllocateVectors_C",
1327:             "KSPGMRESSetPreAllocateVectors_GMRES",
1328:             KSPGMRESSetPreAllocateVectors_GMRES);
1329:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetOrthogonalization_C",
1330:             "KSPGMRESSetOrthogonalization_GMRES",
1331:             KSPGMRESSetOrthogonalization_GMRES);
1332:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetRestart_C",
1333:             "KSPGMRESSetRestart_GMRES",
1334:             KSPGMRESSetRestart_GMRES);
1335:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetHapTol_C",
1336:             "KSPGMRESSetHapTol_GMRES",
1337:             KSPGMRESSetHapTol_GMRES);
1338:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetCGSRefinementType_C",
1339:             "KSPGMRESSetCGSRefinementType_GMRES",
1340:             KSPGMRESSetCGSRefinementType_GMRES);
1341:     /* -- New functions defined in DGMRES -- */
1342:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetEigen_C",
1343:             "KSPDGMRESSetEigen_DGMRES",
1344:             KSPDGMRESSetEigen_DGMRES);
1345:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetMaxEigen_C",
1346:             "KSPDGMRESSetMaxEigen_DGMRES",
1347:             KSPDGMRESSetMaxEigen_DGMRES);
1348:         ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetRatio_C",
1349:                         "KSPDGMRESSetRatio_DGMRES",
1350:                          KSPDGMRESSetRatio_DGMRES);
1351:         ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESForce_C",
1352:                          "KSPDGMRESForce_DGMRES",
1353:                          KSPDGMRESForce_DGMRES);
1354:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESComputeSchurForm_C",
1355:             "KSPDGMRESComputeSchurForm_DGMRES",
1356:             KSPDGMRESComputeSchurForm_DGMRES);
1357:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESComputeDeflationData_C",
1358:             "KSPDGMRESComputeDeflationData_DGMRES",
1359:             KSPDGMRESComputeDeflationData_DGMRES);
1360:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESApplyDeflation_C",
1361:             "KSPDGMRESApplyDeflation_DGMRES",
1362:             KSPDGMRESApplyDeflation_DGMRES);
1363:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESImproveEig_C", "KSPDGMRESImproveEig_DGMRES", KSPDGMRESImproveEig_DGMRES);

1365:     ierr=PetscLogEventRegister ("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1366:     ierr=PetscLogEventRegister ("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1368:     dgmres->haptol              = 1.0e-30;
1369:     dgmres->q_preallocate       = 0;
1370:     dgmres->delta_allocate      = GMRES_DELTA_DIRECTIONS;
1371:     dgmres->orthog              = KSPGMRESClassicalGramSchmidtOrthogonalization;
1372:     dgmres->nrs                 = 0;
1373:     dgmres->sol_temp            = 0;
1374:     dgmres->max_k               = GMRES_DEFAULT_MAXK;
1375:     dgmres->Rsvd                = 0;
1376:     dgmres->cgstype             = KSP_GMRES_CGS_REFINE_NEVER;
1377:     dgmres->orthogwork          = 0;

1379:     /* Default values for the deflation */
1380:     dgmres->r                                        = 0;
1381:     dgmres->neig                                = DGMRES_DEFAULT_EIG;
1382:     dgmres->max_neig                        = DGMRES_DEFAULT_MAXEIG-1;
1383:     dgmres->lambdaN                                = 0.0;
1384:     dgmres->smv                                        = SMV;
1385:     dgmres->force                                = 0;
1386:     dgmres->matvecs                         = 0;
1387:     dgmres->improve                                = 0;
1388:         dgmres->GreatestEig                        = PETSC_FALSE; /* experimental */
1389:     PetscFunctionReturn (0);
1390: }
1391: EXTERN_C_END