Actual source code: bncg.c

petsc-master 2019-12-13
Report Typos and Errors
  1:  #include <petsctaolinesearch.h>
  2:  #include <../src/tao/bound/impls/bncg/bncg.h>
  3:  #include <petscksp.h>

  5: #define CG_GradientDescent      0
  6: #define CG_HestenesStiefel      1
  7: #define CG_FletcherReeves       2
  8: #define CG_PolakRibierePolyak   3
  9: #define CG_PolakRibierePlus     4
 10: #define CG_DaiYuan              5
 11: #define CG_HagerZhang           6
 12: #define CG_DaiKou               7
 13: #define CG_KouDai               8
 14: #define CG_SSML_BFGS            9
 15: #define CG_SSML_DFP             10
 16: #define CG_SSML_BROYDEN         11
 17: #define CG_PCGradientDescent    12
 18: #define CGTypes                 13

 20: static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};

 22: #define CG_AS_NONE       0
 23: #define CG_AS_BERTSEKAS  1
 24: #define CG_AS_SIZE       2

 26: static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};

 28: PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle)
 29: {
 30:   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;

 33:   cg->recycle = recycle;
 34:   return(0);
 35: }

 37: PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
 38: {
 39:   PetscErrorCode               ierr;
 40:   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;

 43:   ISDestroy(&cg->inactive_old);
 44:   if (cg->inactive_idx) {
 45:     ISDuplicate(cg->inactive_idx, &cg->inactive_old);
 46:     ISCopy(cg->inactive_idx, cg->inactive_old);
 47:   }
 48:   switch (asType) {
 49:   case CG_AS_NONE:
 50:     ISDestroy(&cg->inactive_idx);
 51:     VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);
 52:     ISDestroy(&cg->active_idx);
 53:     ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);
 54:     break;

 56:   case CG_AS_BERTSEKAS:
 57:     /* Use gradient descent to estimate the active set */
 58:     VecCopy(cg->unprojected_gradient, cg->W);
 59:     VecScale(cg->W, -1.0);
 60:     TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
 61:                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);
 62:     break;

 64:   default:
 65:     break;
 66:   }
 67:   return(0);
 68: }

 70: PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
 71: {
 72:   PetscErrorCode               ierr;
 73:   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;

 76:   switch (asType) {
 77:   case CG_AS_NONE:
 78:     VecISSet(step, cg->active_idx, 0.0);
 79:     break;

 81:   case CG_AS_BERTSEKAS:
 82:     TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);
 83:     break;

 85:   default:
 86:     break;
 87:   }
 88:   return(0);
 89: }

 91: static PetscErrorCode TaoSolve_BNCG(Tao tao)
 92: {
 93:   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
 94:   PetscErrorCode               ierr;
 95:   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
 96:   PetscInt                     nDiff;

 99:   /*   Project the current point onto the feasible set */
100:   TaoComputeVariableBounds(tao);
101:   TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);

103:   /* Project the initial point onto the feasible region */
104:   TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);

106:   if (nDiff > 0 || !cg->recycle){
107:     TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);
108:   }
109:   VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);
110:   if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");

112:   /* Estimate the active set and compute the projected gradient */
113:   TaoBNCGEstimateActiveSet(tao, cg->as_type);

115:   /* Project the gradient and calculate the norm */
116:   VecCopy(cg->unprojected_gradient, tao->gradient);
117:   VecISSet(tao->gradient, cg->active_idx, 0.0);
118:   VecNorm(tao->gradient,NORM_2,&gnorm);
119:   gnorm2 = gnorm*gnorm;

121:   /* Initialize counters */
122:   tao->niter = 0;
123:   cg->ls_fails = cg->descent_error = 0;
124:   cg->resets = -1;
125:   cg->skipped_updates = cg->pure_gd_steps = 0;
126:   cg->iter_quad = 0;

128:   /* Convergence test at the starting point. */
129:   tao->reason = TAO_CONTINUE_ITERATING;

131:   VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);
132:   VecNorm(cg->W, NORM_2, &resnorm);
133:   if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
134:   TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);
135:   TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);
136:   (*tao->ops->convergencetest)(tao,tao->cnvP);
137:   if (tao->reason != TAO_CONTINUE_ITERATING) return(0);
138:   /* Calculate initial direction. */
139:   if (!cg->recycle) {
140:     /* We are not recycling a solution/history from a past TaoSolve */
141:     TaoBNCGResetUpdate(tao, gnorm2);
142:   }
143:   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
144:   while(1) {
145:     /* Call general purpose update function */
146:     if (tao->ops->update) {
147:       (*tao->ops->update)(tao, tao->niter, tao->user_update);
148:     }
149:     TaoBNCGConductIteration(tao, gnorm);
150:     if (tao->reason != TAO_CONTINUE_ITERATING) return(0);
151:   }
152:   return(0);
153: }

155: static PetscErrorCode TaoSetUp_BNCG(Tao tao)
156: {
157:   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;

161:   if (!tao->gradient) {
162:     VecDuplicate(tao->solution,&tao->gradient);
163:   }
164:   if (!tao->stepdirection) {
165:     VecDuplicate(tao->solution,&tao->stepdirection);
166:   }
167:   if (!cg->W) {
168:     VecDuplicate(tao->solution,&cg->W);
169:   }
170:   if (!cg->work) {
171:     VecDuplicate(tao->solution,&cg->work);
172:   }
173:   if (!cg->sk) {
174:     VecDuplicate(tao->solution,&cg->sk);
175:   }
176:   if (!cg->yk) {
177:     VecDuplicate(tao->gradient,&cg->yk);
178:   }
179:   if (!cg->X_old) {
180:     VecDuplicate(tao->solution,&cg->X_old);
181:   }
182:   if (!cg->G_old) {
183:     VecDuplicate(tao->gradient,&cg->G_old);
184:   }
185:   if (cg->diag_scaling){
186:     VecDuplicate(tao->solution,&cg->d_work);
187:     VecDuplicate(tao->solution,&cg->y_work);
188:     VecDuplicate(tao->solution,&cg->g_work);
189:   }
190:   if (!cg->unprojected_gradient) {
191:     VecDuplicate(tao->gradient,&cg->unprojected_gradient);
192:   }
193:   if (!cg->unprojected_gradient_old) {
194:     VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);
195:   }
196:   MatLMVMAllocate(cg->B, cg->sk, cg->yk);
197:   if (cg->pc){
198:     MatLMVMSetJ0(cg->B, cg->pc);
199:   }
200:   return(0);
201: }

203: static PetscErrorCode TaoDestroy_BNCG(Tao tao)
204: {
205:   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;

209:   if (tao->setupcalled) {
210:     VecDestroy(&cg->W);
211:     VecDestroy(&cg->work);
212:     VecDestroy(&cg->X_old);
213:     VecDestroy(&cg->G_old);
214:     VecDestroy(&cg->unprojected_gradient);
215:     VecDestroy(&cg->unprojected_gradient_old);
216:     VecDestroy(&cg->g_work);
217:     VecDestroy(&cg->d_work);
218:     VecDestroy(&cg->y_work);
219:     VecDestroy(&cg->sk);
220:     VecDestroy(&cg->yk);
221:   }
222:   ISDestroy(&cg->active_lower);
223:   ISDestroy(&cg->active_upper);
224:   ISDestroy(&cg->active_fixed);
225:   ISDestroy(&cg->active_idx);
226:   ISDestroy(&cg->inactive_idx);
227:   ISDestroy(&cg->inactive_old);
228:   ISDestroy(&cg->new_inactives);
229:   MatDestroy(&cg->B);
230:   if (cg->pc) {
231:     MatDestroy(&cg->pc);
232:   }
233:   PetscFree(tao->data);
234:   return(0);
235: }

237: static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
238: {
239:     TAO_BNCG       *cg = (TAO_BNCG*)tao->data;

243:     TaoLineSearchSetFromOptions(tao->linesearch);
244:     PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");
245:     PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);
246:     if (cg->cg_type != CG_SSML_BFGS){
247:       cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
248:     }
249:     if (CG_GradientDescent == cg->cg_type){
250:       cg->cg_type = CG_PCGradientDescent;
251:       /* Set scaling equal to none or, at best, scalar scaling. */
252:       cg->unscaled_restart = PETSC_TRUE;
253:       cg->diag_scaling = PETSC_FALSE;
254:     }
255:     PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);

257:     PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);
258:     PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);
259:     PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);
260:     PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);
261:     PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);
262:     PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);
263:     PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);
264:     PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);
265:     PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);
266:     PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);
267:     PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);
268:     PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);
269:     PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);
270:     PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);
271:     PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);
272:     PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution, gradient, and diagonal scaling vector at the start of a new TaoSolve() call","",cg->recycle,&cg->recycle,NULL);
273:     PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);
274:     PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);
275:     if (cg->no_scaling){
276:       cg->diag_scaling = PETSC_FALSE;
277:       cg->alpha = -1.0;
278:     }
279:     if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling){ /* Some more default options that appear to be good. */
280:       cg->neg_xi = PETSC_TRUE;
281:     }
282:     PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);
283:     PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);
284:     PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);
285:     PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);
286:     PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);

288:    PetscOptionsTail();
289:    MatSetFromOptions(cg->B);
290:    return(0);
291: }

293: static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
294: {
295:   PetscBool      isascii;
296:   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;

300:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
301:   if (isascii) {
302:     PetscViewerASCIIPushTab(viewer);
303:     PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);
304:     PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);
305:     PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);
306:     PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);
307:     PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);
308:     PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);
309:     if (cg->diag_scaling){
310:       PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
311:       if (isascii) {
312:         PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);
313:         MatView(cg->B, viewer);
314:         PetscViewerPopFormat(viewer);
315:       }
316:     }
317:     PetscViewerASCIIPopTab(viewer);
318:   }
319:   return(0);
320: }

322: PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
323: {
324:   PetscReal            a, b, c, sig1, sig2;

327:   *scale = 0.0;

329:   if (1.0 == alpha){
330:     *scale = yts/yty;
331:   } else if (0.0 == alpha){
332:     *scale = sts/yts;
333:   }
334:   else if (-1.0 == alpha) *scale = 1.0;
335:   else {
336:     a = yty;
337:     b = yts;
338:     c = sts;
339:     a *= alpha;
340:     b *= -(2.0*alpha - 1.0);
341:     c *= alpha - 1.0;
342:     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
343:     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
344:     /* accept the positive root as the scalar */
345:     if (sig1 > 0.0) {
346:       *scale = sig1;
347:     } else if (sig2 > 0.0) {
348:       *scale = sig2;
349:     } else {
350:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
351:     }
352:   }
353:   return(0);
354: }

356: /*MC
357:   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.

359:   Options Database Keys:
360: + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
361: . -tao_bncg_eta <r> - restart tolerance
362: . -tao_bncg_type <taocg_type> - cg formula
363: . -tao_bncg_as_type <none,bertsekas> - active set estimation method
364: . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
365: . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
366: . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
367: . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
368: . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
369: . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
370: . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
371: . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
372: . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
373: . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
374: . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
375: . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
376: . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
377: . -tao_bncg_zeta <r> - Scaling parameter in the KD method
378: . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
379: . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
380: . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
381: . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
382: . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
383: . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
384: - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions

386:   Notes:
387:     CG formulas are:
388: + "gd" - Gradient Descent
389: . "fr" - Fletcher-Reeves
390: . "pr" - Polak-Ribiere-Polyak
391: . "prp" - Polak-Ribiere-Plus
392: . "hs" - Hestenes-Steifel
393: . "dy" - Dai-Yuan
394: . "ssml_bfgs" - Self-Scaling Memoryless BFGS
395: . "ssml_dfp"  - Self-Scaling Memoryless DFP
396: . "ssml_brdn" - Self-Scaling Memoryless Broyden
397: . "hz" - Hager-Zhang (CG_DESCENT 5.3)
398: . "dk" - Dai-Kou (2013)
399: - "kd" - Kou-Dai (2015)

401:   Level: beginner
402: M*/

404: PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
405: {
406:   TAO_BNCG       *cg;
407:   const char     *morethuente_type = TAOLINESEARCHMT;

411:   tao->ops->setup = TaoSetUp_BNCG;
412:   tao->ops->solve = TaoSolve_BNCG;
413:   tao->ops->view = TaoView_BNCG;
414:   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
415:   tao->ops->destroy = TaoDestroy_BNCG;

417:   /* Override default settings (unless already changed) */
418:   if (!tao->max_it_changed) tao->max_it = 2000;
419:   if (!tao->max_funcs_changed) tao->max_funcs = 4000;

421:   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
422:   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
423:   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
424:   /*  linesearch because it seems to work better. */
425:   TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);
426:   PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);
427:   TaoLineSearchSetType(tao->linesearch, morethuente_type);
428:   TaoLineSearchUseTaoRoutines(tao->linesearch, tao);
429:   TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);

431:   PetscNewLog(tao,&cg);
432:   tao->data = (void*)cg;
433:   KSPInitializePackage();
434:   MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);
435:   PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);
436:   MatSetOptionsPrefix(cg->B, "tao_bncg_");
437:   MatSetType(cg->B, MATLMVMDIAGBRDN);

439:   cg->pc = NULL;

441:   cg->dk_eta = 0.5;
442:   cg->hz_eta = 0.4;
443:   cg->dynamic_restart = PETSC_FALSE;
444:   cg->unscaled_restart = PETSC_FALSE;
445:   cg->no_scaling = PETSC_FALSE;
446:   cg->delta_min = 1e-7;
447:   cg->delta_max = 100;
448:   cg->theta = 1.0;
449:   cg->hz_theta = 1.0;
450:   cg->dfp_scale = 1.0;
451:   cg->bfgs_scale = 1.0;
452:   cg->zeta = 0.1;
453:   cg->min_quad = 6;
454:   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
455:   cg->xi = 1.0;
456:   cg->neg_xi = PETSC_TRUE;
457:   cg->spaced_restart = PETSC_FALSE;
458:   cg->tol_quad = 1e-8;
459:   cg->as_step = 0.001;
460:   cg->as_tol = 0.001;
461:   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
462:   cg->as_type = CG_AS_BERTSEKAS;
463:   cg->cg_type = CG_SSML_BFGS;
464:   cg->recycle = PETSC_FALSE;
465:   cg->alpha = 1.0;
466:   cg->diag_scaling = PETSC_TRUE;
467:   return(0);
468: }


471: PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
472: {
473:    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
474:    PetscErrorCode    ierr;
475:    PetscReal         scaling;

478:    ++cg->resets;
479:    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
480:    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
481:    if (cg->unscaled_restart) {
482:      scaling = 1.0;
483:      ++cg->pure_gd_steps;
484:    }
485:    VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);
486:    /* Also want to reset our diagonal scaling with each restart */
487:    if (cg->diag_scaling) {
488:      MatLMVMReset(cg->B, PETSC_FALSE);
489:    }
490:    return(0);
491:  }

493: PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
494: {
495:    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
496:    PetscReal         quadinterp;

499:    if (cg->f < cg->min_quad/10) {
500:      *dynrestart = PETSC_FALSE;
501:      return(0);
502:    } /* just skip this since this strategy doesn't work well for functions near zero */
503:    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
504:    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
505:    else {
506:      cg->iter_quad = 0;
507:      *dynrestart = PETSC_FALSE;
508:    }
509:    if (cg->iter_quad >= cg->min_quad) {
510:      cg->iter_quad = 0;
511:      *dynrestart = PETSC_TRUE;
512:    }
513:    return(0);
514:  }

516: PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
517: {
518:   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
519:   PetscErrorCode    ierr;
520:   PetscReal         gamma = 1.0, tau_k, beta;
521:   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
522:   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
523:   PetscInt          dim;
524:   PetscBool         cg_restart = PETSC_FALSE;

527:   /* Local curvature check to see if we need to restart */
528:   if (tao->niter >= 1 || cg->recycle){
529:     VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);
530:     VecNorm(cg->yk, NORM_2, &ynorm);
531:     ynorm2 = ynorm*ynorm;
532:     VecDot(cg->yk, tao->stepdirection, &dk_yk);
533:     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON){
534:       cg_restart = PETSC_TRUE;
535:       ++cg->skipped_updates;
536:     }
537:     if (cg->spaced_restart) {
538:       VecGetSize(tao->gradient, &dim);
539:       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
540:     }
541:   }
542:   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
543:   if (cg->spaced_restart){
544:     VecGetSize(tao->gradient, &dim);
545:     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
546:   }
547:   /* Compute the diagonal scaling vector if applicable */
548:   if (cg->diag_scaling) {
549:     MatLMVMUpdate(cg->B, tao->solution, tao->gradient);
550:   }

552:   /* A note on diagonal scaling (to be added to paper):
553:    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
554:    must be derived as a preconditioned CG method rather than as
555:    a Hessian initialization like in the Broyden methods. */

557:   /* In that case, one writes the objective function as 
558:    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 
559:    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 
560:    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 
561:    same under preconditioning. Note that A is diagonal, such that A^T = A. */

563:   /* This yields questions like what the dot product d_k^T y_k 
564:    should look like. HZ mistakenly treats that as the same under 
565:    preconditioning, but that is not necessarily true. */

567:   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 
568:    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 
569:    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 
570:    NOT the same if our preconditioning matrix is updated between iterations. 
571:    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */

573:   /* Compute CG step direction */
574:   if (cg_restart) {
575:     TaoBNCGResetUpdate(tao, gnorm2);
576:   } else if (pcgd_fallback) {
577:     /* Just like preconditioned CG */
578:     MatSolve(cg->B, tao->gradient, cg->g_work);
579:     VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);
580:   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
581:     switch (cg->cg_type) {
582:     case CG_PCGradientDescent:
583:       if (!cg->diag_scaling){
584:         if (!cg->no_scaling){
585:         cg->sts = step*step*dnorm*dnorm;
586:         TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);
587:         } else {
588:           tau_k = 1.0;
589:           ++cg->pure_gd_steps;
590:         }
591:         VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);
592:       } else {
593:         MatSolve(cg->B, tao->gradient, cg->g_work);
594:         VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);
595:       }
596:       break;

598:     case CG_HestenesStiefel:
599:       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
600:       if (!cg->diag_scaling){
601:         cg->sts = step*step*dnorm*dnorm;
602:         VecDot(cg->yk, tao->gradient, &gkp1_yk);
603:         TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);
604:         beta = tau_k*gkp1_yk/dk_yk;
605:         VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);
606:       } else {
607:         MatSolve(cg->B, tao->gradient, cg->g_work);
608:         VecDot(cg->yk, cg->g_work, &gkp1_yk);
609:         beta = gkp1_yk/dk_yk;
610:         VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);
611:       }
612:       break;

614:     case CG_FletcherReeves:
615:       VecDot(cg->G_old, cg->G_old, &gnorm2_old);
616:       VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);
617:       VecNorm(cg->yk, NORM_2, &ynorm);
618:       ynorm2 = ynorm*ynorm;
619:       VecDot(cg->yk, tao->stepdirection, &dk_yk);
620:       if (!cg->diag_scaling){
621:         TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);
622:         beta = tau_k*gnorm2/gnorm2_old;
623:         VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);
624:       } else {
625:         VecDot(cg->G_old, cg->g_work, &gnorm2_old); /* Before it's updated */
626:         MatSolve(cg->B, tao->gradient, cg->g_work);
627:         VecDot(tao->gradient, cg->g_work, &tmp);
628:         beta = tmp/gnorm2_old;
629:         VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);
630:       }
631:       break;

633:     case CG_PolakRibierePolyak:
634:       snorm = step*dnorm;
635:       if (!cg->diag_scaling){
636:         VecDot(cg->G_old, cg->G_old, &gnorm2_old);
637:         VecDot(cg->yk, tao->gradient, &gkp1_yk);
638:         TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);
639:         beta = tau_k*gkp1_yk/gnorm2_old;
640:         VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);
641:       } else {
642:         VecDot(cg->G_old, cg->g_work, &gnorm2_old);
643:         MatSolve(cg->B, tao->gradient, cg->g_work);
644:         VecDot(cg->g_work, cg->yk, &gkp1_yk);
645:         beta = gkp1_yk/gnorm2_old;
646:         VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);
647:       }
648:       break;

650:     case CG_PolakRibierePlus:
651:       VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);
652:       VecNorm(cg->yk, NORM_2, &ynorm);
653:       ynorm2 = ynorm*ynorm;
654:       if (!cg->diag_scaling){
655:         VecDot(cg->G_old, cg->G_old, &gnorm2_old);
656:         VecDot(cg->yk, tao->gradient, &gkp1_yk);
657:         TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);
658:         beta = tau_k*gkp1_yk/gnorm2_old;
659:         beta = PetscMax(beta, 0.0);
660:         VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);
661:       } else {
662:         VecDot(cg->G_old, cg->g_work, &gnorm2_old); /* Old gtDg */
663:         MatSolve(cg->B, tao->gradient, cg->g_work);
664:         VecDot(cg->g_work, cg->yk, &gkp1_yk);
665:         beta = gkp1_yk/gnorm2_old;
666:         beta = PetscMax(beta, 0.0);
667:         VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);
668:       }
669:       break;

671:     case CG_DaiYuan:
672:       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
673:          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
674:       if (!cg->diag_scaling){
675:         VecDot(tao->stepdirection, tao->gradient, &gd);
676:         VecDot(cg->G_old, tao->stepdirection, &gd_old);
677:         TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);
678:         beta = tau_k*gnorm2/(gd - gd_old);
679:         VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);
680:       } else {
681:         MatMult(cg->B, tao->stepdirection, cg->d_work);
682:         MatSolve(cg->B, tao->gradient, cg->g_work);
683:         VecDot(cg->g_work, tao->gradient, &gtDg);
684:         VecDot(tao->stepdirection, cg->G_old, &gd_old);
685:         VecDot(cg->d_work, cg->g_work, &dk_yk);
686:         dk_yk = dk_yk - gd_old;
687:         beta = gtDg/dk_yk;
688:         VecScale(cg->d_work, beta);
689:         VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);
690:       }
691:       break;

693:     case CG_HagerZhang:
694:       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
695:          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
696:       VecDot(tao->gradient, tao->stepdirection, &gd);
697:       VecDot(cg->G_old, tao->stepdirection, &gd_old);
698:       VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);
699:       snorm = dnorm*step;
700:       cg->yts = step*dk_yk;
701:       if (cg->use_dynamic_restart){
702:         TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);
703:       }
704:       if (cg->dynamic_restart){
705:         TaoBNCGResetUpdate(tao, gnorm2);
706:       } else {
707:         if (!cg->diag_scaling){
708:           VecDot(cg->yk, tao->gradient, &gkp1_yk);
709:           TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);
710:           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
711:           tmp = gd/dk_yk;
712:           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
713:           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
714:           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
715:           /* d <- -t*g + beta*t*d */
716:           VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);
717:         } else {
718:           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
719:           cg->yty = ynorm2;
720:           cg->sts = snorm*snorm;
721:           /* Apply the diagonal scaling to all my vectors */
722:           MatSolve(cg->B, tao->gradient, cg->g_work);
723:           MatSolve(cg->B, cg->yk, cg->y_work);
724:           MatSolve(cg->B, tao->stepdirection, cg->d_work);
725:           /* Construct the constant ytDgkp1 */
726:           VecDot(cg->yk, cg->g_work, &gkp1_yk);
727:           /* Construct the constant for scaling Dkyk in the update */
728:           tmp = gd/dk_yk;
729:           VecDot(cg->yk, cg->y_work, &tau_k);
730:           tau_k = -tau_k*gd/(dk_yk*dk_yk);
731:           /* beta is the constant which adds the dk contribution */
732:           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
733:           /* From HZ2013, modified to account for diagonal scaling*/
734:           VecDot(cg->G_old, cg->d_work, &gd_old);
735:           VecDot(tao->stepdirection, cg->g_work, &gd);
736:           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
737:           /* Do the update */
738:           VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);
739:         }
740:       }
741:       break;

743:     case CG_DaiKou:
744:       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 
745:          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
746:       VecDot(tao->gradient, tao->stepdirection, &gd);
747:       VecDot(cg->G_old, tao->stepdirection, &gd_old);
748:       VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);
749:       snorm = step*dnorm;
750:       cg->yts = dk_yk*step;
751:       if (!cg->diag_scaling){
752:         VecDot(cg->yk, tao->gradient, &gkp1_yk);
753:         TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);
754:         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
755:         tmp = gd/dk_yk;
756:         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
757:         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
758:         /* d <- -t*g + beta*t*d */
759:         VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);
760:       } else {
761:         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
762:         cg->yty = ynorm2;
763:         cg->sts = snorm*snorm;
764:         MatSolve(cg->B, tao->gradient, cg->g_work);
765:         MatSolve(cg->B, cg->yk, cg->y_work);
766:         MatSolve(cg->B, tao->stepdirection, cg->d_work);
767:         /* Construct the constant ytDgkp1 */
768:         VecDot(cg->yk, cg->g_wo