Actual source code: gqt.c

  1: #include <petscsys.h>
  2: #include <petscblaslapack.h>

  4: static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z)
  5: {
  6:   PetscBLASInt blas1 = 1, blasn, blasnmi, blasj, blasldr;
  7:   PetscInt     i, j;
  8:   PetscReal    e, temp, w, wm, ynorm, znorm, s, sm;

 10:   PetscFunctionBegin;
 11:   PetscCall(PetscBLASIntCast(n, &blasn));
 12:   PetscCall(PetscBLASIntCast(ldr, &blasldr));
 13:   for (i = 0; i < n; i++) z[i] = 0.0;
 14:   e = PetscAbs(r[0]);
 15:   if (e == 0.0) {
 16:     *svmin = 0.0;
 17:     z[0]   = 1.0;
 18:   } else {
 19:     /* Solve R'*y = e */
 20:     for (i = 0; i < n; i++) {
 21:       /* Scale y. The scaling factor (0.01) reduces the number of scalings */
 22:       if (z[i] >= 0.0) e = -PetscAbs(e);
 23:       else e = PetscAbs(e);

 25:       if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr * i])) {
 26:         temp = PetscMin(0.01, PetscAbs(r[i + ldr * i])) / PetscAbs(e - z[i]);
 27:         PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, z, &blas1));
 28:         e = temp * e;
 29:       }

 31:       /* Determine the two possible choices of y[i] */
 32:       if (r[i + ldr * i] == 0.0) {
 33:         w = wm = 1.0;
 34:       } else {
 35:         w  = (e - z[i]) / r[i + ldr * i];
 36:         wm = -(e + z[i]) / r[i + ldr * i];
 37:       }

 39:       /*  Chose y[i] based on the predicted value of y[j] for j>i */
 40:       s  = PetscAbs(e - z[i]);
 41:       sm = PetscAbs(e + z[i]);
 42:       for (j = i + 1; j < n; j++) sm += PetscAbs(z[j] + wm * r[i + ldr * j]);
 43:       if (i < n - 1) {
 44:         PetscCall(PetscBLASIntCast(n - i - 1, &blasnmi));
 45:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasnmi, &w, &r[i + ldr * (i + 1)], &blasldr, &z[i + 1], &blas1));
 46:         PetscCallBLAS("BLASasum", s += BLASasum_(&blasnmi, &z[i + 1], &blas1));
 47:       }
 48:       if (s < sm) {
 49:         temp = wm - w;
 50:         w    = wm;
 51:         if (i < n - 1) PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasnmi, &temp, &r[i + ldr * (i + 1)], &blasldr, &z[i + 1], &blas1));
 52:       }
 53:       z[i] = w;
 54:     }

 56:     PetscCallBLAS("BLASnrm2", ynorm = BLASnrm2_(&blasn, z, &blas1));

 58:     /* Solve R*z = y */
 59:     for (j = n - 1; j >= 0; j--) {
 60:       /* Scale z */
 61:       if (PetscAbs(z[j]) > PetscAbs(r[j + ldr * j])) {
 62:         temp = PetscMin(0.01, PetscAbs(r[j + ldr * j] / z[j]));
 63:         PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, z, &blas1));
 64:         ynorm *= temp;
 65:       }
 66:       if (r[j + ldr * j] == 0) {
 67:         z[j] = 1.0;
 68:       } else {
 69:         z[j] = z[j] / r[j + ldr * j];
 70:       }
 71:       temp = -z[j];
 72:       PetscCall(PetscBLASIntCast(j, &blasj));
 73:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasj, &temp, &r[0 + ldr * j], &blas1, z, &blas1));
 74:     }

 76:     /* Compute svmin and normalize z */
 77:     PetscCallBLAS("BLASnrm2", znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1));
 78:     *svmin = ynorm * znorm;
 79:     PetscCallBLAS("BLASscal", BLASscal_(&blasn, &znorm, z, &blas1));
 80:   }
 81:   PetscFunctionReturn(PETSC_SUCCESS);
 82: }

 84: /*
 85: c     ***********
 86: c
 87: c     Subroutine gqt
 88: c
 89: c     Given an n by n symmetric matrix A, an n-vector b, and a
 90: c     positive number delta, this subroutine determines a vector
 91: c     x which approximately minimizes the quadratic function
 92: c
 93: c           f(x) = (1/2)*x'*A*x + b'*x
 94: c
 95: c     subject to the Euclidean norm constraint
 96: c
 97: c           norm(x) <= delta.
 98: c
 99: c     This subroutine computes an approximation x and a Lagrange
100: c     multiplier par such that either par is zero and
101: c
102: c            norm(x) <= (1+rtol)*delta,
103: c
104: c     or par is positive and
105: c
106: c            abs(norm(x) - delta) <= rtol*delta.
107: c
108: c     If xsol is the solution to the problem, the approximation x
109: c     satisfies
110: c
111: c            f(x) <= ((1 - rtol)**2)*f(xsol)
112: c
113: c     The subroutine statement is
114: c
115: c       subroutine gqt(n,a,lda,b,delta,rtol,atol,itmax,
116: c                        par,f,x,info,z,wa1,wa2)
117: c
118: c     where
119: c
120: c       n is an integer variable.
121: c         On entry n is the order of A.
122: c         On exit n is unchanged.
123: c
124: c       a is a double precision array of dimension (lda,n).
125: c         On entry the full upper triangle of a must contain the
126: c            full upper triangle of the symmetric matrix A.
127: c         On exit the array contains the matrix A.
128: c
129: c       lda is an integer variable.
130: c         On entry lda is the leading dimension of the array a.
131: c         On exit lda is unchanged.
132: c
133: c       b is an double precision array of dimension n.
134: c         On entry b specifies the linear term in the quadratic.
135: c         On exit b is unchanged.
136: c
137: c       delta is a double precision variable.
138: c         On entry delta is a bound on the Euclidean norm of x.
139: c         On exit delta is unchanged.
140: c
141: c       rtol is a double precision variable.
142: c         On entry rtol is the relative accuracy desired in the
143: c            solution. Convergence occurs if
144: c
145: c              f(x) <= ((1 - rtol)**2)*f(xsol)
146: c
147: c         On exit rtol is unchanged.
148: c
149: c       atol is a double precision variable.
150: c         On entry atol is the absolute accuracy desired in the
151: c            solution. Convergence occurs when
152: c
153: c              norm(x) <= (1 + rtol)*delta
154: c
155: c              max(-f(x),-f(xsol)) <= atol
156: c
157: c         On exit atol is unchanged.
158: c
159: c       itmax is an integer variable.
160: c         On entry itmax specifies the maximum number of iterations.
161: c         On exit itmax is unchanged.
162: c
163: c       par is a double precision variable.
164: c         On entry par is an initial estimate of the Lagrange
165: c            multiplier for the constraint norm(x) <= delta.
166: c         On exit par contains the final estimate of the multiplier.
167: c
168: c       f is a double precision variable.
169: c         On entry f need not be specified.
170: c         On exit f is set to f(x) at the output x.
171: c
172: c       x is a double precision array of dimension n.
173: c         On entry x need not be specified.
174: c         On exit x is set to the final estimate of the solution.
175: c
176: c       info is an integer variable.
177: c         On entry info need not be specified.
178: c         On exit info is set as follows:
179: c
180: c            info = 1  The function value f(x) has the relative
181: c                      accuracy specified by rtol.
182: c
183: c            info = 2  The function value f(x) has the absolute
184: c                      accuracy specified by atol.
185: c
186: c            info = 3  Rounding errors prevent further progress.
187: c                      On exit x is the best available approximation.
188: c
189: c            info = 4  Failure to converge after itmax iterations.
190: c                      On exit x is the best available approximation.
191: c
192: c       z is a double precision work array of dimension n.
193: c
194: c       wa1 is a double precision work array of dimension n.
195: c
196: c       wa2 is a double precision work array of dimension n.
197: c
198: c     Subprograms called
199: c
200: c       MINPACK-2  ......  destsv
201: c
202: c       LAPACK  .........  dpotrf
203: c
204: c       Level 1 BLAS  ...  daxpy, dcopy, ddot, dnrm2, dscal
205: c
206: c       Level 2 BLAS  ...  dtrmv, dtrsv
207: c
208: c     MINPACK-2 Project. October 1993.
209: c     Argonne National Laboratory and University of Minnesota.
210: c     Brett M. Averick, Richard Carter, and Jorge J. More'
211: c
212: c     ***********
213: */
214: PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b, PetscReal delta, PetscReal rtol, PetscReal atol, PetscInt itmax, PetscReal *retpar, PetscReal *retf, PetscReal *x, PetscInt *retinfo, PetscInt *retits, PetscReal *z, PetscReal *wa1, PetscReal *wa2)
215: {
216:   PetscReal    f = 0.0, p001 = 0.001, p5 = 0.5, minusone = -1, delta2 = delta * delta;
217:   PetscInt     iter, j, rednc, info;
218:   PetscBLASInt indef;
219:   PetscBLASInt blas1 = 1, blasn, iblas, blaslda, blasldap1, blasinfo;
220:   PetscReal    alpha, anorm, bnorm, parc, parf, parl, pars, par = *retpar, paru, prod, rxnorm, rznorm = 0.0, temp, xnorm;

222:   PetscFunctionBegin;
223:   PetscCall(PetscBLASIntCast(n, &blasn));
224:   PetscCall(PetscBLASIntCast(lda, &blaslda));
225:   PetscCall(PetscBLASIntCast(lda + 1, &blasldap1));
226:   parf   = 0.0;
227:   xnorm  = 0.0;
228:   rxnorm = 0.0;
229:   rednc  = 0;
230:   for (j = 0; j < n; j++) {
231:     x[j] = 0.0;
232:     z[j] = 0.0;
233:   }

235:   /* Copy the diagonal and save A in its lower triangle */
236:   PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, a, &blasldap1, wa1, &blas1));
237:   for (j = 0; j < n - 1; j++) {
238:     PetscCall(PetscBLASIntCast(n - j - 1, &iblas));
239:     PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + lda * (j + 1)], &blaslda, &a[j + 1 + lda * j], &blas1));
240:   }

242:   /* Calculate the l1-norm of A, the Gershgorin row sums, and the
243:    l2-norm of b */
244:   anorm = 0.0;
245:   for (j = 0; j < n; j++) {
246:     PetscCallBLAS("BLASasum", wa2[j] = BLASasum_(&blasn, &a[0 + lda * j], &blas1));
247:     CHKMEMQ;
248:     anorm = PetscMax(anorm, wa2[j]);
249:   }
250:   for (j = 0; j < n; j++) wa2[j] = wa2[j] - PetscAbs(wa1[j]);
251:   PetscCallBLAS("BLASnrm2", bnorm = BLASnrm2_(&blasn, b, &blas1));
252:   CHKMEMQ;
253:   /* Calculate a lower bound, pars, for the domain of the problem.
254:    Also calculate an upper bound, paru, and a lower bound, parl,
255:    for the Lagrange multiplier. */
256:   pars = parl = paru = -anorm;
257:   for (j = 0; j < n; j++) {
258:     pars = PetscMax(pars, -wa1[j]);
259:     parl = PetscMax(parl, wa1[j] + wa2[j]);
260:     paru = PetscMax(paru, -wa1[j] + wa2[j]);
261:   }
262:   parl = PetscMax(bnorm / delta - parl, pars);
263:   parl = PetscMax(0.0, parl);
264:   paru = PetscMax(0.0, bnorm / delta + paru);

266:   /* If the input par lies outside of the interval (parl, paru),
267:    set par to the closer endpoint. */

269:   par = PetscMax(par, parl);
270:   par = PetscMin(par, paru);

272:   /* Special case: parl == paru */
273:   paru = PetscMax(paru, (1.0 + rtol) * parl);

275:   /* Beginning of an iteration */

277:   info = 0;
278:   for (iter = 1; iter <= itmax; iter++) {
279:     /* Safeguard par */
280:     if (par <= pars && paru > 0) par = PetscMax(p001, PetscSqrtScalar(parl / paru)) * paru;

282:     /* Copy the lower triangle of A into its upper triangle and  compute A + par*I */

284:     for (j = 0; j < n - 1; j++) {
285:       PetscCall(PetscBLASIntCast(n - j - 1, &iblas));
286:       PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + 1 + j * lda], &blas1, &a[j + (j + 1) * lda], &blaslda));
287:     }
288:     for (j = 0; j < n; j++) a[j + j * lda] = wa1[j] + par;

290:     /* Attempt the Cholesky factorization of A without referencing the lower triangular part. */
291:     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("U", &blasn, a, &blaslda, &indef));

293:     /* Case 1: A + par*I is pos. def. */
294:     if (indef == 0) {
295:       /* Compute an approximate solution x and save the last value of par with A + par*I pos. def. */

297:       parf = par;
298:       PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, b, &blas1, wa2, &blas1));
299:       PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
300:       PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
301:       PetscCallBLAS("BLASnrm2", rxnorm = BLASnrm2_(&blasn, wa2, &blas1));
302:       PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
303:       PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);

305:       PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, wa2, &blas1, x, &blas1));
306:       PetscCallBLAS("BLASscal", BLASscal_(&blasn, &minusone, x, &blas1));
307:       PetscCallBLAS("BLASnrm2", xnorm = BLASnrm2_(&blasn, x, &blas1));
308:       CHKMEMQ;

310:       /* Test for convergence */
311:       if (PetscAbs(xnorm - delta) <= rtol * delta || (par == 0 && xnorm <= (1.0 + rtol) * delta)) info = 1;

313:       /* Compute a direction of negative curvature and use this information to improve pars. */
314:       PetscCall(estsv(n, a, lda, &rznorm, z));
315:       CHKMEMQ;
316:       pars = PetscMax(pars, par - rznorm * rznorm);

318:       /* Compute a negative curvature solution of the form x + alpha*z,  where norm(x+alpha*z)==delta */

320:       rednc = 0;
321:       if (xnorm < delta) {
322:         /* Compute alpha */
323:         PetscCallBLAS("BLASdot", prod = BLASdot_(&blasn, z, &blas1, x, &blas1) / delta);
324:         temp  = (delta - xnorm) * ((delta + xnorm) / delta);
325:         alpha = temp / (PetscAbs(prod) + PetscSqrtScalar(prod * prod + temp / delta));
326:         if (prod >= 0) alpha = PetscAbs(alpha);
327:         else alpha = -PetscAbs(alpha);

329:         /* Test to decide if the negative curvature step produces a larger reduction than with z=0 */
330:         rznorm = PetscAbs(alpha) * rznorm;
331:         if ((rznorm * rznorm + par * xnorm * xnorm) / (delta2) <= par) rednc = 1;
332:         /* Test for convergence */
333:         if (p5 * rznorm * rznorm / delta2 <= rtol * (1.0 - p5 * rtol) * (par + rxnorm * rxnorm / delta2)) {
334:           info = 1;
335:         } else if (info == 0 && (p5 * (par + rxnorm * rxnorm / delta2) <= atol / delta2)) {
336:           info = 2;
337:         }
338:       }

340:       /* Compute the Newton correction parc to par. */
341:       if (xnorm == 0) {
342:         parc = -par;
343:       } else {
344:         PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, x, &blas1, wa2, &blas1));
345:         temp = 1.0 / xnorm;
346:         PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, wa2, &blas1));
347:         PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
348:         PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
349:         PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&blasn, wa2, &blas1));
350:         parc = (xnorm - delta) / (delta * temp * temp);
351:       }

353:       /* update parl or paru */
354:       if (xnorm > delta) {
355:         parl = PetscMax(parl, par);
356:       } else if (xnorm < delta) {
357:         paru = PetscMin(paru, par);
358:       }
359:     } else {
360:       /* Case 2: A + par*I is not pos. def. */

362:       /* Use the rank information from the Cholesky decomposition to update par. */

364:       if (indef > 1) {
365:         /* Restore column indef to A + par*I. */
366:         iblas = indef - 1;
367:         PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[indef - 1 + 0 * lda], &blaslda, &a[0 + (indef - 1) * lda], &blas1));
368:         a[indef - 1 + (indef - 1) * lda] = wa1[indef - 1] + par;

370:         /* compute parc. */
371:         PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[0 + (indef - 1) * lda], &blas1, wa2, &blas1));
372:         PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &iblas, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
373:         PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
374:         PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, wa2, &blas1, &a[0 + (indef - 1) * lda], &blas1));
375:         PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&iblas, &a[0 + (indef - 1) * lda], &blas1));
376:         CHKMEMQ;
377:         a[indef - 1 + (indef - 1) * lda] -= temp * temp;
378:         PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &iblas, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
379:         PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
380:       }

382:       wa2[indef - 1] = -1.0;
383:       iblas          = indef;
384:       PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&iblas, wa2, &blas1));
385:       parc = -a[indef - 1 + (indef - 1) * lda] / (temp * temp);
386:       pars = PetscMax(pars, par + parc);

388:       /* If necessary, increase paru slightly.
389:        This is needed because in some exceptional situations
390:        paru is the optimal value of par. */

392:       paru = PetscMax(paru, (1.0 + rtol) * pars);
393:     }

395:     /* Use pars to update parl */
396:     parl = PetscMax(parl, pars);

398:     /* Test for converged. */
399:     if (info == 0) {
400:       if (iter == itmax) info = 4;
401:       if (paru <= (1.0 + p5 * rtol) * pars) info = 3;
402:       if (paru == 0.0) info = 2;
403:     }

405:     /* If exiting, store the best approximation and restore
406:      the upper triangle of A. */

408:     if (info != 0) {
409:       /* Compute the best current estimates for x and f. */
410:       par = parf;
411:       f   = -p5 * (rxnorm * rxnorm + par * xnorm * xnorm);
412:       if (rednc) {
413:         f = -p5 * (rxnorm * rxnorm + par * delta * delta - rznorm * rznorm);
414:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1));
415:       }
416:       /* Restore the upper triangle of A */
417:       for (j = 0; j < n; j++) {
418:         PetscCall(PetscBLASIntCast(n - j - 1, &iblas));
419:         PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + 1 + j * lda], &blas1, &a[j + (j + 1) * lda], &blaslda));
420:       }
421:       PetscCall(PetscBLASIntCast(lda + 1, &iblas));
422:       PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, wa1, &blas1, a, &iblas));
423:       break;
424:     }
425:     par = PetscMax(parl, par + parc);
426:   }
427:   *retpar  = par;
428:   *retf    = f;
429:   *retinfo = info;
430:   *retits  = iter;
431:   CHKMEMQ;
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }