Actual source code: gqt.c
1: #include <petscsys.h>
2: #include <petscblaslapack.h>
4: static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z)
5: {
6: PetscBLASInt blas1 = 1, blasn, blasnmi, blasj, blasldr;
7: PetscInt i, j;
8: PetscReal e, temp, w, wm, ynorm, znorm, s, sm;
10: PetscFunctionBegin;
11: PetscCall(PetscBLASIntCast(n, &blasn));
12: PetscCall(PetscBLASIntCast(ldr, &blasldr));
13: for (i = 0; i < n; i++) z[i] = 0.0;
14: e = PetscAbs(r[0]);
15: if (e == 0.0) {
16: *svmin = 0.0;
17: z[0] = 1.0;
18: } else {
19: /* Solve R'*y = e */
20: for (i = 0; i < n; i++) {
21: /* Scale y. The scaling factor (0.01) reduces the number of scalings */
22: if (z[i] >= 0.0) e = -PetscAbs(e);
23: else e = PetscAbs(e);
25: if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr * i])) {
26: temp = PetscMin(0.01, PetscAbs(r[i + ldr * i])) / PetscAbs(e - z[i]);
27: PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, z, &blas1));
28: e = temp * e;
29: }
31: /* Determine the two possible choices of y[i] */
32: if (r[i + ldr * i] == 0.0) {
33: w = wm = 1.0;
34: } else {
35: w = (e - z[i]) / r[i + ldr * i];
36: wm = -(e + z[i]) / r[i + ldr * i];
37: }
39: /* Chose y[i] based on the predicted value of y[j] for j>i */
40: s = PetscAbs(e - z[i]);
41: sm = PetscAbs(e + z[i]);
42: for (j = i + 1; j < n; j++) sm += PetscAbs(z[j] + wm * r[i + ldr * j]);
43: if (i < n - 1) {
44: PetscCall(PetscBLASIntCast(n - i - 1, &blasnmi));
45: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasnmi, &w, &r[i + ldr * (i + 1)], &blasldr, &z[i + 1], &blas1));
46: PetscCallBLAS("BLASasum", s += BLASasum_(&blasnmi, &z[i + 1], &blas1));
47: }
48: if (s < sm) {
49: temp = wm - w;
50: w = wm;
51: if (i < n - 1) PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasnmi, &temp, &r[i + ldr * (i + 1)], &blasldr, &z[i + 1], &blas1));
52: }
53: z[i] = w;
54: }
56: PetscCallBLAS("BLASnrm2", ynorm = BLASnrm2_(&blasn, z, &blas1));
58: /* Solve R*z = y */
59: for (j = n - 1; j >= 0; j--) {
60: /* Scale z */
61: if (PetscAbs(z[j]) > PetscAbs(r[j + ldr * j])) {
62: temp = PetscMin(0.01, PetscAbs(r[j + ldr * j] / z[j]));
63: PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, z, &blas1));
64: ynorm *= temp;
65: }
66: if (r[j + ldr * j] == 0) {
67: z[j] = 1.0;
68: } else {
69: z[j] = z[j] / r[j + ldr * j];
70: }
71: temp = -z[j];
72: PetscCall(PetscBLASIntCast(j, &blasj));
73: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasj, &temp, &r[0 + ldr * j], &blas1, z, &blas1));
74: }
76: /* Compute svmin and normalize z */
77: PetscCallBLAS("BLASnrm2", znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1));
78: *svmin = ynorm * znorm;
79: PetscCallBLAS("BLASscal", BLASscal_(&blasn, &znorm, z, &blas1));
80: }
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: /*
85: c ***********
86: c
87: c Subroutine gqt
88: c
89: c Given an n by n symmetric matrix A, an n-vector b, and a
90: c positive number delta, this subroutine determines a vector
91: c x which approximately minimizes the quadratic function
92: c
93: c f(x) = (1/2)*x'*A*x + b'*x
94: c
95: c subject to the Euclidean norm constraint
96: c
97: c norm(x) <= delta.
98: c
99: c This subroutine computes an approximation x and a Lagrange
100: c multiplier par such that either par is zero and
101: c
102: c norm(x) <= (1+rtol)*delta,
103: c
104: c or par is positive and
105: c
106: c abs(norm(x) - delta) <= rtol*delta.
107: c
108: c If xsol is the solution to the problem, the approximation x
109: c satisfies
110: c
111: c f(x) <= ((1 - rtol)**2)*f(xsol)
112: c
113: c The subroutine statement is
114: c
115: c subroutine gqt(n,a,lda,b,delta,rtol,atol,itmax,
116: c par,f,x,info,z,wa1,wa2)
117: c
118: c where
119: c
120: c n is an integer variable.
121: c On entry n is the order of A.
122: c On exit n is unchanged.
123: c
124: c a is a double precision array of dimension (lda,n).
125: c On entry the full upper triangle of a must contain the
126: c full upper triangle of the symmetric matrix A.
127: c On exit the array contains the matrix A.
128: c
129: c lda is an integer variable.
130: c On entry lda is the leading dimension of the array a.
131: c On exit lda is unchanged.
132: c
133: c b is an double precision array of dimension n.
134: c On entry b specifies the linear term in the quadratic.
135: c On exit b is unchanged.
136: c
137: c delta is a double precision variable.
138: c On entry delta is a bound on the Euclidean norm of x.
139: c On exit delta is unchanged.
140: c
141: c rtol is a double precision variable.
142: c On entry rtol is the relative accuracy desired in the
143: c solution. Convergence occurs if
144: c
145: c f(x) <= ((1 - rtol)**2)*f(xsol)
146: c
147: c On exit rtol is unchanged.
148: c
149: c atol is a double precision variable.
150: c On entry atol is the absolute accuracy desired in the
151: c solution. Convergence occurs when
152: c
153: c norm(x) <= (1 + rtol)*delta
154: c
155: c max(-f(x),-f(xsol)) <= atol
156: c
157: c On exit atol is unchanged.
158: c
159: c itmax is an integer variable.
160: c On entry itmax specifies the maximum number of iterations.
161: c On exit itmax is unchanged.
162: c
163: c par is a double precision variable.
164: c On entry par is an initial estimate of the Lagrange
165: c multiplier for the constraint norm(x) <= delta.
166: c On exit par contains the final estimate of the multiplier.
167: c
168: c f is a double precision variable.
169: c On entry f need not be specified.
170: c On exit f is set to f(x) at the output x.
171: c
172: c x is a double precision array of dimension n.
173: c On entry x need not be specified.
174: c On exit x is set to the final estimate of the solution.
175: c
176: c info is an integer variable.
177: c On entry info need not be specified.
178: c On exit info is set as follows:
179: c
180: c info = 1 The function value f(x) has the relative
181: c accuracy specified by rtol.
182: c
183: c info = 2 The function value f(x) has the absolute
184: c accuracy specified by atol.
185: c
186: c info = 3 Rounding errors prevent further progress.
187: c On exit x is the best available approximation.
188: c
189: c info = 4 Failure to converge after itmax iterations.
190: c On exit x is the best available approximation.
191: c
192: c z is a double precision work array of dimension n.
193: c
194: c wa1 is a double precision work array of dimension n.
195: c
196: c wa2 is a double precision work array of dimension n.
197: c
198: c Subprograms called
199: c
200: c MINPACK-2 ...... destsv
201: c
202: c LAPACK ......... dpotrf
203: c
204: c Level 1 BLAS ... daxpy, dcopy, ddot, dnrm2, dscal
205: c
206: c Level 2 BLAS ... dtrmv, dtrsv
207: c
208: c MINPACK-2 Project. October 1993.
209: c Argonne National Laboratory and University of Minnesota.
210: c Brett M. Averick, Richard Carter, and Jorge J. More'
211: c
212: c ***********
213: */
214: PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b, PetscReal delta, PetscReal rtol, PetscReal atol, PetscInt itmax, PetscReal *retpar, PetscReal *retf, PetscReal *x, PetscInt *retinfo, PetscInt *retits, PetscReal *z, PetscReal *wa1, PetscReal *wa2)
215: {
216: PetscReal f = 0.0, p001 = 0.001, p5 = 0.5, minusone = -1, delta2 = delta * delta;
217: PetscInt iter, j, rednc, info;
218: PetscBLASInt indef;
219: PetscBLASInt blas1 = 1, blasn, iblas, blaslda, blasldap1, blasinfo;
220: PetscReal alpha, anorm, bnorm, parc, parf, parl, pars, par = *retpar, paru, prod, rxnorm, rznorm = 0.0, temp, xnorm;
222: PetscFunctionBegin;
223: PetscCall(PetscBLASIntCast(n, &blasn));
224: PetscCall(PetscBLASIntCast(lda, &blaslda));
225: PetscCall(PetscBLASIntCast(lda + 1, &blasldap1));
226: parf = 0.0;
227: xnorm = 0.0;
228: rxnorm = 0.0;
229: rednc = 0;
230: for (j = 0; j < n; j++) {
231: x[j] = 0.0;
232: z[j] = 0.0;
233: }
235: /* Copy the diagonal and save A in its lower triangle */
236: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, a, &blasldap1, wa1, &blas1));
237: for (j = 0; j < n - 1; j++) {
238: PetscCall(PetscBLASIntCast(n - j - 1, &iblas));
239: PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + lda * (j + 1)], &blaslda, &a[j + 1 + lda * j], &blas1));
240: }
242: /* Calculate the l1-norm of A, the Gershgorin row sums, and the
243: l2-norm of b */
244: anorm = 0.0;
245: for (j = 0; j < n; j++) {
246: PetscCallBLAS("BLASasum", wa2[j] = BLASasum_(&blasn, &a[0 + lda * j], &blas1));
247: CHKMEMQ;
248: anorm = PetscMax(anorm, wa2[j]);
249: }
250: for (j = 0; j < n; j++) wa2[j] = wa2[j] - PetscAbs(wa1[j]);
251: PetscCallBLAS("BLASnrm2", bnorm = BLASnrm2_(&blasn, b, &blas1));
252: CHKMEMQ;
253: /* Calculate a lower bound, pars, for the domain of the problem.
254: Also calculate an upper bound, paru, and a lower bound, parl,
255: for the Lagrange multiplier. */
256: pars = parl = paru = -anorm;
257: for (j = 0; j < n; j++) {
258: pars = PetscMax(pars, -wa1[j]);
259: parl = PetscMax(parl, wa1[j] + wa2[j]);
260: paru = PetscMax(paru, -wa1[j] + wa2[j]);
261: }
262: parl = PetscMax(bnorm / delta - parl, pars);
263: parl = PetscMax(0.0, parl);
264: paru = PetscMax(0.0, bnorm / delta + paru);
266: /* If the input par lies outside of the interval (parl, paru),
267: set par to the closer endpoint. */
269: par = PetscMax(par, parl);
270: par = PetscMin(par, paru);
272: /* Special case: parl == paru */
273: paru = PetscMax(paru, (1.0 + rtol) * parl);
275: /* Beginning of an iteration */
277: info = 0;
278: for (iter = 1; iter <= itmax; iter++) {
279: /* Safeguard par */
280: if (par <= pars && paru > 0) par = PetscMax(p001, PetscSqrtScalar(parl / paru)) * paru;
282: /* Copy the lower triangle of A into its upper triangle and compute A + par*I */
284: for (j = 0; j < n - 1; j++) {
285: PetscCall(PetscBLASIntCast(n - j - 1, &iblas));
286: PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + 1 + j * lda], &blas1, &a[j + (j + 1) * lda], &blaslda));
287: }
288: for (j = 0; j < n; j++) a[j + j * lda] = wa1[j] + par;
290: /* Attempt the Cholesky factorization of A without referencing the lower triangular part. */
291: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("U", &blasn, a, &blaslda, &indef));
293: /* Case 1: A + par*I is pos. def. */
294: if (indef == 0) {
295: /* Compute an approximate solution x and save the last value of par with A + par*I pos. def. */
297: parf = par;
298: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, b, &blas1, wa2, &blas1));
299: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
300: PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
301: PetscCallBLAS("BLASnrm2", rxnorm = BLASnrm2_(&blasn, wa2, &blas1));
302: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
303: PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
305: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, wa2, &blas1, x, &blas1));
306: PetscCallBLAS("BLASscal", BLASscal_(&blasn, &minusone, x, &blas1));
307: PetscCallBLAS("BLASnrm2", xnorm = BLASnrm2_(&blasn, x, &blas1));
308: CHKMEMQ;
310: /* Test for convergence */
311: if (PetscAbs(xnorm - delta) <= rtol * delta || (par == 0 && xnorm <= (1.0 + rtol) * delta)) info = 1;
313: /* Compute a direction of negative curvature and use this information to improve pars. */
314: PetscCall(estsv(n, a, lda, &rznorm, z));
315: CHKMEMQ;
316: pars = PetscMax(pars, par - rznorm * rznorm);
318: /* Compute a negative curvature solution of the form x + alpha*z, where norm(x+alpha*z)==delta */
320: rednc = 0;
321: if (xnorm < delta) {
322: /* Compute alpha */
323: PetscCallBLAS("BLASdot", prod = BLASdot_(&blasn, z, &blas1, x, &blas1) / delta);
324: temp = (delta - xnorm) * ((delta + xnorm) / delta);
325: alpha = temp / (PetscAbs(prod) + PetscSqrtScalar(prod * prod + temp / delta));
326: if (prod >= 0) alpha = PetscAbs(alpha);
327: else alpha = -PetscAbs(alpha);
329: /* Test to decide if the negative curvature step produces a larger reduction than with z=0 */
330: rznorm = PetscAbs(alpha) * rznorm;
331: if ((rznorm * rznorm + par * xnorm * xnorm) / (delta2) <= par) rednc = 1;
332: /* Test for convergence */
333: if (p5 * rznorm * rznorm / delta2 <= rtol * (1.0 - p5 * rtol) * (par + rxnorm * rxnorm / delta2)) {
334: info = 1;
335: } else if (info == 0 && (p5 * (par + rxnorm * rxnorm / delta2) <= atol / delta2)) {
336: info = 2;
337: }
338: }
340: /* Compute the Newton correction parc to par. */
341: if (xnorm == 0) {
342: parc = -par;
343: } else {
344: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, x, &blas1, wa2, &blas1));
345: temp = 1.0 / xnorm;
346: PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, wa2, &blas1));
347: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
348: PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
349: PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&blasn, wa2, &blas1));
350: parc = (xnorm - delta) / (delta * temp * temp);
351: }
353: /* update parl or paru */
354: if (xnorm > delta) {
355: parl = PetscMax(parl, par);
356: } else if (xnorm < delta) {
357: paru = PetscMin(paru, par);
358: }
359: } else {
360: /* Case 2: A + par*I is not pos. def. */
362: /* Use the rank information from the Cholesky decomposition to update par. */
364: if (indef > 1) {
365: /* Restore column indef to A + par*I. */
366: iblas = indef - 1;
367: PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[indef - 1 + 0 * lda], &blaslda, &a[0 + (indef - 1) * lda], &blas1));
368: a[indef - 1 + (indef - 1) * lda] = wa1[indef - 1] + par;
370: /* compute parc. */
371: PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[0 + (indef - 1) * lda], &blas1, wa2, &blas1));
372: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &iblas, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
373: PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
374: PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, wa2, &blas1, &a[0 + (indef - 1) * lda], &blas1));
375: PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&iblas, &a[0 + (indef - 1) * lda], &blas1));
376: CHKMEMQ;
377: a[indef - 1 + (indef - 1) * lda] -= temp * temp;
378: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &iblas, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
379: PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo);
380: }
382: wa2[indef - 1] = -1.0;
383: iblas = indef;
384: PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&iblas, wa2, &blas1));
385: parc = -a[indef - 1 + (indef - 1) * lda] / (temp * temp);
386: pars = PetscMax(pars, par + parc);
388: /* If necessary, increase paru slightly.
389: This is needed because in some exceptional situations
390: paru is the optimal value of par. */
392: paru = PetscMax(paru, (1.0 + rtol) * pars);
393: }
395: /* Use pars to update parl */
396: parl = PetscMax(parl, pars);
398: /* Test for converged. */
399: if (info == 0) {
400: if (iter == itmax) info = 4;
401: if (paru <= (1.0 + p5 * rtol) * pars) info = 3;
402: if (paru == 0.0) info = 2;
403: }
405: /* If exiting, store the best approximation and restore
406: the upper triangle of A. */
408: if (info != 0) {
409: /* Compute the best current estimates for x and f. */
410: par = parf;
411: f = -p5 * (rxnorm * rxnorm + par * xnorm * xnorm);
412: if (rednc) {
413: f = -p5 * (rxnorm * rxnorm + par * delta * delta - rznorm * rznorm);
414: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1));
415: }
416: /* Restore the upper triangle of A */
417: for (j = 0; j < n; j++) {
418: PetscCall(PetscBLASIntCast(n - j - 1, &iblas));
419: PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + 1 + j * lda], &blas1, &a[j + (j + 1) * lda], &blaslda));
420: }
421: PetscCall(PetscBLASIntCast(lda + 1, &iblas));
422: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, wa1, &blas1, a, &iblas));
423: break;
424: }
425: par = PetscMax(parl, par + parc);
426: }
427: *retpar = par;
428: *retf = f;
429: *retinfo = info;
430: *retits = iter;
431: CHKMEMQ;
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }