1 | /////////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | // Levenberg - Marquardt non-linear minimization algorithm
|
---|
4 | // Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr)
|
---|
5 | // Institute of Computer Science, Foundation for Research & Technology - Hellas
|
---|
6 | // Heraklion, Crete, Greece.
|
---|
7 | //
|
---|
8 | // This program is free software; you can redistribute it and/or modify
|
---|
9 | // it under the terms of the GNU General Public License as published by
|
---|
10 | // the Free Software Foundation; either version 2 of the License, or
|
---|
11 | // (at your option) any later version.
|
---|
12 | //
|
---|
13 | // This program is distributed in the hope that it will be useful,
|
---|
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
16 | // GNU General Public License for more details.
|
---|
17 | //
|
---|
18 | /////////////////////////////////////////////////////////////////////////////////
|
---|
19 |
|
---|
20 | #ifndef LM_REAL // not included by lmbc.c
|
---|
21 | #error This file should not be compiled directly!
|
---|
22 | #endif
|
---|
23 |
|
---|
24 |
|
---|
25 | /* precision-specific definitions */
|
---|
26 | #define FUNC_STATE LM_ADD_PREFIX(func_state)
|
---|
27 | #define LNSRCH LM_ADD_PREFIX(lnsrch)
|
---|
28 | #define BOXPROJECT LM_ADD_PREFIX(boxProject)
|
---|
29 | #define BOXSCALE LM_ADD_PREFIX(boxScale)
|
---|
30 | #define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check)
|
---|
31 | #define VECNORM LM_ADD_PREFIX(vecnorm)
|
---|
32 | #define LEVMAR_BC_DER LM_ADD_PREFIX(levmar_bc_der)
|
---|
33 | #define LEVMAR_BC_DIF LM_ADD_PREFIX(levmar_bc_dif)
|
---|
34 | #define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)
|
---|
35 | #define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)
|
---|
36 | #define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
|
---|
37 | #define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
|
---|
38 | #define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
|
---|
39 | #define LMBC_DIF_DATA LM_ADD_PREFIX(lmbc_dif_data)
|
---|
40 | #define LMBC_DIF_FUNC LM_ADD_PREFIX(lmbc_dif_func)
|
---|
41 | #define LMBC_DIF_JACF LM_ADD_PREFIX(lmbc_dif_jacf)
|
---|
42 |
|
---|
43 | #ifdef HAVE_LAPACK
|
---|
44 | #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
|
---|
45 | #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
|
---|
46 | #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
|
---|
47 | #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
|
---|
48 | #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
|
---|
49 | #define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
|
---|
50 | #else
|
---|
51 | #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack)
|
---|
52 | #endif /* HAVE_LAPACK */
|
---|
53 |
|
---|
54 | #ifdef HAVE_PLASMA
|
---|
55 | #define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol)
|
---|
56 | #endif
|
---|
57 |
|
---|
58 | /* find the median of 3 numbers */
|
---|
59 | #define __MEDIAN3(a, b, c) ( ((a) >= (b))?\
|
---|
60 | ( ((c) >= (a))? (a) : ( ((c) <= (b))? (b) : (c) ) ) : \
|
---|
61 | ( ((c) >= (b))? (b) : ( ((c) <= (a))? (a) : (c) ) ) )
|
---|
62 |
|
---|
63 | /* Projections to feasible set \Omega: P_{\Omega}(y) := arg min { ||x - y|| : x \in \Omega}, y \in R^m */
|
---|
64 |
|
---|
65 | /* project vector p to a box shaped feasible set. p is a mx1 vector.
|
---|
66 | * Either lb, ub can be NULL. If not NULL, they are mx1 vectors
|
---|
67 | */
|
---|
68 | static void BOXPROJECT(LM_REAL *p, LM_REAL *lb, LM_REAL *ub, int m)
|
---|
69 | {
|
---|
70 | register int i;
|
---|
71 |
|
---|
72 | if(!lb){ /* no lower bounds */
|
---|
73 | if(!ub) /* no upper bounds */
|
---|
74 | return;
|
---|
75 | else{ /* upper bounds only */
|
---|
76 | for(i=m; i-->0; )
|
---|
77 | if(p[i]>ub[i]) p[i]=ub[i];
|
---|
78 | }
|
---|
79 | }
|
---|
80 | else
|
---|
81 | if(!ub){ /* lower bounds only */
|
---|
82 | for(i=m; i-->0; )
|
---|
83 | if(p[i]<lb[i]) p[i]=lb[i];
|
---|
84 | }
|
---|
85 | else /* box bounds */
|
---|
86 | for(i=m; i-->0; )
|
---|
87 | p[i]=__MEDIAN3(lb[i], p[i], ub[i]);
|
---|
88 | }
|
---|
89 | #undef __MEDIAN3
|
---|
90 |
|
---|
91 | /* pointwise scaling of bounds with the mx1 vector scl. If div=1 scaling is by 1./scl.
|
---|
92 | * Either lb, ub can be NULL. If not NULL, they are mx1 vectors
|
---|
93 | */
|
---|
94 | static void BOXSCALE(LM_REAL *lb, LM_REAL *ub, LM_REAL *scl, int m, int div)
|
---|
95 | {
|
---|
96 | register int i;
|
---|
97 |
|
---|
98 | if(!lb){ /* no lower bounds */
|
---|
99 | if(!ub) /* no upper bounds */
|
---|
100 | return;
|
---|
101 | else{ /* upper bounds only */
|
---|
102 | if(div){
|
---|
103 | for(i=m; i-->0; )
|
---|
104 | if(ub[i]!=LM_REAL_MAX)
|
---|
105 | ub[i]=ub[i]/scl[i];
|
---|
106 | }else{
|
---|
107 | for(i=m; i-->0; )
|
---|
108 | if(ub[i]!=LM_REAL_MAX)
|
---|
109 | ub[i]=ub[i]*scl[i];
|
---|
110 | }
|
---|
111 | }
|
---|
112 | }
|
---|
113 | else
|
---|
114 | if(!ub){ /* lower bounds only */
|
---|
115 | if(div){
|
---|
116 | for(i=m; i-->0; )
|
---|
117 | if(lb[i]!=LM_REAL_MIN)
|
---|
118 | lb[i]=lb[i]/scl[i];
|
---|
119 | }else{
|
---|
120 | for(i=m; i-->0; )
|
---|
121 | if(lb[i]!=LM_REAL_MIN)
|
---|
122 | lb[i]=lb[i]*scl[i];
|
---|
123 | }
|
---|
124 | }
|
---|
125 | else{ /* box bounds */
|
---|
126 | if(div){
|
---|
127 | for(i=m; i-->0; ){
|
---|
128 | if(ub[i]!=LM_REAL_MAX)
|
---|
129 | ub[i]=ub[i]/scl[i];
|
---|
130 | if(lb[i]!=LM_REAL_MIN)
|
---|
131 | lb[i]=lb[i]/scl[i];
|
---|
132 | }
|
---|
133 | }else{
|
---|
134 | for(i=m; i-->0; ){
|
---|
135 | if(ub[i]!=LM_REAL_MAX)
|
---|
136 | ub[i]=ub[i]*scl[i];
|
---|
137 | if(lb[i]!=LM_REAL_MIN)
|
---|
138 | lb[i]=lb[i]*scl[i];
|
---|
139 | }
|
---|
140 | }
|
---|
141 | }
|
---|
142 | }
|
---|
143 |
|
---|
144 | /* compute the norm of a vector in a manner that avoids overflows
|
---|
145 | */
|
---|
146 | static LM_REAL VECNORM(LM_REAL *x, int n)
|
---|
147 | {
|
---|
148 | #ifdef HAVE_LAPACK
|
---|
149 | #define NRM2 LM_MK_BLAS_NAME(nrm2)
|
---|
150 | extern LM_REAL NRM2(int *n, LM_REAL *dx, int *incx);
|
---|
151 | int one=1;
|
---|
152 |
|
---|
153 | return NRM2(&n, x, &one);
|
---|
154 | #undef NRM2
|
---|
155 | #else // no LAPACK, use the simple method described by Blue in TOMS78
|
---|
156 | register int i;
|
---|
157 | LM_REAL max, sum, tmp;
|
---|
158 |
|
---|
159 | for(i=n, max=0.0; i-->0; )
|
---|
160 | if(x[i]>max) max=x[i];
|
---|
161 | else if(x[i]<-max) max=-x[i];
|
---|
162 |
|
---|
163 | for(i=n, sum=0.0; i-->0; ){
|
---|
164 | tmp=x[i]/max;
|
---|
165 | sum+=tmp*tmp;
|
---|
166 | }
|
---|
167 |
|
---|
168 | return max*(LM_REAL)sqrt(sum);
|
---|
169 | #endif /* HAVE_LAPACK */
|
---|
170 | }
|
---|
171 |
|
---|
172 | struct FUNC_STATE{
|
---|
173 | int n, *nfev;
|
---|
174 | LM_REAL *hx, *x;
|
---|
175 | LM_REAL *lb, *ub;
|
---|
176 | void *adata;
|
---|
177 | };
|
---|
178 |
|
---|
179 | static void
|
---|
180 | LNSRCH(int m, LM_REAL *x, LM_REAL f, LM_REAL *g, LM_REAL *p, LM_REAL alpha, LM_REAL *xpls,
|
---|
181 | LM_REAL *ffpls, void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), struct FUNC_STATE *state,
|
---|
182 | int *mxtake, int *iretcd, LM_REAL stepmx, LM_REAL steptl, LM_REAL *sx)
|
---|
183 | {
|
---|
184 | /* Find a next newton iterate by backtracking line search.
|
---|
185 | * Specifically, finds a \lambda such that for a fixed alpha<0.5 (usually 1e-4),
|
---|
186 | * f(x + \lambda*p) <= f(x) + alpha * \lambda * g^T*p
|
---|
187 | *
|
---|
188 | * Translated (with a few changes) from Schnabel, Koontz & Weiss uncmin.f, v1.3
|
---|
189 | * Main changes include the addition of box projection and modification of the scaling
|
---|
190 | * logic since uncmin.f operates in the original (unscaled) variable space.
|
---|
191 |
|
---|
192 | * PARAMETERS :
|
---|
193 |
|
---|
194 | * m --> dimension of problem (i.e. number of variables)
|
---|
195 | * x(m) --> old iterate: x[k-1]
|
---|
196 | * f --> function value at old iterate, f(x)
|
---|
197 | * g(m) --> gradient at old iterate, g(x), or approximate
|
---|
198 | * p(m) --> non-zero newton step
|
---|
199 | * alpha --> fixed constant < 0.5 for line search (see above)
|
---|
200 | * xpls(m) <-- new iterate x[k]
|
---|
201 | * ffpls <-- function value at new iterate, f(xpls)
|
---|
202 | * func --> name of subroutine to evaluate function
|
---|
203 | * state <--> information other than x and m that func requires.
|
---|
204 | * state is not modified in xlnsrch (but can be modified by func).
|
---|
205 | * iretcd <-- return code
|
---|
206 | * mxtake <-- boolean flag indicating step of maximum length used
|
---|
207 | * stepmx --> maximum allowable step size
|
---|
208 | * steptl --> relative step size at which successive iterates
|
---|
209 | * considered close enough to terminate algorithm
|
---|
210 | * sx(m) --> diagonal scaling matrix for x, can be NULL
|
---|
211 |
|
---|
212 | * internal variables
|
---|
213 |
|
---|
214 | * sln newton length
|
---|
215 | * rln relative length of newton step
|
---|
216 | */
|
---|
217 |
|
---|
218 | register int i, j;
|
---|
219 | int firstback = 1;
|
---|
220 | LM_REAL disc;
|
---|
221 | LM_REAL a3, b;
|
---|
222 | LM_REAL t1, t2, t3, lambda, tlmbda, rmnlmb;
|
---|
223 | LM_REAL scl, rln, sln, slp;
|
---|
224 | LM_REAL tmp1, tmp2;
|
---|
225 | LM_REAL fpls, pfpls = 0., plmbda = 0.; /* -Wall */
|
---|
226 |
|
---|
227 | f*=LM_CNST(0.5);
|
---|
228 | *mxtake = 0;
|
---|
229 | *iretcd = 2;
|
---|
230 | tmp1 = 0.;
|
---|
231 | for (i = m; i-- > 0; )
|
---|
232 | tmp1 += p[i] * p[i];
|
---|
233 | sln = (LM_REAL)sqrt(tmp1);
|
---|
234 | if (sln > stepmx) {
|
---|
235 | /* newton step longer than maximum allowed */
|
---|
236 | scl = stepmx / sln;
|
---|
237 | for (i = m; i-- > 0; ) /* p * scl */
|
---|
238 | p[i]*=scl;
|
---|
239 | sln = stepmx;
|
---|
240 | }
|
---|
241 | for (i = m, slp = rln = 0.; i-- > 0; ){
|
---|
242 | slp+=g[i]*p[i]; /* g^T * p */
|
---|
243 |
|
---|
244 | tmp1 = (FABS(x[i])>=LM_CNST(1.))? FABS(x[i]) : LM_CNST(1.);
|
---|
245 | tmp2 = FABS(p[i])/tmp1;
|
---|
246 | if(rln < tmp2) rln = tmp2;
|
---|
247 | }
|
---|
248 | rmnlmb = steptl / rln;
|
---|
249 | lambda = LM_CNST(1.0);
|
---|
250 |
|
---|
251 | /* check if new iterate satisfactory. generate new lambda if necessary. */
|
---|
252 |
|
---|
253 | for(j = _LSITMAX_; j-- > 0; ) {
|
---|
254 | for (i = m; i-- > 0; )
|
---|
255 | xpls[i] = x[i] + lambda * p[i];
|
---|
256 | BOXPROJECT(xpls, state->lb, state->ub, m); /* project to feasible set */
|
---|
257 |
|
---|
258 | /* evaluate function at new point */
|
---|
259 | if(!sx){
|
---|
260 | (*func)(xpls, state->hx, m, state->n, state->adata); ++(*(state->nfev));
|
---|
261 | }
|
---|
262 | else{
|
---|
263 | for (i = m; i-- > 0; ) xpls[i] *= sx[i];
|
---|
264 | (*func)(xpls, state->hx, m, state->n, state->adata); ++(*(state->nfev));
|
---|
265 | for (i = m; i-- > 0; ) xpls[i] /= sx[i];
|
---|
266 | }
|
---|
267 | /* ### state->hx=state->x-state->hx, tmp1=||state->hx|| */
|
---|
268 | #if 1
|
---|
269 | tmp1=LEVMAR_L2NRMXMY(state->hx, state->x, state->hx, state->n);
|
---|
270 | #else
|
---|
271 | for(i=0, tmp1=0.0; i<state->n; ++i){
|
---|
272 | state->hx[i]=tmp2=state->x[i]-state->hx[i];
|
---|
273 | tmp1+=tmp2*tmp2;
|
---|
274 | }
|
---|
275 | #endif
|
---|
276 | fpls=LM_CNST(0.5)*tmp1; *ffpls=tmp1;
|
---|
277 |
|
---|
278 | if (fpls <= f + slp * alpha * lambda) { /* solution found */
|
---|
279 | *iretcd = 0;
|
---|
280 | if (lambda == LM_CNST(1.) && sln > stepmx * LM_CNST(.99)) *mxtake = 1;
|
---|
281 | return;
|
---|
282 | }
|
---|
283 |
|
---|
284 | /* else : solution not (yet) found */
|
---|
285 |
|
---|
286 | /* First find a point with a finite value */
|
---|
287 |
|
---|
288 | if (lambda < rmnlmb) {
|
---|
289 | /* no satisfactory xpls found sufficiently distinct from x */
|
---|
290 |
|
---|
291 | *iretcd = 1;
|
---|
292 | return;
|
---|
293 | }
|
---|
294 | else { /* calculate new lambda */
|
---|
295 |
|
---|
296 | /* modifications to cover non-finite values */
|
---|
297 | if (!LM_FINITE(fpls)) {
|
---|
298 | lambda *= LM_CNST(0.1);
|
---|
299 | firstback = 1;
|
---|
300 | }
|
---|
301 | else {
|
---|
302 | if (firstback) { /* first backtrack: quadratic fit */
|
---|
303 | tlmbda = -lambda * slp / ((fpls - f - slp) * LM_CNST(2.));
|
---|
304 | firstback = 0;
|
---|
305 | }
|
---|
306 | else { /* all subsequent backtracks: cubic fit */
|
---|
307 | t1 = fpls - f - lambda * slp;
|
---|
308 | t2 = pfpls - f - plmbda * slp;
|
---|
309 | t3 = LM_CNST(1.) / (lambda - plmbda);
|
---|
310 | a3 = LM_CNST(3.) * t3 * (t1 / (lambda * lambda)
|
---|
311 | - t2 / (plmbda * plmbda));
|
---|
312 | b = t3 * (t2 * lambda / (plmbda * plmbda)
|
---|
313 | - t1 * plmbda / (lambda * lambda));
|
---|
314 | disc = b * b - a3 * slp;
|
---|
315 | if (disc > b * b)
|
---|
316 | /* only one positive critical point, must be minimum */
|
---|
317 | tlmbda = (-b + ((a3 < 0)? -(LM_REAL)sqrt(disc): (LM_REAL)sqrt(disc))) /a3;
|
---|
318 | else
|
---|
319 | /* both critical points positive, first is minimum */
|
---|
320 | tlmbda = (-b + ((a3 < 0)? (LM_REAL)sqrt(disc): -(LM_REAL)sqrt(disc))) /a3;
|
---|
321 |
|
---|
322 | if (tlmbda > lambda * LM_CNST(.5))
|
---|
323 | tlmbda = lambda * LM_CNST(.5);
|
---|
324 | }
|
---|
325 | plmbda = lambda;
|
---|
326 | pfpls = fpls;
|
---|
327 | if (tlmbda < lambda * LM_CNST(.1))
|
---|
328 | lambda *= LM_CNST(.1);
|
---|
329 | else
|
---|
330 | lambda = tlmbda;
|
---|
331 | }
|
---|
332 | }
|
---|
333 | }
|
---|
334 | /* this point is reached when the iterations limit is exceeded */
|
---|
335 | *iretcd = 1; /* failed */
|
---|
336 | return;
|
---|
337 | } /* LNSRCH */
|
---|
338 |
|
---|
339 | /*
|
---|
340 | * This function seeks the parameter vector p that best describes the measurements
|
---|
341 | * vector x under box constraints.
|
---|
342 | * More precisely, given a vector function func : R^m --> R^n with n>=m,
|
---|
343 | * it finds p s.t. func(p) ~= x, i.e. the squared second order (i.e. L2) norm of
|
---|
344 | * e=x-func(p) is minimized under the constraints lb[i]<=p[i]<=ub[i].
|
---|
345 | * If no lower bound constraint applies for p[i], use -DBL_MAX/-FLT_MAX for lb[i];
|
---|
346 | * If no upper bound constraint applies for p[i], use DBL_MAX/FLT_MAX for ub[i].
|
---|
347 | *
|
---|
348 | * This function requires an analytic Jacobian. In case the latter is unavailable,
|
---|
349 | * use LEVMAR_BC_DIF() bellow
|
---|
350 | *
|
---|
351 | * Returns the number of iterations (>=0) if successful, LM_ERROR if failed
|
---|
352 | *
|
---|
353 | * For details, see C. Kanzow, N. Yamashita and M. Fukushima: "Levenberg-Marquardt
|
---|
354 | * methods for constrained nonlinear equations with strong local convergence properties",
|
---|
355 | * Journal of Computational and Applied Mathematics 172, 2004, pp. 375-397.
|
---|
356 | * Also, see K. Madsen, H.B. Nielsen and O. Tingleff's lecture notes on
|
---|
357 | * unconstrained Levenberg-Marquardt at http://www.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf
|
---|
358 | *
|
---|
359 | * The algorithm implemented by this function employs projected gradient steps. Since steepest descent
|
---|
360 | * is very sensitive to poor scaling, diagonal scaling has been implemented through the dscl argument:
|
---|
361 | * Instead of minimizing f(p) for p, f(D*q) is minimized for q=D^-1*p, D being a diagonal scaling
|
---|
362 | * matrix whose diagonal equals dscl (see Nocedal-Wright p.27). dscl should contain "typical" magnitudes
|
---|
363 | * for the parameters p. A NULL value for dscl implies no scaling. i.e. D=I.
|
---|
364 | * To account for scaling, the code divides the starting point and box bounds pointwise by dscl. Moreover,
|
---|
365 | * before calling func and jacf the scaling has to be undone (by multiplying), as should be done with
|
---|
366 | * the final point. Note also that jac_q=jac_p*D, where jac_q, jac_p are the jacobians w.r.t. q & p, resp.
|
---|
367 | */
|
---|
368 |
|
---|
369 | int LEVMAR_BC_DER(
|
---|
370 | void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */
|
---|
371 | void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the Jacobian \part x / \part p */
|
---|
372 | LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */
|
---|
373 | LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */
|
---|
374 | int m, /* I: parameter vector dimension (i.e. #unknowns) */
|
---|
375 | int n, /* I: measurement vector dimension */
|
---|
376 | LM_REAL *lb, /* I: vector of lower bounds. If NULL, no lower bounds apply */
|
---|
377 | LM_REAL *ub, /* I: vector of upper bounds. If NULL, no upper bounds apply */
|
---|
378 | LM_REAL *dscl, /* I: diagonal scaling constants. NULL implies no scaling */
|
---|
379 | int itmax, /* I: maximum number of iterations */
|
---|
380 | LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu,
|
---|
381 | * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used.
|
---|
382 | * Note that ||J^T e||_inf is computed on free (not equal to lb[i] or ub[i]) variables only.
|
---|
383 | */
|
---|
384 | LM_REAL info[LM_INFO_SZ],
|
---|
385 | /* O: information regarding the minimization. Set to NULL if don't care
|
---|
386 | * info[0]= ||e||_2 at initial p.
|
---|
387 | * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
|
---|
388 | * info[5]= # iterations,
|
---|
389 | * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
|
---|
390 | * 2 - stopped by small Dp
|
---|
391 | * 3 - stopped by itmax
|
---|
392 | * 4 - singular matrix. Restart from current p with increased mu
|
---|
393 | * 5 - no further error reduction is possible. Restart with increased mu
|
---|
394 | * 6 - stopped by small ||e||_2
|
---|
395 | * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
|
---|
396 | * info[7]= # function evaluations
|
---|
397 | * info[8]= # Jacobian evaluations
|
---|
398 | * info[9]= # linear systems solved, i.e. # attempts for reducing error
|
---|
399 | */
|
---|
400 | LM_REAL *work, /* working memory at least LM_BC_DER_WORKSZ() reals large, allocated if NULL */
|
---|
401 | LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
|
---|
402 | void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf.
|
---|
403 | * Set to NULL if not needed
|
---|
404 | */
|
---|
405 | {
|
---|
406 | register int i, j, k, l;
|
---|
407 | int worksz, freework=0, issolved;
|
---|
408 | /* temp work arrays */
|
---|
409 | LM_REAL *e, /* nx1 */
|
---|
410 | *hx, /* \hat{x}_i, nx1 */
|
---|
411 | *jacTe, /* J^T e_i mx1 */
|
---|
412 | *jac, /* nxm */
|
---|
413 | *jacTjac, /* mxm */
|
---|
414 | *Dp, /* mx1 */
|
---|
415 | *diag_jacTjac, /* diagonal of J^T J, mx1 */
|
---|
416 | *pDp, /* p + Dp, mx1 */
|
---|
417 | *sp_pDp=NULL; /* dscl*p or dscl*pDp, mx1 */
|
---|
418 |
|
---|
419 | register LM_REAL mu, /* damping constant */
|
---|
420 | tmp; /* mainly used in matrix & vector multiplications */
|
---|
421 | LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */
|
---|
422 | LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
|
---|
423 | LM_REAL tau, eps1, eps2, eps2_sq, eps3;
|
---|
424 | LM_REAL init_p_eL2;
|
---|
425 | int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
|
---|
426 | const int nm=n*m;
|
---|
427 |
|
---|
428 | /* variables for constrained LM */
|
---|
429 | struct FUNC_STATE fstate;
|
---|
430 | LM_REAL alpha=LM_CNST(1e-4), beta=LM_CNST(0.9), gamma=LM_CNST(0.99995), rho=LM_CNST(1e-8);
|
---|
431 | LM_REAL t, t0, jacTeDp;
|
---|
432 | LM_REAL tmin=LM_CNST(1e-12), tming=LM_CNST(1e-18); /* minimum step length for LS and PG steps */
|
---|
433 | const LM_REAL tini=LM_CNST(1.0); /* initial step length for LS and PG steps */
|
---|
434 | int nLMsteps=0, nLSsteps=0, nPGsteps=0, gprevtaken=0;
|
---|
435 | int numactive;
|
---|
436 | int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL;
|
---|
437 |
|
---|
438 | mu=jacTe_inf=t=0.0; tmin=tmin; /* -Wall */
|
---|
439 |
|
---|
440 | if(n<m){
|
---|
441 | fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
|
---|
442 | return LM_ERROR;
|
---|
443 | }
|
---|
444 |
|
---|
445 | if(!jacf){
|
---|
446 | fprintf(stderr, RCAT("No function specified for computing the Jacobian in ", LEVMAR_BC_DER)
|
---|
447 | RCAT("().\nIf no such function is available, use ", LEVMAR_BC_DIF) RCAT("() rather than ", LEVMAR_BC_DER) "()\n");
|
---|
448 | return LM_ERROR;
|
---|
449 | }
|
---|
450 |
|
---|
451 | if(!LEVMAR_BOX_CHECK(lb, ub, m)){
|
---|
452 | fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): at least one lower bound exceeds the upper one\n"));
|
---|
453 | return LM_ERROR;
|
---|
454 | }
|
---|
455 |
|
---|
456 | if(dscl){ /* check that scaling consts are valid */
|
---|
457 | for(i=m; i-->0; )
|
---|
458 | if(dscl[i]<=0.0){
|
---|
459 | fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): scaling constants should be positive (scale %d: %g <= 0)\n"), i, dscl[i]);
|
---|
460 | return LM_ERROR;
|
---|
461 | }
|
---|
462 |
|
---|
463 | sp_pDp=(LM_REAL *)malloc(m*sizeof(LM_REAL));
|
---|
464 | if(!sp_pDp){
|
---|
465 | fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n"));
|
---|
466 | return LM_ERROR;
|
---|
467 | }
|
---|
468 | }
|
---|
469 |
|
---|
470 | if(opts){
|
---|
471 | tau=opts[0];
|
---|
472 | eps1=opts[1];
|
---|
473 | eps2=opts[2];
|
---|
474 | eps2_sq=opts[2]*opts[2];
|
---|
475 | eps3=opts[3];
|
---|
476 | }
|
---|
477 | else{ // use default values
|
---|
478 | tau=LM_CNST(LM_INIT_MU);
|
---|
479 | eps1=LM_CNST(LM_STOP_THRESH);
|
---|
480 | eps2=LM_CNST(LM_STOP_THRESH);
|
---|
481 | eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH);
|
---|
482 | eps3=LM_CNST(LM_STOP_THRESH);
|
---|
483 | }
|
---|
484 |
|
---|
485 | if(!work){
|
---|
486 | worksz=LM_BC_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m;
|
---|
487 | work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */
|
---|
488 | if(!work){
|
---|
489 | fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n"));
|
---|
490 | return LM_ERROR;
|
---|
491 | }
|
---|
492 | freework=1;
|
---|
493 | }
|
---|
494 |
|
---|
495 | /* set up work arrays */
|
---|
496 | e=work;
|
---|
497 | hx=e + n;
|
---|
498 | jacTe=hx + n;
|
---|
499 | jac=jacTe + m;
|
---|
500 | jacTjac=jac + nm;
|
---|
501 | Dp=jacTjac + m*m;
|
---|
502 | diag_jacTjac=Dp + m;
|
---|
503 | pDp=diag_jacTjac + m;
|
---|
504 |
|
---|
505 | fstate.n=n;
|
---|
506 | fstate.hx=hx;
|
---|
507 | fstate.x=x;
|
---|
508 | fstate.lb=lb;
|
---|
509 | fstate.ub=ub;
|
---|
510 | fstate.adata=adata;
|
---|
511 | fstate.nfev=&nfev;
|
---|
512 |
|
---|
513 | /* see if starting point is within the feasible set */
|
---|
514 | for(i=0; i<m; ++i)
|
---|
515 | pDp[i]=p[i];
|
---|
516 | BOXPROJECT(p, lb, ub, m); /* project to feasible set */
|
---|
517 | for(i=0; i<m; ++i)
|
---|
518 | if(pDp[i]!=p[i])
|
---|
519 | fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_BC_DER) "()! [%g projected to %g]\n",
|
---|
520 | i, pDp[i], p[i]);
|
---|
521 |
|
---|
522 | /* compute e=x - f(p) and its L2 norm */
|
---|
523 | (*func)(p, hx, m, n, adata); nfev=1;
|
---|
524 | /* ### e=x-hx, p_eL2=||e|| */
|
---|
525 | #if 1
|
---|
526 | p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n);
|
---|
527 | #else
|
---|
528 | for(i=0, p_eL2=0.0; i<n; ++i){
|
---|
529 | e[i]=tmp=x[i]-hx[i];
|
---|
530 | p_eL2+=tmp*tmp;
|
---|
531 | }
|
---|
532 | #endif
|
---|
533 | init_p_eL2=p_eL2;
|
---|
534 | if(!LM_FINITE(p_eL2)) stop=7;
|
---|
535 |
|
---|
536 | if(dscl){
|
---|
537 | /* scale starting point and constraints */
|
---|
538 | for(i=m; i-->0; ) p[i]/=dscl[i];
|
---|
539 | BOXSCALE(lb, ub, dscl, m, 1);
|
---|
540 | }
|
---|
541 |
|
---|
542 | for(k=0; k<itmax && !stop; ++k){
|
---|
543 | /* Note that p and e have been updated at a previous iteration */
|
---|
544 |
|
---|
545 | if(p_eL2<=eps3){ /* error is small */
|
---|
546 | stop=6;
|
---|
547 | break;
|
---|
548 | }
|
---|
549 |
|
---|
550 | /* Compute the Jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2.
|
---|
551 | * Since J^T J is symmetric, its computation can be sped up by computing
|
---|
552 | * only its upper triangular part and copying it to the lower part
|
---|
553 | */
|
---|
554 |
|
---|
555 | if(!dscl){
|
---|
556 | (*jacf)(p, jac, m, n, adata); ++njev;
|
---|
557 | }
|
---|
558 | else{
|
---|
559 | for(i=m; i-->0; ) sp_pDp[i]=p[i]*dscl[i];
|
---|
560 | (*jacf)(sp_pDp, jac, m, n, adata); ++njev;
|
---|
561 |
|
---|
562 | /* compute jac*D */
|
---|
563 | for(i=n; i-->0; ){
|
---|
564 | register LM_REAL *jacim;
|
---|
565 |
|
---|
566 | jacim=jac+i*m;
|
---|
567 | for(j=m; j-->0; )
|
---|
568 | jacim[j]*=dscl[j]; // jac[i*m+j]*=dscl[j];
|
---|
569 | }
|
---|
570 | }
|
---|
571 |
|
---|
572 | /* J^T J, J^T e */
|
---|
573 | if(nm<__BLOCKSZ__SQ){ // this is a small problem
|
---|
574 | /* J^T*J_ij = \sum_l J^T_il * J_lj = \sum_l J_li * J_lj.
|
---|
575 | * Thus, the product J^T J can be computed using an outer loop for
|
---|
576 | * l that adds J_li*J_lj to each element ij of the result. Note that
|
---|
577 | * with this scheme, the accesses to J and JtJ are always along rows,
|
---|
578 | * therefore induces less cache misses compared to the straightforward
|
---|
579 | * algorithm for computing the product (i.e., l loop is innermost one).
|
---|
580 | * A similar scheme applies to the computation of J^T e.
|
---|
581 | * However, for large minimization problems (i.e., involving a large number
|
---|
582 | * of unknowns and measurements) for which J/J^T J rows are too large to
|
---|
583 | * fit in the L1 cache, even this scheme incures many cache misses. In
|
---|
584 | * such cases, a cache-efficient blocking scheme is preferable.
|
---|
585 | *
|
---|
586 | * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
|
---|
587 | * performance problem.
|
---|
588 | *
|
---|
589 | * Note that the non-blocking algorithm is faster on small
|
---|
590 | * problems since in this case it avoids the overheads of blocking.
|
---|
591 | */
|
---|
592 | register LM_REAL alpha, *jaclm, *jacTjacim;
|
---|
593 |
|
---|
594 | /* looping downwards saves a few computations */
|
---|
595 | for(i=m*m; i-->0; )
|
---|
596 | jacTjac[i]=0.0;
|
---|
597 | for(i=m; i-->0; )
|
---|
598 | jacTe[i]=0.0;
|
---|
599 |
|
---|
600 | for(l=n; l-->0; ){
|
---|
601 | jaclm=jac+l*m;
|
---|
602 | for(i=m; i-->0; ){
|
---|
603 | jacTjacim=jacTjac+i*m;
|
---|
604 | alpha=jaclm[i]; //jac[l*m+i];
|
---|
605 | for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */
|
---|
606 | jacTjacim[j]+=jaclm[j]*alpha; //jacTjac[i*m+j]+=jac[l*m+j]*alpha
|
---|
607 |
|
---|
608 | /* J^T e */
|
---|
609 | jacTe[i]+=alpha*e[l];
|
---|
610 | }
|
---|
611 | }
|
---|
612 |
|
---|
613 | for(i=m; i-->0; ) /* copy to upper part */
|
---|
614 | for(j=i+1; j<m; ++j)
|
---|
615 | jacTjac[i*m+j]=jacTjac[j*m+i];
|
---|
616 | }
|
---|
617 | else{ // this is a large problem
|
---|
618 | /* Cache efficient computation of J^T J based on blocking
|
---|
619 | */
|
---|
620 | LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);
|
---|
621 |
|
---|
622 | /* cache efficient computation of J^T e */
|
---|
623 | for(i=0; i<m; ++i)
|
---|
624 | jacTe[i]=0.0;
|
---|
625 |
|
---|
626 | for(i=0; i<n; ++i){
|
---|
627 | register LM_REAL *jacrow;
|
---|
628 |
|
---|
629 | for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
|
---|
630 | jacTe[l]+=jacrow[l]*tmp;
|
---|
631 | }
|
---|
632 | }
|
---|
633 |
|
---|
634 | /* Compute ||J^T e||_inf and ||p||^2. Note that ||J^T e||_inf
|
---|
635 | * is computed for free (i.e. inactive) variables only.
|
---|
636 | * At a local minimum, if p[i]==ub[i] then g[i]>0;
|
---|
637 | * if p[i]==lb[i] g[i]<0; otherwise g[i]=0
|
---|
638 | */
|
---|
639 | for(i=j=numactive=0, p_L2=jacTe_inf=0.0; i<m; ++i){
|
---|
640 | if(ub && p[i]==ub[i]){ ++numactive; if(jacTe[i]>0.0) ++j; }
|
---|
641 | else if(lb && p[i]==lb[i]){ ++numactive; if(jacTe[i]<0.0) ++j; }
|
---|
642 | else if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
|
---|
643 |
|
---|
644 | diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */
|
---|
645 | p_L2+=p[i]*p[i];
|
---|
646 | }
|
---|
647 | //p_L2=sqrt(p_L2);
|
---|
648 |
|
---|
649 | #if 0
|
---|
650 | if(!(k%100)){
|
---|
651 | printf("Current estimate: ");
|
---|
652 | for(i=0; i<m; ++i)
|
---|
653 | printf("%.9g ", p[i]);
|
---|
654 | printf("-- errors %.9g %0.9g, #active %d [%d]\n", jacTe_inf, p_eL2, numactive, j);
|
---|
655 | }
|
---|
656 | #endif
|
---|
657 |
|
---|
658 | /* check for convergence */
|
---|
659 | if(j==numactive && (jacTe_inf <= eps1)){
|
---|
660 | Dp_L2=0.0; /* no increment for p in this case */
|
---|
661 | stop=1;
|
---|
662 | break;
|
---|
663 | }
|
---|
664 |
|
---|
665 | /* compute initial damping factor */
|
---|
666 | if(k==0){
|
---|
667 | if(!lb && !ub){ /* no bounds */
|
---|
668 | for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
|
---|
669 | if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
|
---|
670 | mu=tau*tmp;
|
---|
671 | }
|
---|
672 | else
|
---|
673 | mu=LM_CNST(0.5)*tau*p_eL2; /* use Kanzow's starting mu */
|
---|
674 | }
|
---|
675 |
|
---|
676 | /* determine increment using a combination of adaptive damping, line search and projected gradient search */
|
---|
677 | while(1){
|
---|
678 | /* augment normal equations */
|
---|
679 | for(i=0; i<m; ++i)
|
---|
680 | jacTjac[i*m+i]+=mu;
|
---|
681 |
|
---|
682 | /* solve augmented equations */
|
---|
683 | #ifdef HAVE_LAPACK
|
---|
684 | /* 7 alternatives are available: LU, Cholesky + Cholesky with PLASMA, LDLt, 2 variants of QR decomposition and SVD.
|
---|
685 | * For matrices with dimensions of at least a few hundreds, the PLASMA implementation of Cholesky is the fastest.
|
---|
686 | * From the serial solvers, Cholesky is the fastest but might occasionally be inapplicable due to numerical round-off;
|
---|
687 | * QR is slower but more robust; SVD is the slowest but most robust; LU is quite robust but
|
---|
688 | * slower than LDLt; LDLt offers a good tradeoff between robustness and speed
|
---|
689 | */
|
---|
690 |
|
---|
691 | issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK;
|
---|
692 | //issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
|
---|
693 | //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_CHOL;
|
---|
694 | #ifdef HAVE_PLASMA
|
---|
695 | //issolved=AX_EQ_B_PLASMA_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_PLASMA_CHOL;
|
---|
696 | #endif
|
---|
697 | //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_QR;
|
---|
698 | //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); ++nlss; linsolver=(int (*)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m))AX_EQ_B_QRLS;
|
---|
699 | //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_SVD;
|
---|
700 |
|
---|
701 | #else
|
---|
702 | /* use the LU included with levmar */
|
---|
703 | issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
|
---|
704 | #endif /* HAVE_LAPACK */
|
---|
705 |
|
---|
706 | if(issolved){
|
---|
707 | for(i=0; i<m; ++i)
|
---|
708 | pDp[i]=p[i] + Dp[i];
|
---|
709 |
|
---|
710 | /* compute p's new estimate and ||Dp||^2 */
|
---|
711 | BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
|
---|
712 | for(i=0, Dp_L2=0.0; i<m; ++i){
|
---|
713 | Dp[i]=tmp=pDp[i]-p[i];
|
---|
714 | Dp_L2+=tmp*tmp;
|
---|
715 | }
|
---|
716 | //Dp_L2=sqrt(Dp_L2);
|
---|
717 |
|
---|
718 | if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
|
---|
719 | stop=2;
|
---|
720 | break;
|
---|
721 | }
|
---|
722 |
|
---|
723 | if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */
|
---|
724 | stop=4;
|
---|
725 | break;
|
---|
726 | }
|
---|
727 |
|
---|
728 | if(!dscl){
|
---|
729 | (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */
|
---|
730 | }
|
---|
731 | else{
|
---|
732 | for(i=m; i-->0; ) sp_pDp[i]=pDp[i]*dscl[i];
|
---|
733 | (*func)(sp_pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */
|
---|
734 | }
|
---|
735 |
|
---|
736 | /* ### hx=x-hx, pDp_eL2=||hx|| */
|
---|
737 | #if 1
|
---|
738 | pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
|
---|
739 | #else
|
---|
740 | for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */
|
---|
741 | hx[i]=tmp=x[i]-hx[i];
|
---|
742 | pDp_eL2+=tmp*tmp;
|
---|
743 | }
|
---|
744 | #endif
|
---|
745 | /* the following test ensures that the computation of pDp_eL2 has not overflowed.
|
---|
746 | * Such an overflow does no harm here, thus it is not signalled as an error
|
---|
747 | */
|
---|
748 | if(!LM_FINITE(pDp_eL2) && !LM_FINITE(VECNORM(hx, n))){
|
---|
749 | stop=7;
|
---|
750 | break;
|
---|
751 | }
|
---|
752 |
|
---|
753 | if(pDp_eL2<=gamma*p_eL2){
|
---|
754 | for(i=0, dL=0.0; i<m; ++i)
|
---|
755 | dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);
|
---|
756 |
|
---|
757 | #if 1
|
---|
758 | if(dL>0.0){
|
---|
759 | dF=p_eL2-pDp_eL2;
|
---|
760 | tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0));
|
---|
761 | tmp=LM_CNST(1.0)-tmp*tmp*tmp;
|
---|
762 | mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) );
|
---|
763 | }
|
---|
764 | else{
|
---|
765 | tmp=LM_CNST(0.1)*pDp_eL2; /* pDp_eL2 is the new p_eL2 */
|
---|
766 | mu=(mu>=tmp)? tmp : mu;
|
---|
767 | }
|
---|
768 | #else
|
---|
769 |
|
---|
770 | tmp=LM_CNST(0.1)*pDp_eL2; /* pDp_eL2 is the new p_eL2 */
|
---|
771 | mu=(mu>=tmp)? tmp : mu;
|
---|
772 | #endif
|
---|
773 |
|
---|
774 | nu=2;
|
---|
775 |
|
---|
776 | for(i=0 ; i<m; ++i) /* update p's estimate */
|
---|
777 | p[i]=pDp[i];
|
---|
778 |
|
---|
779 | for(i=0; i<n; ++i) /* update e and ||e||_2 */
|
---|
780 | e[i]=hx[i];
|
---|
781 | p_eL2=pDp_eL2;
|
---|
782 | ++nLMsteps;
|
---|
783 | gprevtaken=0;
|
---|
784 | break;
|
---|
785 | }
|
---|
786 | /* note that if the LM step is not taken, code falls through to the LM line search below */
|
---|
787 | }
|
---|
788 | else{
|
---|
789 |
|
---|
790 | /* the augmented linear system could not be solved, increase mu */
|
---|
791 |
|
---|
792 | mu*=nu;
|
---|
793 | nu2=nu<<1; // 2*nu;
|
---|
794 | if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
|
---|
795 | stop=5;
|
---|
796 | break;
|
---|
797 | }
|
---|
798 | nu=nu2;
|
---|
799 |
|
---|
800 | for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
|
---|
801 | jacTjac[i*m+i]=diag_jacTjac[i];
|
---|
802 |
|
---|
803 | continue; /* solve again with increased nu */
|
---|
804 | }
|
---|
805 |
|
---|
806 | /* if this point is reached, the LM step did not reduce the error;
|
---|
807 | * see if it is a descent direction
|
---|
808 | */
|
---|
809 |
|
---|
810 | /* negate jacTe (i.e. g) & compute g^T * Dp */
|
---|
811 | for(i=0, jacTeDp=0.0; i<m; ++i){
|
---|
812 | jacTe[i]=-jacTe[i];
|
---|
813 | jacTeDp+=jacTe[i]*Dp[i];
|
---|
814 | }
|
---|
815 |
|
---|
816 | if(jacTeDp<=-rho*pow(Dp_L2, LM_CNST(_POW_)/LM_CNST(2.0))){
|
---|
817 | /* Dp is a descent direction; do a line search along it */
|
---|
818 | #if 1
|
---|
819 | /* use Schnabel's backtracking line search; it requires fewer "func" evaluations */
|
---|
820 | {
|
---|
821 | int mxtake, iretcd;
|
---|
822 | LM_REAL stepmx, steptl=LM_CNST(1e3)*(LM_REAL)sqrt(LM_REAL_EPSILON);
|
---|
823 |
|
---|
824 | tmp=(LM_REAL)sqrt(p_L2); stepmx=LM_CNST(1e3)*( (tmp>=LM_CNST(1.0))? tmp : LM_CNST(1.0) );
|
---|
825 |
|
---|
826 | LNSRCH(m, p, p_eL2, jacTe, Dp, alpha, pDp, &pDp_eL2, func, &fstate,
|
---|
827 | &mxtake, &iretcd, stepmx, steptl, dscl); /* NOTE: LNSRCH() updates hx */
|
---|
828 | if(iretcd!=0 || !LM_FINITE(pDp_eL2)) goto gradproj; /* rather inelegant but effective way to handle LNSRCH() failures... */
|
---|
829 | }
|
---|
830 | #else
|
---|
831 | /* use the simpler (but slower!) line search described by Kanzow et al */
|
---|
832 | for(t=tini; t>tmin; t*=beta){
|
---|
833 | for(i=0; i<m; ++i)
|
---|
834 | pDp[i]=p[i] + t*Dp[i];
|
---|
835 | BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
|
---|
836 |
|
---|
837 | if(!dscl){
|
---|
838 | (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */
|
---|
839 | }
|
---|
840 | else{
|
---|
841 | for(i=m; i-->0; ) sp_pDp[i]=pDp[i]*dscl[i];
|
---|
842 | (*func)(sp_pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */
|
---|
843 | }
|
---|
844 |
|
---|
845 | /* compute ||e(pDp)||_2 */
|
---|
846 | /* ### hx=x-hx, pDp_eL2=||hx|| */
|
---|
847 | #if 1
|
---|
848 | pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
|
---|
849 | #else
|
---|
850 | for(i=0, pDp_eL2=0.0; i<n; ++i){
|
---|
851 | hx[i]=tmp=x[i]-hx[i];
|
---|
852 | pDp_eL2+=tmp*tmp;
|
---|
853 | }
|
---|
854 | #endif /* ||e(pDp)||_2 */
|
---|
855 | if(!LM_FINITE(pDp_eL2)) goto gradproj; /* treat as line search failure */
|
---|
856 |
|
---|
857 | //if(LM_CNST(0.5)*pDp_eL2<=LM_CNST(0.5)*p_eL2 + t*alpha*jacTeDp) break;
|
---|
858 | if(pDp_eL2<=p_eL2 + LM_CNST(2.0)*t*alpha*jacTeDp) break;
|
---|
859 | }
|
---|
860 | #endif /* line search alternatives */
|
---|
861 |
|
---|
862 | ++nLSsteps;
|
---|
863 | gprevtaken=0;
|
---|
864 |
|
---|
865 | /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2.
|
---|
866 | * These values are used below to update their corresponding variables
|
---|
867 | */
|
---|
868 | }
|
---|
869 | else{
|
---|
870 | /* Note that this point can also be reached via a goto when LNSRCH() fails. */
|
---|
871 | gradproj:
|
---|
872 |
|
---|
873 | /* jacTe has been negated above. Being a descent direction, it is next used
|
---|
874 | * to make a projected gradient step
|
---|
875 | */
|
---|
876 |
|
---|
877 | /* compute ||g|| */
|
---|
878 | for(i=0, tmp=0.0; i<m; ++i)
|
---|
879 | tmp+=jacTe[i]*jacTe[i];
|
---|
880 | tmp=(LM_REAL)sqrt(tmp);
|
---|
881 | tmp=LM_CNST(100.0)/(LM_CNST(1.0)+tmp);
|
---|
882 | t0=(tmp<=tini)? tmp : tini; /* guard against poor scaling & large steps; see (3.50) in C.T. Kelley's book */
|
---|
883 |
|
---|
884 | /* if the previous step was along the gradient descent, try to use the t employed in that step */
|
---|
885 | for(t=(gprevtaken)? t : t0; t>tming; t*=beta){
|
---|
886 | for(i=0; i<m; ++i)
|
---|
887 | pDp[i]=p[i] - t*jacTe[i];
|
---|
888 | BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
|
---|
889 | for(i=0, Dp_L2=0.0; i<m; ++i){
|
---|
890 | Dp[i]=tmp=pDp[i]-p[i];
|
---|
891 | Dp_L2+=tmp*tmp;
|
---|
892 | }
|
---|
893 |
|
---|
894 | if(!dscl){
|
---|
895 | (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */
|
---|
896 | }
|
---|
897 | else{
|
---|
898 | for(i=m; i-->0; ) sp_pDp[i]=pDp[i]*dscl[i];
|
---|
899 | (*func)(sp_pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */
|
---|
900 | }
|
---|
901 |
|
---|
902 | /* compute ||e(pDp)||_2 */
|
---|
903 | /* ### hx=x-hx, pDp_eL2=||hx|| */
|
---|
904 | #if 1
|
---|
905 | pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
|
---|
906 | #else
|
---|
907 | for(i=0, pDp_eL2=0.0; i<n; ++i){
|
---|
908 | hx[i]=tmp=x[i]-hx[i];
|
---|
909 | pDp_eL2+=tmp*tmp;
|
---|
910 | }
|
---|
911 | #endif
|
---|
912 | /* the following test ensures that the computation of pDp_eL2 has not overflowed.
|
---|
913 | * Such an overflow does no harm here, thus it is not signalled as an error
|
---|
914 | */
|
---|
915 | if(!LM_FINITE(pDp_eL2) && !LM_FINITE(VECNORM(hx, n))){
|
---|
916 | stop=7;
|
---|
917 | goto breaknested;
|
---|
918 | }
|
---|
919 |
|
---|
920 | /* compute ||g^T * Dp||. Note that if pDp has not been altered by projection
|
---|
921 | * (i.e. BOXPROJECT), jacTeDp=-t*||g||^2
|
---|
922 | */
|
---|
923 | for(i=0, jacTeDp=0.0; i<m; ++i)
|
---|
924 | jacTeDp+=jacTe[i]*Dp[i];
|
---|
925 |
|
---|
926 | if(gprevtaken && pDp_eL2<=p_eL2 + LM_CNST(2.0)*LM_CNST(0.99999)*jacTeDp){ /* starting t too small */
|
---|
927 | t=t0;
|
---|
928 | gprevtaken=0;
|
---|
929 | continue;
|
---|
930 | }
|
---|
931 | //if(LM_CNST(0.5)*pDp_eL2<=LM_CNST(0.5)*p_eL2 + alpha*jacTeDp) terminatePGLS;
|
---|
932 | if(pDp_eL2<=p_eL2 + LM_CNST(2.0)*alpha*jacTeDp) goto terminatePGLS;
|
---|
933 |
|
---|
934 | //if(pDp_eL2<=p_eL2 - LM_CNST(2.0)*alpha/t*Dp_L2) goto terminatePGLS; // sufficient decrease condition proposed by Kelley in (5.13)
|
---|
935 | }
|
---|
936 |
|
---|
937 | /* if this point is reached then the gradient line search has failed */
|
---|
938 | gprevtaken=0;
|
---|
939 | break;
|
---|
940 |
|
---|
941 | terminatePGLS:
|
---|
942 |
|
---|
943 | ++nPGsteps;
|
---|
944 | gprevtaken=1;
|
---|
945 | /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2 */
|
---|
946 | }
|
---|
947 |
|
---|
948 | /* update using computed values */
|
---|
949 |
|
---|
950 | for(i=0, Dp_L2=0.0; i<m; ++i){
|
---|
951 | tmp=pDp[i]-p[i];
|
---|
952 | Dp_L2+=tmp*tmp;
|
---|
953 | }
|
---|
954 | //Dp_L2=sqrt(Dp_L2);
|
---|
955 |
|
---|
956 | if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
|
---|
957 | stop=2;
|
---|
958 | break;
|
---|
959 | }
|
---|
960 |
|
---|
961 | for(i=0 ; i<m; ++i) /* update p's estimate */
|
---|
962 | p[i]=pDp[i];
|
---|
963 |
|
---|
964 | for(i=0; i<n; ++i) /* update e and ||e||_2 */
|
---|
965 | e[i]=hx[i];
|
---|
966 | p_eL2=pDp_eL2;
|
---|
967 | break;
|
---|
968 | } /* inner loop */
|
---|
969 | }
|
---|
970 |
|
---|
971 | breaknested: /* NOTE: this point is also reached via an explicit goto! */
|
---|
972 |
|
---|
973 | if(k>=itmax) stop=3;
|
---|
974 |
|
---|
975 | for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
|
---|
976 | jacTjac[i*m+i]=diag_jacTjac[i];
|
---|
977 |
|
---|
978 | if(info){
|
---|
979 | info[0]=init_p_eL2;
|
---|
980 | info[1]=p_eL2;
|
---|
981 | info[2]=jacTe_inf;
|
---|
982 | info[3]=Dp_L2;
|
---|
983 | for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
|
---|
984 | if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
|
---|
985 | info[4]=mu/tmp;
|
---|
986 | info[5]=(LM_REAL)k;
|
---|
987 | info[6]=(LM_REAL)stop;
|
---|
988 | info[7]=(LM_REAL)nfev;
|
---|
989 | info[8]=(LM_REAL)njev;
|
---|
990 | info[9]=(LM_REAL)nlss;
|
---|
991 | }
|
---|
992 |
|
---|
993 | /* covariance matrix */
|
---|
994 | if(covar){
|
---|
995 | LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
|
---|
996 |
|
---|
997 | if(dscl){ /* correct for the scaling */
|
---|
998 | for(i=m; i-->0; )
|
---|
999 | for(j=m; j-->0; )
|
---|
1000 | covar[i*m+j]*=(dscl[i]*dscl[j]);
|
---|
1001 | }
|
---|
1002 | }
|
---|
1003 |
|
---|
1004 | if(freework) free(work);
|
---|
1005 |
|
---|
1006 | #ifdef LINSOLVERS_RETAIN_MEMORY
|
---|
1007 | if(linsolver) (*linsolver)(NULL, NULL, NULL, 0);
|
---|
1008 | #endif
|
---|
1009 |
|
---|
1010 | #if 0
|
---|
1011 | printf("%d LM steps, %d line search, %d projected gradient\n", nLMsteps, nLSsteps, nPGsteps);
|
---|
1012 | #endif
|
---|
1013 |
|
---|
1014 | if(dscl){
|
---|
1015 | /* scale final point and constraints */
|
---|
1016 | for(i=0; i<m; ++i) p[i]*=dscl[i];
|
---|
1017 | BOXSCALE(lb, ub, dscl, m, 0);
|
---|
1018 | free(sp_pDp);
|
---|
1019 | }
|
---|
1020 |
|
---|
1021 | return (stop!=4 && stop!=7)? k : LM_ERROR;
|
---|
1022 | }
|
---|
1023 |
|
---|
1024 | /* following struct & LMBC_DIF_XXX functions won't be necessary if a true secant
|
---|
1025 | * version of LEVMAR_BC_DIF() is implemented...
|
---|
1026 | */
|
---|
1027 | struct LMBC_DIF_DATA{
|
---|
1028 | int ffdif; // nonzero if forward differencing is used
|
---|
1029 | void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata);
|
---|
1030 | LM_REAL *hx, *hxx;
|
---|
1031 | void *adata;
|
---|
1032 | LM_REAL delta;
|
---|
1033 | };
|
---|
1034 |
|
---|
1035 | static void LMBC_DIF_FUNC(LM_REAL *p, LM_REAL *hx, int m, int n, void *data)
|
---|
1036 | {
|
---|
1037 | struct LMBC_DIF_DATA *dta=(struct LMBC_DIF_DATA *)data;
|
---|
1038 |
|
---|
1039 | /* call user-supplied function passing it the user-supplied data */
|
---|
1040 | (*(dta->func))(p, hx, m, n, dta->adata);
|
---|
1041 | }
|
---|
1042 |
|
---|
1043 | static void LMBC_DIF_JACF(LM_REAL *p, LM_REAL *jac, int m, int n, void *data)
|
---|
1044 | {
|
---|
1045 | struct LMBC_DIF_DATA *dta=(struct LMBC_DIF_DATA *)data;
|
---|
1046 |
|
---|
1047 | if(dta->ffdif){
|
---|
1048 | /* evaluate user-supplied function at p */
|
---|
1049 | (*(dta->func))(p, dta->hx, m, n, dta->adata);
|
---|
1050 | LEVMAR_FDIF_FORW_JAC_APPROX(dta->func, p, dta->hx, dta->hxx, dta->delta, jac, m, n, dta->adata);
|
---|
1051 | }
|
---|
1052 | else
|
---|
1053 | LEVMAR_FDIF_CENT_JAC_APPROX(dta->func, p, dta->hx, dta->hxx, dta->delta, jac, m, n, dta->adata);
|
---|
1054 | }
|
---|
1055 |
|
---|
1056 |
|
---|
1057 | /* No Jacobian version of the LEVMAR_BC_DER() function above: the Jacobian is approximated with
|
---|
1058 | * the aid of finite differences (forward or central, see the comment for the opts argument)
|
---|
1059 | * Ideally, this function should be implemented with a secant approach. Currently, it just calls
|
---|
1060 | * LEVMAR_BC_DER()
|
---|
1061 | */
|
---|
1062 | int LEVMAR_BC_DIF(
|
---|
1063 | void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */
|
---|
1064 | LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */
|
---|
1065 | LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */
|
---|
1066 | int m, /* I: parameter vector dimension (i.e. #unknowns) */
|
---|
1067 | int n, /* I: measurement vector dimension */
|
---|
1068 | LM_REAL *lb, /* I: vector of lower bounds. If NULL, no lower bounds apply */
|
---|
1069 | LM_REAL *ub, /* I: vector of upper bounds. If NULL, no upper bounds apply */
|
---|
1070 | LM_REAL *dscl, /* I: diagonal scaling constants. NULL implies no scaling */
|
---|
1071 | int itmax, /* I: maximum number of iterations */
|
---|
1072 | LM_REAL opts[5], /* I: opts[0-4] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the
|
---|
1073 | * scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2 and
|
---|
1074 | * the step used in difference approximation to the Jacobian. Set to NULL for defaults to be used.
|
---|
1075 | * If \delta<0, the Jacobian is approximated with central differences which are more accurate
|
---|
1076 | * (but slower!) compared to the forward differences employed by default.
|
---|
1077 | */
|
---|
1078 | LM_REAL info[LM_INFO_SZ],
|
---|
1079 | /* O: information regarding the minimization. Set to NULL if don't care
|
---|
1080 | * info[0]= ||e||_2 at initial p.
|
---|
1081 | * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
|
---|
1082 | * info[5]= # iterations,
|
---|
1083 | * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
|
---|
1084 | * 2 - stopped by small Dp
|
---|
1085 | * 3 - stopped by itmax
|
---|
1086 | * 4 - singular matrix. Restart from current p with increased mu
|
---|
1087 | * 5 - no further error reduction is possible. Restart with increased mu
|
---|
1088 | * 6 - stopped by small ||e||_2
|
---|
1089 | * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
|
---|
1090 | * info[7]= # function evaluations
|
---|
1091 | * info[8]= # Jacobian evaluations
|
---|
1092 | * info[9]= # linear systems solved, i.e. # attempts for reducing error
|
---|
1093 | */
|
---|
1094 | LM_REAL *work, /* working memory at least LM_BC_DIF_WORKSZ() reals large, allocated if NULL */
|
---|
1095 | LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
|
---|
1096 | void *adata) /* pointer to possibly additional data, passed uninterpreted to func.
|
---|
1097 | * Set to NULL if not needed
|
---|
1098 | */
|
---|
1099 | {
|
---|
1100 | struct LMBC_DIF_DATA data;
|
---|
1101 | int ret;
|
---|
1102 |
|
---|
1103 | //fprintf(stderr, RCAT("\nWarning: current implementation of ", LEVMAR_BC_DIF) "() does not use a secant approach!\n\n");
|
---|
1104 |
|
---|
1105 | data.ffdif=!opts || opts[4]>=0.0;
|
---|
1106 |
|
---|
1107 | data.func=func;
|
---|
1108 | data.hx=(LM_REAL *)malloc(2*n*sizeof(LM_REAL)); /* allocate a big chunk in one step */
|
---|
1109 | if(!data.hx){
|
---|
1110 | fprintf(stderr, LCAT(LEVMAR_BC_DIF, "(): memory allocation request failed\n"));
|
---|
1111 | return LM_ERROR;
|
---|
1112 | }
|
---|
1113 | data.hxx=data.hx+n;
|
---|
1114 | data.adata=adata;
|
---|
1115 | data.delta=(opts)? FABS(opts[4]) : (LM_REAL)LM_DIFF_DELTA;
|
---|
1116 |
|
---|
1117 | ret=LEVMAR_BC_DER(LMBC_DIF_FUNC, LMBC_DIF_JACF, p, x, m, n, lb, ub, dscl, itmax, opts, info, work, covar, (void *)&data);
|
---|
1118 |
|
---|
1119 | if(info){ /* correct the number of function calls */
|
---|
1120 | if(data.ffdif)
|
---|
1121 | info[7]+=info[8]*(m+1); /* each Jacobian evaluation costs m+1 function calls */
|
---|
1122 | else
|
---|
1123 | info[7]+=info[8]*(2*m); /* each Jacobian evaluation costs 2*m function calls */
|
---|
1124 | }
|
---|
1125 |
|
---|
1126 | free(data.hx);
|
---|
1127 |
|
---|
1128 | return ret;
|
---|
1129 | }
|
---|
1130 |
|
---|
1131 | /* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
|
---|
1132 | #undef FUNC_STATE
|
---|
1133 | #undef LNSRCH
|
---|
1134 | #undef BOXPROJECT
|
---|
1135 | #undef BOXSCALE
|
---|
1136 | #undef LEVMAR_BOX_CHECK
|
---|
1137 | #undef VECNORM
|
---|
1138 | #undef LEVMAR_BC_DER
|
---|
1139 | #undef LMBC_DIF_DATA
|
---|
1140 | #undef LMBC_DIF_FUNC
|
---|
1141 | #undef LMBC_DIF_JACF
|
---|
1142 | #undef LEVMAR_BC_DIF
|
---|
1143 | #undef LEVMAR_FDIF_FORW_JAC_APPROX
|
---|
1144 | #undef LEVMAR_FDIF_CENT_JAC_APPROX
|
---|
1145 | #undef LEVMAR_COVAR
|
---|
1146 | #undef LEVMAR_TRANS_MAT_MAT_MULT
|
---|
1147 | #undef LEVMAR_L2NRMXMY
|
---|
1148 | #undef AX_EQ_B_LU
|
---|
1149 | #undef AX_EQ_B_CHOL
|
---|
1150 | #undef AX_EQ_B_PLASMA_CHOL
|
---|
1151 | #undef AX_EQ_B_QR
|
---|
1152 | #undef AX_EQ_B_QRLS
|
---|
1153 | #undef AX_EQ_B_SVD
|
---|
1154 | #undef AX_EQ_B_BK
|
---|