source: ThirdParty/levmar/src/misc_core.c@ 8ce1a9

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since 8ce1a9 was 8ce1a9, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '5443b10a06f0c125d0ae0500abb09901fda9666b' as 'ThirdParty/levmar'

  • Property mode set to 100644
File size: 24.0 KB
Line 
1/////////////////////////////////////////////////////////////////////////////////
2//
3// Levenberg - Marquardt non-linear minimization algorithm
4// Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr)
5// Institute of Computer Science, Foundation for Research & Technology - Hellas
6// Heraklion, Crete, Greece.
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 2 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16// GNU General Public License for more details.
17//
18/////////////////////////////////////////////////////////////////////////////////
19
20#ifndef LM_REAL // not included by misc.c
21#error This file should not be compiled directly!
22#endif
23
24
25/* precision-specific definitions */
26#define LEVMAR_CHKJAC LM_ADD_PREFIX(levmar_chkjac)
27#define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)
28#define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)
29#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
30#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
31#define LEVMAR_STDDEV LM_ADD_PREFIX(levmar_stddev)
32#define LEVMAR_CORCOEF LM_ADD_PREFIX(levmar_corcoef)
33#define LEVMAR_R2 LM_ADD_PREFIX(levmar_R2)
34#define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check)
35#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
36
37#ifdef HAVE_LAPACK
38#define LEVMAR_PSEUDOINVERSE LM_ADD_PREFIX(levmar_pseudoinverse)
39static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m);
40
41#ifdef __cplusplus
42extern "C" {
43#endif
44/* BLAS matrix multiplication, LAPACK SVD & Cholesky routines */
45#define GEMM LM_MK_BLAS_NAME(gemm)
46/* C := alpha*op( A )*op( B ) + beta*C */
47extern void GEMM(char *transa, char *transb, int *m, int *n, int *k,
48 LM_REAL *alpha, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, LM_REAL *beta, LM_REAL *c, int *ldc);
49
50#define GESVD LM_MK_LAPACK_NAME(gesvd)
51#define GESDD LM_MK_LAPACK_NAME(gesdd)
52extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu,
53 LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info);
54
55/* lapack 3.0 new SVD routine, faster than xgesvd() */
56extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt,
57 LM_REAL *work, int *lwork, int *iwork, int *info);
58
59/* Cholesky decomposition */
60#define POTF2 LM_MK_LAPACK_NAME(potf2)
61extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info);
62#ifdef __cplusplus
63}
64#endif
65
66#define LEVMAR_CHOLESKY LM_ADD_PREFIX(levmar_chol)
67
68#else /* !HAVE_LAPACK */
69#define LEVMAR_LUINVERSE LM_ADD_PREFIX(levmar_LUinverse_noLapack)
70
71static int LEVMAR_LUINVERSE(LM_REAL *A, LM_REAL *B, int m);
72#endif /* HAVE_LAPACK */
73
74/* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a)
75 * using a block size of bsize. The product is returned in b.
76 * Since a^T a is symmetric, its computation can be sped up by computing only its
77 * upper triangular part and copying it to the lower part.
78 *
79 * More details on blocking can be found at
80 * http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf
81 */
82void LEVMAR_TRANS_MAT_MAT_MULT(LM_REAL *a, LM_REAL *b, int n, int m)
83{
84#ifdef HAVE_LAPACK /* use BLAS matrix multiply */
85
86LM_REAL alpha=LM_CNST(1.0), beta=LM_CNST(0.0);
87 /* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major,
88 * therefore BLAS computes a*a^T with a and a*a^T in column major, which is equivalent to
89 * computing a^T*a in row major!
90 */
91 GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m);
92
93#else /* no LAPACK, use blocking-based multiply */
94
95register int i, j, k, jj, kk;
96register LM_REAL sum, *bim, *akm;
97const int bsize=__BLOCKSZ__;
98
99#define __MIN__(x, y) (((x)<=(y))? (x) : (y))
100#define __MAX__(x, y) (((x)>=(y))? (x) : (y))
101
102 /* compute upper triangular part using blocking */
103 for(jj=0; jj<m; jj+=bsize){
104 for(i=0; i<m; ++i){
105 bim=b+i*m;
106 for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j)
107 bim[j]=0.0; //b[i*m+j]=0.0;
108 }
109
110 for(kk=0; kk<n; kk+=bsize){
111 for(i=0; i<m; ++i){
112 bim=b+i*m;
113 for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j){
114 sum=0.0;
115 for(k=kk; k<__MIN__(kk+bsize, n); ++k){
116 akm=a+k*m;
117 sum+=akm[i]*akm[j]; //a[k*m+i]*a[k*m+j];
118 }
119 bim[j]+=sum; //b[i*m+j]+=sum;
120 }
121 }
122 }
123 }
124
125 /* copy upper triangular part to the lower one */
126 for(i=0; i<m; ++i)
127 for(j=0; j<i; ++j)
128 b[i*m+j]=b[j*m+i];
129
130#undef __MIN__
131#undef __MAX__
132
133#endif /* HAVE_LAPACK */
134}
135
136/* forward finite difference approximation to the Jacobian of func */
137void LEVMAR_FDIF_FORW_JAC_APPROX(
138 void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata),
139 /* function to differentiate */
140 LM_REAL *p, /* I: current parameter estimate, mx1 */
141 LM_REAL *hx, /* I: func evaluated at p, i.e. hx=func(p), nx1 */
142 LM_REAL *hxx, /* W/O: work array for evaluating func(p+delta), nx1 */
143 LM_REAL delta, /* increment for computing the Jacobian */
144 LM_REAL *jac, /* O: array for storing approximated Jacobian, nxm */
145 int m,
146 int n,
147 void *adata)
148{
149register int i, j;
150LM_REAL tmp;
151register LM_REAL d;
152
153 for(j=0; j<m; ++j){
154 /* determine d=max(1E-04*|p[j]|, delta), see HZ */
155 d=LM_CNST(1E-04)*p[j]; // force evaluation
156 d=FABS(d);
157 if(d<delta)
158 d=delta;
159
160 tmp=p[j];
161 p[j]+=d;
162
163 (*func)(p, hxx, m, n, adata);
164
165 p[j]=tmp; /* restore */
166
167 d=LM_CNST(1.0)/d; /* invert so that divisions can be carried out faster as multiplications */
168 for(i=0; i<n; ++i){
169 jac[i*m+j]=(hxx[i]-hx[i])*d;
170 }
171 }
172}
173
174/* central finite difference approximation to the Jacobian of func */
175void LEVMAR_FDIF_CENT_JAC_APPROX(
176 void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata),
177 /* function to differentiate */
178 LM_REAL *p, /* I: current parameter estimate, mx1 */
179 LM_REAL *hxm, /* W/O: work array for evaluating func(p-delta), nx1 */
180 LM_REAL *hxp, /* W/O: work array for evaluating func(p+delta), nx1 */
181 LM_REAL delta, /* increment for computing the Jacobian */
182 LM_REAL *jac, /* O: array for storing approximated Jacobian, nxm */
183 int m,
184 int n,
185 void *adata)
186{
187register int i, j;
188LM_REAL tmp;
189register LM_REAL d;
190
191 for(j=0; j<m; ++j){
192 /* determine d=max(1E-04*|p[j]|, delta), see HZ */
193 d=LM_CNST(1E-04)*p[j]; // force evaluation
194 d=FABS(d);
195 if(d<delta)
196 d=delta;
197
198 tmp=p[j];
199 p[j]-=d;
200 (*func)(p, hxm, m, n, adata);
201
202 p[j]=tmp+d;
203 (*func)(p, hxp, m, n, adata);
204 p[j]=tmp; /* restore */
205
206 d=LM_CNST(0.5)/d; /* invert so that divisions can be carried out faster as multiplications */
207 for(i=0; i<n; ++i){
208 jac[i*m+j]=(hxp[i]-hxm[i])*d;
209 }
210 }
211}
212
213/*
214 * Check the Jacobian of a n-valued nonlinear function in m variables
215 * evaluated at a point p, for consistency with the function itself.
216 *
217 * Based on fortran77 subroutine CHKDER by
218 * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
219 * Argonne National Laboratory. MINPACK project. March 1980.
220 *
221 *
222 * func points to a function from R^m --> R^n: Given a p in R^m it yields hx in R^n
223 * jacf points to a function implementing the Jacobian of func, whose correctness
224 * is to be tested. Given a p in R^m, jacf computes into the nxm matrix j the
225 * Jacobian of func at p. Note that row i of j corresponds to the gradient of
226 * the i-th component of func, evaluated at p.
227 * p is an input array of length m containing the point of evaluation.
228 * m is the number of variables
229 * n is the number of functions
230 * adata points to possible additional data and is passed uninterpreted
231 * to func, jacf.
232 * err is an array of length n. On output, err contains measures
233 * of correctness of the respective gradients. if there is
234 * no severe loss of significance, then if err[i] is 1.0 the
235 * i-th gradient is correct, while if err[i] is 0.0 the i-th
236 * gradient is incorrect. For values of err between 0.0 and 1.0,
237 * the categorization is less certain. In general, a value of
238 * err[i] greater than 0.5 indicates that the i-th gradient is
239 * probably correct, while a value of err[i] less than 0.5
240 * indicates that the i-th gradient is probably incorrect.
241 *
242 *
243 * The function does not perform reliably if cancellation or
244 * rounding errors cause a severe loss of significance in the
245 * evaluation of a function. therefore, none of the components
246 * of p should be unusually small (in particular, zero) or any
247 * other value which may cause loss of significance.
248 */
249
250void LEVMAR_CHKJAC(
251 void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata),
252 void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata),
253 LM_REAL *p, int m, int n, void *adata, LM_REAL *err)
254{
255LM_REAL factor=LM_CNST(100.0);
256LM_REAL one=LM_CNST(1.0);
257LM_REAL zero=LM_CNST(0.0);
258LM_REAL *fvec, *fjac, *pp, *fvecp, *buf;
259
260register int i, j;
261LM_REAL eps, epsf, temp, epsmch;
262LM_REAL epslog;
263int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n;
264
265 epsmch=LM_REAL_EPSILON;
266 eps=(LM_REAL)sqrt(epsmch);
267
268 buf=(LM_REAL *)malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)*sizeof(LM_REAL));
269 if(!buf){
270 fprintf(stderr, LCAT(LEVMAR_CHKJAC, "(): memory allocation request failed\n"));
271 exit(1);
272 }
273 fvec=buf;
274 fjac=fvec+fvec_sz;
275 pp=fjac+fjac_sz;
276 fvecp=pp+pp_sz;
277
278 /* compute fvec=func(p) */
279 (*func)(p, fvec, m, n, adata);
280
281 /* compute the Jacobian at p */
282 (*jacf)(p, fjac, m, n, adata);
283
284 /* compute pp */
285 for(j=0; j<m; ++j){
286 temp=eps*FABS(p[j]);
287 if(temp==zero) temp=eps;
288 pp[j]=p[j]+temp;
289 }
290
291 /* compute fvecp=func(pp) */
292 (*func)(pp, fvecp, m, n, adata);
293
294 epsf=factor*epsmch;
295 epslog=(LM_REAL)log10(eps);
296
297 for(i=0; i<n; ++i)
298 err[i]=zero;
299
300 for(j=0; j<m; ++j){
301 temp=FABS(p[j]);
302 if(temp==zero) temp=one;
303
304 for(i=0; i<n; ++i)
305 err[i]+=temp*fjac[i*m+j];
306 }
307
308 for(i=0; i<n; ++i){
309 temp=one;
310 if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i]))
311 temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i]));
312 err[i]=one;
313 if(temp>epsmch && temp<eps)
314 err[i]=((LM_REAL)log10(temp) - epslog)/epslog;
315 if(temp>=eps) err[i]=zero;
316 }
317
318 free(buf);
319
320 return;
321}
322
323#ifdef HAVE_LAPACK
324/*
325 * This function computes the pseudoinverse of a square matrix A
326 * into B using SVD. A and B can coincide
327 *
328 * The function returns 0 in case of error (e.g. A is singular),
329 * the rank of A if successful
330 *
331 * A, B are mxm
332 *
333 */
334static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m)
335{
336LM_REAL *buf=NULL;
337int buf_sz=0;
338static LM_REAL eps=LM_CNST(-1.0);
339
340register int i, j;
341LM_REAL *a, *u, *s, *vt, *work;
342int a_sz, u_sz, s_sz, vt_sz, tot_sz;
343LM_REAL thresh, one_over_denom;
344int info, rank, worksz, *iwork, iworksz;
345
346 /* calculate required memory size */
347 worksz=5*m; // min worksize for GESVD
348 //worksz=m*(7*m+4); // min worksize for GESDD
349 iworksz=8*m;
350 a_sz=m*m;
351 u_sz=m*m; s_sz=m; vt_sz=m*m;
352
353 tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
354
355 buf_sz=tot_sz;
356 buf=(LM_REAL *)malloc(buf_sz);
357 if(!buf){
358 fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n");
359 return 0; /* error */
360 }
361
362 a=buf;
363 u=a+a_sz;
364 s=u+u_sz;
365 vt=s+s_sz;
366 work=vt+vt_sz;
367 iwork=(int *)(work+worksz);
368
369 /* store A (column major!) into a */
370 for(i=0; i<m; i++)
371 for(j=0; j<m; j++)
372 a[i+j*m]=A[i*m+j];
373
374 /* SVD decomposition of A */
375 GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
376 //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);
377
378 /* error treatment */
379 if(info!=0){
380 if(info<0){
381 fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info);
382 }
383 else{
384 fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info);
385 }
386 free(buf);
387 return 0;
388 }
389
390 if(eps<0.0){
391 LM_REAL aux;
392
393 /* compute machine epsilon */
394 for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
395 ;
396 eps*=LM_CNST(2.0);
397 }
398
399 /* compute the pseudoinverse in B */
400 for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */
401 for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
402 one_over_denom=LM_CNST(1.0)/s[rank];
403
404 for(j=0; j<m; j++)
405 for(i=0; i<m; i++)
406 B[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
407 }
408
409 free(buf);
410
411 return rank;
412}
413#else // no LAPACK
414
415/*
416 * This function computes the inverse of A in B. A and B can coincide
417 *
418 * The function employs LAPACK-free LU decomposition of A to solve m linear
419 * systems A*B_i=I_i, where B_i and I_i are the i-th columns of B and I.
420 *
421 * A and B are mxm
422 *
423 * The function returns 0 in case of error, 1 if successful
424 *
425 */
426static int LEVMAR_LUINVERSE(LM_REAL *A, LM_REAL *B, int m)
427{
428void *buf=NULL;
429int buf_sz=0;
430
431register int i, j, k, l;
432int *idx, maxi=-1, idx_sz, a_sz, x_sz, work_sz, tot_sz;
433LM_REAL *a, *x, *work, max, sum, tmp;
434
435 /* calculate required memory size */
436 idx_sz=m;
437 a_sz=m*m;
438 x_sz=m;
439 work_sz=m;
440 tot_sz=(a_sz + x_sz + work_sz)*sizeof(LM_REAL) + idx_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
441
442 buf_sz=tot_sz;
443 buf=(void *)malloc(tot_sz);
444 if(!buf){
445 fprintf(stderr, RCAT("memory allocation in ", LEVMAR_LUINVERSE) "() failed!\n");
446 return 0; /* error */
447 }
448
449 a=buf;
450 x=a+a_sz;
451 work=x+x_sz;
452 idx=(int *)(work+work_sz);
453
454 /* avoid destroying A by copying it to a */
455 for(i=0; i<a_sz; ++i) a[i]=A[i];
456
457 /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
458 for(i=0; i<m; ++i){
459 max=0.0;
460 for(j=0; j<m; ++j)
461 if((tmp=FABS(a[i*m+j]))>max)
462 max=tmp;
463 if(max==0.0){
464 fprintf(stderr, RCAT("Singular matrix A in ", LEVMAR_LUINVERSE) "()!\n");
465 free(buf);
466
467 return 0;
468 }
469 work[i]=LM_CNST(1.0)/max;
470 }
471
472 for(j=0; j<m; ++j){
473 for(i=0; i<j; ++i){
474 sum=a[i*m+j];
475 for(k=0; k<i; ++k)
476 sum-=a[i*m+k]*a[k*m+j];
477 a[i*m+j]=sum;
478 }
479 max=0.0;
480 for(i=j; i<m; ++i){
481 sum=a[i*m+j];
482 for(k=0; k<j; ++k)
483 sum-=a[i*m+k]*a[k*m+j];
484 a[i*m+j]=sum;
485 if((tmp=work[i]*FABS(sum))>=max){
486 max=tmp;
487 maxi=i;
488 }
489 }
490 if(j!=maxi){
491 for(k=0; k<m; ++k){
492 tmp=a[maxi*m+k];
493 a[maxi*m+k]=a[j*m+k];
494 a[j*m+k]=tmp;
495 }
496 work[maxi]=work[j];
497 }
498 idx[j]=maxi;
499 if(a[j*m+j]==0.0)
500 a[j*m+j]=LM_REAL_EPSILON;
501 if(j!=m-1){
502 tmp=LM_CNST(1.0)/(a[j*m+j]);
503 for(i=j+1; i<m; ++i)
504 a[i*m+j]*=tmp;
505 }
506 }
507
508 /* The decomposition has now replaced a. Solve the m linear systems using
509 * forward and back substitution
510 */
511 for(l=0; l<m; ++l){
512 for(i=0; i<m; ++i) x[i]=0.0;
513 x[l]=LM_CNST(1.0);
514
515 for(i=k=0; i<m; ++i){
516 j=idx[i];
517 sum=x[j];
518 x[j]=x[i];
519 if(k!=0)
520 for(j=k-1; j<i; ++j)
521 sum-=a[i*m+j]*x[j];
522 else
523 if(sum!=0.0)
524 k=i+1;
525 x[i]=sum;
526 }
527
528 for(i=m-1; i>=0; --i){
529 sum=x[i];
530 for(j=i+1; j<m; ++j)
531 sum-=a[i*m+j]*x[j];
532 x[i]=sum/a[i*m+i];
533 }
534
535 for(i=0; i<m; ++i)
536 B[i*m+l]=x[i];
537 }
538
539 free(buf);
540
541 return 1;
542}
543#endif /* HAVE_LAPACK */
544
545/*
546 * This function computes in C the covariance matrix corresponding to a least
547 * squares fit. JtJ is the approximate Hessian at the solution (i.e. J^T*J, where
548 * J is the Jacobian at the solution), sumsq is the sum of squared residuals
549 * (i.e. goodnes of fit) at the solution, m is the number of parameters (variables)
550 * and n the number of observations. JtJ can coincide with C.
551 *
552 * if JtJ is of full rank, C is computed as sumsq/(n-m)*(JtJ)^-1
553 * otherwise and if LAPACK is available, C=sumsq/(n-r)*(JtJ)^+
554 * where r is JtJ's rank and ^+ denotes the pseudoinverse
555 * The diagonal of C is made up from the estimates of the variances
556 * of the estimated regression coefficients.
557 * See the documentation of routine E04YCF from the NAG fortran lib
558 *
559 * The function returns the rank of JtJ if successful, 0 on error
560 *
561 * A and C are mxm
562 *
563 */
564int LEVMAR_COVAR(LM_REAL *JtJ, LM_REAL *C, LM_REAL sumsq, int m, int n)
565{
566register int i;
567int rnk;
568LM_REAL fact;
569
570#ifdef HAVE_LAPACK
571 rnk=LEVMAR_PSEUDOINVERSE(JtJ, C, m);
572 if(!rnk) return 0;
573#else
574#ifdef _MSC_VER
575#pragma message("LAPACK not available, LU will be used for matrix inversion when computing the covariance; this might be unstable at times")
576#else
577#warning LAPACK not available, LU will be used for matrix inversion when computing the covariance; this might be unstable at times
578#endif // _MSC_VER
579
580 rnk=LEVMAR_LUINVERSE(JtJ, C, m);
581 if(!rnk) return 0;
582
583 rnk=m; /* assume full rank */
584#endif /* HAVE_LAPACK */
585
586 fact=sumsq/(LM_REAL)(n-rnk);
587 for(i=0; i<m*m; ++i)
588 C[i]*=fact;
589
590 return rnk;
591}
592
593/* standard deviation of the best-fit parameter i.
594 * covar is the mxm covariance matrix of the best-fit parameters (see also LEVMAR_COVAR()).
595 *
596 * The standard deviation is computed as \sigma_{i} = \sqrt{C_{ii}}
597 */
598LM_REAL LEVMAR_STDDEV(LM_REAL *covar, int m, int i)
599{
600 return (LM_REAL)sqrt(covar[i*m+i]);
601}
602
603/* Pearson's correlation coefficient of the best-fit parameters i and j.
604 * covar is the mxm covariance matrix of the best-fit parameters (see also LEVMAR_COVAR()).
605 *
606 * The coefficient is computed as \rho_{ij} = C_{ij} / sqrt(C_{ii} C_{jj})
607 */
608LM_REAL LEVMAR_CORCOEF(LM_REAL *covar, int m, int i, int j)
609{
610 return (LM_REAL)(covar[i*m+j]/sqrt(covar[i*m+i]*covar[j*m+j]));
611}
612
613/* coefficient of determination.
614 * see http://en.wikipedia.org/wiki/Coefficient_of_determination
615 */
616LM_REAL LEVMAR_R2(void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata),
617 LM_REAL *p, LM_REAL *x, int m, int n, void *adata)
618{
619register int i;
620register LM_REAL tmp;
621LM_REAL SSerr, // sum of squared errors, i.e. residual sum of squares \sum_i (x_i-hx_i)^2
622 SStot, // \sum_i (x_i-xavg)^2
623 *hx, xavg;
624
625
626 if((hx=(LM_REAL *)malloc(n*sizeof(LM_REAL)))==NULL){
627 fprintf(stderr, RCAT("memory allocation request failed in ", LEVMAR_R2) "()\n");
628 exit(1);
629 }
630
631 /* hx=f(p) */
632 (*func)(p, hx, m, n, adata);
633
634 for(i=n, tmp=0.0; i-->0; )
635 tmp+=x[i];
636 xavg=tmp/(LM_REAL)n;
637
638 if(x)
639 for(i=n, SSerr=SStot=0.0; i-->0; ){
640 tmp=x[i]-hx[i];
641 SSerr+=tmp*tmp;
642
643 tmp=x[i]-xavg;
644 SStot+=tmp*tmp;
645 }
646 else /* x==0 */
647 for(i=n, SSerr=SStot=0.0; i-->0; ){
648 tmp=-hx[i];
649 SSerr+=tmp*tmp;
650
651 tmp=-xavg;
652 SStot+=tmp*tmp;
653 }
654
655 free(hx);
656
657 return LM_CNST(1.0) - SSerr/SStot;
658}
659
660/* check box constraints for consistency */
661int LEVMAR_BOX_CHECK(LM_REAL *lb, LM_REAL *ub, int m)
662{
663register int i;
664
665 if(!lb || !ub) return 1;
666
667 for(i=0; i<m; ++i)
668 if(lb[i]>ub[i]) return 0;
669
670 return 1;
671}
672
673#ifdef HAVE_LAPACK
674
675/* compute the Cholesky decomposition of C in W, s.t. C=W^t W and W is upper triangular */
676int LEVMAR_CHOLESKY(LM_REAL *C, LM_REAL *W, int m)
677{
678register int i, j;
679int info;
680
681 /* copy weights array C to W so that LAPACK won't destroy it;
682 * C is assumed symmetric, hence no transposition is needed
683 */
684 for(i=0, j=m*m; i<j; ++i)
685 W[i]=C[i];
686
687 /* Cholesky decomposition */
688 POTF2("L", (int *)&m, W, (int *)&m, (int *)&info);
689 /* error treatment */
690 if(info!=0){
691 if(info<0){
692 fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in %s\n", -info, LCAT(LEVMAR_CHOLESKY, "()"));
693 }
694 else{
695 fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s()\n", info,
696 RCAT("and the Cholesky factorization could not be completed in ", LEVMAR_CHOLESKY));
697 }
698 return LM_ERROR;
699 }
700
701 /* the decomposition is in the lower part of W (in column-major order!).
702 * zeroing the upper part makes it lower triangular which is equivalent to
703 * upper triangular in row-major order
704 */
705 for(i=0; i<m; i++)
706 for(j=i+1; j<m; j++)
707 W[i+j*m]=0.0;
708
709 return 0;
710}
711#endif /* HAVE_LAPACK */
712
713
714/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e.
715 * e can coincide with either x or y; x can be NULL, in which case it is assumed
716 * to be equal to the zero vector.
717 * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline
718 * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html
719 */
720
721LM_REAL LEVMAR_L2NRMXMY(LM_REAL *e, LM_REAL *x, LM_REAL *y, int n)
722{
723const int blocksize=8, bpwr=3; /* 8=2^3 */
724register int i;
725int j1, j2, j3, j4, j5, j6, j7;
726int blockn;
727register LM_REAL sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0;
728
729 /* n may not be divisible by blocksize,
730 * go as near as we can first, then tidy up.
731 */
732 blockn = (n>>bpwr)<<bpwr; /* (n / blocksize) * blocksize; */
733
734 /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
735 if(x){
736 for(i=blockn-1; i>0; i-=blocksize){
737 e[i ]=x[i ]-y[i ]; sum0+=e[i ]*e[i ];
738 j1=i-1; e[j1]=x[j1]-y[j1]; sum1+=e[j1]*e[j1];
739 j2=i-2; e[j2]=x[j2]-y[j2]; sum2+=e[j2]*e[j2];
740 j3=i-3; e[j3]=x[j3]-y[j3]; sum3+=e[j3]*e[j3];
741 j4=i-4; e[j4]=x[j4]-y[j4]; sum0+=e[j4]*e[j4];
742 j5=i-5; e[j5]=x[j5]-y[j5]; sum1+=e[j5]*e[j5];
743 j6=i-6; e[j6]=x[j6]-y[j6]; sum2+=e[j6]*e[j6];
744 j7=i-7; e[j7]=x[j7]-y[j7]; sum3+=e[j7]*e[j7];
745 }
746
747 /*
748 * There may be some left to do.
749 * This could be done as a simple for() loop,
750 * but a switch is faster (and more interesting)
751 */
752
753 i=blockn;
754 if(i<n){
755 /* Jump into the case at the place that will allow
756 * us to finish off the appropriate number of items.
757 */
758
759 switch(n - i){
760 case 7 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
761 case 6 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
762 case 5 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; ++i;
763 case 4 : e[i]=x[i]-y[i]; sum3+=e[i]*e[i]; ++i;
764 case 3 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
765 case 2 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
766 case 1 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; //++i;
767 }
768 }
769 }
770 else{ /* x==0 */
771 for(i=blockn-1; i>0; i-=blocksize){
772 e[i ]=-y[i ]; sum0+=e[i ]*e[i ];
773 j1=i-1; e[j1]=-y[j1]; sum1+=e[j1]*e[j1];
774 j2=i-2; e[j2]=-y[j2]; sum2+=e[j2]*e[j2];
775 j3=i-3; e[j3]=-y[j3]; sum3+=e[j3]*e[j3];
776 j4=i-4; e[j4]=-y[j4]; sum0+=e[j4]*e[j4];
777 j5=i-5; e[j5]=-y[j5]; sum1+=e[j5]*e[j5];
778 j6=i-6; e[j6]=-y[j6]; sum2+=e[j6]*e[j6];
779 j7=i-7; e[j7]=-y[j7]; sum3+=e[j7]*e[j7];
780 }
781
782 /*
783 * There may be some left to do.
784 * This could be done as a simple for() loop,
785 * but a switch is faster (and more interesting)
786 */
787
788 i=blockn;
789 if(i<n){
790 /* Jump into the case at the place that will allow
791 * us to finish off the appropriate number of items.
792 */
793
794 switch(n - i){
795 case 7 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
796 case 6 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
797 case 5 : e[i]=-y[i]; sum2+=e[i]*e[i]; ++i;
798 case 4 : e[i]=-y[i]; sum3+=e[i]*e[i]; ++i;
799 case 3 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
800 case 2 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
801 case 1 : e[i]=-y[i]; sum2+=e[i]*e[i]; //++i;
802 }
803 }
804 }
805
806 return sum0+sum1+sum2+sum3;
807}
808
809/* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
810#undef POTF2
811#undef GESVD
812#undef GESDD
813#undef GEMM
814#undef LEVMAR_PSEUDOINVERSE
815#undef LEVMAR_LUINVERSE
816#undef LEVMAR_BOX_CHECK
817#undef LEVMAR_CHOLESKY
818#undef LEVMAR_COVAR
819#undef LEVMAR_STDDEV
820#undef LEVMAR_CORCOEF
821#undef LEVMAR_R2
822#undef LEVMAR_CHKJAC
823#undef LEVMAR_FDIF_FORW_JAC_APPROX
824#undef LEVMAR_FDIF_CENT_JAC_APPROX
825#undef LEVMAR_TRANS_MAT_MAT_MULT
826#undef LEVMAR_L2NRMXMY
Note: See TracBrowser for help on using the repository browser.