Context Navigation

source: ThirdParty/levmar/src/misc_core.c@ e9f307

Visit:

Candidate_v1.7.0 stable

Last change on this file since e9f307 was 8ce1a9, checked in by Frederik Heber <heber@…>, 9 years ago
Merge commit '5443b10a06f0c125d0ae0500abb09901fda9666b' as 'ThirdParty/levmar'
Property mode set to `100644`
File size: 24.0 KB

Rev	Line
[5443b1]	1	/////////////////////////////////////////////////////////////////////////////////
	2	//
	3	// Levenberg - Marquardt non-linear minimization algorithm
	4	// Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr)
	5	// Institute of Computer Science, Foundation for Research & Technology - Hellas
	6	// Heraklion, Crete, Greece.
	7	//
	8	// This program is free software; you can redistribute it and/or modify
	9	// it under the terms of the GNU General Public License as published by
	10	// the Free Software Foundation; either version 2 of the License, or
	11	// (at your option) any later version.
	12	//
	13	// This program is distributed in the hope that it will be useful,
	14	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	// GNU General Public License for more details.
	17	//
	18	/////////////////////////////////////////////////////////////////////////////////
	19
	20	#ifndef LM_REAL // not included by misc.c
	21	#error This file should not be compiled directly!
	22	#endif
	23
	24
	25	/* precision-specific definitions */
	26	#define LEVMAR_CHKJAC LM_ADD_PREFIX(levmar_chkjac)
	27	#define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)
	28	#define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)
	29	#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
	30	#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
	31	#define LEVMAR_STDDEV LM_ADD_PREFIX(levmar_stddev)
	32	#define LEVMAR_CORCOEF LM_ADD_PREFIX(levmar_corcoef)
	33	#define LEVMAR_R2 LM_ADD_PREFIX(levmar_R2)
	34	#define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check)
	35	#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
	36
	37	#ifdef HAVE_LAPACK
	38	#define LEVMAR_PSEUDOINVERSE LM_ADD_PREFIX(levmar_pseudoinverse)
	39	static int LEVMAR_PSEUDOINVERSE(LM_REAL A, LM_REAL B, int m);
	40
	41	#ifdef __cplusplus
	42	extern "C" {
	43	#endif
	44	/* BLAS matrix multiplication, LAPACK SVD & Cholesky routines */
	45	#define GEMM LM_MK_BLAS_NAME(gemm)
	46	/* C := alphaop( A )op( B ) + betaC /
	47	extern void GEMM(char transa, char transb, int m, int n, int *k,
	48	LM_REAL alpha, LM_REAL a, int lda, LM_REAL b, int ldb, LM_REAL beta, LM_REAL c, int ldc);
	49
	50	#define GESVD LM_MK_LAPACK_NAME(gesvd)
	51	#define GESDD LM_MK_LAPACK_NAME(gesdd)
	52	extern int GESVD(char jobu, char jobvt, int m, int n, LM_REAL a, int lda, LM_REAL s, LM_REAL u, int *ldu,
	53	LM_REAL vt, int ldvt, LM_REAL work, int lwork, int *info);
	54
	55	/* lapack 3.0 new SVD routine, faster than xgesvd() */
	56	extern int GESDD(char jobz, int m, int n, LM_REAL a, int lda, LM_REAL s, LM_REAL u, int ldu, LM_REAL vt, int ldvt,
	57	LM_REAL work, int lwork, int iwork, int info);
	58
	59	/* Cholesky decomposition */
	60	#define POTF2 LM_MK_LAPACK_NAME(potf2)
	61	extern int POTF2(char uplo, int n, LM_REAL a, int lda, int *info);
	62	#ifdef __cplusplus
	63	}
	64	#endif
	65
	66	#define LEVMAR_CHOLESKY LM_ADD_PREFIX(levmar_chol)
	67
	68	#else /* !HAVE_LAPACK */
	69	#define LEVMAR_LUINVERSE LM_ADD_PREFIX(levmar_LUinverse_noLapack)
	70
	71	static int LEVMAR_LUINVERSE(LM_REAL A, LM_REAL B, int m);
	72	#endif /* HAVE_LAPACK */
	73
	74	/* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a)
	75	* using a block size of bsize. The product is returned in b.
	76	* Since a^T a is symmetric, its computation can be sped up by computing only its
	77	* upper triangular part and copying it to the lower part.
	78	*
	79	* More details on blocking can be found at
	80	* http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf
	81	*/
	82	void LEVMAR_TRANS_MAT_MAT_MULT(LM_REAL a, LM_REAL b, int n, int m)
	83	{
	84	#ifdef HAVE_LAPACK /* use BLAS matrix multiply */
	85
	86	LM_REAL alpha=LM_CNST(1.0), beta=LM_CNST(0.0);
	87	/* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major,
	88	* therefore BLAS computes aa^T with a and aa^T in column major, which is equivalent to
	89	* computing a^T*a in row major!
	90	*/
	91	GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m);
	92
	93	#else /* no LAPACK, use blocking-based multiply */
	94
	95	register int i, j, k, jj, kk;
	96	register LM_REAL sum, bim, akm;
	97	const int bsize=__BLOCKSZ__;
	98
	99	#define __MIN__(x, y) (((x)<=(y))? (x) : (y))
	100	#define __MAX__(x, y) (((x)>=(y))? (x) : (y))
	101
	102	/* compute upper triangular part using blocking */
	103	for(jj=0; jj<m; jj+=bsize){
	104	for(i=0; i<m; ++i){
	105	bim=b+i*m;
	106	for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j)
	107	bim[j]=0.0; //b[i*m+j]=0.0;
	108	}
	109
	110	for(kk=0; kk<n; kk+=bsize){
	111	for(i=0; i<m; ++i){
	112	bim=b+i*m;
	113	for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j){
	114	sum=0.0;
	115	for(k=kk; k<__MIN__(kk+bsize, n); ++k){
	116	akm=a+k*m;
	117	sum+=akm[i]akm[j]; //a[km+i]a[km+j];
	118	}
	119	bim[j]+=sum; //b[i*m+j]+=sum;
	120	}
	121	}
	122	}
	123	}
	124
	125	/* copy upper triangular part to the lower one */
	126	for(i=0; i<m; ++i)
	127	for(j=0; j<i; ++j)
	128	b[im+j]=b[jm+i];
	129
	130	#undef __MIN__
	131	#undef __MAX__
	132
	133	#endif /* HAVE_LAPACK */
	134	}
	135
	136	/* forward finite difference approximation to the Jacobian of func */
	137	void LEVMAR_FDIF_FORW_JAC_APPROX(
	138	void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
	139	/* function to differentiate */
	140	LM_REAL p, / I: current parameter estimate, mx1 */
	141	LM_REAL hx, / I: func evaluated at p, i.e. hx=func(p), nx1 */
	142	LM_REAL hxx, / W/O: work array for evaluating func(p+delta), nx1 */
	143	LM_REAL delta, /* increment for computing the Jacobian */
	144	LM_REAL jac, / O: array for storing approximated Jacobian, nxm */
	145	int m,
	146	int n,
	147	void *adata)
	148	{
	149	register int i, j;
	150	LM_REAL tmp;
	151	register LM_REAL d;
	152
	153	for(j=0; j<m; ++j){
	154	/* determine d=max(1E-04\|p[j]\|, delta), see HZ /
	155	d=LM_CNST(1E-04)*p[j]; // force evaluation
	156	d=FABS(d);
	157	if(d<delta)
	158	d=delta;
	159
	160	tmp=p[j];
	161	p[j]+=d;
	162
	163	(*func)(p, hxx, m, n, adata);
	164
	165	p[j]=tmp; /* restore */
	166
	167	d=LM_CNST(1.0)/d; /* invert so that divisions can be carried out faster as multiplications */
	168	for(i=0; i<n; ++i){
	169	jac[im+j]=(hxx[i]-hx[i])d;
	170	}
	171	}
	172	}
	173
	174	/* central finite difference approximation to the Jacobian of func */
	175	void LEVMAR_FDIF_CENT_JAC_APPROX(
	176	void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
	177	/* function to differentiate */
	178	LM_REAL p, / I: current parameter estimate, mx1 */
	179	LM_REAL hxm, / W/O: work array for evaluating func(p-delta), nx1 */
	180	LM_REAL hxp, / W/O: work array for evaluating func(p+delta), nx1 */
	181	LM_REAL delta, /* increment for computing the Jacobian */
	182	LM_REAL jac, / O: array for storing approximated Jacobian, nxm */
	183	int m,
	184	int n,
	185	void *adata)
	186	{
	187	register int i, j;
	188	LM_REAL tmp;
	189	register LM_REAL d;
	190
	191	for(j=0; j<m; ++j){
	192	/* determine d=max(1E-04\|p[j]\|, delta), see HZ /
	193	d=LM_CNST(1E-04)*p[j]; // force evaluation
	194	d=FABS(d);
	195	if(d<delta)
	196	d=delta;
	197
	198	tmp=p[j];
	199	p[j]-=d;
	200	(*func)(p, hxm, m, n, adata);
	201
	202	p[j]=tmp+d;
	203	(*func)(p, hxp, m, n, adata);
	204	p[j]=tmp; /* restore */
	205
	206	d=LM_CNST(0.5)/d; /* invert so that divisions can be carried out faster as multiplications */
	207	for(i=0; i<n; ++i){
	208	jac[im+j]=(hxp[i]-hxm[i])d;
	209	}
	210	}
	211	}
	212
	213	/*
	214	* Check the Jacobian of a n-valued nonlinear function in m variables
	215	* evaluated at a point p, for consistency with the function itself.
	216	*
	217	* Based on fortran77 subroutine CHKDER by
	218	* Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
	219	* Argonne National Laboratory. MINPACK project. March 1980.
	220	*
	221	*
	222	* func points to a function from R^m --> R^n: Given a p in R^m it yields hx in R^n
	223	* jacf points to a function implementing the Jacobian of func, whose correctness
	224	* is to be tested. Given a p in R^m, jacf computes into the nxm matrix j the
	225	* Jacobian of func at p. Note that row i of j corresponds to the gradient of
	226	* the i-th component of func, evaluated at p.
	227	* p is an input array of length m containing the point of evaluation.
	228	* m is the number of variables
	229	* n is the number of functions
	230	* adata points to possible additional data and is passed uninterpreted
	231	* to func, jacf.
	232	* err is an array of length n. On output, err contains measures
	233	* of correctness of the respective gradients. if there is
	234	* no severe loss of significance, then if err[i] is 1.0 the
	235	* i-th gradient is correct, while if err[i] is 0.0 the i-th
	236	* gradient is incorrect. For values of err between 0.0 and 1.0,
	237	* the categorization is less certain. In general, a value of
	238	* err[i] greater than 0.5 indicates that the i-th gradient is
	239	* probably correct, while a value of err[i] less than 0.5
	240	* indicates that the i-th gradient is probably incorrect.
	241	*
	242	*
	243	* The function does not perform reliably if cancellation or
	244	* rounding errors cause a severe loss of significance in the
	245	* evaluation of a function. therefore, none of the components
	246	* of p should be unusually small (in particular, zero) or any
	247	* other value which may cause loss of significance.
	248	*/
	249
	250	void LEVMAR_CHKJAC(
	251	void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
	252	void (jacf)(LM_REAL p, LM_REAL j, int m, int n, void adata),
	253	LM_REAL p, int m, int n, void adata, LM_REAL *err)
	254	{
	255	LM_REAL factor=LM_CNST(100.0);
	256	LM_REAL one=LM_CNST(1.0);
	257	LM_REAL zero=LM_CNST(0.0);
	258	LM_REAL fvec, fjac, pp, fvecp, *buf;
	259
	260	register int i, j;
	261	LM_REAL eps, epsf, temp, epsmch;
	262	LM_REAL epslog;
	263	int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n;
	264
	265	epsmch=LM_REAL_EPSILON;
	266	eps=(LM_REAL)sqrt(epsmch);
	267
	268	buf=(LM_REAL )malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)sizeof(LM_REAL));
	269	if(!buf){
	270	fprintf(stderr, LCAT(LEVMAR_CHKJAC, "(): memory allocation request failed\n"));
	271	exit(1);
	272	}
	273	fvec=buf;
	274	fjac=fvec+fvec_sz;
	275	pp=fjac+fjac_sz;
	276	fvecp=pp+pp_sz;
	277
	278	/* compute fvec=func(p) */
	279	(*func)(p, fvec, m, n, adata);
	280
	281	/* compute the Jacobian at p */
	282	(*jacf)(p, fjac, m, n, adata);
	283
	284	/* compute pp */
	285	for(j=0; j<m; ++j){
	286	temp=eps*FABS(p[j]);
	287	if(temp==zero) temp=eps;
	288	pp[j]=p[j]+temp;
	289	}
	290
	291	/* compute fvecp=func(pp) */
	292	(*func)(pp, fvecp, m, n, adata);
	293
	294	epsf=factor*epsmch;
	295	epslog=(LM_REAL)log10(eps);
	296
	297	for(i=0; i<n; ++i)
	298	err[i]=zero;
	299
	300	for(j=0; j<m; ++j){
	301	temp=FABS(p[j]);
	302	if(temp==zero) temp=one;
	303
	304	for(i=0; i<n; ++i)
	305	err[i]+=tempfjac[im+j];
	306	}
	307
	308	for(i=0; i<n; ++i){
	309	temp=one;
	310	if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i]))
	311	temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i]));
	312	err[i]=one;
	313	if(temp>epsmch && temp<eps)
	314	err[i]=((LM_REAL)log10(temp) - epslog)/epslog;
	315	if(temp>=eps) err[i]=zero;
	316	}
	317
	318	free(buf);
	319
	320	return;
	321	}
	322
	323	#ifdef HAVE_LAPACK
	324	/*
	325	* This function computes the pseudoinverse of a square matrix A
	326	* into B using SVD. A and B can coincide
	327	*
	328	* The function returns 0 in case of error (e.g. A is singular),
	329	* the rank of A if successful
	330	*
	331	* A, B are mxm
	332	*
	333	*/
	334	static int LEVMAR_PSEUDOINVERSE(LM_REAL A, LM_REAL B, int m)
	335	{
	336	LM_REAL *buf=NULL;
	337	int buf_sz=0;
	338	static LM_REAL eps=LM_CNST(-1.0);
	339
	340	register int i, j;
	341	LM_REAL a, u, s, vt, *work;
	342	int a_sz, u_sz, s_sz, vt_sz, tot_sz;
	343	LM_REAL thresh, one_over_denom;
	344	int info, rank, worksz, *iwork, iworksz;
	345
	346	/* calculate required memory size */
	347	worksz=5*m; // min worksize for GESVD
	348	//worksz=m(7m+4); // min worksize for GESDD
	349	iworksz=8*m;
	350	a_sz=m*m;
	351	u_sz=mm; s_sz=m; vt_sz=mm;
	352
	353	tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)sizeof(LM_REAL) + iworkszsizeof(int); /* should be arranged in that order for proper doubles alignment */
	354
	355	buf_sz=tot_sz;
	356	buf=(LM_REAL *)malloc(buf_sz);
	357	if(!buf){
	358	fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n");
	359	return 0; /* error */
	360	}
	361
	362	a=buf;
	363	u=a+a_sz;
	364	s=u+u_sz;
	365	vt=s+s_sz;
	366	work=vt+vt_sz;
	367	iwork=(int *)(work+worksz);
	368
	369	/* store A (column major!) into a */
	370	for(i=0; i<m; i++)
	371	for(j=0; j<m; j++)
	372	a[i+jm]=A[im+j];
	373
	374	/* SVD decomposition of A */
	375	GESVD("A", "A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, &info);
	376	//GESDD("A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, iwork, &info);
	377
	378	/* error treatment */
	379	if(info!=0){
	380	if(info<0){
	381	fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info);
	382	}
	383	else{
	384	fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info);
	385	}
	386	free(buf);
	387	return 0;
	388	}
	389
	390	if(eps<0.0){
	391	LM_REAL aux;
	392
	393	/* compute machine epsilon */
	394	for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
	395	;
	396	eps*=LM_CNST(2.0);
	397	}
	398
	399	/* compute the pseudoinverse in B */
	400	for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */
	401	for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
	402	one_over_denom=LM_CNST(1.0)/s[rank];
	403
	404	for(j=0; j<m; j++)
	405	for(i=0; i<m; i++)
	406	B[im+j]+=vt[rank+im]u[j+rankm]*one_over_denom;
	407	}
	408
	409	free(buf);
	410
	411	return rank;
	412	}
	413	#else // no LAPACK
	414
	415	/*
	416	* This function computes the inverse of A in B. A and B can coincide
	417	*
	418	* The function employs LAPACK-free LU decomposition of A to solve m linear
	419	* systems A*B_i=I_i, where B_i and I_i are the i-th columns of B and I.
	420	*
	421	* A and B are mxm
	422	*
	423	* The function returns 0 in case of error, 1 if successful
	424	*
	425	*/
	426	static int LEVMAR_LUINVERSE(LM_REAL A, LM_REAL B, int m)
	427	{
	428	void *buf=NULL;
	429	int buf_sz=0;
	430
	431	register int i, j, k, l;
	432	int *idx, maxi=-1, idx_sz, a_sz, x_sz, work_sz, tot_sz;
	433	LM_REAL a, x, *work, max, sum, tmp;
	434
	435	/* calculate required memory size */
	436	idx_sz=m;
	437	a_sz=m*m;
	438	x_sz=m;
	439	work_sz=m;
	440	tot_sz=(a_sz + x_sz + work_sz)sizeof(LM_REAL) + idx_szsizeof(int); /* should be arranged in that order for proper doubles alignment */
	441
	442	buf_sz=tot_sz;
	443	buf=(void *)malloc(tot_sz);
	444	if(!buf){
	445	fprintf(stderr, RCAT("memory allocation in ", LEVMAR_LUINVERSE) "() failed!\n");
	446	return 0; /* error */
	447	}
	448
	449	a=buf;
	450	x=a+a_sz;
	451	work=x+x_sz;
	452	idx=(int *)(work+work_sz);
	453
	454	/* avoid destroying A by copying it to a */
	455	for(i=0; i<a_sz; ++i) a[i]=A[i];
	456
	457	/* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
	458	for(i=0; i<m; ++i){
	459	max=0.0;
	460	for(j=0; j<m; ++j)
	461	if((tmp=FABS(a[i*m+j]))>max)
	462	max=tmp;
	463	if(max==0.0){
	464	fprintf(stderr, RCAT("Singular matrix A in ", LEVMAR_LUINVERSE) "()!\n");
	465	free(buf);
	466
	467	return 0;
	468	}
	469	work[i]=LM_CNST(1.0)/max;
	470	}
	471
	472	for(j=0; j<m; ++j){
	473	for(i=0; i<j; ++i){
	474	sum=a[i*m+j];
	475	for(k=0; k<i; ++k)
	476	sum-=a[im+k]a[k*m+j];
	477	a[i*m+j]=sum;
	478	}
	479	max=0.0;
	480	for(i=j; i<m; ++i){
	481	sum=a[i*m+j];
	482	for(k=0; k<j; ++k)
	483	sum-=a[im+k]a[k*m+j];
	484	a[i*m+j]=sum;
	485	if((tmp=work[i]*FABS(sum))>=max){
	486	max=tmp;
	487	maxi=i;
	488	}
	489	}
	490	if(j!=maxi){
	491	for(k=0; k<m; ++k){
	492	tmp=a[maxi*m+k];
	493	a[maxim+k]=a[jm+k];
	494	a[j*m+k]=tmp;
	495	}
	496	work[maxi]=work[j];
	497	}
	498	idx[j]=maxi;
	499	if(a[j*m+j]==0.0)
	500	a[j*m+j]=LM_REAL_EPSILON;
	501	if(j!=m-1){
	502	tmp=LM_CNST(1.0)/(a[j*m+j]);
	503	for(i=j+1; i<m; ++i)
	504	a[im+j]=tmp;
	505	}
	506	}
	507
	508	/* The decomposition has now replaced a. Solve the m linear systems using
	509	* forward and back substitution
	510	*/
	511	for(l=0; l<m; ++l){
	512	for(i=0; i<m; ++i) x[i]=0.0;
	513	x[l]=LM_CNST(1.0);
	514
	515	for(i=k=0; i<m; ++i){
	516	j=idx[i];
	517	sum=x[j];
	518	x[j]=x[i];
	519	if(k!=0)
	520	for(j=k-1; j<i; ++j)
	521	sum-=a[im+j]x[j];
	522	else
	523	if(sum!=0.0)
	524	k=i+1;
	525	x[i]=sum;
	526	}
	527
	528	for(i=m-1; i>=0; --i){
	529	sum=x[i];
	530	for(j=i+1; j<m; ++j)
	531	sum-=a[im+j]x[j];
	532	x[i]=sum/a[i*m+i];
	533	}
	534
	535	for(i=0; i<m; ++i)
	536	B[i*m+l]=x[i];
	537	}
	538
	539	free(buf);
	540
	541	return 1;
	542	}
	543	#endif /* HAVE_LAPACK */
	544
	545	/*
	546	* This function computes in C the covariance matrix corresponding to a least
	547	* squares fit. JtJ is the approximate Hessian at the solution (i.e. J^T*J, where
	548	* J is the Jacobian at the solution), sumsq is the sum of squared residuals
	549	* (i.e. goodnes of fit) at the solution, m is the number of parameters (variables)
	550	* and n the number of observations. JtJ can coincide with C.
	551	*
	552	* if JtJ is of full rank, C is computed as sumsq/(n-m)*(JtJ)^-1
	553	* otherwise and if LAPACK is available, C=sumsq/(n-r)*(JtJ)^+
	554	* where r is JtJ's rank and ^+ denotes the pseudoinverse
	555	* The diagonal of C is made up from the estimates of the variances
	556	* of the estimated regression coefficients.
	557	* See the documentation of routine E04YCF from the NAG fortran lib
	558	*
	559	* The function returns the rank of JtJ if successful, 0 on error
	560	*
	561	* A and C are mxm
	562	*
	563	*/
	564	int LEVMAR_COVAR(LM_REAL JtJ, LM_REAL C, LM_REAL sumsq, int m, int n)
	565	{
	566	register int i;
	567	int rnk;
	568	LM_REAL fact;
	569
	570	#ifdef HAVE_LAPACK
	571	rnk=LEVMAR_PSEUDOINVERSE(JtJ, C, m);
	572	if(!rnk) return 0;
	573	#else
	574	#ifdef _MSC_VER
	575	#pragma message("LAPACK not available, LU will be used for matrix inversion when computing the covariance; this might be unstable at times")
	576	#else
	577	#warning LAPACK not available, LU will be used for matrix inversion when computing the covariance; this might be unstable at times
	578	#endif // _MSC_VER
	579
	580	rnk=LEVMAR_LUINVERSE(JtJ, C, m);
	581	if(!rnk) return 0;
	582
	583	rnk=m; /* assume full rank */
	584	#endif /* HAVE_LAPACK */
	585
	586	fact=sumsq/(LM_REAL)(n-rnk);
	587	for(i=0; i<m*m; ++i)
	588	C[i]*=fact;
	589
	590	return rnk;
	591	}
	592
	593	/* standard deviation of the best-fit parameter i.
	594	* covar is the mxm covariance matrix of the best-fit parameters (see also LEVMAR_COVAR()).
	595	*
	596	* The standard deviation is computed as \sigma_{i} = \sqrt{C_{ii}}
	597	*/
	598	LM_REAL LEVMAR_STDDEV(LM_REAL *covar, int m, int i)
	599	{
	600	return (LM_REAL)sqrt(covar[i*m+i]);
	601	}
	602
	603	/* Pearson's correlation coefficient of the best-fit parameters i and j.
	604	* covar is the mxm covariance matrix of the best-fit parameters (see also LEVMAR_COVAR()).
	605	*
	606	* The coefficient is computed as \rho_{ij} = C_{ij} / sqrt(C_{ii} C_{jj})
	607	*/
	608	LM_REAL LEVMAR_CORCOEF(LM_REAL *covar, int m, int i, int j)
	609	{
	610	return (LM_REAL)(covar[im+j]/sqrt(covar[im+i]covar[jm+j]));
	611	}
	612
	613	/* coefficient of determination.
	614	* see http://en.wikipedia.org/wiki/Coefficient_of_determination
	615	*/
	616	LM_REAL LEVMAR_R2(void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
	617	LM_REAL p, LM_REAL x, int m, int n, void *adata)
	618	{
	619	register int i;
	620	register LM_REAL tmp;
	621	LM_REAL SSerr, // sum of squared errors, i.e. residual sum of squares \sum_i (x_i-hx_i)^2
	622	SStot, // \sum_i (x_i-xavg)^2
	623	*hx, xavg;
	624
	625
	626	if((hx=(LM_REAL )malloc(nsizeof(LM_REAL)))==NULL){
	627	fprintf(stderr, RCAT("memory allocation request failed in ", LEVMAR_R2) "()\n");
	628	exit(1);
	629	}
	630
	631	/* hx=f(p) */
	632	(*func)(p, hx, m, n, adata);
	633
	634	for(i=n, tmp=0.0; i-->0; )
	635	tmp+=x[i];
	636	xavg=tmp/(LM_REAL)n;
	637
	638	if(x)
	639	for(i=n, SSerr=SStot=0.0; i-->0; ){
	640	tmp=x[i]-hx[i];
	641	SSerr+=tmp*tmp;
	642
	643	tmp=x[i]-xavg;
	644	SStot+=tmp*tmp;
	645	}
	646	else /* x==0 */
	647	for(i=n, SSerr=SStot=0.0; i-->0; ){
	648	tmp=-hx[i];
	649	SSerr+=tmp*tmp;
	650
	651	tmp=-xavg;
	652	SStot+=tmp*tmp;
	653	}
	654
	655	free(hx);
	656
	657	return LM_CNST(1.0) - SSerr/SStot;
	658	}
	659
	660	/* check box constraints for consistency */
	661	int LEVMAR_BOX_CHECK(LM_REAL lb, LM_REAL ub, int m)
	662	{
	663	register int i;
	664
	665	if(!lb \|\| !ub) return 1;
	666
	667	for(i=0; i<m; ++i)
	668	if(lb[i]>ub[i]) return 0;
	669
	670	return 1;
	671	}
	672
	673	#ifdef HAVE_LAPACK
	674
	675	/* compute the Cholesky decomposition of C in W, s.t. C=W^t W and W is upper triangular */
	676	int LEVMAR_CHOLESKY(LM_REAL C, LM_REAL W, int m)
	677	{
	678	register int i, j;
	679	int info;
	680
	681	/* copy weights array C to W so that LAPACK won't destroy it;
	682	* C is assumed symmetric, hence no transposition is needed
	683	*/
	684	for(i=0, j=m*m; i<j; ++i)
	685	W[i]=C[i];
	686
	687	/* Cholesky decomposition */
	688	POTF2("L", (int )&m, W, (int )&m, (int *)&info);
	689	/* error treatment */
	690	if(info!=0){
	691	if(info<0){
	692	fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in %s\n", -info, LCAT(LEVMAR_CHOLESKY, "()"));
	693	}
	694	else{
	695	fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s()\n", info,
	696	RCAT("and the Cholesky factorization could not be completed in ", LEVMAR_CHOLESKY));
	697	}
	698	return LM_ERROR;
	699	}
	700
	701	/* the decomposition is in the lower part of W (in column-major order!).
	702	* zeroing the upper part makes it lower triangular which is equivalent to
	703	* upper triangular in row-major order
	704	*/
	705	for(i=0; i<m; i++)
	706	for(j=i+1; j<m; j++)
	707	W[i+j*m]=0.0;
	708
	709	return 0;
	710	}
	711	#endif /* HAVE_LAPACK */
	712
	713
	714	/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e.
	715	* e can coincide with either x or y; x can be NULL, in which case it is assumed
	716	* to be equal to the zero vector.
	717	* Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline
	718	* stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html
	719	*/
	720
	721	LM_REAL LEVMAR_L2NRMXMY(LM_REAL e, LM_REAL x, LM_REAL *y, int n)
	722	{
	723	const int blocksize=8, bpwr=3; /* 8=2^3 */
	724	register int i;
	725	int j1, j2, j3, j4, j5, j6, j7;
	726	int blockn;
	727	register LM_REAL sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0;
	728
	729	/* n may not be divisible by blocksize,
	730	* go as near as we can first, then tidy up.
	731	*/
	732	blockn = (n>>bpwr)<<bpwr; /* (n / blocksize) * blocksize; */
	733
	734	/* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
	735	if(x){
	736	for(i=blockn-1; i>0; i-=blocksize){
	737	e[i ]=x[i ]-y[i ]; sum0+=e[i ]*e[i ];
	738	j1=i-1; e[j1]=x[j1]-y[j1]; sum1+=e[j1]*e[j1];
	739	j2=i-2; e[j2]=x[j2]-y[j2]; sum2+=e[j2]*e[j2];
	740	j3=i-3; e[j3]=x[j3]-y[j3]; sum3+=e[j3]*e[j3];
	741	j4=i-4; e[j4]=x[j4]-y[j4]; sum0+=e[j4]*e[j4];
	742	j5=i-5; e[j5]=x[j5]-y[j5]; sum1+=e[j5]*e[j5];
	743	j6=i-6; e[j6]=x[j6]-y[j6]; sum2+=e[j6]*e[j6];
	744	j7=i-7; e[j7]=x[j7]-y[j7]; sum3+=e[j7]*e[j7];
	745	}
	746
	747	/*
	748	* There may be some left to do.
	749	* This could be done as a simple for() loop,
	750	* but a switch is faster (and more interesting)
	751	*/
	752
	753	i=blockn;
	754	if(i<n){
	755	/* Jump into the case at the place that will allow
	756	* us to finish off the appropriate number of items.
	757	*/
	758
	759	switch(n - i){
	760	case 7 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
	761	case 6 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
	762	case 5 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; ++i;
	763	case 4 : e[i]=x[i]-y[i]; sum3+=e[i]*e[i]; ++i;
	764	case 3 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
	765	case 2 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
	766	case 1 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; //++i;
	767	}
	768	}
	769	}
	770	else{ /* x==0 */
	771	for(i=blockn-1; i>0; i-=blocksize){
	772	e[i ]=-y[i ]; sum0+=e[i ]*e[i ];
	773	j1=i-1; e[j1]=-y[j1]; sum1+=e[j1]*e[j1];
	774	j2=i-2; e[j2]=-y[j2]; sum2+=e[j2]*e[j2];
	775	j3=i-3; e[j3]=-y[j3]; sum3+=e[j3]*e[j3];
	776	j4=i-4; e[j4]=-y[j4]; sum0+=e[j4]*e[j4];
	777	j5=i-5; e[j5]=-y[j5]; sum1+=e[j5]*e[j5];
	778	j6=i-6; e[j6]=-y[j6]; sum2+=e[j6]*e[j6];
	779	j7=i-7; e[j7]=-y[j7]; sum3+=e[j7]*e[j7];
	780	}
	781
	782	/*
	783	* There may be some left to do.
	784	* This could be done as a simple for() loop,
	785	* but a switch is faster (and more interesting)
	786	*/
	787
	788	i=blockn;
	789	if(i<n){
	790	/* Jump into the case at the place that will allow
	791	* us to finish off the appropriate number of items.
	792	*/
	793
	794	switch(n - i){
	795	case 7 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
	796	case 6 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
	797	case 5 : e[i]=-y[i]; sum2+=e[i]*e[i]; ++i;
	798	case 4 : e[i]=-y[i]; sum3+=e[i]*e[i]; ++i;
	799	case 3 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
	800	case 2 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
	801	case 1 : e[i]=-y[i]; sum2+=e[i]*e[i]; //++i;
	802	}
	803	}
	804	}
	805
	806	return sum0+sum1+sum2+sum3;
	807	}
	808
	809	/* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
	810	#undef POTF2
	811	#undef GESVD
	812	#undef GESDD
	813	#undef GEMM
	814	#undef LEVMAR_PSEUDOINVERSE
	815	#undef LEVMAR_LUINVERSE
	816	#undef LEVMAR_BOX_CHECK
	817	#undef LEVMAR_CHOLESKY
	818	#undef LEVMAR_COVAR
	819	#undef LEVMAR_STDDEV
	820	#undef LEVMAR_CORCOEF
	821	#undef LEVMAR_R2
	822	#undef LEVMAR_CHKJAC
	823	#undef LEVMAR_FDIF_FORW_JAC_APPROX
	824	#undef LEVMAR_FDIF_CENT_JAC_APPROX
	825	#undef LEVMAR_TRANS_MAT_MAT_MULT
	826	#undef LEVMAR_L2NRMXMY

Note: See TracBrowser for help on using the repository browser.

Download in other formats: