Context Navigation

lm_core.c@ 2945e6

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.1 Candidate_v1.7.0 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_oldresults ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps stable

Last change on this file since 2945e6 was 8ce1a9, checked in by Frederik Heber <heber@…>, 9 years ago
Merge commit '5443b10a06f0c125d0ae0500abb09901fda9666b' as 'ThirdParty/levmar'
Property mode set to `100644`
File size: 30.5 KB

Rev	Line
[5443b1]	1	/////////////////////////////////////////////////////////////////////////////////
	2	//
	3	// Levenberg - Marquardt non-linear minimization algorithm
	4	// Copyright (C) 2004 Manolis Lourakis (lourakis at ics forth gr)
	5	// Institute of Computer Science, Foundation for Research & Technology - Hellas
	6	// Heraklion, Crete, Greece.
	7	//
	8	// This program is free software; you can redistribute it and/or modify
	9	// it under the terms of the GNU General Public License as published by
	10	// the Free Software Foundation; either version 2 of the License, or
	11	// (at your option) any later version.
	12	//
	13	// This program is distributed in the hope that it will be useful,
	14	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	// GNU General Public License for more details.
	17	//
	18	/////////////////////////////////////////////////////////////////////////////////
	19
	20	#ifndef LM_REAL // not included by lm.c
	21	#error This file should not be compiled directly!
	22	#endif
	23
	24
	25	/* precision-specific definitions */
	26	#define LEVMAR_DER LM_ADD_PREFIX(levmar_der)
	27	#define LEVMAR_DIF LM_ADD_PREFIX(levmar_dif)
	28	#define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)
	29	#define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)
	30	#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
	31	#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
	32	#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
	33
	34	#ifdef HAVE_LAPACK
	35	#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
	36	#define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
	37	#define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
	38	#define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
	39	#define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
	40	#define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
	41	#else
	42	#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack)
	43	#endif /* HAVE_LAPACK */
	44
	45	#ifdef HAVE_PLASMA
	46	#define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol)
	47	#endif
	48
	49	/*
	50	* This function seeks the parameter vector p that best describes the measurements vector x.
	51	* More precisely, given a vector function func : R^m --> R^n with n>=m,
	52	* it finds p s.t. func(p) ~= x, i.e. the squared second order (i.e. L2) norm of
	53	* e=x-func(p) is minimized.
	54	*
	55	* This function requires an analytic Jacobian. In case the latter is unavailable,
	56	* use LEVMAR_DIF() bellow
	57	*
	58	* Returns the number of iterations (>=0) if successful, LM_ERROR if failed
	59	*
	60	* For more details, see K. Madsen, H.B. Nielsen and O. Tingleff's lecture notes on
	61	* non-linear least squares at http://www.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf
	62	*/
	63
	64	int LEVMAR_DER(
	65	void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */
	66	void (jacf)(LM_REAL p, LM_REAL j, int m, int n, void adata), /* function to evaluate the Jacobian \part x / \part p */
	67	LM_REAL p, / I/O: initial parameter estimates. On output has the estimated solution */
	68	LM_REAL x, / I: measurement vector. NULL implies a zero vector */
	69	int m, /* I: parameter vector dimension (i.e. #unknowns) */
	70	int n, /* I: measurement vector dimension */
	71	int itmax, /* I: maximum number of iterations */
	72	LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu,
	73	* stopping thresholds for \|\|J^T e\|\|_inf, \|\|Dp\|\|_2 and \|\|e\|\|_2. Set to NULL for defaults to be used
	74	*/
	75	LM_REAL info[LM_INFO_SZ],
	76	/* O: information regarding the minimization. Set to NULL if don't care
	77	* info[0]= \|\|e\|\|_2 at initial p.
	78	* info[1-4]=[ \|\|e\|\|_2, \|\|J^T e\|\|_inf, \|\|Dp\|\|_2, mu/max[J^T J]_ii ], all computed at estimated p.
	79	* info[5]= # iterations,
	80	* info[6]=reason for terminating: 1 - stopped by small gradient J^T e
	81	* 2 - stopped by small Dp
	82	* 3 - stopped by itmax
	83	* 4 - singular matrix. Restart from current p with increased mu
	84	* 5 - no further error reduction is possible. Restart with increased mu
	85	* 6 - stopped by small \|\|e\|\|_2
	86	* 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
	87	* info[7]= # function evaluations
	88	* info[8]= # Jacobian evaluations
	89	* info[9]= # linear systems solved, i.e. # attempts for reducing error
	90	*/
	91	LM_REAL work, / working memory at least LM_DER_WORKSZ() reals large, allocated if NULL */
	92	LM_REAL covar, / O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
	93	void adata) / pointer to possibly additional data, passed uninterpreted to func & jacf.
	94	* Set to NULL if not needed
	95	*/
	96	{
	97	register int i, j, k, l;
	98	int worksz, freework=0, issolved;
	99	/* temp work arrays */
	100	LM_REAL e, / nx1 */
	101	hx, / \hat{x}_i, nx1 */
	102	jacTe, / J^T e_i mx1 */
	103	jac, / nxm */
	104	jacTjac, / mxm */
	105	Dp, / mx1 */
	106	diag_jacTjac, / diagonal of J^T J, mx1 */
	107	pDp; / p + Dp, mx1 */
	108
	109	register LM_REAL mu, /* damping constant */
	110	tmp; /* mainly used in matrix & vector multiplications */
	111	LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* \|\|e(p)\|\|_2, \|\|J^T e\|\|_inf, \|\|e(p+Dp)\|\|_2 */
	112	LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
	113	LM_REAL tau, eps1, eps2, eps2_sq, eps3;
	114	LM_REAL init_p_eL2;
	115	int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
	116	const int nm=n*m;
	117	int (linsolver)(LM_REAL A, LM_REAL B, LM_REAL x, int m)=NULL;
	118
	119	mu=jacTe_inf=0.0; /* -Wall */
	120
	121	if(n<m){
	122	fprintf(stderr, LCAT(LEVMAR_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
	123	return LM_ERROR;
	124	}
	125
	126	if(!jacf){
	127	fprintf(stderr, RCAT("No function specified for computing the Jacobian in ", LEVMAR_DER)
	128	RCAT("().\nIf no such function is available, use ", LEVMAR_DIF) RCAT("() rather than ", LEVMAR_DER) "()\n");
	129	return LM_ERROR;
	130	}
	131
	132	if(opts){
	133	tau=opts[0];
	134	eps1=opts[1];
	135	eps2=opts[2];
	136	eps2_sq=opts[2]*opts[2];
	137	eps3=opts[3];
	138	}
	139	else{ // use default values
	140	tau=LM_CNST(LM_INIT_MU);
	141	eps1=LM_CNST(LM_STOP_THRESH);
	142	eps2=LM_CNST(LM_STOP_THRESH);
	143	eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH);
	144	eps3=LM_CNST(LM_STOP_THRESH);
	145	}
	146
	147	if(!work){
	148	worksz=LM_DER_WORKSZ(m, n); //2n+4m + nm + mm;
	149	work=(LM_REAL )malloc(workszsizeof(LM_REAL)); /* allocate a big chunk in one step */
	150	if(!work){
	151	fprintf(stderr, LCAT(LEVMAR_DER, "(): memory allocation request failed\n"));
	152	return LM_ERROR;
	153	}
	154	freework=1;
	155	}
	156
	157	/* set up work arrays */
	158	e=work;
	159	hx=e + n;
	160	jacTe=hx + n;
	161	jac=jacTe + m;
	162	jacTjac=jac + nm;
	163	Dp=jacTjac + m*m;
	164	diag_jacTjac=Dp + m;
	165	pDp=diag_jacTjac + m;
	166
	167	/* compute e=x - f(p) and its L2 norm */
	168	(*func)(p, hx, m, n, adata); nfev=1;
	169	/* ### e=x-hx, p_eL2=\|\|e\|\| */
	170	#if 1
	171	p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n);
	172	#else
	173	for(i=0, p_eL2=0.0; i<n; ++i){
	174	e[i]=tmp=x[i]-hx[i];
	175	p_eL2+=tmp*tmp;
	176	}
	177	#endif
	178	init_p_eL2=p_eL2;
	179	if(!LM_FINITE(p_eL2)) stop=7;
	180
	181	for(k=0; k<itmax && !stop; ++k){
	182	/* Note that p and e have been updated at a previous iteration */
	183
	184	if(p_eL2<=eps3){ /* error is small */
	185	stop=6;
	186	break;
	187	}
	188
	189	/* Compute the Jacobian J at p, J^T J, J^T e, \|\|J^T e\|\|_inf and \|\|p\|\|^2.
	190	* Since J^T J is symmetric, its computation can be sped up by computing
	191	* only its upper triangular part and copying it to the lower part
	192	*/
	193
	194	(*jacf)(p, jac, m, n, adata); ++njev;
	195
	196	/* J^T J, J^T e */
	197	if(nm<__BLOCKSZ__SQ){ // this is a small problem
	198	/* J^TJ_ij = \sum_l J^T_il J_lj = \sum_l J_li * J_lj.
	199	* Thus, the product J^T J can be computed using an outer loop for
	200	* l that adds J_li*J_lj to each element ij of the result. Note that
	201	* with this scheme, the accesses to J and JtJ are always along rows,
	202	* therefore induces less cache misses compared to the straightforward
	203	* algorithm for computing the product (i.e., l loop is innermost one).
	204	* A similar scheme applies to the computation of J^T e.
	205	* However, for large minimization problems (i.e., involving a large number
	206	* of unknowns and measurements) for which J/J^T J rows are too large to
	207	* fit in the L1 cache, even this scheme incures many cache misses. In
	208	* such cases, a cache-efficient blocking scheme is preferable.
	209	*
	210	* Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
	211	* performance problem.
	212	*
	213	* Note that the non-blocking algorithm is faster on small
	214	* problems since in this case it avoids the overheads of blocking.
	215	*/
	216
	217	/* looping downwards saves a few computations */
	218	register int l;
	219	register LM_REAL alpha, jaclm, jacTjacim;
	220
	221	for(i=m*m; i-->0; )
	222	jacTjac[i]=0.0;
	223	for(i=m; i-->0; )
	224	jacTe[i]=0.0;
	225
	226	for(l=n; l-->0; ){
	227	jaclm=jac+l*m;
	228	for(i=m; i-->0; ){
	229	jacTjacim=jacTjac+i*m;
	230	alpha=jaclm[i]; //jac[l*m+i];
	231	for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */
	232	jacTjacim[j]+=jaclm[j]alpha; //jacTjac[im+j]+=jac[lm+j]alpha
	233
	234	/* J^T e */
	235	jacTe[i]+=alpha*e[l];
	236	}
	237	}
	238
	239	for(i=m; i-->0; ) /* copy to upper part */
	240	for(j=i+1; j<m; ++j)
	241	jacTjac[im+j]=jacTjac[jm+i];
	242
	243	}
	244	else{ // this is a large problem
	245	/* Cache efficient computation of J^T J based on blocking
	246	*/
	247	LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);
	248
	249	/* cache efficient computation of J^T e */
	250	for(i=0; i<m; ++i)
	251	jacTe[i]=0.0;
	252
	253	for(i=0; i<n; ++i){
	254	register LM_REAL *jacrow;
	255
	256	for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
	257	jacTe[l]+=jacrow[l]*tmp;
	258	}
	259	}
	260
	261	/* Compute \|\|J^T e\|\|_inf and \|\|p\|\|^2 */
	262	for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){
	263	if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
	264
	265	diag_jacTjac[i]=jacTjac[im+i]; / save diagonal entries so that augmentation can be later canceled */
	266	p_L2+=p[i]*p[i];
	267	}
	268	//p_L2=sqrt(p_L2);
	269
	270	#if 0
	271	if(!(k%100)){
	272	printf("Current estimate: ");
	273	for(i=0; i<m; ++i)
	274	printf("%.9g ", p[i]);
	275	printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2);
	276	}
	277	#endif
	278
	279	/* check for convergence */
	280	if((jacTe_inf <= eps1)){
	281	Dp_L2=0.0; /* no increment for p in this case */
	282	stop=1;
	283	break;
	284	}
	285
	286	/* compute initial damping factor */
	287	if(k==0){
	288	for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
	289	if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
	290	mu=tau*tmp;
	291	}
	292
	293	/* determine increment using adaptive damping */
	294	while(1){
	295	/* augment normal equations */
	296	for(i=0; i<m; ++i)
	297	jacTjac[i*m+i]+=mu;
	298
	299	/* solve augmented equations */
	300	#ifdef HAVE_LAPACK
	301	/* 7 alternatives are available: LU, Cholesky + Cholesky with PLASMA, LDLt, 2 variants of QR decomposition and SVD.
	302	* For matrices with dimensions of at least a few hundreds, the PLASMA implementation of Cholesky is the fastest.
	303	* From the serial solvers, Cholesky is the fastest but might occasionally be inapplicable due to numerical round-off;
	304	* QR is slower but more robust; SVD is the slowest but most robust; LU is quite robust but
	305	* slower than LDLt; LDLt offers a good tradeoff between robustness and speed
	306	*/
	307
	308	issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK;
	309	//issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
	310	//issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_CHOL;
	311	#ifdef HAVE_PLASMA
	312	//issolved=AX_EQ_B_PLASMA_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_PLASMA_CHOL;
	313	#endif
	314	//issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_QR;
	315	//issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); ++nlss; linsolver=(int ()(LM_REAL A, LM_REAL B, LM_REAL x, int m))AX_EQ_B_QRLS;
	316	//issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_SVD;
	317
	318	#else
	319	/* use the LU included with levmar */
	320	issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
	321	#endif /* HAVE_LAPACK */
	322
	323	if(issolved){
	324	/* compute p's new estimate and \|\|Dp\|\|^2 */
	325	for(i=0, Dp_L2=0.0; i<m; ++i){
	326	pDp[i]=p[i] + (tmp=Dp[i]);
	327	Dp_L2+=tmp*tmp;
	328	}
	329	//Dp_L2=sqrt(Dp_L2);
	330
	331	if(Dp_L2<=eps2_sqp_L2){ / relative change in p is small, stop */
	332	//if(Dp_L2<=eps2(p_L2 + eps2)){ / relative change in p is small, stop */
	333	stop=2;
	334	break;
	335	}
	336
	337	if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)LM_CNST(EPSILON))){ / almost singular */
	338	//if(Dp_L2>=(p_L2+eps2)/LM_CNST(EPSILON)){ /* almost singular */
	339	stop=4;
	340	break;
	341	}
	342
	343	(func)(pDp, hx, m, n, adata); ++nfev; / evaluate function at p + Dp */
	344	/* compute \|\|e(pDp)\|\|_2 */
	345	/* ### hx=x-hx, pDp_eL2=\|\|hx\|\| */
	346	#if 1
	347	pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
	348	#else
	349	for(i=0, pDp_eL2=0.0; i<n; ++i){
	350	hx[i]=tmp=x[i]-hx[i];
	351	pDp_eL2+=tmp*tmp;
	352	}
	353	#endif
	354	if(!LM_FINITE(pDp_eL2)){ /* sum of squares is not finite, most probably due to a user error.
	355	* This check makes sure that the inner loop does not run indefinitely.
	356	* Thanks to Steve Danauskas for reporting such cases
	357	*/
	358	stop=7;
	359	break;
	360	}
	361
	362	for(i=0, dL=0.0; i<m; ++i)
	363	dL+=Dp[i](muDp[i]+jacTe[i]);
	364
	365	dF=p_eL2-pDp_eL2;
	366
	367	if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
	368	tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0));
	369	tmp=LM_CNST(1.0)-tmptmptmp;
	370	mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) );
	371	nu=2;
	372
	373	for(i=0 ; i<m; ++i) /* update p's estimate */
	374	p[i]=pDp[i];
	375
	376	for(i=0; i<n; ++i) /* update e and \|\|e\|\|_2 */
	377	e[i]=hx[i];
	378	p_eL2=pDp_eL2;
	379	break;
	380	}
	381	}
	382
	383	/* if this point is reached, either the linear system could not be solved or
	384	* the error did not reduce; in any case, the increment must be rejected
	385	*/
	386
	387	mu*=nu;
	388	nu2=nu<<1; // 2*nu;
	389	if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
	390	stop=5;
	391	break;
	392	}
	393	nu=nu2;
	394
	395	for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
	396	jacTjac[i*m+i]=diag_jacTjac[i];
	397	} /* inner loop */
	398	}
	399
	400	if(k>=itmax) stop=3;
	401
	402	for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
	403	jacTjac[i*m+i]=diag_jacTjac[i];
	404
	405	if(info){
	406	info[0]=init_p_eL2;
	407	info[1]=p_eL2;
	408	info[2]=jacTe_inf;
	409	info[3]=Dp_L2;
	410	for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
	411	if(tmp<jacTjac[im+i]) tmp=jacTjac[im+i];
	412	info[4]=mu/tmp;
	413	info[5]=(LM_REAL)k;
	414	info[6]=(LM_REAL)stop;
	415	info[7]=(LM_REAL)nfev;
	416	info[8]=(LM_REAL)njev;
	417	info[9]=(LM_REAL)nlss;
	418	}
	419
	420	/* covariance matrix */
	421	if(covar){
	422	LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
	423	}
	424
	425	if(freework) free(work);
	426
	427	#ifdef LINSOLVERS_RETAIN_MEMORY
	428	if(linsolver) (*linsolver)(NULL, NULL, NULL, 0);
	429	#endif
	430
	431	return (stop!=4 && stop!=7)? k : LM_ERROR;
	432	}
	433
	434
	435	/* Secant version of the LEVMAR_DER() function above: the Jacobian is approximated with
	436	* the aid of finite differences (forward or central, see the comment for the opts argument)
	437	*/
	438	int LEVMAR_DIF(
	439	void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */
	440	LM_REAL p, / I/O: initial parameter estimates. On output has the estimated solution */
	441	LM_REAL x, / I: measurement vector. NULL implies a zero vector */
	442	int m, /* I: parameter vector dimension (i.e. #unknowns) */
	443	int n, /* I: measurement vector dimension */
	444	int itmax, /* I: maximum number of iterations */
	445	LM_REAL opts[5], /* I: opts[0-4] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the
	446	* scale factor for initial \mu, stopping thresholds for \|\|J^T e\|\|_inf, \|\|Dp\|\|_2 and \|\|e\|\|_2 and
	447	* the step used in difference approximation to the Jacobian. Set to NULL for defaults to be used.
	448	* If \delta<0, the Jacobian is approximated with central differences which are more accurate
	449	* (but slower!) compared to the forward differences employed by default.
	450	*/
	451	LM_REAL info[LM_INFO_SZ],
	452	/* O: information regarding the minimization. Set to NULL if don't care
	453	* info[0]= \|\|e\|\|_2 at initial p.
	454	* info[1-4]=[ \|\|e\|\|_2, \|\|J^T e\|\|_inf, \|\|Dp\|\|_2, mu/max[J^T J]_ii ], all computed at estimated p.
	455	* info[5]= # iterations,
	456	* info[6]=reason for terminating: 1 - stopped by small gradient J^T e
	457	* 2 - stopped by small Dp
	458	* 3 - stopped by itmax
	459	* 4 - singular matrix. Restart from current p with increased mu
	460	* 5 - no further error reduction is possible. Restart with increased mu
	461	* 6 - stopped by small \|\|e\|\|_2
	462	* 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
	463	* info[7]= # function evaluations
	464	* info[8]= # Jacobian evaluations
	465	* info[9]= # linear systems solved, i.e. # attempts for reducing error
	466	*/
	467	LM_REAL work, / working memory at least LM_DIF_WORKSZ() reals large, allocated if NULL */
	468	LM_REAL covar, / O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
	469	void adata) / pointer to possibly additional data, passed uninterpreted to func.
	470	* Set to NULL if not needed
	471	*/
	472	{
	473	register int i, j, k, l;
	474	int worksz, freework=0, issolved;
	475	/* temp work arrays */
	476	LM_REAL e, / nx1 */
	477	hx, / \hat{x}_i, nx1 */
	478	jacTe, / J^T e_i mx1 */
	479	jac, / nxm */
	480	jacTjac, / mxm */
	481	Dp, / mx1 */
	482	diag_jacTjac, / diagonal of J^T J, mx1 */
	483	pDp, / p + Dp, mx1 */
	484	wrk, / nx1 */
	485	wrk2; / nx1, used only for holding a temporary e vector and when differentiating with central differences */
	486
	487	int using_ffdif=1;
	488
	489	register LM_REAL mu, /* damping constant */
	490	tmp; /* mainly used in matrix & vector multiplications */
	491	LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* \|\|e(p)\|\|_2, \|\|J^T e\|\|_inf, \|\|e(p+Dp)\|\|_2 */
	492	LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
	493	LM_REAL tau, eps1, eps2, eps2_sq, eps3, delta;
	494	LM_REAL init_p_eL2;
	495	int nu, nu2, stop=0, nfev, njap=0, nlss=0, K=(m>=10)? m: 10, updjac, updp=1, newjac;
	496	const int nm=n*m;
	497	int (linsolver)(LM_REAL A, LM_REAL B, LM_REAL x, int m)=NULL;
	498
	499	mu=jacTe_inf=p_L2=0.0; /* -Wall */
	500	updjac=newjac=0; /* -Wall */
	501
	502	if(n<m){
	503	fprintf(stderr, LCAT(LEVMAR_DIF, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
	504	return LM_ERROR;
	505	}
	506
	507	if(opts){
	508	tau=opts[0];
	509	eps1=opts[1];
	510	eps2=opts[2];
	511	eps2_sq=opts[2]*opts[2];
	512	eps3=opts[3];
	513	delta=opts[4];
	514	if(delta<0.0){
	515	delta=-delta; /* make positive */
	516	using_ffdif=0; /* use central differencing */
	517	}
	518	}
	519	else{ // use default values
	520	tau=LM_CNST(LM_INIT_MU);
	521	eps1=LM_CNST(LM_STOP_THRESH);
	522	eps2=LM_CNST(LM_STOP_THRESH);
	523	eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH);
	524	eps3=LM_CNST(LM_STOP_THRESH);
	525	delta=LM_CNST(LM_DIFF_DELTA);
	526	}
	527
	528	if(!work){
	529	worksz=LM_DIF_WORKSZ(m, n); //4n+4m + nm + mm;
	530	work=(LM_REAL )malloc(workszsizeof(LM_REAL)); /* allocate a big chunk in one step */
	531	if(!work){
	532	fprintf(stderr, LCAT(LEVMAR_DIF, "(): memory allocation request failed\n"));
	533	return LM_ERROR;
	534	}
	535	freework=1;
	536	}
	537
	538	/* set up work arrays */
	539	e=work;
	540	hx=e + n;
	541	jacTe=hx + n;
	542	jac=jacTe + m;
	543	jacTjac=jac + nm;
	544	Dp=jacTjac + m*m;
	545	diag_jacTjac=Dp + m;
	546	pDp=diag_jacTjac + m;
	547	wrk=pDp + m;
	548	wrk2=wrk + n;
	549
	550	/* compute e=x - f(p) and its L2 norm */
	551	(*func)(p, hx, m, n, adata); nfev=1;
	552	/* ### e=x-hx, p_eL2=\|\|e\|\| */
	553	#if 1
	554	p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n);
	555	#else
	556	for(i=0, p_eL2=0.0; i<n; ++i){
	557	e[i]=tmp=x[i]-hx[i];
	558	p_eL2+=tmp*tmp;
	559	}
	560	#endif
	561	init_p_eL2=p_eL2;
	562	if(!LM_FINITE(p_eL2)) stop=7;
	563
	564	nu=20; /* force computation of J */
	565
	566	for(k=0; k<itmax && !stop; ++k){
	567	/* Note that p and e have been updated at a previous iteration */
	568
	569	if(p_eL2<=eps3){ /* error is small */
	570	stop=6;
	571	break;
	572	}
	573
	574	/* Compute the Jacobian J at p, J^T J, J^T e, \|\|J^T e\|\|_inf and \|\|p\|\|^2.
	575	* The symmetry of J^T J is again exploited for speed
	576	*/
	577
	578	if((updp && nu>16) \|\| updjac==K){ /* compute difference approximation to J */
	579	if(using_ffdif){ /* use forward differences */
	580	LEVMAR_FDIF_FORW_JAC_APPROX(func, p, hx, wrk, delta, jac, m, n, adata);
	581	++njap; nfev+=m;
	582	}
	583	else{ /* use central differences */
	584	LEVMAR_FDIF_CENT_JAC_APPROX(func, p, wrk, wrk2, delta, jac, m, n, adata);
	585	++njap; nfev+=2*m;
	586	}
	587	nu=2; updjac=0; updp=0; newjac=1;
	588	}
	589
	590	if(newjac){ /* Jacobian has changed, recompute J^T J, J^t e, etc */
	591	newjac=0;
	592
	593	/* J^T J, J^T e */
	594	if(nm<=__BLOCKSZ__SQ){ // this is a small problem
	595	/* J^TJ_ij = \sum_l J^T_il J_lj = \sum_l J_li * J_lj.
	596	* Thus, the product J^T J can be computed using an outer loop for
	597	* l that adds J_li*J_lj to each element ij of the result. Note that
	598	* with this scheme, the accesses to J and JtJ are always along rows,
	599	* therefore induces less cache misses compared to the straightforward
	600	* algorithm for computing the product (i.e., l loop is innermost one).
	601	* A similar scheme applies to the computation of J^T e.
	602	* However, for large minimization problems (i.e., involving a large number
	603	* of unknowns and measurements) for which J/J^T J rows are too large to
	604	* fit in the L1 cache, even this scheme incures many cache misses. In
	605	* such cases, a cache-efficient blocking scheme is preferable.
	606	*
	607	* Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
	608	* performance problem.
	609	*
	610	* Note that the non-blocking algorithm is faster on small
	611	* problems since in this case it avoids the overheads of blocking.
	612	*/
	613	register int l;
	614	register LM_REAL alpha, jaclm, jacTjacim;
	615
	616	/* looping downwards saves a few computations */
	617	for(i=m*m; i-->0; )
	618	jacTjac[i]=0.0;
	619	for(i=m; i-->0; )
	620	jacTe[i]=0.0;
	621
	622	for(l=n; l-->0; ){
	623	jaclm=jac+l*m;
	624	for(i=m; i-->0; ){
	625	jacTjacim=jacTjac+i*m;
	626	alpha=jaclm[i]; //jac[l*m+i];
	627	for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */
	628	jacTjacim[j]+=jaclm[j]alpha; //jacTjac[im+j]+=jac[lm+j]alpha
	629
	630	/* J^T e */
	631	jacTe[i]+=alpha*e[l];
	632	}
	633	}
	634
	635	for(i=m; i-->0; ) /* copy to upper part */
	636	for(j=i+1; j<m; ++j)
	637	jacTjac[im+j]=jacTjac[jm+i];
	638	}
	639	else{ // this is a large problem
	640	/* Cache efficient computation of J^T J based on blocking
	641	*/
	642	LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);
	643
	644	/* cache efficient computation of J^T e */
	645	for(i=0; i<m; ++i)
	646	jacTe[i]=0.0;
	647
	648	for(i=0; i<n; ++i){
	649	register LM_REAL *jacrow;
	650
	651	for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
	652	jacTe[l]+=jacrow[l]*tmp;
	653	}
	654	}
	655
	656	/* Compute \|\|J^T e\|\|_inf and \|\|p\|\|^2 */
	657	for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){
	658	if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
	659
	660	diag_jacTjac[i]=jacTjac[im+i]; / save diagonal entries so that augmentation can be later canceled */
	661	p_L2+=p[i]*p[i];
	662	}
	663	//p_L2=sqrt(p_L2);
	664	}
	665
	666	#if 0
	667	if(!(k%100)){
	668	printf("Current estimate: ");
	669	for(i=0; i<m; ++i)
	670	printf("%.9g ", p[i]);
	671	printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2);
	672	}
	673	#endif
	674
	675	/* check for convergence */
	676	if((jacTe_inf <= eps1)){
	677	Dp_L2=0.0; /* no increment for p in this case */
	678	stop=1;
	679	break;
	680	}
	681
	682	/* compute initial damping factor */
	683	if(k==0){
	684	for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
	685	if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
	686	mu=tau*tmp;
	687	}
	688
	689	/* determine increment using adaptive damping */
	690
	691	/* augment normal equations */
	692	for(i=0; i<m; ++i)
	693	jacTjac[i*m+i]+=mu;
	694
	695	/* solve augmented equations */
	696	#ifdef HAVE_LAPACK
	697	/* 7 alternatives are available: LU, Cholesky + Cholesky with PLASMA, LDLt, 2 variants of QR decomposition and SVD.
	698	* For matrices with dimensions of at least a few hundreds, the PLASMA implementation of Cholesky is the fastest.
	699	* From the serial solvers, Cholesky is the fastest but might occasionally be inapplicable due to numerical round-off;
	700	* QR is slower but more robust; SVD is the slowest but most robust; LU is quite robust but
	701	* slower than LDLt; LDLt offers a good tradeoff between robustness and speed
	702	*/
	703
	704	issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK;
	705	//issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
	706	//issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_CHOL;
	707	#ifdef HAVE_PLASMA
	708	//issolved=AX_EQ_B_PLASMA_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_PLASMA_CHOL;
	709	#endif
	710	//issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_QR;
	711	//issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); ++nlss; linsolver=(int ()(LM_REAL A, LM_REAL B, LM_REAL x, int m))AX_EQ_B_QRLS;
	712	//issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_SVD;
	713	#else
	714	/* use the LU included with levmar */
	715	issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
	716	#endif /* HAVE_LAPACK */
	717
	718	if(issolved){
	719	/* compute p's new estimate and \|\|Dp\|\|^2 */
	720	for(i=0, Dp_L2=0.0; i<m; ++i){
	721	pDp[i]=p[i] + (tmp=Dp[i]);
	722	Dp_L2+=tmp*tmp;
	723	}
	724	//Dp_L2=sqrt(Dp_L2);
	725
	726	if(Dp_L2<=eps2_sqp_L2){ / relative change in p is small, stop */
	727	//if(Dp_L2<=eps2(p_L2 + eps2)){ / relative change in p is small, stop */
	728	stop=2;
	729	break;
	730	}
	731
	732	if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)LM_CNST(EPSILON))){ / almost singular */
	733	//if(Dp_L2>=(p_L2+eps2)/LM_CNST(EPSILON)){ /* almost singular */
	734	stop=4;
	735	break;
	736	}
	737
	738	(func)(pDp, wrk, m, n, adata); ++nfev; / evaluate function at p + Dp */
	739	/* compute \|\|e(pDp)\|\|_2 */
	740	/* ### wrk2=x-wrk, pDp_eL2=\|\|wrk2\|\| */
	741	#if 1
	742	pDp_eL2=LEVMAR_L2NRMXMY(wrk2, x, wrk, n);
	743	#else
	744	for(i=0, pDp_eL2=0.0; i<n; ++i){
	745	wrk2[i]=tmp=x[i]-wrk[i];
	746	pDp_eL2+=tmp*tmp;
	747	}
	748	#endif
	749	if(!LM_FINITE(pDp_eL2)){ /* sum of squares is not finite, most probably due to a user error.
	750	* This check makes sure that the loop terminates early in the case
	751	* of invalid input. Thanks to Steve Danauskas for suggesting it
	752	*/
	753
	754	stop=7;
	755	break;
	756	}
	757
	758	dF=p_eL2-pDp_eL2;
	759	if(updp \|\| dF>0){ /* update jac */
	760	for(i=0; i<n; ++i){
	761	for(l=0, tmp=0.0; l<m; ++l)
	762	tmp+=jac[im+l]Dp[l]; /* (J * Dp)[i] */
	763	tmp=(wrk[i] - hx[i] - tmp)/Dp_L2; /* (f(p+dp)[i] - f(p)[i] - (J * Dp)[i])/(dp^Tdp) /
	764	for(j=0; j<m; ++j)
	765	jac[im+j]+=tmpDp[j];
	766	}
	767	++updjac;
	768	newjac=1;
	769	}
	770
	771	for(i=0, dL=0.0; i<m; ++i)
	772	dL+=Dp[i](muDp[i]+jacTe[i]);
	773
	774	if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
	775	tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0));
	776	tmp=LM_CNST(1.0)-tmptmptmp;
	777	mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) );
	778	nu=2;
	779
	780	for(i=0 ; i<m; ++i) /* update p's estimate */
	781	p[i]=pDp[i];
	782
	783	for(i=0; i<n; ++i){ /* update e, hx and \|\|e\|\|_2 */
	784	e[i]=wrk2[i]; //x[i]-wrk[i];
	785	hx[i]=wrk[i];
	786	}
	787	p_eL2=pDp_eL2;
	788	updp=1;
	789	continue;
	790	}
	791	}
	792
	793	/* if this point is reached, either the linear system could not be solved or
	794	* the error did not reduce; in any case, the increment must be rejected
	795	*/
	796
	797	mu*=nu;
	798	nu2=nu<<1; // 2*nu;
	799	if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
	800	stop=5;
	801	break;
	802	}
	803	nu=nu2;
	804
	805	for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
	806	jacTjac[i*m+i]=diag_jacTjac[i];
	807	}
	808
	809	if(k>=itmax) stop=3;
	810
	811	for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
	812	jacTjac[i*m+i]=diag_jacTjac[i];
	813
	814	if(info){
	815	info[0]=init_p_eL2;
	816	info[1]=p_eL2;
	817	info[2]=jacTe_inf;
	818	info[3]=Dp_L2;
	819	for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
	820	if(tmp<jacTjac[im+i]) tmp=jacTjac[im+i];
	821	info[4]=mu/tmp;
	822	info[5]=(LM_REAL)k;
	823	info[6]=(LM_REAL)stop;
	824	info[7]=(LM_REAL)nfev;
	825	info[8]=(LM_REAL)njap;
	826	info[9]=(LM_REAL)nlss;
	827	}
	828
	829	/* covariance matrix */
	830	if(covar){
	831	LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
	832	}
	833
	834
	835	if(freework) free(work);
	836
	837	#ifdef LINSOLVERS_RETAIN_MEMORY
	838	if(linsolver) (*linsolver)(NULL, NULL, NULL, 0);
	839	#endif
	840
	841	return (stop!=4 && stop!=7)? k : LM_ERROR;
	842	}
	843
	844	/* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
	845	#undef LEVMAR_DER
	846	#undef LEVMAR_DIF
	847	#undef LEVMAR_FDIF_FORW_JAC_APPROX
	848	#undef LEVMAR_FDIF_CENT_JAC_APPROX
	849	#undef LEVMAR_COVAR
	850	#undef LEVMAR_TRANS_MAT_MAT_MULT
	851	#undef LEVMAR_L2NRMXMY
	852	#undef AX_EQ_B_LU
	853	#undef AX_EQ_B_CHOL
	854	#undef AX_EQ_B_PLASMA_CHOL
	855	#undef AX_EQ_B_QR
	856	#undef AX_EQ_B_QRLS
	857	#undef AX_EQ_B_SVD
	858	#undef AX_EQ_B_BK

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/levmar/src/lm_core.c@ 2945e6

Download in other formats: