Context Navigation

Axb_core.c@ 7516f6

Visit:

Action_Thermostats Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.1 ChemicalSpaceEvaluator Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Exclude_Hydrogens_annealWithBondGraph Fix_Verbose_Codepatterns ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion Gui_displays_atomic_force_velocity JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool PythonUI_with_named_parameters Recreated_GuiChecks StoppableMakroAction TremoloParser_IncreasedPrecision

Last change on this file since 7516f6 was 8ce1a9, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '5443b10a06f0c125d0ae0500abb09901fda9666b' as 'ThirdParty/levmar'
Property mode set to `100644`
File size: 35.5 KB

Line
1	/////////////////////////////////////////////////////////////////////////////////
2	//
3	// Solution of linear systems involved in the Levenberg - Marquardt
4	// minimization algorithm
5	// Copyright (C) 2004 Manolis Lourakis (lourakis at ics forth gr)
6	// Institute of Computer Science, Foundation for Research & Technology - Hellas
7	// Heraklion, Crete, Greece.
8	//
9	// This program is free software; you can redistribute it and/or modify
10	// it under the terms of the GNU General Public License as published by
11	// the Free Software Foundation; either version 2 of the License, or
12	// (at your option) any later version.
13	//
14	// This program is distributed in the hope that it will be useful,
15	// but WITHOUT ANY WARRANTY; without even the implied warranty of
16	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	// GNU General Public License for more details.
18	//
19	/////////////////////////////////////////////////////////////////////////////////
20
21
22	/* Solvers for the linear systems Ax=b. Solvers should NOT modify their A & B arguments! */
23
24
25	#ifndef LM_REAL // not included by Axb.c
26	#error This file should not be compiled directly!
27	#endif
28
29
30	#ifdef LINSOLVERS_RETAIN_MEMORY
31	#define __STATIC__ static
32	#else
33	#define __STATIC__ // empty
34	#endif /* LINSOLVERS_RETAIN_MEMORY */
35
36	#ifdef HAVE_LAPACK
37
38	/* prototypes of LAPACK routines */
39
40	#define GEQRF LM_MK_LAPACK_NAME(geqrf)
41	#define ORGQR LM_MK_LAPACK_NAME(orgqr)
42	#define TRTRS LM_MK_LAPACK_NAME(trtrs)
43	#define POTF2 LM_MK_LAPACK_NAME(potf2)
44	#define POTRF LM_MK_LAPACK_NAME(potrf)
45	#define POTRS LM_MK_LAPACK_NAME(potrs)
46	#define GETRF LM_MK_LAPACK_NAME(getrf)
47	#define GETRS LM_MK_LAPACK_NAME(getrs)
48	#define GESVD LM_MK_LAPACK_NAME(gesvd)
49	#define GESDD LM_MK_LAPACK_NAME(gesdd)
50	#define SYTRF LM_MK_LAPACK_NAME(sytrf)
51	#define SYTRS LM_MK_LAPACK_NAME(sytrs)
52	#define PLASMA_POSV LM_CAT_(PLASMA_, LM_ADD_PREFIX(posv))
53
54	#ifdef __cplusplus
55	extern "C" {
56	#endif
57	/* QR decomposition */
58	extern int GEQRF(int m, int n, LM_REAL a, int lda, LM_REAL tau, LM_REAL work, int lwork, int info);
59	extern int ORGQR(int m, int n, int k, LM_REAL a, int lda, LM_REAL tau, LM_REAL work, int lwork, int *info);
60
61	/* solution of triangular systems */
62	extern int TRTRS(char uplo, char trans, char diag, int n, int nrhs, LM_REAL a, int lda, LM_REAL b, int ldb, int info);
63
64	/* Cholesky decomposition and systems solution */
65	extern int POTF2(char uplo, int n, LM_REAL a, int lda, int *info);
66	extern int POTRF(char uplo, int n, LM_REAL a, int lda, int info); / block version of dpotf2 */
67	extern int POTRS(char uplo, int n, int nrhs, LM_REAL a, int lda, LM_REAL b, int ldb, int info);
68
69	/* LU decomposition and systems solution */
70	extern int GETRF(int m, int n, LM_REAL a, int lda, int ipiv, int info);
71	extern int GETRS(char trans, int n, int nrhs, LM_REAL a, int lda, int ipiv, LM_REAL b, int ldb, int *info);
72
73	/* Singular Value Decomposition (SVD) */
74	extern int GESVD(char jobu, char jobvt, int m, int n, LM_REAL a, int lda, LM_REAL s, LM_REAL u, int *ldu,
75	LM_REAL vt, int ldvt, LM_REAL work, int lwork, int *info);
76
77	/* lapack 3.0 new SVD routine, faster than xgesvd().
78	* In case that your version of LAPACK does not include them, use the above two older routines
79	*/
80	extern int GESDD(char jobz, int m, int n, LM_REAL a, int lda, LM_REAL s, LM_REAL u, int ldu, LM_REAL vt, int ldvt,
81	LM_REAL work, int lwork, int iwork, int info);
82
83	/* LDLt/UDUt factorization and systems solution */
84	extern int SYTRF(char uplo, int n, LM_REAL a, int lda, int ipiv, LM_REAL work, int lwork, int info);
85	extern int SYTRS(char uplo, int n, int nrhs, LM_REAL a, int lda, int ipiv, LM_REAL b, int ldb, int *info);
86	#ifdef __cplusplus
87	}
88	#endif
89
90	/* precision-specific definitions */
91	#define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
92	#define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
93	#define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
94	#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
95	#define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
96	#define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
97	#define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol)
98
99	/*
100	* This function returns the solution of Ax = b
101	*
102	* The function is based on QR decomposition with explicit computation of Q:
103	* If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
104	* Q R x = b or R x = Q^T b.
105	* The last equation can be solved directly.
106	*
107	* A is mxm, b is mx1
108	*
109	* The function returns 0 in case of error, 1 if successful
110	*
111	* This function is often called repetitively to solve problems of identical
112	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
113	* retained between calls and free'd-malloc'ed when not of the appropriate size.
114	* A call with NULL as the first argument forces this memory to be released.
115	*/
116	int AX_EQ_B_QR(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
117	{
118	__STATIC__ LM_REAL *buf=NULL;
119	__STATIC__ int buf_sz=0;
120
121	static int nb=0; /* no __STATIC__ decl. here! */
122
123	LM_REAL a, tau, r, work;
124	int a_sz, tau_sz, r_sz, tot_sz;
125	register int i, j;
126	int info, worksz, nrhs=1;
127	register LM_REAL sum;
128
129	if(!A)
130	#ifdef LINSOLVERS_RETAIN_MEMORY
131	{
132	if(buf) free(buf);
133	buf=NULL;
134	buf_sz=0;
135
136	return 1;
137	}
138	#else
139	return 1; /* NOP */
140	#endif /* LINSOLVERS_RETAIN_MEMORY */
141
142	/* calculate required memory size */
143	a_sz=m*m;
144	tau_sz=m;
145	r_sz=mm; / only the upper triangular part really needed */
146	if(!nb){
147	LM_REAL tmp;
148
149	worksz=-1; // workspace query; optimal size is returned in tmp
150	GEQRF((int )&m, (int )&m, NULL, (int )&m, NULL, (LM_REAL )&tmp, (int )&worksz, (int )&info);
151	nb=((int)tmp)/m; // optimal worksize is m*nb
152	}
153	worksz=nb*m;
154	tot_sz=a_sz + tau_sz + r_sz + worksz;
155
156	#ifdef LINSOLVERS_RETAIN_MEMORY
157	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
158	if(buf) free(buf); /* free previously allocated memory */
159
160	buf_sz=tot_sz;
161	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
162	if(!buf){
163	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
164	exit(1);
165	}
166	}
167	#else
168	buf_sz=tot_sz;
169	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
170	if(!buf){
171	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
172	exit(1);
173	}
174	#endif /* LINSOLVERS_RETAIN_MEMORY */
175
176	a=buf;
177	tau=a+a_sz;
178	r=tau+tau_sz;
179	work=r+r_sz;
180
181	/* store A (column major!) into a */
182	for(i=0; i<m; i++)
183	for(j=0; j<m; j++)
184	a[i+jm]=A[im+j];
185
186	/* QR decomposition of A */
187	GEQRF((int )&m, (int )&m, a, (int )&m, tau, work, (int )&worksz, (int *)&info);
188	/* error treatment */
189	if(info!=0){
190	if(info<0){
191	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QR) "()\n", -info);
192	exit(1);
193	}
194	else{
195	fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QR) "()\n", info);
196	#ifndef LINSOLVERS_RETAIN_MEMORY
197	free(buf);
198	#endif
199
200	return 0;
201	}
202	}
203
204	/* R is stored in the upper triangular part of a; copy it in r so that ORGQR() below won't destroy it */
205	memcpy(r, a, r_sz*sizeof(LM_REAL));
206
207	/* compute Q using the elementary reflectors computed by the above decomposition */
208	ORGQR((int )&m, (int )&m, (int )&m, a, (int )&m, tau, work, (int )&worksz, (int )&info);
209	if(info!=0){
210	if(info<0){
211	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", AX_EQ_B_QR) "()\n", -info);
212	exit(1);
213	}
214	else{
215	fprintf(stderr, RCAT("Unknown LAPACK error (%d) in ", AX_EQ_B_QR) "()\n", info);
216	#ifndef LINSOLVERS_RETAIN_MEMORY
217	free(buf);
218	#endif
219
220	return 0;
221	}
222	}
223
224	/* Q is now in a; compute Q^T b in x */
225	for(i=0; i<m; i++){
226	for(j=0, sum=0.0; j<m; j++)
227	sum+=a[im+j]B[j];
228	x[i]=sum;
229	}
230
231	/* solve the linear system R x = Q^t b */
232	TRTRS("U", "N", "N", (int )&m, (int )&nrhs, r, (int )&m, x, (int )&m, &info);
233	/* error treatment */
234	if(info!=0){
235	if(info<0){
236	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QR) "()\n", -info);
237	exit(1);
238	}
239	else{
240	fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QR) "()\n", info);
241	#ifndef LINSOLVERS_RETAIN_MEMORY
242	free(buf);
243	#endif
244
245	return 0;
246	}
247	}
248
249	#ifndef LINSOLVERS_RETAIN_MEMORY
250	free(buf);
251	#endif
252
253	return 1;
254	}
255
256	/*
257	* This function returns the solution of min_x \|\|Ax - b\|\|
258	*
259	* \|\| . \|\| is the second order (i.e. L2) norm. This is a least squares technique that
260	* is based on QR decomposition:
261	* If A=Q R with Q orthogonal and R upper triangular, the normal equations become
262	* (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b.
263	* This amounts to solving R^T y = A^T b for y and then R x = y for x
264	* Note that Q does not need to be explicitly computed
265	*
266	* A is mxn, b is mx1
267	*
268	* The function returns 0 in case of error, 1 if successful
269	*
270	* This function is often called repetitively to solve problems of identical
271	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
272	* retained between calls and free'd-malloc'ed when not of the appropriate size.
273	* A call with NULL as the first argument forces this memory to be released.
274	*/
275	int AX_EQ_B_QRLS(LM_REAL A, LM_REAL B, LM_REAL *x, int m, int n)
276	{
277	__STATIC__ LM_REAL *buf=NULL;
278	__STATIC__ int buf_sz=0;
279
280	static int nb=0; /* no __STATIC__ decl. here! */
281
282	LM_REAL a, tau, r, work;
283	int a_sz, tau_sz, r_sz, tot_sz;
284	register int i, j;
285	int info, worksz, nrhs=1;
286	register LM_REAL sum;
287
288	if(!A)
289	#ifdef LINSOLVERS_RETAIN_MEMORY
290	{
291	if(buf) free(buf);
292	buf=NULL;
293	buf_sz=0;
294
295	return 1;
296	}
297	#else
298	return 1; /* NOP */
299	#endif /* LINSOLVERS_RETAIN_MEMORY */
300
301	if(m<n){
302	fprintf(stderr, RCAT("Normal equations require that the number of rows is greater than number of columns in ", AX_EQ_B_QRLS) "() [%d x %d]! -- try transposing\n", m, n);
303	exit(1);
304	}
305
306	/* calculate required memory size */
307	a_sz=m*n;
308	tau_sz=n;
309	r_sz=n*n;
310	if(!nb){
311	LM_REAL tmp;
312
313	worksz=-1; // workspace query; optimal size is returned in tmp
314	GEQRF((int )&m, (int )&m, NULL, (int )&m, NULL, (LM_REAL )&tmp, (int )&worksz, (int )&info);
315	nb=((int)tmp)/m; // optimal worksize is m*nb
316	}
317	worksz=nb*m;
318	tot_sz=a_sz + tau_sz + r_sz + worksz;
319
320	#ifdef LINSOLVERS_RETAIN_MEMORY
321	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
322	if(buf) free(buf); /* free previously allocated memory */
323
324	buf_sz=tot_sz;
325	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
326	if(!buf){
327	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
328	exit(1);
329	}
330	}
331	#else
332	buf_sz=tot_sz;
333	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
334	if(!buf){
335	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
336	exit(1);
337	}
338	#endif /* LINSOLVERS_RETAIN_MEMORY */
339
340	a=buf;
341	tau=a+a_sz;
342	r=tau+tau_sz;
343	work=r+r_sz;
344
345	/* store A (column major!) into a */
346	for(i=0; i<m; i++)
347	for(j=0; j<n; j++)
348	a[i+jm]=A[in+j];
349
350	/* compute A^T b in x */
351	for(i=0; i<n; i++){
352	for(j=0, sum=0.0; j<m; j++)
353	sum+=A[jn+i]B[j];
354	x[i]=sum;
355	}
356
357	/* QR decomposition of A */
358	GEQRF((int )&m, (int )&n, a, (int )&m, tau, work, (int )&worksz, (int *)&info);
359	/* error treatment */
360	if(info!=0){
361	if(info<0){
362	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", -info);
363	exit(1);
364	}
365	else{
366	fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", info);
367	#ifndef LINSOLVERS_RETAIN_MEMORY
368	free(buf);
369	#endif
370
371	return 0;
372	}
373	}
374
375	/* R is stored in the upper triangular part of a. Note that a is mxn while r nxn */
376	for(j=0; j<n; j++){
377	for(i=0; i<=j; i++)
378	r[i+jn]=a[i+jm];
379
380	/* lower part is zero */
381	for(i=j+1; i<n; i++)
382	r[i+j*n]=0.0;
383	}
384
385	/* solve the linear system R^T y = A^t b */
386	TRTRS("U", "T", "N", (int )&n, (int )&nrhs, r, (int )&n, x, (int )&n, &info);
387	/* error treatment */
388	if(info!=0){
389	if(info<0){
390	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
391	exit(1);
392	}
393	else{
394	fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
395	#ifndef LINSOLVERS_RETAIN_MEMORY
396	free(buf);
397	#endif
398
399	return 0;
400	}
401	}
402
403	/* solve the linear system R x = y */
404	TRTRS("U", "N", "N", (int )&n, (int )&nrhs, r, (int )&n, x, (int )&n, &info);
405	/* error treatment */
406	if(info!=0){
407	if(info<0){
408	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
409	exit(1);
410	}
411	else{
412	fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
413	#ifndef LINSOLVERS_RETAIN_MEMORY
414	free(buf);
415	#endif
416
417	return 0;
418	}
419	}
420
421	#ifndef LINSOLVERS_RETAIN_MEMORY
422	free(buf);
423	#endif
424
425	return 1;
426	}
427
428	/*
429	* This function returns the solution of Ax=b
430	*
431	* The function assumes that A is symmetric & postive definite and employs
432	* the Cholesky decomposition:
433	* If A=L L^T with L lower triangular, the system to be solved becomes
434	* (L L^T) x = b
435	* This amounts to solving L y = b for y and then L^T x = y for x
436	*
437	* A is mxm, b is mx1
438	*
439	* The function returns 0 in case of error, 1 if successful
440	*
441	* This function is often called repetitively to solve problems of identical
442	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
443	* retained between calls and free'd-malloc'ed when not of the appropriate size.
444	* A call with NULL as the first argument forces this memory to be released.
445	*/
446	int AX_EQ_B_CHOL(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
447	{
448	__STATIC__ LM_REAL *buf=NULL;
449	__STATIC__ int buf_sz=0;
450
451	LM_REAL *a;
452	int a_sz, tot_sz;
453	int info, nrhs=1;
454
455	if(!A)
456	#ifdef LINSOLVERS_RETAIN_MEMORY
457	{
458	if(buf) free(buf);
459	buf=NULL;
460	buf_sz=0;
461
462	return 1;
463	}
464	#else
465	return 1; /* NOP */
466	#endif /* LINSOLVERS_RETAIN_MEMORY */
467
468	/* calculate required memory size */
469	a_sz=m*m;
470	tot_sz=a_sz;
471
472	#ifdef LINSOLVERS_RETAIN_MEMORY
473	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
474	if(buf) free(buf); /* free previously allocated memory */
475
476	buf_sz=tot_sz;
477	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
478	if(!buf){
479	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
480	exit(1);
481	}
482	}
483	#else
484	buf_sz=tot_sz;
485	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
486	if(!buf){
487	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
488	exit(1);
489	}
490	#endif /* LINSOLVERS_RETAIN_MEMORY */
491
492	a=buf;
493
494	/* store A into a and B into x. A is assumed symmetric,
495	* hence no transposition is needed
496	*/
497	memcpy(a, A, a_sz*sizeof(LM_REAL));
498	memcpy(x, B, m*sizeof(LM_REAL));
499
500	/* Cholesky decomposition of A */
501	//POTF2("L", (int )&m, a, (int )&m, (int *)&info);
502	POTRF("L", (int )&m, a, (int )&m, (int *)&info);
503	/* error treatment */
504	if(info!=0){
505	if(info<0){
506	fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ",
507	AX_EQ_B_CHOL) "()\n", -info);
508	exit(1);
509	}
510	else{
511	fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info);
512	#ifndef LINSOLVERS_RETAIN_MEMORY
513	free(buf);
514	#endif
515
516	return 0;
517	}
518	}
519
520	/* solve using the computed Cholesky in one lapack call */
521	POTRS("L", (int )&m, (int )&nrhs, a, (int )&m, x, (int )&m, &info);
522	if(info<0){
523	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
524	exit(1);
525	}
526
527	#if 0
528	/* alternative: solve the linear system L y = b ... */
529	TRTRS("L", "N", "N", (int )&m, (int )&nrhs, a, (int )&m, x, (int )&m, &info);
530	/* error treatment */
531	if(info!=0){
532	if(info<0){
533	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
534	exit(1);
535	}
536	else{
537	fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info);
538	#ifndef LINSOLVERS_RETAIN_MEMORY
539	free(buf);
540	#endif
541
542	return 0;
543	}
544	}
545
546	/* ... solve the linear system L^T x = y */
547	TRTRS("L", "T", "N", (int )&m, (int )&nrhs, a, (int )&m, x, (int )&m, &info);
548	/* error treatment */
549	if(info!=0){
550	if(info<0){
551	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info);
552	exit(1);
553	}
554	else{
555	fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info);
556	#ifndef LINSOLVERS_RETAIN_MEMORY
557	free(buf);
558	#endif
559
560	return 0;
561	}
562	}
563	#endif /* 0 */
564
565	#ifndef LINSOLVERS_RETAIN_MEMORY
566	free(buf);
567	#endif
568
569	return 1;
570	}
571
572	#ifdef HAVE_PLASMA
573
574	/* Linear algebra using PLASMA parallel library for multicore CPUs.
575	* http://icl.cs.utk.edu/plasma/
576	*
577	* WARNING: BLAS multithreading should be disabled, e.g. setenv MKL_NUM_THREADS 1
578	*/
579
580	#ifndef _LM_PLASMA_MISC_
581	/* avoid multiple inclusion of helper code */
582	#define _LM_PLASMA_MISC_
583
584	#include <plasma.h>
585	#include <cblas.h>
586	#include <lapacke.h>
587	#include <plasma_tmg.h>
588	#include <core_blas.h>
589
590	/* programmatically determine the number of cores on the current machine */
591	#ifdef _WIN32
592	#include <windows.h>
593	#elif __linux
594	#include <unistd.h>
595	#endif
596	static int getnbcores()
597	{
598	#ifdef _WIN32
599	SYSTEM_INFO sysinfo;
600	GetSystemInfo(&sysinfo);
601	return sysinfo.dwNumberOfProcessors;
602	#elif __linux
603	return sysconf(_SC_NPROCESSORS_ONLN);
604	#else // unknown system
605	return 2<<1; // will be halved by right shift below
606	#endif
607	}
608
609	static int PLASMA_ncores=-(getnbcores()>>1); // >0 if PLASMA initialized, <0 otherwise
610
611	/* user-specified number of cores */
612	void levmar_PLASMA_setnbcores(int cores)
613	{
614	PLASMA_ncores=(cores>0)? -cores : ((cores)? cores : -2);
615	}
616	#endif /* _LM_PLASMA_MISC_ */
617
618	/*
619	* This function returns the solution of Ax=b
620	*
621	* The function assumes that A is symmetric & positive definite and employs the
622	* Cholesky decomposition implemented by PLASMA for homogeneous multicore processors.
623	*
624	* A is mxm, b is mx1
625	*
626	* The function returns 0 in case of error, 1 if successfull
627	*
628	* This function is often called repetitively to solve problems of identical
629	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
630	* retained between calls and free'd-malloc'ed when not of the appropriate size.
631	* A call with NULL as the first argument forces this memory to be released.
632	*/
633	int AX_EQ_B_PLASMA_CHOL(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
634	{
635	__STATIC__ LM_REAL *buf=NULL;
636	__STATIC__ int buf_sz=0;
637
638	LM_REAL *a;
639	int a_sz, tot_sz;
640	int info, nrhs=1;
641
642	if(A==NULL){
643	#ifdef LINSOLVERS_RETAIN_MEMORY
644	if(buf) free(buf);
645	buf=NULL;
646	buf_sz=0;
647	#endif /* LINSOLVERS_RETAIN_MEMORY */
648
649	PLASMA_Finalize();
650	PLASMA_ncores=-PLASMA_ncores;
651
652	return 1;
653	}
654
655	/* calculate required memory size */
656	a_sz=m*m;
657	tot_sz=a_sz;
658
659	#ifdef LINSOLVERS_RETAIN_MEMORY
660	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
661	if(buf) free(buf); /* free previously allocated memory */
662
663	buf_sz=tot_sz;
664	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
665	if(!buf){
666	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n");
667	exit(1);
668	}
669	}
670	#else
671	buf_sz=tot_sz;
672	buf=(LM_REAL )malloc(buf_szsizeof(LM_REAL));
673	if(!buf){
674	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n");
675	exit(1);
676	}
677	#endif /* LINSOLVERS_RETAIN_MEMORY */
678
679	a=buf;
680
681	/* store A into a and B into x; A is assumed to be symmetric,
682	* hence no transposition is needed
683	*/
684	memcpy(a, A, a_sz*sizeof(LM_REAL));
685	memcpy(x, B, m*sizeof(LM_REAL));
686
687	/* initialize PLASMA */
688	if(PLASMA_ncores<0){
689	PLASMA_ncores=-PLASMA_ncores;
690	PLASMA_Init(PLASMA_ncores);
691	fprintf(stderr, RCAT("\n", AX_EQ_B_PLASMA_CHOL) "(): PLASMA is running on %d cores.\n\n", PLASMA_ncores);
692	}
693
694	/* Solve the linear system */
695	info=PLASMA_POSV(PlasmaLower, m, 1, a, m, x, m);
696	/* error treatment */
697	if(info!=0){
698	if(info<0){
699	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", PLASMA_POSV) " in ",
700	AX_EQ_B_PLASMA_CHOL) "()\n", -info);
701	exit(1);
702	}
703	else{
704	fprintf(stderr, RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\n"
705	"the factorization could not be completed for ", PLASMA_POSV) " in ", AX_EQ_B_CHOL) "()\n", info);
706	#ifndef LINSOLVERS_RETAIN_MEMORY
707	free(buf);
708	#endif
709	return 0;
710	}
711	}
712
713	#ifndef LINSOLVERS_RETAIN_MEMORY
714	free(buf);
715	#endif
716
717	return 1;
718	}
719	#endif /* HAVE_PLASMA */
720
721	/*
722	* This function returns the solution of Ax = b
723	*
724	* The function employs LU decomposition:
725	* If A=L U with L lower and U upper triangular, then the original system
726	* amounts to solving
727	* L y = b, U x = y
728	*
729	* A is mxm, b is mx1
730	*
731	* The function returns 0 in case of error, 1 if successful
732	*
733	* This function is often called repetitively to solve problems of identical
734	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
735	* retained between calls and free'd-malloc'ed when not of the appropriate size.
736	* A call with NULL as the first argument forces this memory to be released.
737	*/
738	int AX_EQ_B_LU(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
739	{
740	__STATIC__ LM_REAL *buf=NULL;
741	__STATIC__ int buf_sz=0;
742
743	int a_sz, ipiv_sz, tot_sz;
744	register int i, j;
745	int info, *ipiv, nrhs=1;
746	LM_REAL *a;
747
748	if(!A)
749	#ifdef LINSOLVERS_RETAIN_MEMORY
750	{
751	if(buf) free(buf);
752	buf=NULL;
753	buf_sz=0;
754
755	return 1;
756	}
757	#else
758	return 1; /* NOP */
759	#endif /* LINSOLVERS_RETAIN_MEMORY */
760
761	/* calculate required memory size */
762	ipiv_sz=m;
763	a_sz=m*m;
764	tot_sz=a_szsizeof(LM_REAL) + ipiv_szsizeof(int); /* should be arranged in that order for proper doubles alignment */
765
766	#ifdef LINSOLVERS_RETAIN_MEMORY
767	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
768	if(buf) free(buf); /* free previously allocated memory */
769
770	buf_sz=tot_sz;
771	buf=(LM_REAL *)malloc(buf_sz);
772	if(!buf){
773	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
774	exit(1);
775	}
776	}
777	#else
778	buf_sz=tot_sz;
779	buf=(LM_REAL *)malloc(buf_sz);
780	if(!buf){
781	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
782	exit(1);
783	}
784	#endif /* LINSOLVERS_RETAIN_MEMORY */
785
786	a=buf;
787	ipiv=(int *)(a+a_sz);
788
789	/* store A (column major!) into a and B into x */
790	for(i=0; i<m; i++){
791	for(j=0; j<m; j++)
792	a[i+jm]=A[im+j];
793
794	x[i]=B[i];
795	}
796
797	/* LU decomposition for A */
798	GETRF((int )&m, (int )&m, a, (int )&m, ipiv, (int )&info);
799	if(info!=0){
800	if(info<0){
801	fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info);
802	exit(1);
803	}
804	else{
805	fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n");
806	#ifndef LINSOLVERS_RETAIN_MEMORY
807	free(buf);
808	#endif
809
810	return 0;
811	}
812	}
813
814	/* solve the system with the computed LU */
815	GETRS("N", (int )&m, (int )&nrhs, a, (int )&m, ipiv, x, (int )&m, (int *)&info);
816	if(info!=0){
817	if(info<0){
818	fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info);
819	exit(1);
820	}
821	else{
822	fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n");
823	#ifndef LINSOLVERS_RETAIN_MEMORY
824	free(buf);
825	#endif
826
827	return 0;
828	}
829	}
830
831	#ifndef LINSOLVERS_RETAIN_MEMORY
832	free(buf);
833	#endif
834
835	return 1;
836	}
837
838	/*
839	* This function returns the solution of Ax = b
840	*
841	* The function is based on SVD decomposition:
842	* If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
843	* (U D V^T) x = b or x=V D^{-1} U^T b
844	* Note that V D^{-1} U^T is the pseudoinverse A^+
845	*
846	* A is mxm, b is mx1.
847	*
848	* The function returns 0 in case of error, 1 if successful
849	*
850	* This function is often called repetitively to solve problems of identical
851	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
852	* retained between calls and free'd-malloc'ed when not of the appropriate size.
853	* A call with NULL as the first argument forces this memory to be released.
854	*/
855	int AX_EQ_B_SVD(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
856	{
857	__STATIC__ LM_REAL *buf=NULL;
858	__STATIC__ int buf_sz=0;
859	static LM_REAL eps=LM_CNST(-1.0);
860
861	register int i, j;
862	LM_REAL a, u, s, vt, *work;
863	int a_sz, u_sz, s_sz, vt_sz, tot_sz;
864	LM_REAL thresh, one_over_denom;
865	register LM_REAL sum;
866	int info, rank, worksz, *iwork, iworksz;
867
868	if(!A)
869	#ifdef LINSOLVERS_RETAIN_MEMORY
870	{
871	if(buf) free(buf);
872	buf=NULL;
873	buf_sz=0;
874
875	return 1;
876	}
877	#else
878	return 1; /* NOP */
879	#endif /* LINSOLVERS_RETAIN_MEMORY */
880
881	/* calculate required memory size */
882	#if 1 /* use optimal size */
883	worksz=-1; // workspace query. Keep in mind that GESDD requires more memory than GESVD
884	/* note that optimal work size is returned in thresh */
885	GESVD("A", "A", (int )&m, (int )&m, NULL, (int )&m, NULL, NULL, (int )&m, NULL, (int )&m, (LM_REAL )&thresh, (int *)&worksz, &info);
886	//GESDD("A", (int )&m, (int )&m, NULL, (int )&m, NULL, NULL, (int )&m, NULL, (int )&m, (LM_REAL )&thresh, (int *)&worksz, NULL, &info);
887	worksz=(int)thresh;
888	#else /* use minimum size */
889	worksz=5*m; // min worksize for GESVD
890	//worksz=m(7m+4); // min worksize for GESDD
891	#endif
892	iworksz=8*m;
893	a_sz=m*m;
894	u_sz=mm; s_sz=m; vt_sz=mm;
895
896	tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)sizeof(LM_REAL) + iworkszsizeof(int); /* should be arranged in that order for proper doubles alignment */
897
898	#ifdef LINSOLVERS_RETAIN_MEMORY
899	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
900	if(buf) free(buf); /* free previously allocated memory */
901
902	buf_sz=tot_sz;
903	buf=(LM_REAL *)malloc(buf_sz);
904	if(!buf){
905	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
906	exit(1);
907	}
908	}
909	#else
910	buf_sz=tot_sz;
911	buf=(LM_REAL *)malloc(buf_sz);
912	if(!buf){
913	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
914	exit(1);
915	}
916	#endif /* LINSOLVERS_RETAIN_MEMORY */
917
918	a=buf;
919	u=a+a_sz;
920	s=u+u_sz;
921	vt=s+s_sz;
922	work=vt+vt_sz;
923	iwork=(int *)(work+worksz);
924
925	/* store A (column major!) into a */
926	for(i=0; i<m; i++)
927	for(j=0; j<m; j++)
928	a[i+jm]=A[im+j];
929
930	/* SVD decomposition of A */
931	GESVD("A", "A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, &info);
932	//GESDD("A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, iwork, &info);
933
934	/* error treatment */
935	if(info!=0){
936	if(info<0){
937	fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info);
938	exit(1);
939	}
940	else{
941	fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info);
942	#ifndef LINSOLVERS_RETAIN_MEMORY
943	free(buf);
944	#endif
945
946	return 0;
947	}
948	}
949
950	if(eps<0.0){
951	LM_REAL aux;
952
953	/* compute machine epsilon */
954	for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
955	;
956	eps*=LM_CNST(2.0);
957	}
958
959	/* compute the pseudoinverse in a */
960	for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */
961	for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
962	one_over_denom=LM_CNST(1.0)/s[rank];
963
964	for(j=0; j<m; j++)
965	for(i=0; i<m; i++)
966	a[im+j]+=vt[rank+im]u[j+rankm]*one_over_denom;
967	}
968
969	/* compute A^+ b in x */
970	for(i=0; i<m; i++){
971	for(j=0, sum=0.0; j<m; j++)
972	sum+=a[im+j]B[j];
973	x[i]=sum;
974	}
975
976	#ifndef LINSOLVERS_RETAIN_MEMORY
977	free(buf);
978	#endif
979
980	return 1;
981	}
982
983	/*
984	* This function returns the solution of Ax = b for a real symmetric matrix A
985	*
986	* The function is based on LDLT factorization with the pivoting
987	* strategy of Bunch and Kaufman:
988	* A is factored as LDL^T where L is lower triangular and
989	* D symmetric and block diagonal (aka spectral decomposition,
990	* Banachiewicz factorization, modified Cholesky factorization)
991	*
992	* A is mxm, b is mx1.
993	*
994	* The function returns 0 in case of error, 1 if successfull
995	*
996	* This function is often called repetitively to solve problems of identical
997	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
998	* retained between calls and free'd-malloc'ed when not of the appropriate size.
999	* A call with NULL as the first argument forces this memory to be released.
1000	*/
1001	int AX_EQ_B_BK(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
1002	{
1003	__STATIC__ LM_REAL *buf=NULL;
1004	__STATIC__ int buf_sz=0, nb=0;
1005
1006	LM_REAL a, work;
1007	int a_sz, ipiv_sz, work_sz, tot_sz;
1008	int info, *ipiv, nrhs=1;
1009
1010	if(!A)
1011	#ifdef LINSOLVERS_RETAIN_MEMORY
1012	{
1013	if(buf) free(buf);
1014	buf=NULL;
1015	buf_sz=0;
1016
1017	return 1;
1018	}
1019	#else
1020	return 1; /* NOP */
1021	#endif /* LINSOLVERS_RETAIN_MEMORY */
1022
1023	/* calculate required memory size */
1024	ipiv_sz=m;
1025	a_sz=m*m;
1026	if(!nb){
1027	LM_REAL tmp;
1028
1029	work_sz=-1; // workspace query; optimal size is returned in tmp
1030	SYTRF("L", (int )&m, NULL, (int )&m, NULL, (LM_REAL )&tmp, (int )&work_sz, (int *)&info);
1031	nb=((int)tmp)/m; // optimal worksize is m*nb
1032	}
1033	work_sz=(nb!=-1)? nb*m : 1;
1034	tot_sz=(a_sz + work_sz)sizeof(LM_REAL) + ipiv_szsizeof(int); /* should be arranged in that order for proper doubles alignment */
1035
1036	#ifdef LINSOLVERS_RETAIN_MEMORY
1037	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
1038	if(buf) free(buf); /* free previously allocated memory */
1039
1040	buf_sz=tot_sz;
1041	buf=(LM_REAL *)malloc(buf_sz);
1042	if(!buf){
1043	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
1044	exit(1);
1045	}
1046	}
1047	#else
1048	buf_sz=tot_sz;
1049	buf=(LM_REAL *)malloc(buf_sz);
1050	if(!buf){
1051	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
1052	exit(1);
1053	}
1054	#endif /* LINSOLVERS_RETAIN_MEMORY */
1055
1056	a=buf;
1057	work=a+a_sz;
1058	ipiv=(int *)(work+work_sz);
1059
1060	/* store A into a and B into x; A is assumed to be symmetric, hence
1061	* the column and row major order representations are the same
1062	*/
1063	memcpy(a, A, a_sz*sizeof(LM_REAL));
1064	memcpy(x, B, m*sizeof(LM_REAL));
1065
1066	/* LDLt factorization for A */
1067	SYTRF("L", (int )&m, a, (int )&m, ipiv, work, (int )&work_sz, (int )&info);
1068	if(info!=0){
1069	if(info<0){
1070	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRF) " in ", AX_EQ_B_BK) "()\n", -info);
1071	exit(1);
1072	}
1073	else{
1074	fprintf(stderr, RCAT(RCAT("LAPACK error: singular block diagonal matrix D for", SYTRF) " in ", AX_EQ_B_BK)"() [D(%d, %d) is zero]\n", info, info);
1075	#ifndef LINSOLVERS_RETAIN_MEMORY
1076	free(buf);
1077	#endif
1078
1079	return 0;
1080	}
1081	}
1082
1083	/* solve the system with the computed factorization */
1084	SYTRS("L", (int )&m, (int )&nrhs, a, (int )&m, ipiv, x, (int )&m, (int *)&info);
1085	if(info<0){
1086	fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRS) " in ", AX_EQ_B_BK) "()\n", -info);
1087	exit(1);
1088	}
1089
1090	#ifndef LINSOLVERS_RETAIN_MEMORY
1091	free(buf);
1092	#endif
1093
1094	return 1;
1095	}
1096
1097	/* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
1098	#undef AX_EQ_B_QR
1099	#undef AX_EQ_B_QRLS
1100	#undef AX_EQ_B_CHOL
1101	#undef AX_EQ_B_LU
1102	#undef AX_EQ_B_SVD
1103	#undef AX_EQ_B_BK
1104	#undef AX_EQ_B_PLASMA_CHOL
1105
1106	#undef GEQRF
1107	#undef ORGQR
1108	#undef TRTRS
1109	#undef POTF2
1110	#undef POTRF
1111	#undef POTRS
1112	#undef GETRF
1113	#undef GETRS
1114	#undef GESVD
1115	#undef GESDD
1116	#undef SYTRF
1117	#undef SYTRS
1118	#undef PLASMA_POSV
1119
1120	#else // no LAPACK
1121
1122	/* precision-specific definitions */
1123	#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack)
1124
1125	/*
1126	* This function returns the solution of Ax = b
1127	*
1128	* The function employs LU decomposition followed by forward/back substitution (see
1129	* also the LAPACK-based LU solver above)
1130	*
1131	* A is mxm, b is mx1
1132	*
1133	* The function returns 0 in case of error, 1 if successful
1134	*
1135	* This function is often called repetitively to solve problems of identical
1136	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
1137	* retained between calls and free'd-malloc'ed when not of the appropriate size.
1138	* A call with NULL as the first argument forces this memory to be released.
1139	*/
1140	int AX_EQ_B_LU(LM_REAL A, LM_REAL B, LM_REAL *x, int m)
1141	{
1142	__STATIC__ void *buf=NULL;
1143	__STATIC__ int buf_sz=0;
1144
1145	register int i, j, k;
1146	int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz;
1147	LM_REAL a, work, max, sum, tmp;
1148
1149	if(!A)
1150	#ifdef LINSOLVERS_RETAIN_MEMORY
1151	{
1152	if(buf) free(buf);
1153	buf=NULL;
1154	buf_sz=0;
1155
1156	return 1;
1157	}
1158	#else
1159	return 1; /* NOP */
1160	#endif /* LINSOLVERS_RETAIN_MEMORY */
1161
1162	/* calculate required memory size */
1163	idx_sz=m;
1164	a_sz=m*m;
1165	work_sz=m;
1166	tot_sz=(a_sz+work_sz)sizeof(LM_REAL) + idx_szsizeof(int); /* should be arranged in that order for proper doubles alignment */
1167
1168	#ifdef LINSOLVERS_RETAIN_MEMORY
1169	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
1170	if(buf) free(buf); /* free previously allocated memory */
1171
1172	buf_sz=tot_sz;
1173	buf=(void *)malloc(tot_sz);
1174	if(!buf){
1175	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
1176	exit(1);
1177	}
1178	}
1179	#else
1180	buf_sz=tot_sz;
1181	buf=(void *)malloc(tot_sz);
1182	if(!buf){
1183	fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
1184	exit(1);
1185	}
1186	#endif /* LINSOLVERS_RETAIN_MEMORY */
1187
1188	a=buf;
1189	work=a+a_sz;
1190	idx=(int *)(work+work_sz);
1191
1192	/* avoid destroying A, B by copying them to a, x resp. */
1193	memcpy(a, A, a_sz*sizeof(LM_REAL));
1194	memcpy(x, B, m*sizeof(LM_REAL));
1195
1196	/* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
1197	for(i=0; i<m; ++i){
1198	max=0.0;
1199	for(j=0; j<m; ++j)
1200	if((tmp=FABS(a[i*m+j]))>max)
1201	max=tmp;
1202	if(max==0.0){
1203	fprintf(stderr, RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n");
1204	#ifndef LINSOLVERS_RETAIN_MEMORY
1205	free(buf);
1206	#endif
1207
1208	return 0;
1209	}
1210	work[i]=LM_CNST(1.0)/max;
1211	}
1212
1213	for(j=0; j<m; ++j){
1214	for(i=0; i<j; ++i){
1215	sum=a[i*m+j];
1216	for(k=0; k<i; ++k)
1217	sum-=a[im+k]a[k*m+j];
1218	a[i*m+j]=sum;
1219	}
1220	max=0.0;
1221	for(i=j; i<m; ++i){
1222	sum=a[i*m+j];
1223	for(k=0; k<j; ++k)
1224	sum-=a[im+k]a[k*m+j];
1225	a[i*m+j]=sum;
1226	if((tmp=work[i]*FABS(sum))>=max){
1227	max=tmp;
1228	maxi=i;
1229	}
1230	}
1231	if(j!=maxi){
1232	for(k=0; k<m; ++k){
1233	tmp=a[maxi*m+k];
1234	a[maxim+k]=a[jm+k];
1235	a[j*m+k]=tmp;
1236	}
1237	work[maxi]=work[j];
1238	}
1239	idx[j]=maxi;
1240	if(a[j*m+j]==0.0)
1241	a[j*m+j]=LM_REAL_EPSILON;
1242	if(j!=m-1){
1243	tmp=LM_CNST(1.0)/(a[j*m+j]);
1244	for(i=j+1; i<m; ++i)
1245	a[im+j]=tmp;
1246	}
1247	}
1248
1249	/* The decomposition has now replaced a. Solve the linear system using
1250	* forward and back substitution
1251	*/
1252	for(i=k=0; i<m; ++i){
1253	j=idx[i];
1254	sum=x[j];
1255	x[j]=x[i];
1256	if(k!=0)
1257	for(j=k-1; j<i; ++j)
1258	sum-=a[im+j]x[j];
1259	else
1260	if(sum!=0.0)
1261	k=i+1;
1262	x[i]=sum;
1263	}
1264
1265	for(i=m-1; i>=0; --i){
1266	sum=x[i];
1267	for(j=i+1; j<m; ++j)
1268	sum-=a[im+j]x[j];
1269	x[i]=sum/a[i*m+i];
1270	}
1271
1272	#ifndef LINSOLVERS_RETAIN_MEMORY
1273	free(buf);
1274	#endif
1275
1276	return 1;
1277	}
1278
1279	/* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
1280	#undef AX_EQ_B_LU
1281
1282	#endif /* HAVE_LAPACK */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/levmar/src/Axb_core.c@ 7516f6

Download in other formats: