/** \file mymath.c
 * Linear algebra mathematical routines.
 * Small library of often needed mathematical routines such as hard-coded
 * vector VP3(), scalar SP(), matrix products RMat33Vec3(), RMatMat33(), RVec3Mat33(),
 * multiplication with scalar SM(), euclidian distance Dist(),inverse RMatReci3(),
 * transposed RTranspose3(), modulo Rest(), nullifying NV(), SetArrayToDouble0(),
 * gamma function gammln(), gaussian error function derf(), integration via
 * Simpsons Rule Simps().\n
 * Also for printing matrixes PrintCMat330(), PrintRMat330() and vectors
 * PrintCVec30(), PrintRVec30() to screen.\n
 * All specialized for 3x3 real or complex ones.\n
 * Rather specialized is RotateToAlign() which is needed in transforming the whole coordinate
 * system in order to align a certain vector.
 * 
  Project: ParallelCarParrinello
 \author Jan Hamaekers
 \date 2000

  File: helpers.c
  $Id: mymath.c,v 1.25 2007-03-29 13:38:30 foo Exp $
*/

#include<stdlib.h>
#include<stdio.h>
#include<stddef.h>
#include<math.h>
#include<string.h>
#include"mymath.h"

// use double precision fft when we have it
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#ifdef HAVE_DFFTW_H
#include "dfftw.h"
#else
#include "fftw.h"
#endif

#ifdef HAVE_GSL_GSL_SF_ERF_H
#include "gsl/gsl_sf_erf.h"
#endif


/** efficiently compute x^n
 * \param x argument
 * \param n potency
 * \return \f$x^n\f$
 */
#ifdef HAVE_INLINE
inline double tpow(double x, int n)
#else
double tpow(double x, int n)
#endif
{
  double y = 1;
  int neg = (n < 0);

  if (neg) n = -n;

  while (n) {
    if (n & 1) y *= x;
    x *= x;
    n >>= 1;
  }
  return neg ? 1.0/y : y;
}


/** Modulo function.
 * Normal modulo operation, yet return value is >=0
 * \param n	denominator
 * \param m divisor
 * \return modulo >=0
 */
#ifdef HAVE_INLINE
inline int Rest(int n, int m) /* normale modulo-Funktion, Ausgabe>=0 */
#else
int Rest(int n, int m) /* normale modulo-Funktion, Ausgabe>=0 */
#endif
{
  int q = n%m;
  if (q >= 0) return (q);
  return ((q) + m);
}

/* Rechnungen */

/** Real 3x3 inverse of matrix.
 * Calculates the inverse of a matrix by b_ij = A_ij/det(A), where
 * is A_ij is the matrix with row j and column i removed.
 * \param B	inverse matrix array (set by function)
 * \param A matrix array to be inverted
 * \return 0 - error: det A == 0, 1 - success
 */
#ifdef HAVE_INLINE
inline int RMatReci3(double B[NDIM_NDIM], const double A[NDIM_NDIM])
#else
int RMatReci3(double B[NDIM_NDIM], const double A[NDIM_NDIM])
#endif
{
  double detA = RDET3(A);
  double detAReci;
  if (detA == 0.0) return 1;  // RDET3(A) yields precisely zero if A irregular
  detAReci = 1./detA;
  B[0] =  detAReci*RDET2(A[4],A[5],A[7],A[8]);		// A_11
  B[1] = -detAReci*RDET2(A[1],A[2],A[7],A[8]);		// A_12
  B[2] =  detAReci*RDET2(A[1],A[2],A[4],A[5]);		// A_13
  B[3] = -detAReci*RDET2(A[3],A[5],A[6],A[8]);		// A_21
  B[4] =  detAReci*RDET2(A[0],A[2],A[6],A[8]);		// A_22
  B[5] = -detAReci*RDET2(A[0],A[2],A[3],A[5]);		// A_23
  B[6] =  detAReci*RDET2(A[3],A[4],A[6],A[7]);		// A_31
  B[7] = -detAReci*RDET2(A[0],A[1],A[6],A[7]);		// A_32
  B[8] =  detAReci*RDET2(A[0],A[1],A[3],A[4]);		// A_33
  return 0;
}

/** Real 3x3 Matrix multiplication.
 * Hard-coded falk scheme for multiplication of matrix1 * matrix2
 * \param C	product matrix
 * \param A matrix1 array
 * \param B matrix2 array
 */
#ifdef HAVE_INLINE
inline void RMatMat33(double C[NDIM*NDIM], const double A[NDIM*NDIM], const double B[NDIM*NDIM])
#else
void RMatMat33(double C[NDIM*NDIM], const double A[NDIM*NDIM], const double B[NDIM*NDIM])
#endif
{
  C[0] = A[0]*B[0]+A[3]*B[1]+A[6]*B[2];
  C[1] = A[1]*B[0]+A[4]*B[1]+A[7]*B[2];
  C[2] = A[2]*B[0]+A[5]*B[1]+A[8]*B[2];
  C[3] = A[0]*B[3]+A[3]*B[4]+A[6]*B[5];
  C[4] = A[1]*B[3]+A[4]*B[4]+A[7]*B[5];
  C[5] = A[2]*B[3]+A[5]*B[4]+A[8]*B[5];
  C[6] = A[0]*B[6]+A[3]*B[7]+A[6]*B[8];
  C[7] = A[1]*B[6]+A[4]*B[7]+A[7]*B[8];
  C[8] = A[2]*B[6]+A[5]*B[7]+A[8]*B[8];
}

/** Real 3x3 Matrix vector multiplication.
 * hard-coded falk scheme for multiplication of matrix * vector
 * \param C resulting vector
 * \param M	matrix array
 * \param V vector array
 */
#ifdef HAVE_INLINE
inline void RMat33Vec3(double C[NDIM], const double M[NDIM*NDIM], const double V[NDIM])
#else
void RMat33Vec3(double C[NDIM], const double M[NDIM*NDIM], const double V[NDIM])
#endif
{
  C[0] = M[0]*V[0]+M[3]*V[1]+M[6]*V[2];
  C[1] = M[1]*V[0]+M[4]*V[1]+M[7]*V[2];
  C[2] = M[2]*V[0]+M[5]*V[1]+M[8]*V[2];
}

/** Real 3x3 vector Matrix multiplication.
 * hard-coded falk scheme for multiplication of vector * matrix
 * \param C resulting vector
 * \param V vector array
 * \param M	matrix array
 */
#ifdef HAVE_INLINE
inline void RVec3Mat33(double C[NDIM], const double V[NDIM], const double M[NDIM*NDIM])
#else
void RVec3Mat33(double C[NDIM], const double V[NDIM], const double M[NDIM*NDIM])
#endif
{
  C[0] = V[0]*M[0]+V[1]*M[1]+V[2]*M[2];
  C[1] = V[0]*M[3]+V[1]*M[4]+V[2]*M[5];
  C[2] = V[0]*M[6]+V[1]*M[7]+V[2]*M[8];
}

/** Real 3x3 vector product.
 * vector product of vector1 x vector 2
 * \param V	resulting orthogonal vector
 * \param A vector1 array
 * \param B vector2 array
 */
#ifdef HAVE_INLINE
inline void VP3(double V[NDIM], double A[NDIM], double B[NDIM])
#else
void VP3(double V[NDIM], double A[NDIM], double B[NDIM])
#endif
{
  V[0] = A[1]*B[2]-A[2]*B[1];
  V[1] = A[2]*B[0]-A[0]*B[2];
  V[2] = A[0]*B[1]-A[1]*B[0];
}

/** Real transposition of 3x3 Matrix.
 * \param *A Matrix
 */
#ifdef HAVE_INLINE
inline void RTranspose3(double *A)
#else
void RTranspose3(double *A)
#endif
{
  double dummy = A[1];
  A[1] = A[3];
  A[3] = dummy;
  dummy = A[2];
  A[2] = A[6];
  A[6] = dummy;
  dummy = A[5];
  A[5] = A[7];
  A[7] = dummy;
}

/** Scalar product.
 * \param *a first vector
 * \param *b second vector
 * \param n dimension
 * \return scalar product of a with b
 */
#ifdef HAVE_INLINE
inline double SP(const double *a, const double *b, const int n)
#else
double SP(const double *a, const double *b, const int n)
#endif
{
  int i;
  double dummySP;
  dummySP = 0;
  for (i = 0; i < n; i++) {
    dummySP += ((a[i]) * (b[i]));
  }
  return dummySP;
}

/** Euclidian distance.
 * \param *a first vector
 * \param *b second vector
 * \param n dimension
 * \return sqrt(a-b)
 */
#ifdef HAVE_INLINE
inline double Dist(const double *a, const double *b, const int n)
#else
double Dist(const double *a, const double *b, const int n)
#endif
{
  int i;
  double dummyDist = 0;
  for (i = 0; i < n; i++) {
    dummyDist += (a[i]-b[i])*(a[i]-b[i]);
  }
  return (sqrt(dummyDist));
}
     

/** Multiplication with real scalar.
 * \param *a vector	(changed)
 * \param c scalar
 * \param n dimension
 */
#ifdef HAVE_INLINE
inline void SM(double *a, const double c, const int n)
#else
void SM(double *a, const double c, const int n)
#endif
{
  int i;
  for (i = 0; i < n; i++) a[i] *= c;
}

/** nullify vector.
 * sets all components of vector /a a to zero.
 * \param *a vector (changed)
 * \param n dimension
 */
#ifdef HAVE_INLINE
inline void NV(double *a, const int n)
#else
void NV(double *a, const int n)
#endif
{
  int i;
  for (i = 0; i < n; i++) a[i] = 0;
}

/** Differential step sum.
 * Sums up entries from array *dx, taking each \a incx of it, \a n times.
 * \param n number of steps
 * \param *dx incremental value array
 * \param incx step width
 * \return sum_i+=incx dx[i]
 * \sa Simps
 */
#ifdef HAVE_INLINE
inline double dSum(int n, double *dx, int incx)
#else
double dSum(int n, double *dx, int incx)
#endif
{
  int i;
  double res;
  if (n <= 0) return(0.0);
  res = dx[0];
  for(i = incx+1; i <= n*incx; i +=incx) 
    res += dx[i-1];
  return (res);
}

/** Simpson formula for integration.
 * \a f is replaced by a polynomial of 2nd degree in order
 * to approximate the integral
 * \param n number of sampling points
 * \param *f function value array
 * \param h half the width of the integration interval
 * \return \f$\int_a^b f(x) dx = \frac{h}{3} (y_0 + 4 y_1 + 2 y_2 + 4 y_3 + ... + 2 y_{n-2} + 4 y_{n-1} + y_n)\f$
 * \sa dSum() - used by this function.
 */
#ifdef HAVE_INLINE
inline double Simps(int n, double *f, double h)
#else
double Simps(int n, double *f, double h)
#endif
{
  double res;
  int nm12=(n-1)/2;
  if (nm12*2 != n-1) { 
    fprintf(stderr,"Simps: wrong n in Simps");
  }
  res = 4.*dSum(nm12,&f[1],2)+2.*dSum(nm12-1,&f[2],2)+f[0]+f[n-1];
  return(res*h/3.);
}

/* derf */

#ifndef HAVE_GSL_GSL_SF_ERF_H
/** Logarithm of Gamma function.
 * \param xx x-value for function
 * \return ln(gamma(xx))
 * \note formula and coefficients are taken from "Numerical Receipes in C"
 */
static double gammln(double xx) {
  int j;
  double x,tmp,ser;
  double stp = 2.50662827465;
  double cof[6] = { 76.18009173,-86.50532033,24.01409822,-1.231739516,.120858003e-2,-.536382e-5 };
  x = xx -1.;
  tmp = x+5.5;
  tmp = (x+0.5)*log(tmp)-tmp;
  ser = 1.;
  for(j=0;j<6;j++) {
    x+=1.0;
    ser+=cof[j]/x;
  }
  return(tmp+log(stp*ser));
}

/** Series used by gammp().
 * \param a
 * \param x
 * \bug when x equals 0 is 0 returned?
 * \note formula and coefficients are taken from "Numerical Receipes in C"
 * \warning maximum precision 1e-7
 */
static double gser(double a, double x) {
  double gln = gammln(a);
  double ap,sum,del;
  int n;
  if (x <= 0.) {
    if (x < 0.) {
      return(0.0);
    }
  }
  ap=a;
  sum=1./a;
  del=sum;
  for (n=1;n<=100;n++) {
    ap += 1.;
    del *=x/ap;
    sum += del;
    if(fabs(del) < fabs(sum)*1.e-7) {
      return(sum*exp(-x+a*log(x)-gln));
    }
  }
  return(sum*exp(-x+a*log(x)-gln));
}

/** Continued fraction used by gammp().
 * \param a
 * \param x
 * \note formula and coefficients are taken from "Numerical Receipes in C"
 */
static double gcf(double a, double x) {
  double gln = gammln(a);
  double gold = 0.0;
  double a0 = 1.;
  double a1 = x;
  double b0 = 0.;
  double b1 = 1.;
  double fac = 1.;
  double an,ana,anf,g=0.0;
  int n;
  for (n=1; n <= 100; n++) {
    an = n;
    ana = an-a;
    a0=(a1+a0*ana)*fac;
    b0=(b1+b0*ana)*fac;
    anf=an*fac;
    a1=x*a0+anf*a1;
    b1=x*b0+anf*b1;
    if(a1 != 0.) {
      fac=1./a1;
      g=b1*fac;
      if (fabs((g-gold)/g)<1.e-7) {
				return(exp(-x+a*log(x)-gln)*g);
      }
    }
  }
  return(exp(-x+a*log(x)-gln)*g);
}

/** Incomplete gamma function.
 * Either calculated via series gser() or via continued fraction gcf()
 * Needed by derf()
 * \f[
 * 	gammp(a,x) = \frac{1}{\gamma(a)} \int_x^\infty t^{a-1} \exp(-t) dt
 * \f]
 * \param a
 * \param x
 * \return f(a,x) =  (x < 1+a) ?  gser(a,x) : 1-gcf(a,x)
 * \note formula and coefficients are taken from "Numerical Receipes in C"
 */
static double gammp(double a, double x) {
  double res;
  if (x < a+1.) {
    res = gser(a,x);
  } else {
    res = 1.-gcf(a,x);
  }
  return(res);
}
#endif

/** Error function of integrated normal distribution.
 * Either realized via GSL function gsl_sf_erf or via gammp()
 * \f[
  	erf(x) = \frac{2}{\sqrt{\pi}} \int^x_0 \exp(-t^2) dt 
  				 = \pi^{-1/2} \gamma(\frac{1}{2},x^2)
 * \f]
 * \param x
 * \return f(x) = sign(x) * gammp(0.5,x^2)
 * \sa gammp
 */
#ifdef HAVE_INLINE
inline double derf(double x)
#else
double derf(double x)
#endif
{
  double res;
  #ifdef HAVE_GSL_GSL_SF_ERF_H
	  // call gsl instead of numerical recipes routines
  	res = gsl_sf_erf(x);
  #else
	   if (x < 0) {
	    res = -gammp(0.5,x*x);
	  } else {
	    res = gammp(0.5,x*x);
	  }
	#endif
  return(res);
}

/** Sets array to zero.
 * \param *a pointer to the double array
 * \param n number of array elements
 */
#ifdef HAVE_INLINE
inline void SetArrayToDouble0(double *a, int n)
#else
void SetArrayToDouble0(double *a, int n)
#endif
{
  int i;
  for(i=0;i<n;i++) a[i] = 0.0;
}

/** Print complex 3x3 matrix.
 * Checks if matrix has only zero entries, if not print each to screen: (re, im) ...
 * \param M matrix array
 */
void PrintCMat330(fftw_complex M[NDIM_NDIM])
{
  int i,p=0;
  for (i=0;i<NDIM_NDIM;i++)
    if (M[i].re != 0.0 || M[i].im != 0.0) p++;
  if (p) {
    for (i=0;i<NDIM_NDIM;i++) fprintf(stderr," (%f %f)", M[i].re, M[i].im);
    fprintf(stderr,"\n");
  }
}

/** Print real 3x3 matrix.
 * Checks if matrix has only zero entries, if not print each to screen: re ...
 * \param M matrix array
 */
void PrintRMat330(fftw_real M[NDIM_NDIM])
{
  int i,p=0;
  for (i=0;i<NDIM_NDIM;i++)
    if (M[i] != 0.0) p++;
  if (p) {
    for (i=0;i<NDIM_NDIM;i++) fprintf(stderr," %f", M[i]);
    fprintf(stderr,"\n");
  }
}

/** Print complex 3-dim vector.
 * Checks if vector has only zero entries, if not print each to screen: (re, im) ...
 * \param M vector array
 */
void PrintCVec30(fftw_complex M[NDIM])
{
  int i,p=0;
  for (i=0;i<NDIM;i++)
    if (M[i].re != 0.0 || M[i].im != 0.0) p++;
  if (p) {
    for (i=0;i<NDIM;i++) fprintf(stderr," (%f %f)", M[i].re, M[i].im);
    fprintf(stderr,"\n");
  }
}

/** Print real 3-dim vector.
 * Checks if vector has only zero entries, if not print each to screen: re ...
 * \param M matrix array
 */
void PrintRVec30(fftw_real M[NDIM])
{
  int i,p=0;
  for (i=0;i<NDIM;i++)
    if (M[i] != 0.0) p++;
  if (p) {
    for (i=0;i<NDIM;i++) fprintf(stderr," %f", M[i]);
    fprintf(stderr,"\n");
  }
}

/** Rotates \a matrix, such that simultaneously given \a vector is aligned with z axis.
 * Is used to rotate the unit cell in case of an external magnetic field. This field
 * is rotated so that it aligns with z axis in order to simplify necessary perturbation
 * calculations (only one component of each perturbed wave function necessary then).
 * \param vector which is aligned with z axis by rotation \a Q
 * \param Q return rotation matrix
 * \param matrix which is transformed under the above rotation \a Q
 */
void RotateToAlign(fftw_real Q[NDIM_NDIM], fftw_real matrix[NDIM_NDIM], fftw_real vector[NDIM]) {
  double tmp[NDIM_NDIM], Q1[NDIM_NDIM], Qtmp[NDIM_NDIM];
  double alpha, beta, new_y;
  int i,j ;
    
  // calculate rotation angles
  if (vector[0] < MYEPSILON) {
    alpha = 0;
  } else if (vector[1] > MYEPSILON) {
      alpha = atan(-vector[0]/vector[1]);
    } else alpha = PI/2; 
  new_y = -sin(alpha)*vector[0]+cos(alpha)*vector[1];
  if (new_y < MYEPSILON) {
    beta = 0;
  } else if (vector[2] > MYEPSILON) {
      beta = atan(-new_y/vector[2]);//asin(-vector[1]/vector[2]);
    } else beta = PI/2;

  // create temporary matrix copy
  // set Q to identity
  for (i=0;i<NDIM;i++)
    for (j=0;j<NDIM;j++) {
      Q[i*NDIM+j] = (i == j) ? 1 : 0;
      tmp[i*NDIM+j] = matrix[i*NDIM+j];
    }
  
  // construct rotation matrices
  Q1[0] = cos(alpha);
  Q1[1] = sin(alpha);
  Q1[2] = 0;
  Q1[3] = -sin(alpha);
  Q1[4] = cos(alpha);
  Q1[5] = 0;
  Q1[6] = 0;
  Q1[7] = 0;
  Q1[8] = 1;
  // apply rotation and store
  RMatMat33(tmp,Q1,matrix);
  RMatMat33(Qtmp,Q1,Q);

  Q1[0] = 1;
  Q1[1] = 0;
  Q1[2] = 0;
  Q1[3] = 0;
  Q1[4] = cos(beta);
  Q1[5] = sin(beta);
  Q1[6] = 0;
  Q1[7] = -sin(beta);
  Q1[8] = cos(beta);
  // apply rotation and store
  RMatMat33(matrix,Q1,tmp);
  RMatMat33(Q,Q1,Qtmp);

  // in order to avoid unncessary calculations, set everything below epsilon to zero
  for (i=0;i<NDIM_NDIM;i++) {
    matrix[i] = (fabs(matrix[i]) > MYEPSILON) ? matrix[i] : 0;
    Q[i] = (fabs(Q[i]) > MYEPSILON) ? Q[i] : 0;
  }
}
