#include #include #include #include #include using namespace std; using namespace sc; extern "C" { void F77_PDSTEQR(int *n, double *d, double *e, double *z, int *ldz, int *nz, double *work, int *info); void F77_DCOPY(int *n, double *dx, int *incx, double *dy, int *incy); double F77_DNRM2(int *n, double *dx, int *incx); double F77_DDOT(int *n, double *dx, int *incx, double *dy, int *incy); void F77_DSCAL(int *n, double *da, double *dx, int *incx); void F77_DAXPY(int *n, double *da, double *dx, int *incx, double *dy, int *incy); } namespace sc { static void dist_diagonalize_(int n, int m, double *a, double *d, double *e, double *sigma, double *z, double *v, double *w, int *ind, const Ref&); static void pflip(int id,int n,int m,int p,double *ar,double *ac,double *w, const Ref&); static void ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id, double *d, double *e, double *z, double *work, const Ref& grp); static void ptred_single(double *a,int *lda,int *n,int *m,int *p,int *id, double *d,double *e,double *z,double *work); static void ptred_parallel(double *a, int *lda, int *n, int *m, int *p, int *id, double *d, double *e, double *z, double *work, const Ref&); /* ******************************************************** */ /* Function of this subroutine : */ /* Diagonalize a real, symmetric matrix */ /* */ /* Parameters : */ /* n - size of the matrix */ /* m - number of locally held columns */ /* a[n][m] - locally held submatrix */ /* d[n] - returned with eigenvalues */ /* v[n][m] - returned with eigenvectors */ /* -------------------------------------------------------- */ void dist_diagonalize(int n, int m, double *a, double *d, double *v, const Ref &grp) { double *e = new double[n]; double *sigma = new double[n]; double *z = new double[n*m]; double *w = new double[3*n]; int *ind = new int[n]; dist_diagonalize_(n, m, a, d, e, sigma, z, v, w, ind, grp); delete[] e; delete[] sigma; delete[] z; delete[] w; delete[] ind; } /* ******************************************************** */ /* Function of this subroutine : */ /* Diagonalize a real, symmetric matrix */ /* */ /* Parameters : */ /* n - size of the matrix */ /* m - number of locally held columns */ /* a[n][m] - locally held submatrix */ /* d[n] - returned with eigenvalues */ /* e[n] - scratch space */ /* sigma[n]- scratch space */ /* z[m][n] - scratch space */ /* v[n][m] - returned with eigenvectors */ /* w[3*n] - scratch space */ /* ind[n] - scratch space (integer) */ /* -------------------------------------------------------- */ static void dist_diagonalize_(int n, int m, double *a, double *d, double *e, double *sigma, double *z, double *v, double *w, int *ind, const Ref& grp) { int i,info,one=1; int nproc = grp->n(); int id = grp->me(); /* reduce A to tridiagonal matrix using Householder transformation */ ptred2_(&a[0],&n,&n,&m,&nproc,&id,&d[0],&e[0],&z[0],&w[0],grp); /* diagonalize tridiagonal matrix using implicit QL method */ for (i=1; i& grp) { int i,k,r,dpsize=sizeof(double),one=1; i = 0; for (k=0; kraw_bcast(&w[0], n*dpsize, r); F77_DCOPY(&m,&w[id],&p,&ac[k],&n); } } /*******************************************************************/ static void ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id, double *d, double *e, double *z, double *work, const Ref& grp) { if (*p==1) ptred_single(a, lda, n, m, p, id, d, e, z, work); else ptred_parallel(a, lda, n, m, p, id, d, e, z, work, grp); } /* ******************************************************** */ /* Function of this subroutine : */ /* tridiagonalize a real, symmetric matrix using */ /* Householder transformation */ /* Parameters : */ /* a[lda][m] - locally held submatrix */ /* lda - leading dimension of array a */ /* n - size of the matrix a */ /* m - number of locally held columns */ /* p - number of nodes used */ /* id - my node id */ /* on return : */ /* d[n] - the diagonal of the tridiagonal result */ /* e[n] - the offdiagonal of the result(e[1]-e[n-1]) */ /* z[m][n] - m rows of transformation matrix */ /* matrix a will be destroyed */ /* -------------------------------------------------------- */ static void ptred_single(double *a,int *lda,int *n,int *m,int *p,int *id, double *d,double *e,double *z,double *work) { double alpha=0.0, beta, gamma, alpha2; double oobeta; int i,j,k,l,ld,r; int slda, sn, sm, sp, sid, q, inc=1; /* extract parameters and get cube information */ slda = *lda; sn = *n; sm = *m; sp = *p; sid = *id; /* initialize eigenvector matrix to be identity */ i = sn * sm; alpha2 = 0.0; j = 0; F77_DCOPY(&i,&alpha2,&j,&z[0],&inc); ld = sid; for (i=0; i& grp) { int i, j, k, l, ld, r, dpsize = sizeof(double); int kp1l; int slda, sn, sm, sp, sid, q, inc = 1; double alpha=0.0, beta=0.0, gamma, alpha2; double oobeta, atemp; /* extract parameters and get cube information */ slda = *lda; sn = *n; sm = *m; sp = *p; sid = *id; /* initialize eigenvector matrix to be identity */ i = sn * sm; alpha2 = 0.0; j = 0; F77_DCOPY(&i, &alpha2, &j, &z[0], &inc); ld = sid; for (i = 0; i < sm; i++) { z[ld * sm + i] = 1.0; ld += sp; } /* start reduction - one column at a time */ l = 0; ld = sid; d[0] = 0.0; e[0] = 0.0; if (sid == 0) d[0] = a[0]; for (k = 1; k <= sn - 1; k++) { /* Use a Householder reflection to zero a(i,k), i = k+2,..., n . * Let a = (0, ..., 0, a(k+1,k) ... a(n,k))', * u = a/norm(a) + (k+1-st unit vector), * beta = -u(k+1) = -norm(u)**2/2, * H = I + u*u'/beta. * Replace A by H*A*H. * Store u in D(K+1) through D(N). * The root node, r, is the owner of column k. */ r = (k - 1) % sp; if (sid == r) { kp1l=l*slda+k; q = sn - k; atemp = a[l * slda + ld]; alpha = F77_DNRM2(&q, &a[kp1l], &inc); if (a[kp1l] < 0.0) alpha = -alpha; if (alpha != 0.0) { alpha2 = 1.0 / alpha; F77_DSCAL(&q, &alpha2, &a[kp1l], &inc); a[kp1l] += 1.0; } grp->raw_bcast(&a[kp1l], (sn - k) * dpsize, r); /* this is the deferred update of the eigenvector matrix. It was * deferred from the last step to accelerate the sending of the Householder * vector. Don't do this on the first step. */ if (k != 1) { int ik = k - 1; /* ik is a temporary index to the previous step */ int nmik = sn - ik; if (beta != 0.0) { for (i = 0; i < sm; i++) { gamma = F77_DDOT(&nmik, &d[ik], &inc, &z[ik * sm + i], &sm) / beta; F77_DAXPY(&nmik, &gamma, &d[ik], &inc, &z[ik * sm + i], &sm); } } e[ik] = 0.0; d[ik] = atemp; } /* now resume normal service */ F77_DCOPY(&q, &a[kp1l], &inc, &d[k], &inc); l++; ld += sp; } else { grp->raw_bcast(&d[k], (sn - k) * dpsize, r); } beta = -d[k]; if (beta != 0.0) { /* Symmetric matrix times vector, v = A*u. */ /* Store v in E(K+1) through E(N) . */ alpha2 = 0.0; j = 0; q = sn - k; F77_DCOPY(&q, &alpha2, &j, &e[k], &inc); i = ld; for (j = l; j < sm; j++) { int ij=j*slda+i; q = sn - i; e[i] += F77_DDOT(&q, &a[ij], &inc, &d[i], &inc); q--; F77_DAXPY(&q, &d[i], &a[ij+1], &inc, &e[i + 1], &inc); i += sp; } grp->sum(&e[k], sn-k, work); /* v = v/beta * gamma = v'*u/(2*beta) * v = v + gamma*u */ q = sn - k; alpha2 = 1.0 / beta; F77_DSCAL(&q, &alpha2, &e[k], &inc); gamma = 0.5 * F77_DDOT(&q, &d[k], &inc, &e[k], &inc) / beta; F77_DAXPY(&q, &gamma, &d[k], &inc, &e[k], &inc); /* Rank two update of A, compute only lower half. */ /* A = A + u'*v + v'*u = H*A*H */ i = ld; for (j = l; j < sm; j++) { double *atmp= &a[j*slda+i]; q = sn - i; F77_DAXPY(&q, &d[i], &e[i], &inc, atmp, &inc); F77_DAXPY(&q, &e[i], &d[i], &inc, atmp, &inc); i += sp; } /* Accumulate m rows of transformation matrix. * Z = Z*H * * if I have next column, defer updating */ if (sid != k%sp || k == sn - 1) { q = sn - k; oobeta = 1.0 / beta; for (i = 0; i < sm; i++) { gamma = F77_DDOT(&q, &d[k], &inc, &z[k * sm + i], &sm) * oobeta; F77_DAXPY(&q, &gamma, &d[k], &inc, &z[k * sm + i], &sm); } } } /* another bit of calcs to be deferred */ if (sid != k%sp || k == sn - 1) { d[k] = 0.0; e[k] = 0.0; if (sid == k%sp) d[k] = a[l * slda + ld]; if (sid == r) e[k] = -alpha; } } /* collect the whole tridiagonal matrix at every node */ grp->sum(d, sn, work); grp->sum(e, sn, work); } }