| [0b990d] | 1 | 
 | 
|---|
 | 2 | #include <iostream>
 | 
|---|
 | 3 | 
 | 
|---|
 | 4 | #include <math.h>
 | 
|---|
 | 5 | 
 | 
|---|
 | 6 | #include <util/group/message.h>
 | 
|---|
 | 7 | 
 | 
|---|
 | 8 | #include <math/scmat/disthql.h>
 | 
|---|
 | 9 | 
 | 
|---|
 | 10 | #include <math/scmat/f77sym.h>
 | 
|---|
 | 11 | 
 | 
|---|
 | 12 | using namespace std;
 | 
|---|
 | 13 | using namespace sc;
 | 
|---|
 | 14 | 
 | 
|---|
 | 15 | extern "C" {
 | 
|---|
 | 16 |   void F77_PDSTEQR(int *n, double *d, double *e,
 | 
|---|
 | 17 |                 double *z, int *ldz, int *nz, double *work,
 | 
|---|
 | 18 |                 int *info);
 | 
|---|
 | 19 |   void F77_DCOPY(int *n, double *dx, int *incx, double *dy, int *incy);
 | 
|---|
 | 20 |   double F77_DNRM2(int *n, double *dx, int *incx);
 | 
|---|
 | 21 |   double F77_DDOT(int *n, double *dx, int *incx, double *dy, int *incy);
 | 
|---|
 | 22 |   void F77_DSCAL(int *n, double *da, double *dx, int *incx);
 | 
|---|
 | 23 |   void F77_DAXPY(int *n, double *da, double *dx, int *incx, double *dy, int *incy);
 | 
|---|
 | 24 | }
 | 
|---|
 | 25 | 
 | 
|---|
 | 26 | namespace sc {
 | 
|---|
 | 27 | 
 | 
|---|
 | 28 | static void dist_diagonalize_(int n, int m, double *a, double *d, double *e,
 | 
|---|
 | 29 |                               double *sigma, double *z, double *v, double *w,
 | 
|---|
 | 30 |                               int *ind, const Ref<MessageGrp>&);
 | 
|---|
 | 31 | 
 | 
|---|
 | 32 | static void pflip(int id,int n,int m,int p,double *ar,double *ac,double *w,
 | 
|---|
 | 33 |                   const Ref<MessageGrp>&);
 | 
|---|
 | 34 | 
 | 
|---|
 | 35 | static void
 | 
|---|
 | 36 | ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id,
 | 
|---|
 | 37 |         double *d, double *e, double *z, double *work,
 | 
|---|
 | 38 |         const Ref<MessageGrp>& grp);
 | 
|---|
 | 39 | 
 | 
|---|
 | 40 | static void
 | 
|---|
 | 41 | ptred_single(double *a,int *lda,int *n,int *m,int *p,int *id,
 | 
|---|
 | 42 |              double *d,double *e,double *z,double *work);
 | 
|---|
 | 43 | static void
 | 
|---|
 | 44 | ptred_parallel(double *a, int *lda, int *n, int *m, int *p, int *id,
 | 
|---|
 | 45 |                double *d, double *e, double *z, double *work,
 | 
|---|
 | 46 |                const Ref<MessageGrp>&);
 | 
|---|
 | 47 | 
 | 
|---|
 | 48 | /* ******************************************************** */
 | 
|---|
 | 49 | /* Function of this subroutine :                            */ 
 | 
|---|
 | 50 | /*    Diagonalize a real, symmetric matrix                  */ 
 | 
|---|
 | 51 | /*                                                          */
 | 
|---|
 | 52 | /* Parameters :                                             */
 | 
|---|
 | 53 | /*    n   - size of the matrix                              */
 | 
|---|
 | 54 | /*    m   - number of locally held columns                  */
 | 
|---|
 | 55 | /*    a[n][m] - locally held submatrix                      */
 | 
|---|
 | 56 | /*    d[n]    - returned with eigenvalues                   */
 | 
|---|
 | 57 | /*    v[n][m] - returned with eigenvectors                  */
 | 
|---|
 | 58 | /* -------------------------------------------------------- */
 | 
|---|
 | 59 | void
 | 
|---|
 | 60 | dist_diagonalize(int n, int m, double *a, double *d, double *v,
 | 
|---|
 | 61 |                  const Ref<MessageGrp> &grp)
 | 
|---|
 | 62 | {
 | 
|---|
 | 63 |   double *e = new double[n];
 | 
|---|
 | 64 |   double *sigma = new double[n];
 | 
|---|
 | 65 |   double *z = new double[n*m];
 | 
|---|
 | 66 |   double *w = new double[3*n];
 | 
|---|
 | 67 |   int *ind = new int[n];
 | 
|---|
 | 68 |   dist_diagonalize_(n, m, a, d, e, sigma, z, v, w, ind, grp);
 | 
|---|
 | 69 |   delete[] e;
 | 
|---|
 | 70 |   delete[] sigma;
 | 
|---|
 | 71 |   delete[] z;
 | 
|---|
 | 72 |   delete[] w;
 | 
|---|
 | 73 |   delete[] ind;
 | 
|---|
 | 74 | }
 | 
|---|
 | 75 | 
 | 
|---|
 | 76 | /* ******************************************************** */
 | 
|---|
 | 77 | /* Function of this subroutine :                            */ 
 | 
|---|
 | 78 | /*    Diagonalize a real, symmetric matrix                  */ 
 | 
|---|
 | 79 | /*                                                          */
 | 
|---|
 | 80 | /* Parameters :                                             */
 | 
|---|
 | 81 | /*    n   - size of the matrix                              */
 | 
|---|
 | 82 | /*    m   - number of locally held columns                  */
 | 
|---|
 | 83 | /*    a[n][m] - locally held submatrix                      */
 | 
|---|
 | 84 | /*    d[n]    - returned with eigenvalues                   */
 | 
|---|
 | 85 | /*    e[n]    - scratch space                               */
 | 
|---|
 | 86 | /*    sigma[n]- scratch space                               */
 | 
|---|
 | 87 | /*    z[m][n] - scratch space                               */
 | 
|---|
 | 88 | /*    v[n][m] - returned with eigenvectors                  */
 | 
|---|
 | 89 | /*    w[3*n]  - scratch space                               */
 | 
|---|
 | 90 | /*    ind[n]  - scratch space (integer)                     */
 | 
|---|
 | 91 | /* -------------------------------------------------------- */
 | 
|---|
 | 92 | static void
 | 
|---|
 | 93 | dist_diagonalize_(int n, int m, double *a, double *d, double *e,
 | 
|---|
 | 94 |                   double *sigma, double *z, double *v, double *w, int *ind,
 | 
|---|
 | 95 |                   const Ref<MessageGrp>& grp)
 | 
|---|
 | 96 | {
 | 
|---|
 | 97 |   int i,info,one=1;
 | 
|---|
 | 98 |   int nproc = grp->n();
 | 
|---|
 | 99 |   int id = grp->me();
 | 
|---|
 | 100 | 
 | 
|---|
 | 101 |  /* reduce A to tridiagonal matrix using Householder transformation */
 | 
|---|
 | 102 | 
 | 
|---|
 | 103 |   ptred2_(&a[0],&n,&n,&m,&nproc,&id,&d[0],&e[0],&z[0],&w[0],grp);
 | 
|---|
 | 104 | 
 | 
|---|
 | 105 |  /* diagonalize tridiagonal matrix using implicit QL method */
 | 
|---|
 | 106 | 
 | 
|---|
 | 107 |    for (i=1; i<n; i++) e[i-1] = e[i];
 | 
|---|
 | 108 |    F77_PDSTEQR(&n, d, e, z, &m, &m, w, &info);
 | 
|---|
 | 109 | 
 | 
|---|
 | 110 |  /* rearrange the eigenvectors by transposition */
 | 
|---|
 | 111 | 
 | 
|---|
 | 112 |   i = m * n;
 | 
|---|
 | 113 |   F77_DCOPY(&i,&z[0],&one,&a[0],&one);
 | 
|---|
 | 114 |   pflip(id,n,m,nproc,&a[0],&v[0],&w[0],grp);
 | 
|---|
 | 115 | }
 | 
|---|
 | 116 | 
 | 
|---|
 | 117 | /* ******************************************************** */
 | 
|---|
 | 118 | /* Function : transpose matrix                              */
 | 
|---|
 | 119 | /* -------------------------------------------------------- */
 | 
|---|
 | 120 | 
 | 
|---|
 | 121 | static void
 | 
|---|
 | 122 | pflip(int id,int n,int m,int p,double *ar,double *ac,double *w,
 | 
|---|
 | 123 |       const Ref<MessageGrp>& grp)
 | 
|---|
 | 124 | {
 | 
|---|
 | 125 |   int i,k,r,dpsize=sizeof(double),one=1;
 | 
|---|
 | 126 | 
 | 
|---|
 | 127 |   i = 0;
 | 
|---|
 | 128 |   for (k=0; k<n; k++) {
 | 
|---|
 | 129 |     r = k % p;
 | 
|---|
 | 130 |     if (id == r) {
 | 
|---|
 | 131 |       F77_DCOPY(&n,&ar[i],&m,&w[0],&one);
 | 
|---|
 | 132 |       i++;
 | 
|---|
 | 133 |     }
 | 
|---|
 | 134 |     grp->raw_bcast(&w[0], n*dpsize, r);
 | 
|---|
 | 135 |     F77_DCOPY(&m,&w[id],&p,&ac[k],&n);
 | 
|---|
 | 136 |   }
 | 
|---|
 | 137 | }
 | 
|---|
 | 138 | 
 | 
|---|
 | 139 | /*******************************************************************/
 | 
|---|
 | 140 | 
 | 
|---|
 | 141 | static void
 | 
|---|
 | 142 | ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id,
 | 
|---|
 | 143 |         double *d, double *e, double *z, double *work,
 | 
|---|
 | 144 |         const Ref<MessageGrp>& grp)
 | 
|---|
 | 145 | {
 | 
|---|
 | 146 |   if (*p==1)
 | 
|---|
 | 147 |     ptred_single(a, lda, n, m, p, id, d, e, z, work);
 | 
|---|
 | 148 |   else
 | 
|---|
 | 149 |     ptred_parallel(a, lda, n, m, p, id, d, e, z, work, grp);
 | 
|---|
 | 150 | }
 | 
|---|
 | 151 | 
 | 
|---|
 | 152 | /* ******************************************************** */
 | 
|---|
 | 153 | /* Function of this subroutine :                            */ 
 | 
|---|
 | 154 | /*    tridiagonalize a real, symmetric matrix using         */ 
 | 
|---|
 | 155 | /*    Householder transformation                            */
 | 
|---|
 | 156 | /* Parameters :                                             */
 | 
|---|
 | 157 | /*    a[lda][m] - locally held submatrix                    */
 | 
|---|
 | 158 | /*    lda - leading dimension of array a                    */
 | 
|---|
 | 159 | /*    n   - size of the matrix a                            */
 | 
|---|
 | 160 | /*    m   - number of locally held columns                  */
 | 
|---|
 | 161 | /*    p   - number of nodes used                            */
 | 
|---|
 | 162 | /*    id  - my node id                                      */
 | 
|---|
 | 163 | /*  on return :                                             */
 | 
|---|
 | 164 | /*    d[n]    - the diagonal of the tridiagonal result      */
 | 
|---|
 | 165 | /*    e[n]    - the offdiagonal of the result(e[1]-e[n-1])  */
 | 
|---|
 | 166 | /*    z[m][n] - m rows of transformation matrix             */
 | 
|---|
 | 167 | /*    matrix a will be destroyed                            */
 | 
|---|
 | 168 | /* -------------------------------------------------------- */
 | 
|---|
 | 169 | 
 | 
|---|
 | 170 | static void
 | 
|---|
 | 171 | ptred_single(double *a,int *lda,int *n,int *m,int *p,int *id,
 | 
|---|
 | 172 |              double *d,double *e,double *z,double *work)
 | 
|---|
 | 173 | {
 | 
|---|
 | 174 |    double  alpha=0.0, beta, gamma, alpha2; 
 | 
|---|
 | 175 |    double  oobeta;
 | 
|---|
 | 176 |    int     i,j,k,l,ld,r;
 | 
|---|
 | 177 |    int     slda, sn, sm, sp, sid, q, inc=1;
 | 
|---|
 | 178 | 
 | 
|---|
 | 179 |    /* extract parameters and get  cube information */
 | 
|---|
 | 180 | 
 | 
|---|
 | 181 |    slda = *lda;
 | 
|---|
 | 182 |    sn = *n;
 | 
|---|
 | 183 |    sm = *m;
 | 
|---|
 | 184 |    sp = *p;
 | 
|---|
 | 185 |    sid = *id;
 | 
|---|
 | 186 | 
 | 
|---|
 | 187 |    /* initialize eigenvector matrix to be identity */
 | 
|---|
 | 188 | 
 | 
|---|
 | 189 |    i = sn * sm;
 | 
|---|
 | 190 |    alpha2 = 0.0;
 | 
|---|
 | 191 |    j = 0;
 | 
|---|
 | 192 |    F77_DCOPY(&i,&alpha2,&j,&z[0],&inc);
 | 
|---|
 | 193 |    ld = sid;
 | 
|---|
 | 194 |    for (i=0; i<sm; i++) {
 | 
|---|
 | 195 |       z[ld*sm+i] = 1.0;
 | 
|---|
 | 196 |       ld += sp; 
 | 
|---|
 | 197 |    }
 | 
|---|
 | 198 | 
 | 
|---|
 | 199 |    /* start reduction - one column at a time */
 | 
|---|
 | 200 | 
 | 
|---|
 | 201 |    l = 0;
 | 
|---|
 | 202 |    ld = sid;
 | 
|---|
 | 203 |    d[0] = 0.0;
 | 
|---|
 | 204 |    e[0] = 0.0;
 | 
|---|
 | 205 |    if (sid == 0) d[0] = a[0];
 | 
|---|
 | 206 |    for (k=1; k<=sn-1; k++) {
 | 
|---|
 | 207 |       r = (k-1) % sp;
 | 
|---|
 | 208 | 
 | 
|---|
 | 209 |       /* Use a Householder reflection to zero a(i,k), i = k+2,..., n .*/
 | 
|---|
 | 210 |       /* Let  a = (0, ..., 0, a(k+1,k) ... a(n,k))',                  */
 | 
|---|
 | 211 |       /*      u =  a/norm(a) + (k+1-st unit vector),                  */
 | 
|---|
 | 212 |       /*      beta = -u(k+1) = -norm(u)**2/2 ,                        */
 | 
|---|
 | 213 |       /*      H = I + u*u'/beta .                                     */
 | 
|---|
 | 214 |       /* Replace  A  by  H*A*H .                                      */
 | 
|---|
 | 215 |       /* Store u in D(K+1) through D(N) .                             */
 | 
|---|
 | 216 |       /* The root node, r, is the owner of column k.                  */
 | 
|---|
 | 217 | 
 | 
|---|
 | 218 |       if (sid == r) {
 | 
|---|
 | 219 |          q = sn - k;      
 | 
|---|
 | 220 |          alpha = F77_DNRM2(&q,&a[l*slda+k],&inc);
 | 
|---|
 | 221 |          if (a[l*slda+k] < 0.0) alpha = -alpha;
 | 
|---|
 | 222 |          if (alpha != 0.0) {
 | 
|---|
 | 223 |             alpha2 = 1.0 / alpha;
 | 
|---|
 | 224 |             F77_DSCAL(&q,&alpha2,&a[l*slda+k],&inc);
 | 
|---|
 | 225 |             a[l*slda+k] += 1.0;
 | 
|---|
 | 226 |          }
 | 
|---|
 | 227 |          F77_DCOPY(&q,&a[l*slda+k],&inc,&d[k],&inc);
 | 
|---|
 | 228 |          l++;
 | 
|---|
 | 229 |          ld += sp;
 | 
|---|
 | 230 |       }
 | 
|---|
 | 231 | 
 | 
|---|
 | 232 |       beta = -d[k];
 | 
|---|
 | 233 |       if (beta != 0.0) {
 | 
|---|
 | 234 | 
 | 
|---|
 | 235 |          /* Symmetric matrix times vector,  v = A*u.*/
 | 
|---|
 | 236 |          /* Store  v  in  E(K+1) through E(N) .     */
 | 
|---|
 | 237 | 
 | 
|---|
 | 238 |          alpha2 = 0.0;
 | 
|---|
 | 239 |          j = 0;
 | 
|---|
 | 240 |          q = sn - k;
 | 
|---|
 | 241 |          F77_DCOPY(&q,&alpha2,&j,&e[k],&inc);
 | 
|---|
 | 242 |          i = ld;
 | 
|---|
 | 243 |          for (j=l; j<sm; j++) {
 | 
|---|
 | 244 |             q = sn - i;
 | 
|---|
 | 245 |             e[i] = e[i] + F77_DDOT(&q,&a[j*slda+i],&inc,&d[i],&inc);
 | 
|---|
 | 246 |             q = sn - i - 1;
 | 
|---|
 | 247 |             F77_DAXPY(&q,&d[i],&a[slda*j+i+1],&inc,&e[i+1],&inc);
 | 
|---|
 | 248 |             i += sp;
 | 
|---|
 | 249 |          }
 | 
|---|
 | 250 | 
 | 
|---|
 | 251 |          /* v = v/beta            */
 | 
|---|
 | 252 |          /* gamma = v'*u/(2*beta) */
 | 
|---|
 | 253 |          /* v = v + gamma*u       */
 | 
|---|
 | 254 | 
 | 
|---|
 | 255 |          if (sid == r) {
 | 
|---|
 | 256 |             q = sn - k;
 | 
|---|
 | 257 |             alpha2 = 1.0 / beta;
 | 
|---|
 | 258 |             F77_DSCAL(&q,&alpha2,&e[k],&inc);
 | 
|---|
 | 259 |             gamma = 0.5*F77_DDOT(&q,&d[k],&inc,&e[k],&inc)/beta;
 | 
|---|
 | 260 |             F77_DAXPY(&q,&gamma,&d[k],&inc,&e[k],&inc);
 | 
|---|
 | 261 |          }
 | 
|---|
 | 262 | 
 | 
|---|
 | 263 |          /* Rank two update of A, compute only lower half. */
 | 
|---|
 | 264 |          /* A  =  A + u'*v + v'*u  =  H*A*H                */
 | 
|---|
 | 265 | 
 | 
|---|
 | 266 |          i = ld;
 | 
|---|
 | 267 |          for (j=l; j<sm; j++) {
 | 
|---|
 | 268 |             q = sn - i;
 | 
|---|
 | 269 |             F77_DAXPY(&q,&d[i],&e[i],&inc,&a[j*slda+i],&inc);
 | 
|---|
 | 270 |             F77_DAXPY(&q,&e[i],&d[i],&inc,&a[j*slda+i],&inc);
 | 
|---|
 | 271 |             i += sp;
 | 
|---|
 | 272 |          }
 | 
|---|
 | 273 |          q = sn - k;
 | 
|---|
 | 274 |          oobeta=1.0/beta;
 | 
|---|
 | 275 |          for (i=0; i<sm; i++) {
 | 
|---|
 | 276 |             gamma = F77_DDOT(&q,&d[k],&inc,&z[k*sm+i],&sm)*oobeta;
 | 
|---|
 | 277 |             F77_DAXPY(&q,&gamma,&d[k],&inc,&z[k*sm+i],&sm);
 | 
|---|
 | 278 |          }
 | 
|---|
 | 279 |       }
 | 
|---|
 | 280 | 
 | 
|---|
 | 281 |       d[k] = 0.0;
 | 
|---|
 | 282 |       e[k] = 0.0;
 | 
|---|
 | 283 |       if (sid == (k % sp)) d[k] = a[l*slda+ld];
 | 
|---|
 | 284 |       if (sid == r) e[k] = -alpha;
 | 
|---|
 | 285 |    }
 | 
|---|
 | 286 |    r = 0;
 | 
|---|
 | 287 | 
 | 
|---|
 | 288 | }
 | 
|---|
 | 289 | 
 | 
|---|
 | 290 | /*
 | 
|---|
 | 291 |  * Function of this subroutine :
 | 
|---|
 | 292 |  * tridiagonalize a real, symmetric matrix using
 | 
|---|
 | 293 |  * Householder transformation
 | 
|---|
 | 294 |  *
 | 
|---|
 | 295 |  * Parameters :
 | 
|---|
 | 296 |  *   a[lda][m] - locally held submatrix
 | 
|---|
 | 297 |  *   lda - leading dimension of array a
 | 
|---|
 | 298 |  *   n   - size of the matrix a
 | 
|---|
 | 299 |  *   m   - number of locally held columns
 | 
|---|
 | 300 |  *   p   - number of nodes used
 | 
|---|
 | 301 |  *   id  - my node id
 | 
|---|
 | 302 |  *
 | 
|---|
 | 303 |  * on return :
 | 
|---|
 | 304 |  *   d[n]    - the diagonal of the tridiagonal result
 | 
|---|
 | 305 |  *   e[n]    - the offdiagonal of the result(e[1]-e[n-1])
 | 
|---|
 | 306 |  *   z[m][n] - m rows of transformation matrix
 | 
|---|
 | 307 |  *   matrix a will be destroyed 
 | 
|---|
 | 308 |  *
 | 
|---|
 | 309 |  * merge C code from libdmt with FORTRAN code modified by R. Chamberlain
 | 
|---|
 | 310 |  * FORTRAN COMMENTS:
 | 
|---|
 | 311 |  *    This version dated 5/4/92
 | 
|---|
 | 312 |  *    Richard Chamberlain, Intel Supercomputer Systems Division
 | 
|---|
 | 313 |  *    Improvements:
 | 
|---|
 | 314 |  *      1. gdcomb of Robert van de Geijn used.
 | 
|---|
 | 315 |  *      2. look-ahead distribution of Householder vector. Here the node
 | 
|---|
 | 316 |  *        containing the next Householder vector defers updating the
 | 
|---|
 | 317 |  *        eigenvector matrix until the next Householder vector is sent.
 | 
|---|
 | 318 |  */
 | 
|---|
 | 319 | 
 | 
|---|
 | 320 | static void
 | 
|---|
 | 321 | ptred_parallel(double *a, int *lda, int *n, int *m, int *p, int *id,
 | 
|---|
 | 322 |                double *d, double *e, double *z, double *work,
 | 
|---|
 | 323 |                const Ref<MessageGrp>& grp)
 | 
|---|
 | 324 | {
 | 
|---|
 | 325 |   int i, j, k, l, ld, r, dpsize = sizeof(double);
 | 
|---|
 | 326 |   int kp1l;
 | 
|---|
 | 327 |   int slda, sn, sm, sp, sid, q, inc = 1;
 | 
|---|
 | 328 |   double alpha=0.0, beta=0.0, gamma, alpha2;
 | 
|---|
 | 329 |   double oobeta, atemp;
 | 
|---|
 | 330 | 
 | 
|---|
 | 331 |   /* extract parameters and get  cube information */
 | 
|---|
 | 332 | 
 | 
|---|
 | 333 |   slda = *lda;
 | 
|---|
 | 334 |   sn = *n;
 | 
|---|
 | 335 |   sm = *m;
 | 
|---|
 | 336 |   sp = *p;
 | 
|---|
 | 337 |   sid = *id;
 | 
|---|
 | 338 | 
 | 
|---|
 | 339 |   /* initialize eigenvector matrix to be identity */
 | 
|---|
 | 340 | 
 | 
|---|
 | 341 |   i = sn * sm;
 | 
|---|
 | 342 |   alpha2 = 0.0;
 | 
|---|
 | 343 |   j = 0;
 | 
|---|
 | 344 |   F77_DCOPY(&i, &alpha2, &j, &z[0], &inc);
 | 
|---|
 | 345 |   ld = sid;
 | 
|---|
 | 346 |   for (i = 0; i < sm; i++) {
 | 
|---|
 | 347 |     z[ld * sm + i] = 1.0;
 | 
|---|
 | 348 |     ld += sp;
 | 
|---|
 | 349 |     }
 | 
|---|
 | 350 | 
 | 
|---|
 | 351 |   /* start reduction - one column at a time */
 | 
|---|
 | 352 | 
 | 
|---|
 | 353 |   l = 0;
 | 
|---|
 | 354 |   ld = sid;
 | 
|---|
 | 355 |   d[0] = 0.0;
 | 
|---|
 | 356 |   e[0] = 0.0;
 | 
|---|
 | 357 |   if (sid == 0) d[0] = a[0];
 | 
|---|
 | 358 | 
 | 
|---|
 | 359 |   for (k = 1; k <= sn - 1; k++) {
 | 
|---|
 | 360 | 
 | 
|---|
 | 361 |     /* Use a Householder reflection to zero a(i,k), i = k+2,..., n .
 | 
|---|
 | 362 |      * Let  a = (0, ..., 0, a(k+1,k) ... a(n,k))',
 | 
|---|
 | 363 |      * u =  a/norm(a) + (k+1-st unit vector),
 | 
|---|
 | 364 |      * beta = -u(k+1) = -norm(u)**2/2,
 | 
|---|
 | 365 |      * H = I + u*u'/beta.
 | 
|---|
 | 366 |      * Replace  A  by  H*A*H.
 | 
|---|
 | 367 |      * Store u in D(K+1) through D(N).
 | 
|---|
 | 368 |      * The root node, r, is the owner of column k.                  
 | 
|---|
 | 369 |      */
 | 
|---|
 | 370 | 
 | 
|---|
 | 371 |     r = (k - 1) % sp;
 | 
|---|
 | 372 |     if (sid == r) {
 | 
|---|
 | 373 |       kp1l=l*slda+k;
 | 
|---|
 | 374 |       q = sn - k;
 | 
|---|
 | 375 |       atemp = a[l * slda + ld];
 | 
|---|
 | 376 |       alpha = F77_DNRM2(&q, &a[kp1l], &inc);
 | 
|---|
 | 377 |       if (a[kp1l] < 0.0) alpha = -alpha;
 | 
|---|
 | 378 |       if (alpha != 0.0) {
 | 
|---|
 | 379 |         alpha2 = 1.0 / alpha;
 | 
|---|
 | 380 |         F77_DSCAL(&q, &alpha2, &a[kp1l], &inc);
 | 
|---|
 | 381 |         a[kp1l] += 1.0;
 | 
|---|
 | 382 |         }
 | 
|---|
 | 383 | 
 | 
|---|
 | 384 |       grp->raw_bcast(&a[kp1l], (sn - k) * dpsize, r);
 | 
|---|
 | 385 | 
 | 
|---|
 | 386 |    /* this is the deferred update of the eigenvector matrix. It was
 | 
|---|
 | 387 |     * deferred from the last step to accelerate the sending of the Householder
 | 
|---|
 | 388 |     * vector. Don't do this on the first step.
 | 
|---|
 | 389 |     */
 | 
|---|
 | 390 |       if (k != 1) {
 | 
|---|
 | 391 |         int ik = k - 1; /* ik is a temporary index to the previous step */
 | 
|---|
 | 392 |         int nmik = sn - ik;
 | 
|---|
 | 393 | 
 | 
|---|
 | 394 |         if (beta != 0.0) {
 | 
|---|
 | 395 |           for (i = 0; i < sm; i++) {
 | 
|---|
 | 396 |             gamma = F77_DDOT(&nmik, &d[ik], &inc, &z[ik * sm + i], &sm) / beta;
 | 
|---|
 | 397 |             F77_DAXPY(&nmik, &gamma, &d[ik], &inc, &z[ik * sm + i], &sm);
 | 
|---|
 | 398 |             }
 | 
|---|
 | 399 |           }
 | 
|---|
 | 400 |         e[ik] = 0.0;
 | 
|---|
 | 401 |         d[ik] = atemp;
 | 
|---|
 | 402 |         }
 | 
|---|
 | 403 | 
 | 
|---|
 | 404 |    /* now resume normal service */
 | 
|---|
 | 405 |       F77_DCOPY(&q, &a[kp1l], &inc, &d[k], &inc);
 | 
|---|
 | 406 |       l++;
 | 
|---|
 | 407 |       ld += sp;
 | 
|---|
 | 408 |       }
 | 
|---|
 | 409 |     else {
 | 
|---|
 | 410 |       grp->raw_bcast(&d[k], (sn - k) * dpsize, r);
 | 
|---|
 | 411 |       }
 | 
|---|
 | 412 | 
 | 
|---|
 | 413 |     beta = -d[k];
 | 
|---|
 | 414 |     if (beta != 0.0) {
 | 
|---|
 | 415 | 
 | 
|---|
 | 416 |       /* Symmetric matrix times vector,  v = A*u. */
 | 
|---|
 | 417 |       /* Store  v  in  E(K+1) through E(N) .     */
 | 
|---|
 | 418 | 
 | 
|---|
 | 419 |       alpha2 = 0.0;
 | 
|---|
 | 420 |       j = 0;
 | 
|---|
 | 421 |       q = sn - k;
 | 
|---|
 | 422 |       F77_DCOPY(&q, &alpha2, &j, &e[k], &inc);
 | 
|---|
 | 423 |       i = ld;
 | 
|---|
 | 424 |       for (j = l; j < sm; j++) {
 | 
|---|
 | 425 |         int ij=j*slda+i;
 | 
|---|
 | 426 |         q = sn - i;
 | 
|---|
 | 427 |         e[i] += F77_DDOT(&q, &a[ij], &inc, &d[i], &inc);
 | 
|---|
 | 428 |         q--;
 | 
|---|
 | 429 |         F77_DAXPY(&q, &d[i], &a[ij+1], &inc, &e[i + 1], &inc);
 | 
|---|
 | 430 |         i += sp;
 | 
|---|
 | 431 |         }
 | 
|---|
 | 432 |       grp->sum(&e[k], sn-k, work);
 | 
|---|
 | 433 | 
 | 
|---|
 | 434 |       /* v = v/beta
 | 
|---|
 | 435 |        * gamma = v'*u/(2*beta)
 | 
|---|
 | 436 |        * v = v + gamma*u
 | 
|---|
 | 437 |        */
 | 
|---|
 | 438 | 
 | 
|---|
 | 439 |       q = sn - k;
 | 
|---|
 | 440 |       alpha2 = 1.0 / beta;
 | 
|---|
 | 441 |       F77_DSCAL(&q, &alpha2, &e[k], &inc);
 | 
|---|
 | 442 |       gamma = 0.5 * F77_DDOT(&q, &d[k], &inc, &e[k], &inc) / beta;
 | 
|---|
 | 443 |       F77_DAXPY(&q, &gamma, &d[k], &inc, &e[k], &inc);
 | 
|---|
 | 444 | 
 | 
|---|
 | 445 |       /* Rank two update of A, compute only lower half. */
 | 
|---|
 | 446 |       /* A  =  A + u'*v + v'*u  =  H*A*H                */
 | 
|---|
 | 447 | 
 | 
|---|
 | 448 |       i = ld;
 | 
|---|
 | 449 |       for (j = l; j < sm; j++) {
 | 
|---|
 | 450 |         double *atmp= &a[j*slda+i];
 | 
|---|
 | 451 |         q = sn - i;
 | 
|---|
 | 452 |         F77_DAXPY(&q, &d[i], &e[i], &inc, atmp, &inc);
 | 
|---|
 | 453 |         F77_DAXPY(&q, &e[i], &d[i], &inc, atmp, &inc);
 | 
|---|
 | 454 |         i += sp;
 | 
|---|
 | 455 |         }
 | 
|---|
 | 456 | 
 | 
|---|
 | 457 |       /*  Accumulate m rows of transformation matrix.
 | 
|---|
 | 458 |        *  Z = Z*H
 | 
|---|
 | 459 |        *
 | 
|---|
 | 460 |        * if I have next column, defer updating
 | 
|---|
 | 461 |        */
 | 
|---|
 | 462 | 
 | 
|---|
 | 463 |       if (sid != k%sp || k == sn - 1) {
 | 
|---|
 | 464 |         q = sn - k;
 | 
|---|
 | 465 |         oobeta = 1.0 / beta;
 | 
|---|
 | 466 |         for (i = 0; i < sm; i++) {
 | 
|---|
 | 467 |           gamma = F77_DDOT(&q, &d[k], &inc, &z[k * sm + i], &sm) * oobeta;
 | 
|---|
 | 468 |           F77_DAXPY(&q, &gamma, &d[k], &inc, &z[k * sm + i], &sm);
 | 
|---|
 | 469 |           }
 | 
|---|
 | 470 |         }
 | 
|---|
 | 471 |       }
 | 
|---|
 | 472 | 
 | 
|---|
 | 473 |    /* another bit of calcs to be deferred */
 | 
|---|
 | 474 |     if (sid != k%sp || k == sn - 1) {
 | 
|---|
 | 475 |       d[k] = 0.0;
 | 
|---|
 | 476 |       e[k] = 0.0;
 | 
|---|
 | 477 |       if (sid == k%sp) d[k] = a[l * slda + ld];
 | 
|---|
 | 478 |       if (sid == r) e[k] = -alpha;
 | 
|---|
 | 479 |       }
 | 
|---|
 | 480 |     }
 | 
|---|
 | 481 | 
 | 
|---|
 | 482 |   /* collect the whole tridiagonal matrix at every node */
 | 
|---|
 | 483 | 
 | 
|---|
 | 484 |   grp->sum(d, sn, work);
 | 
|---|
 | 485 |   grp->sum(e, sn, work);
 | 
|---|
 | 486 |   }
 | 
|---|
 | 487 | 
 | 
|---|
 | 488 | }
 | 
|---|