source: ThirdParty/mpqc_open/src/lib/math/scmat/disthql.cc@ b7e5b0

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since b7e5b0 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 14.6 KB
Line 
1
2#include <iostream>
3
4#include <math.h>
5
6#include <util/group/message.h>
7
8#include <math/scmat/disthql.h>
9
10#include <math/scmat/f77sym.h>
11
12using namespace std;
13using namespace sc;
14
15extern "C" {
16 void F77_PDSTEQR(int *n, double *d, double *e,
17 double *z, int *ldz, int *nz, double *work,
18 int *info);
19 void F77_DCOPY(int *n, double *dx, int *incx, double *dy, int *incy);
20 double F77_DNRM2(int *n, double *dx, int *incx);
21 double F77_DDOT(int *n, double *dx, int *incx, double *dy, int *incy);
22 void F77_DSCAL(int *n, double *da, double *dx, int *incx);
23 void F77_DAXPY(int *n, double *da, double *dx, int *incx, double *dy, int *incy);
24}
25
26namespace sc {
27
28static void dist_diagonalize_(int n, int m, double *a, double *d, double *e,
29 double *sigma, double *z, double *v, double *w,
30 int *ind, const Ref<MessageGrp>&);
31
32static void pflip(int id,int n,int m,int p,double *ar,double *ac,double *w,
33 const Ref<MessageGrp>&);
34
35static void
36ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id,
37 double *d, double *e, double *z, double *work,
38 const Ref<MessageGrp>& grp);
39
40static void
41ptred_single(double *a,int *lda,int *n,int *m,int *p,int *id,
42 double *d,double *e,double *z,double *work);
43static void
44ptred_parallel(double *a, int *lda, int *n, int *m, int *p, int *id,
45 double *d, double *e, double *z, double *work,
46 const Ref<MessageGrp>&);
47
48/* ******************************************************** */
49/* Function of this subroutine : */
50/* Diagonalize a real, symmetric matrix */
51/* */
52/* Parameters : */
53/* n - size of the matrix */
54/* m - number of locally held columns */
55/* a[n][m] - locally held submatrix */
56/* d[n] - returned with eigenvalues */
57/* v[n][m] - returned with eigenvectors */
58/* -------------------------------------------------------- */
59void
60dist_diagonalize(int n, int m, double *a, double *d, double *v,
61 const Ref<MessageGrp> &grp)
62{
63 double *e = new double[n];
64 double *sigma = new double[n];
65 double *z = new double[n*m];
66 double *w = new double[3*n];
67 int *ind = new int[n];
68 dist_diagonalize_(n, m, a, d, e, sigma, z, v, w, ind, grp);
69 delete[] e;
70 delete[] sigma;
71 delete[] z;
72 delete[] w;
73 delete[] ind;
74}
75
76/* ******************************************************** */
77/* Function of this subroutine : */
78/* Diagonalize a real, symmetric matrix */
79/* */
80/* Parameters : */
81/* n - size of the matrix */
82/* m - number of locally held columns */
83/* a[n][m] - locally held submatrix */
84/* d[n] - returned with eigenvalues */
85/* e[n] - scratch space */
86/* sigma[n]- scratch space */
87/* z[m][n] - scratch space */
88/* v[n][m] - returned with eigenvectors */
89/* w[3*n] - scratch space */
90/* ind[n] - scratch space (integer) */
91/* -------------------------------------------------------- */
92static void
93dist_diagonalize_(int n, int m, double *a, double *d, double *e,
94 double *sigma, double *z, double *v, double *w, int *ind,
95 const Ref<MessageGrp>& grp)
96{
97 int i,info,one=1;
98 int nproc = grp->n();
99 int id = grp->me();
100
101 /* reduce A to tridiagonal matrix using Householder transformation */
102
103 ptred2_(&a[0],&n,&n,&m,&nproc,&id,&d[0],&e[0],&z[0],&w[0],grp);
104
105 /* diagonalize tridiagonal matrix using implicit QL method */
106
107 for (i=1; i<n; i++) e[i-1] = e[i];
108 F77_PDSTEQR(&n, d, e, z, &m, &m, w, &info);
109
110 /* rearrange the eigenvectors by transposition */
111
112 i = m * n;
113 F77_DCOPY(&i,&z[0],&one,&a[0],&one);
114 pflip(id,n,m,nproc,&a[0],&v[0],&w[0],grp);
115}
116
117/* ******************************************************** */
118/* Function : transpose matrix */
119/* -------------------------------------------------------- */
120
121static void
122pflip(int id,int n,int m,int p,double *ar,double *ac,double *w,
123 const Ref<MessageGrp>& grp)
124{
125 int i,k,r,dpsize=sizeof(double),one=1;
126
127 i = 0;
128 for (k=0; k<n; k++) {
129 r = k % p;
130 if (id == r) {
131 F77_DCOPY(&n,&ar[i],&m,&w[0],&one);
132 i++;
133 }
134 grp->raw_bcast(&w[0], n*dpsize, r);
135 F77_DCOPY(&m,&w[id],&p,&ac[k],&n);
136 }
137}
138
139/*******************************************************************/
140
141static void
142ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id,
143 double *d, double *e, double *z, double *work,
144 const Ref<MessageGrp>& grp)
145{
146 if (*p==1)
147 ptred_single(a, lda, n, m, p, id, d, e, z, work);
148 else
149 ptred_parallel(a, lda, n, m, p, id, d, e, z, work, grp);
150}
151
152/* ******************************************************** */
153/* Function of this subroutine : */
154/* tridiagonalize a real, symmetric matrix using */
155/* Householder transformation */
156/* Parameters : */
157/* a[lda][m] - locally held submatrix */
158/* lda - leading dimension of array a */
159/* n - size of the matrix a */
160/* m - number of locally held columns */
161/* p - number of nodes used */
162/* id - my node id */
163/* on return : */
164/* d[n] - the diagonal of the tridiagonal result */
165/* e[n] - the offdiagonal of the result(e[1]-e[n-1]) */
166/* z[m][n] - m rows of transformation matrix */
167/* matrix a will be destroyed */
168/* -------------------------------------------------------- */
169
170static void
171ptred_single(double *a,int *lda,int *n,int *m,int *p,int *id,
172 double *d,double *e,double *z,double *work)
173{
174 double alpha=0.0, beta, gamma, alpha2;
175 double oobeta;
176 int i,j,k,l,ld,r;
177 int slda, sn, sm, sp, sid, q, inc=1;
178
179 /* extract parameters and get cube information */
180
181 slda = *lda;
182 sn = *n;
183 sm = *m;
184 sp = *p;
185 sid = *id;
186
187 /* initialize eigenvector matrix to be identity */
188
189 i = sn * sm;
190 alpha2 = 0.0;
191 j = 0;
192 F77_DCOPY(&i,&alpha2,&j,&z[0],&inc);
193 ld = sid;
194 for (i=0; i<sm; i++) {
195 z[ld*sm+i] = 1.0;
196 ld += sp;
197 }
198
199 /* start reduction - one column at a time */
200
201 l = 0;
202 ld = sid;
203 d[0] = 0.0;
204 e[0] = 0.0;
205 if (sid == 0) d[0] = a[0];
206 for (k=1; k<=sn-1; k++) {
207 r = (k-1) % sp;
208
209 /* Use a Householder reflection to zero a(i,k), i = k+2,..., n .*/
210 /* Let a = (0, ..., 0, a(k+1,k) ... a(n,k))', */
211 /* u = a/norm(a) + (k+1-st unit vector), */
212 /* beta = -u(k+1) = -norm(u)**2/2 , */
213 /* H = I + u*u'/beta . */
214 /* Replace A by H*A*H . */
215 /* Store u in D(K+1) through D(N) . */
216 /* The root node, r, is the owner of column k. */
217
218 if (sid == r) {
219 q = sn - k;
220 alpha = F77_DNRM2(&q,&a[l*slda+k],&inc);
221 if (a[l*slda+k] < 0.0) alpha = -alpha;
222 if (alpha != 0.0) {
223 alpha2 = 1.0 / alpha;
224 F77_DSCAL(&q,&alpha2,&a[l*slda+k],&inc);
225 a[l*slda+k] += 1.0;
226 }
227 F77_DCOPY(&q,&a[l*slda+k],&inc,&d[k],&inc);
228 l++;
229 ld += sp;
230 }
231
232 beta = -d[k];
233 if (beta != 0.0) {
234
235 /* Symmetric matrix times vector, v = A*u.*/
236 /* Store v in E(K+1) through E(N) . */
237
238 alpha2 = 0.0;
239 j = 0;
240 q = sn - k;
241 F77_DCOPY(&q,&alpha2,&j,&e[k],&inc);
242 i = ld;
243 for (j=l; j<sm; j++) {
244 q = sn - i;
245 e[i] = e[i] + F77_DDOT(&q,&a[j*slda+i],&inc,&d[i],&inc);
246 q = sn - i - 1;
247 F77_DAXPY(&q,&d[i],&a[slda*j+i+1],&inc,&e[i+1],&inc);
248 i += sp;
249 }
250
251 /* v = v/beta */
252 /* gamma = v'*u/(2*beta) */
253 /* v = v + gamma*u */
254
255 if (sid == r) {
256 q = sn - k;
257 alpha2 = 1.0 / beta;
258 F77_DSCAL(&q,&alpha2,&e[k],&inc);
259 gamma = 0.5*F77_DDOT(&q,&d[k],&inc,&e[k],&inc)/beta;
260 F77_DAXPY(&q,&gamma,&d[k],&inc,&e[k],&inc);
261 }
262
263 /* Rank two update of A, compute only lower half. */
264 /* A = A + u'*v + v'*u = H*A*H */
265
266 i = ld;
267 for (j=l; j<sm; j++) {
268 q = sn - i;
269 F77_DAXPY(&q,&d[i],&e[i],&inc,&a[j*slda+i],&inc);
270 F77_DAXPY(&q,&e[i],&d[i],&inc,&a[j*slda+i],&inc);
271 i += sp;
272 }
273 q = sn - k;
274 oobeta=1.0/beta;
275 for (i=0; i<sm; i++) {
276 gamma = F77_DDOT(&q,&d[k],&inc,&z[k*sm+i],&sm)*oobeta;
277 F77_DAXPY(&q,&gamma,&d[k],&inc,&z[k*sm+i],&sm);
278 }
279 }
280
281 d[k] = 0.0;
282 e[k] = 0.0;
283 if (sid == (k % sp)) d[k] = a[l*slda+ld];
284 if (sid == r) e[k] = -alpha;
285 }
286 r = 0;
287
288}
289
290/*
291 * Function of this subroutine :
292 * tridiagonalize a real, symmetric matrix using
293 * Householder transformation
294 *
295 * Parameters :
296 * a[lda][m] - locally held submatrix
297 * lda - leading dimension of array a
298 * n - size of the matrix a
299 * m - number of locally held columns
300 * p - number of nodes used
301 * id - my node id
302 *
303 * on return :
304 * d[n] - the diagonal of the tridiagonal result
305 * e[n] - the offdiagonal of the result(e[1]-e[n-1])
306 * z[m][n] - m rows of transformation matrix
307 * matrix a will be destroyed
308 *
309 * merge C code from libdmt with FORTRAN code modified by R. Chamberlain
310 * FORTRAN COMMENTS:
311 * This version dated 5/4/92
312 * Richard Chamberlain, Intel Supercomputer Systems Division
313 * Improvements:
314 * 1. gdcomb of Robert van de Geijn used.
315 * 2. look-ahead distribution of Householder vector. Here the node
316 * containing the next Householder vector defers updating the
317 * eigenvector matrix until the next Householder vector is sent.
318 */
319
320static void
321ptred_parallel(double *a, int *lda, int *n, int *m, int *p, int *id,
322 double *d, double *e, double *z, double *work,
323 const Ref<MessageGrp>& grp)
324{
325 int i, j, k, l, ld, r, dpsize = sizeof(double);
326 int kp1l;
327 int slda, sn, sm, sp, sid, q, inc = 1;
328 double alpha=0.0, beta=0.0, gamma, alpha2;
329 double oobeta, atemp;
330
331 /* extract parameters and get cube information */
332
333 slda = *lda;
334 sn = *n;
335 sm = *m;
336 sp = *p;
337 sid = *id;
338
339 /* initialize eigenvector matrix to be identity */
340
341 i = sn * sm;
342 alpha2 = 0.0;
343 j = 0;
344 F77_DCOPY(&i, &alpha2, &j, &z[0], &inc);
345 ld = sid;
346 for (i = 0; i < sm; i++) {
347 z[ld * sm + i] = 1.0;
348 ld += sp;
349 }
350
351 /* start reduction - one column at a time */
352
353 l = 0;
354 ld = sid;
355 d[0] = 0.0;
356 e[0] = 0.0;
357 if (sid == 0) d[0] = a[0];
358
359 for (k = 1; k <= sn - 1; k++) {
360
361 /* Use a Householder reflection to zero a(i,k), i = k+2,..., n .
362 * Let a = (0, ..., 0, a(k+1,k) ... a(n,k))',
363 * u = a/norm(a) + (k+1-st unit vector),
364 * beta = -u(k+1) = -norm(u)**2/2,
365 * H = I + u*u'/beta.
366 * Replace A by H*A*H.
367 * Store u in D(K+1) through D(N).
368 * The root node, r, is the owner of column k.
369 */
370
371 r = (k - 1) % sp;
372 if (sid == r) {
373 kp1l=l*slda+k;
374 q = sn - k;
375 atemp = a[l * slda + ld];
376 alpha = F77_DNRM2(&q, &a[kp1l], &inc);
377 if (a[kp1l] < 0.0) alpha = -alpha;
378 if (alpha != 0.0) {
379 alpha2 = 1.0 / alpha;
380 F77_DSCAL(&q, &alpha2, &a[kp1l], &inc);
381 a[kp1l] += 1.0;
382 }
383
384 grp->raw_bcast(&a[kp1l], (sn - k) * dpsize, r);
385
386 /* this is the deferred update of the eigenvector matrix. It was
387 * deferred from the last step to accelerate the sending of the Householder
388 * vector. Don't do this on the first step.
389 */
390 if (k != 1) {
391 int ik = k - 1; /* ik is a temporary index to the previous step */
392 int nmik = sn - ik;
393
394 if (beta != 0.0) {
395 for (i = 0; i < sm; i++) {
396 gamma = F77_DDOT(&nmik, &d[ik], &inc, &z[ik * sm + i], &sm) / beta;
397 F77_DAXPY(&nmik, &gamma, &d[ik], &inc, &z[ik * sm + i], &sm);
398 }
399 }
400 e[ik] = 0.0;
401 d[ik] = atemp;
402 }
403
404 /* now resume normal service */
405 F77_DCOPY(&q, &a[kp1l], &inc, &d[k], &inc);
406 l++;
407 ld += sp;
408 }
409 else {
410 grp->raw_bcast(&d[k], (sn - k) * dpsize, r);
411 }
412
413 beta = -d[k];
414 if (beta != 0.0) {
415
416 /* Symmetric matrix times vector, v = A*u. */
417 /* Store v in E(K+1) through E(N) . */
418
419 alpha2 = 0.0;
420 j = 0;
421 q = sn - k;
422 F77_DCOPY(&q, &alpha2, &j, &e[k], &inc);
423 i = ld;
424 for (j = l; j < sm; j++) {
425 int ij=j*slda+i;
426 q = sn - i;
427 e[i] += F77_DDOT(&q, &a[ij], &inc, &d[i], &inc);
428 q--;
429 F77_DAXPY(&q, &d[i], &a[ij+1], &inc, &e[i + 1], &inc);
430 i += sp;
431 }
432 grp->sum(&e[k], sn-k, work);
433
434 /* v = v/beta
435 * gamma = v'*u/(2*beta)
436 * v = v + gamma*u
437 */
438
439 q = sn - k;
440 alpha2 = 1.0 / beta;
441 F77_DSCAL(&q, &alpha2, &e[k], &inc);
442 gamma = 0.5 * F77_DDOT(&q, &d[k], &inc, &e[k], &inc) / beta;
443 F77_DAXPY(&q, &gamma, &d[k], &inc, &e[k], &inc);
444
445 /* Rank two update of A, compute only lower half. */
446 /* A = A + u'*v + v'*u = H*A*H */
447
448 i = ld;
449 for (j = l; j < sm; j++) {
450 double *atmp= &a[j*slda+i];
451 q = sn - i;
452 F77_DAXPY(&q, &d[i], &e[i], &inc, atmp, &inc);
453 F77_DAXPY(&q, &e[i], &d[i], &inc, atmp, &inc);
454 i += sp;
455 }
456
457 /* Accumulate m rows of transformation matrix.
458 * Z = Z*H
459 *
460 * if I have next column, defer updating
461 */
462
463 if (sid != k%sp || k == sn - 1) {
464 q = sn - k;
465 oobeta = 1.0 / beta;
466 for (i = 0; i < sm; i++) {
467 gamma = F77_DDOT(&q, &d[k], &inc, &z[k * sm + i], &sm) * oobeta;
468 F77_DAXPY(&q, &gamma, &d[k], &inc, &z[k * sm + i], &sm);
469 }
470 }
471 }
472
473 /* another bit of calcs to be deferred */
474 if (sid != k%sp || k == sn - 1) {
475 d[k] = 0.0;
476 e[k] = 0.0;
477 if (sid == k%sp) d[k] = a[l * slda + ld];
478 if (sid == r) e[k] = -alpha;
479 }
480 }
481
482 /* collect the whole tridiagonal matrix at every node */
483
484 grp->sum(d, sn, work);
485 grp->sum(e, sn, work);
486 }
487
488}
Note: See TracBrowser for help on using the repository browser.