1 | //
|
---|
2 | // csgrade12.cc
|
---|
3 | // based on: csgrad.cc
|
---|
4 | //
|
---|
5 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
6 | //
|
---|
7 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
8 | // Maintainer: LPS
|
---|
9 | //
|
---|
10 | // This file is part of the SC Toolkit.
|
---|
11 | //
|
---|
12 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
13 | // it under the terms of the GNU Library General Public License as published by
|
---|
14 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
15 | // any later version.
|
---|
16 | //
|
---|
17 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
18 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | // GNU Library General Public License for more details.
|
---|
21 | //
|
---|
22 | // You should have received a copy of the GNU Library General Public License
|
---|
23 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
24 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | //
|
---|
26 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
27 | //
|
---|
28 |
|
---|
29 | #ifdef __GNUC__
|
---|
30 | #pragma implementation
|
---|
31 | #endif
|
---|
32 |
|
---|
33 | #include <math.h>
|
---|
34 |
|
---|
35 | #include <util/misc/formio.h>
|
---|
36 | #include <chemistry/qc/basis/petite.h>
|
---|
37 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
38 | #include <chemistry/qc/mbpt/csgrade12.h>
|
---|
39 | #include <chemistry/qc/basis/distshpair.h>
|
---|
40 |
|
---|
41 | #include <chemistry/qc/mbpt/util.h>
|
---|
42 |
|
---|
43 | using namespace std;
|
---|
44 | using namespace sc;
|
---|
45 |
|
---|
46 | extern BiggestContribs biggest_ints_1;
|
---|
47 |
|
---|
48 | #define PRINT1Q 0
|
---|
49 |
|
---|
50 | CSGradErep12Qtr::CSGradErep12Qtr(int mythread_a, int nthread_a,
|
---|
51 | int me_a, int nproc_a,
|
---|
52 | const Ref<MemoryGrp> &mem_a,
|
---|
53 | const Ref<MessageGrp> &msg_a,
|
---|
54 | const Ref<ThreadLock> &lock_a,
|
---|
55 | const Ref<GaussianBasisSet> &basis_a,
|
---|
56 | const Ref<TwoBodyInt> &tbint_a,
|
---|
57 | int nocc_a,
|
---|
58 | double **scf_vector_a,
|
---|
59 | double tol_a, int debug_a,
|
---|
60 | int dynamic_a, double print_percent_a,
|
---|
61 | DistShellPair::SharedData *shellpair_shared_data,
|
---|
62 | int usep4):
|
---|
63 | shellpair_shared_data_(shellpair_shared_data)
|
---|
64 | {
|
---|
65 | msg = msg_a;
|
---|
66 | mythread = mythread_a;
|
---|
67 | nthread = nthread_a;
|
---|
68 | lock = lock_a;
|
---|
69 | basis = basis_a;
|
---|
70 | tbint = tbint_a;
|
---|
71 | nocc = nocc_a;
|
---|
72 | me = me_a;
|
---|
73 | nproc = nproc_a;
|
---|
74 | tol = tol_a;
|
---|
75 | mem = mem_a;
|
---|
76 | scf_vector = scf_vector_a;
|
---|
77 | debug = debug_a;
|
---|
78 | dynamic_ = dynamic_a;
|
---|
79 | print_percent_ = print_percent_a;
|
---|
80 | usep4_ = usep4;
|
---|
81 |
|
---|
82 | aoint_computed = 0;
|
---|
83 | timer = new RegionTimer();
|
---|
84 | }
|
---|
85 |
|
---|
86 | CSGradErep12Qtr::~CSGradErep12Qtr()
|
---|
87 | {
|
---|
88 | }
|
---|
89 |
|
---|
90 | void
|
---|
91 | CSGradErep12Qtr::run()
|
---|
92 | {
|
---|
93 | int P,Q,R,S;
|
---|
94 | int p,q,r,s;
|
---|
95 | int np,nq,nr,ns;
|
---|
96 | int bf1,bf2,bf3,bf4;
|
---|
97 | int p_offset,q_offset,r_offset,s_offset;
|
---|
98 | int offset;
|
---|
99 | int nfuncmax = basis->max_nfunction_in_shell();
|
---|
100 | int nshell = basis->nshell();
|
---|
101 | int nbasis = basis->nbasis();
|
---|
102 | double dtol = pow(2.0,tol);
|
---|
103 | double *iqjs_ptr;
|
---|
104 | double *iqrs_ptr, *iprs_ptr;
|
---|
105 | double *c_pi, *c_qi;
|
---|
106 | double tmpval;
|
---|
107 | int i,j;
|
---|
108 | double *iqjs_contrib; // local contributions to integral_iqjs
|
---|
109 | double *iqjr_contrib; // local contributions to integral_iqjr
|
---|
110 |
|
---|
111 | const double *intbuf = tbint->buffer();
|
---|
112 |
|
---|
113 | iqjs_contrib = mem->malloc_local_double(nbasis*nfuncmax);
|
---|
114 | iqjr_contrib = mem->malloc_local_double(nbasis*nfuncmax);
|
---|
115 |
|
---|
116 | double *integral_iqrs; // quarter transformed two-el integrals
|
---|
117 | lock->lock();
|
---|
118 | integral_iqrs = new double[ni*nbasis*nfuncmax*nfuncmax];
|
---|
119 | lock->unlock();
|
---|
120 |
|
---|
121 | int work_per_thread = ((nshell*(nshell+1))/2)/(nproc*nthread);
|
---|
122 | int print_interval = work_per_thread/100;
|
---|
123 | int time_interval = work_per_thread/10;
|
---|
124 | int print_index = 0;
|
---|
125 | if (print_interval == 0) print_interval = 1;
|
---|
126 | if (time_interval == 0) time_interval = 1;
|
---|
127 | if (work_per_thread == 0) work_per_thread = 1;
|
---|
128 |
|
---|
129 | if (debug) {
|
---|
130 | lock->lock();
|
---|
131 | ExEnv::outn() << scprintf("%d:%d: starting get_task loop",me,mythread) << endl;
|
---|
132 | lock->unlock();
|
---|
133 | }
|
---|
134 |
|
---|
135 | // Use petite list for symmetry utilization
|
---|
136 | Ref<PetiteList> p4list = tbint->integral()->petite_list();
|
---|
137 |
|
---|
138 | DistShellPair shellpairs(msg,nthread,mythread,lock,basis,basis,dynamic_,
|
---|
139 | shellpair_shared_data_);
|
---|
140 | shellpairs.set_print_percent(print_percent_);
|
---|
141 | shellpairs.set_debug(debug);
|
---|
142 | if (debug) shellpairs.set_print_percent(1);
|
---|
143 | S = 0;
|
---|
144 | R = 0;
|
---|
145 | while (shellpairs.get_task(S,R)) {
|
---|
146 | ns = basis->shell(S).nfunction();
|
---|
147 | s_offset = basis->shell_to_function(S);
|
---|
148 |
|
---|
149 | nr = basis->shell(R).nfunction();
|
---|
150 | r_offset = basis->shell_to_function(R);
|
---|
151 |
|
---|
152 | if (debug > 1 && (print_index++)%print_interval == 0) {
|
---|
153 | lock->lock();
|
---|
154 | ExEnv::outn() << scprintf("%d:%d: (PQ|%d %d) %d%%",
|
---|
155 | me,mythread,R,S,(100*print_index)/work_per_thread)
|
---|
156 | << endl;
|
---|
157 | lock->unlock();
|
---|
158 | }
|
---|
159 | if (debug > 1 && (print_index)%time_interval == 0) {
|
---|
160 | lock->lock();
|
---|
161 | ExEnv::outn() << scprintf("timer for %d:%d:",me,mythread) << endl;
|
---|
162 | timer->print();
|
---|
163 | lock->unlock();
|
---|
164 | }
|
---|
165 |
|
---|
166 | bzerofast(integral_iqrs, ni*nbasis*nfuncmax*nfuncmax);
|
---|
167 |
|
---|
168 | for (Q=0; Q<nshell; Q++) {
|
---|
169 | nq = basis->shell(Q).nfunction();
|
---|
170 | q_offset = basis->shell_to_function(Q);
|
---|
171 | for (P=0; P<=Q; P++) {
|
---|
172 | np = basis->shell(P).nfunction();
|
---|
173 | p_offset = basis->shell_to_function(P);
|
---|
174 |
|
---|
175 | // check if symmetry unique and compute degeneracy
|
---|
176 | int deg;
|
---|
177 | if (usep4_) deg = p4list->in_p4(P,Q,R,S);
|
---|
178 | else deg = 1;
|
---|
179 | double symfac = (double) deg;
|
---|
180 | if (deg == 0)
|
---|
181 | continue;
|
---|
182 |
|
---|
183 | if (tbint->log2_shell_bound(P,Q,R,S) < tol) {
|
---|
184 | continue; // skip ereps less than tol
|
---|
185 | }
|
---|
186 |
|
---|
187 | aoint_computed++;
|
---|
188 |
|
---|
189 | timer->enter("erep");
|
---|
190 | tbint->compute_shell(P,Q,R,S);
|
---|
191 | timer->exit("erep");
|
---|
192 |
|
---|
193 | timer->enter("1. q.t.");
|
---|
194 | // Begin first quarter transformation;
|
---|
195 | // generate (iq|rs) for i active
|
---|
196 |
|
---|
197 | offset = nr*ns*nbasis;
|
---|
198 | const double *pqrs_ptr = intbuf;
|
---|
199 | for (bf1 = 0; bf1 < np; bf1++) {
|
---|
200 | p = p_offset + bf1;
|
---|
201 | for (bf2 = 0; bf2 < nq; bf2++) {
|
---|
202 | q = q_offset + bf2;
|
---|
203 |
|
---|
204 | if (q < p) {
|
---|
205 | pqrs_ptr = &intbuf[ns*nr*(bf2+1 + nq*bf1)];
|
---|
206 | continue; // skip to next q value
|
---|
207 | }
|
---|
208 |
|
---|
209 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
210 | r = r_offset + bf3;
|
---|
211 |
|
---|
212 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
213 | s = s_offset + bf4;
|
---|
214 |
|
---|
215 | if (s < r) {
|
---|
216 | pqrs_ptr++;
|
---|
217 | continue; // skip to next bf4 value
|
---|
218 | }
|
---|
219 |
|
---|
220 | if (fabs(*pqrs_ptr) > dtol) {
|
---|
221 | iprs_ptr = &integral_iqrs[bf4 + ns*(p + nbasis*bf3)];
|
---|
222 | iqrs_ptr = &integral_iqrs[bf4 + ns*(q + nbasis*bf3)];
|
---|
223 | c_qi = &scf_vector[q][i_offset];
|
---|
224 | c_pi = &scf_vector[p][i_offset];
|
---|
225 | tmpval = *pqrs_ptr;
|
---|
226 | // multiply each integral by its symmetry degeneracy factor
|
---|
227 | tmpval *= symfac;
|
---|
228 | for (i=0; i<ni; i++) {
|
---|
229 | *iprs_ptr += *c_qi++*tmpval;
|
---|
230 | iprs_ptr += offset;
|
---|
231 | if (p != q) {
|
---|
232 | *iqrs_ptr += *c_pi++*tmpval;
|
---|
233 | iqrs_ptr += offset;
|
---|
234 | }
|
---|
235 | } // exit i loop
|
---|
236 | } // endif
|
---|
237 |
|
---|
238 | pqrs_ptr++;
|
---|
239 | } // exit bf4 loop
|
---|
240 | } // exit bf3 loop
|
---|
241 | } // exit bf2 loop
|
---|
242 | } // exit bf1 loop
|
---|
243 | // end of first quarter transformation
|
---|
244 | timer->exit("1. q.t.");
|
---|
245 |
|
---|
246 | } // exit P loop
|
---|
247 | } // exit Q loop
|
---|
248 |
|
---|
249 | #if PRINT1Q
|
---|
250 | {
|
---|
251 | lock->lock();
|
---|
252 | double *tmp = integral_iqrs;
|
---|
253 | for (int i = 0; i<ni; i++) {
|
---|
254 | for (int r = 0; r<nr; r++) {
|
---|
255 | for (int q = 0; q<nbasis; q++) {
|
---|
256 | for (int s = 0; s<ns; s++) {
|
---|
257 | printf("1Q: (%d %d|%d %d) = %12.8f\n",
|
---|
258 | i,q,r+r_offset,s+s_offset,*tmp);
|
---|
259 | tmp++;
|
---|
260 | }
|
---|
261 | }
|
---|
262 | }
|
---|
263 | }
|
---|
264 | lock->unlock();
|
---|
265 | }
|
---|
266 | #endif
|
---|
267 | #if PRINT_BIGGEST_INTS
|
---|
268 | {
|
---|
269 | lock->lock();
|
---|
270 | double *tmp = integral_iqrs;
|
---|
271 | for (int i = 0; i<ni; i++) {
|
---|
272 | for (int r = 0; r<nr; r++) {
|
---|
273 | for (int q = 0; q<nbasis; q++) {
|
---|
274 | for (int s = 0; s<ns; s++) {
|
---|
275 | if (i+i_offset==104) {
|
---|
276 | biggest_ints_1.insert(*tmp,i+i_offset,q,r+r_offset,s+s_offset);
|
---|
277 | }
|
---|
278 | tmp++;
|
---|
279 | }
|
---|
280 | }
|
---|
281 | }
|
---|
282 | }
|
---|
283 | lock->unlock();
|
---|
284 | }
|
---|
285 | #endif
|
---|
286 |
|
---|
287 | timer->enter("2. q.t.");
|
---|
288 | // Begin second quarter transformation;
|
---|
289 | // generate (iq|jr) for i active and j active or frozen
|
---|
290 | for (i=0; i<ni; i++) {
|
---|
291 | for (j=0; j<nocc; j++) {
|
---|
292 |
|
---|
293 | bzerofast(iqjs_contrib, nbasis*nfuncmax);
|
---|
294 | bzerofast(iqjr_contrib, nbasis*nfuncmax);
|
---|
295 |
|
---|
296 | for (bf1=0; bf1<ns; bf1++) {
|
---|
297 | s = s_offset + bf1;
|
---|
298 | double *c_sj = &scf_vector[s][j];
|
---|
299 | double *iqjr_ptr = iqjr_contrib;
|
---|
300 | for (bf2=0; bf2<nr; bf2++) {
|
---|
301 | r = r_offset + bf2;
|
---|
302 | if (r > s) {
|
---|
303 | break; // skip to next bf1 value
|
---|
304 | }
|
---|
305 | double c_rj = scf_vector[r][j];
|
---|
306 | iqjs_ptr = &iqjs_contrib[bf1*nbasis];
|
---|
307 | iqrs_ptr = &integral_iqrs[bf1 + ns*nbasis*(bf2 + nr*i)];
|
---|
308 | for (q=0; q<nbasis; q++) {
|
---|
309 | *iqjs_ptr++ += c_rj * *iqrs_ptr;
|
---|
310 | if (r != s) *iqjr_ptr += *c_sj * *iqrs_ptr;
|
---|
311 | iqjr_ptr++;
|
---|
312 | iqrs_ptr += ns;
|
---|
313 | } // exit q loop
|
---|
314 | } // exit bf2 loop
|
---|
315 | } // exit bf1 loop
|
---|
316 |
|
---|
317 | // We now have contributions to iqjs and iqjr for one pair i,j,
|
---|
318 | // all q, r in R and s in S; send iqjs and iqjr to the node
|
---|
319 | // (ij_proc) which is going to have this ij pair
|
---|
320 | int ij_proc = (i*nocc + j)%nproc;
|
---|
321 | int ij_index = (i*nocc + j)/nproc;
|
---|
322 |
|
---|
323 | // Sum the iqjs_contrib to the appropriate place
|
---|
324 | size_t ij_offset = size_t(nbasis)*(s_offset + size_t(nbasis)*ij_index);
|
---|
325 | mem->sum_reduction_on_node(iqjs_contrib,
|
---|
326 | ij_offset, ns*nbasis, ij_proc);
|
---|
327 |
|
---|
328 | ij_offset = size_t(nbasis)*(r_offset + size_t(nbasis)*ij_index);
|
---|
329 | mem->sum_reduction_on_node(iqjr_contrib,
|
---|
330 | ij_offset, nr*nbasis, ij_proc);
|
---|
331 |
|
---|
332 | } // exit j loop
|
---|
333 | } // exit i loop
|
---|
334 | // end of second quarter transformation
|
---|
335 | timer->exit("2. q.t.");
|
---|
336 |
|
---|
337 | } // exit while get_task
|
---|
338 |
|
---|
339 | if (debug) {
|
---|
340 | lock->lock();
|
---|
341 | ExEnv::outn() << scprintf("%d:%d: done with get_task loop",me,mythread) << endl;
|
---|
342 | lock->unlock();
|
---|
343 | }
|
---|
344 |
|
---|
345 | lock->lock();
|
---|
346 | delete[] integral_iqrs;
|
---|
347 | mem->free_local_double(iqjs_contrib);
|
---|
348 | mem->free_local_double(iqjr_contrib);
|
---|
349 | lock->unlock();
|
---|
350 | }
|
---|
351 |
|
---|
352 | ////////////////////////////////////////////////////////////////////////////
|
---|
353 |
|
---|
354 | // Local Variables:
|
---|
355 | // mode: c++
|
---|
356 | // c-file-style: "CLJ-CONDENSED"
|
---|
357 | // End:
|
---|