1 | //
|
---|
2 | // csgrad34qb.cc
|
---|
3 | // based on: csgrad.cc
|
---|
4 | //
|
---|
5 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
6 | //
|
---|
7 | // Author: Ida Nielsen <ibniels@ca.sandia.gov>
|
---|
8 | // Maintainer: LPS
|
---|
9 | //
|
---|
10 | // This file is part of the SC Toolkit.
|
---|
11 | //
|
---|
12 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
13 | // it under the terms of the GNU Library General Public License as published by
|
---|
14 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
15 | // any later version.
|
---|
16 | //
|
---|
17 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
18 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | // GNU Library General Public License for more details.
|
---|
21 | //
|
---|
22 | // You should have received a copy of the GNU Library General Public License
|
---|
23 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
24 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | //
|
---|
26 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
27 | //
|
---|
28 |
|
---|
29 | #ifdef __GNUC__
|
---|
30 | #pragma implementation
|
---|
31 | #endif
|
---|
32 |
|
---|
33 | #include <math.h>
|
---|
34 |
|
---|
35 | #include <util/misc/formio.h>
|
---|
36 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
37 | #include <chemistry/qc/mbpt/csgrad34qb.h>
|
---|
38 | #include <chemistry/qc/mbpt/util.h>
|
---|
39 | #include <chemistry/qc/basis/distshpair.h>
|
---|
40 |
|
---|
41 | using namespace std;
|
---|
42 | using namespace sc;
|
---|
43 |
|
---|
44 | CSGrad34Qbtr::CSGrad34Qbtr(int mythread_a, int nthread_a,
|
---|
45 | int me_a, int nproc_a,
|
---|
46 | const Ref<MemoryGrp> &mem_a,
|
---|
47 | const Ref<MessageGrp> &msg_a,
|
---|
48 | const Ref<ThreadLock> &lock_a,
|
---|
49 | const Ref<GaussianBasisSet> &basis_a,
|
---|
50 | const Ref<TwoBodyInt> &tbint_a,
|
---|
51 | const Ref<TwoBodyDerivInt> &tbintder_a,
|
---|
52 | int nocc_a, int nfzc_a,
|
---|
53 | double **scf_vector_a,
|
---|
54 | double tol_a, int debug_a,
|
---|
55 | int dynamic_a, double print_percent_a,
|
---|
56 | DistShellPair::SharedData *shellpair_shared_data,
|
---|
57 | int dograd_a, int natom_a):
|
---|
58 | shellpair_shared_data_(shellpair_shared_data)
|
---|
59 | {
|
---|
60 | msg = msg_a;
|
---|
61 | mythread = mythread_a;
|
---|
62 | nthread = nthread_a;
|
---|
63 | lock = lock_a;
|
---|
64 | basis = basis_a;
|
---|
65 | tbint = tbint_a;
|
---|
66 | tbintder = tbintder_a;
|
---|
67 | nocc = nocc_a;
|
---|
68 | nfzc = nfzc_a;
|
---|
69 | me = me_a;
|
---|
70 | nproc = nproc_a;
|
---|
71 | tol = tol_a;
|
---|
72 | mem = mem_a;
|
---|
73 | scf_vector = scf_vector_a;
|
---|
74 | debug = debug_a;
|
---|
75 | dynamic_ = dynamic_a;
|
---|
76 | print_percent_ = print_percent_a;
|
---|
77 | dograd = dograd_a;
|
---|
78 | natom = natom_a;
|
---|
79 |
|
---|
80 | Lpi = 0;
|
---|
81 |
|
---|
82 | aointder_computed = 0;
|
---|
83 | timer = new RegionTimer();
|
---|
84 |
|
---|
85 | ginter = new double*[natom];
|
---|
86 | for (int i=0; i<natom; i++) {
|
---|
87 | ginter[i] = new double[3];
|
---|
88 | }
|
---|
89 | }
|
---|
90 |
|
---|
91 | CSGrad34Qbtr::~CSGrad34Qbtr()
|
---|
92 | {
|
---|
93 | delete[] Lpi;
|
---|
94 | for (int i=0; i<natom; i++) {
|
---|
95 | delete[] ginter[i];
|
---|
96 | }
|
---|
97 | delete[] ginter;
|
---|
98 | }
|
---|
99 |
|
---|
100 | void
|
---|
101 | CSGrad34Qbtr::run()
|
---|
102 | {
|
---|
103 | ////////////////////////////////////////////////////////////
|
---|
104 | // Perform third and fourth quarter back-transformation and
|
---|
105 | // compute contribution to gradient from non-sep 2PDM
|
---|
106 | ////////////////////////////////////////////////////////////
|
---|
107 |
|
---|
108 | int P, Q, R, S;
|
---|
109 | int p, q, r, s;
|
---|
110 | int offset, p_offset, q_offset, r_offset, s_offset;
|
---|
111 | int np, nq, nr, ns;
|
---|
112 | int nbasis = basis->nbasis();
|
---|
113 | int nshell = basis->nshell();
|
---|
114 | int nfuncmax = basis->max_nfunction_in_shell();;
|
---|
115 | int i, j;
|
---|
116 | int jloop;
|
---|
117 | int ij_proc, ij_index;
|
---|
118 | int int_index;
|
---|
119 | int index;
|
---|
120 | int bf1, bf2, bf3, bf4;
|
---|
121 | int nocc_act = nocc - nfzc;
|
---|
122 | int qp, sr;
|
---|
123 | int factor_pqrs;
|
---|
124 | double c_rj;
|
---|
125 | double pqrs;
|
---|
126 | double tmpval;
|
---|
127 | double dtol = 1.0e-10;
|
---|
128 | double *c_sj, *c_pi, *c_qi;
|
---|
129 | double *gammabuf;
|
---|
130 | double *gamma_iqrs, *gamma_pqrs;
|
---|
131 | double *gamma_iqjs_ptr, *gamma_irjq_ptr;
|
---|
132 | double *gamma_iqrs_ptr, *gamma_iqsr_ptr, *gamma_iprs_ptr;
|
---|
133 | double *gamma_pqrs_ptr;
|
---|
134 | double *lpi_ptr, *lqi_ptr;
|
---|
135 | double *grad_ptr1, *grad_ptr2;
|
---|
136 | const double *intbuf = tbint->buffer();
|
---|
137 | const double *intderbuf = tbintder->buffer(); // AO integral derivative buffer
|
---|
138 |
|
---|
139 | delete[] Lpi;
|
---|
140 | Lpi = new double[nbasis*ni];
|
---|
141 |
|
---|
142 | // Initialize Lpi and ginter
|
---|
143 | memset(Lpi, 0, sizeof(double)*basis->nbasis()*ni);
|
---|
144 | for (i=0; i<natom; i++) memset(ginter[i], 0, sizeof(double)*3);
|
---|
145 |
|
---|
146 | MemoryGrpBuf<double> membuf_remote(mem);
|
---|
147 |
|
---|
148 | gamma_iqrs = new double[ni*nbasis*nfuncmax*nfuncmax];
|
---|
149 | if (!gamma_iqrs) {
|
---|
150 | ExEnv::errn() << "Could not allocate gamma_iqrs" << endl;
|
---|
151 | abort();
|
---|
152 | }
|
---|
153 |
|
---|
154 | gamma_pqrs = new double[nfuncmax*nfuncmax*nfuncmax*nfuncmax];
|
---|
155 | if (!gamma_pqrs) {
|
---|
156 | ExEnv::errn() << "Could not allocate gamma_pqrs" << endl;
|
---|
157 | abort();
|
---|
158 | }
|
---|
159 |
|
---|
160 | DerivCenters der_centers;
|
---|
161 |
|
---|
162 | DistShellPair shellpairs(msg,nthread,mythread,lock,basis,basis,dynamic_,
|
---|
163 | shellpair_shared_data_);
|
---|
164 | shellpairs.set_print_percent(print_percent_);
|
---|
165 | shellpairs.set_debug(debug);
|
---|
166 | if (debug) shellpairs.set_print_percent(1);
|
---|
167 | S = 0;
|
---|
168 | R = 0;
|
---|
169 | while (shellpairs.get_task(S,R)) {
|
---|
170 | // If both PQRS and PQRS derivative are zero, skip this S,R pair
|
---|
171 | // NB: The test is done after assigning an SR pair, and, when
|
---|
172 | // using static load balancing, this may create some load imbalance
|
---|
173 | // if more SR pairs are discarded in some threads than in others
|
---|
174 | if (tbint->log2_shell_bound(R,S) < tol
|
---|
175 | && (dograd && tbintder->log2_shell_bound(R,S) < tol)) continue;
|
---|
176 |
|
---|
177 | ns = basis->shell(S).nfunction();
|
---|
178 | s_offset = basis->shell_to_function(S);
|
---|
179 | nr = basis->shell(R).nfunction();
|
---|
180 | r_offset = basis->shell_to_function(R);
|
---|
181 |
|
---|
182 | timer->enter("3. q.b.t.");
|
---|
183 | // Begin third quarter back-transformation.
|
---|
184 |
|
---|
185 | bzerofast(gamma_iqrs,ni*nbasis*nfuncmax*nfuncmax);
|
---|
186 |
|
---|
187 | for (i=0; i<ni; i++) {
|
---|
188 | for (jloop=me; jloop<me+nocc_act; jloop++) {
|
---|
189 | // stagger j's to minimize contention
|
---|
190 | j = jloop%nocc_act + nfzc; // j runs from nfzc to nocc
|
---|
191 | ij_proc = (i*nocc + j)%nproc; // ij_proc has this ij pair
|
---|
192 | ij_index = (i*nocc + j)/nproc;
|
---|
193 |
|
---|
194 | offset = s_offset*nbasis + ij_index*nbasis*nbasis;
|
---|
195 | // Send for elements gamma_iqjs, if necessary
|
---|
196 | gammabuf = (double*) membuf_remote.readonly_on_node(offset,
|
---|
197 | nbasis * ns,
|
---|
198 | ij_proc);
|
---|
199 | for (bf1=0; bf1<nr; bf1++) {
|
---|
200 | c_rj = scf_vector[bf1 + r_offset][j];
|
---|
201 | gamma_iqjs_ptr = gammabuf;
|
---|
202 | for (bf2=0; bf2<ns; bf2++) {
|
---|
203 | gamma_iqrs_ptr = &gamma_iqrs[bf2 + ns*nbasis*(bf1 + nr*i)];
|
---|
204 | for (q=0; q<nbasis; q++) {
|
---|
205 | *gamma_iqrs_ptr += c_rj * *gamma_iqjs_ptr++;
|
---|
206 | gamma_iqrs_ptr += ns;
|
---|
207 | } // exit q loop
|
---|
208 | } // exit bf2 loop
|
---|
209 | } // exit bf1 loop
|
---|
210 |
|
---|
211 | membuf_remote.release();
|
---|
212 |
|
---|
213 | offset = r_offset*nbasis + ij_index*nbasis*nbasis;
|
---|
214 | // Send for elements gamma_irjq, if necessary
|
---|
215 | gammabuf = (double*) membuf_remote.readonly_on_node(offset,
|
---|
216 | nbasis*nr,
|
---|
217 | ij_proc);
|
---|
218 | for (bf1=0; bf1<ns; bf1++) {
|
---|
219 | s = bf1 + s_offset;
|
---|
220 | c_sj = &scf_vector[s][j];
|
---|
221 | gamma_irjq_ptr = gammabuf;
|
---|
222 | for (bf2=0; bf2<nr; bf2++) {
|
---|
223 | r = bf2 + r_offset;
|
---|
224 | if (r != s) {
|
---|
225 | gamma_iqsr_ptr = &gamma_iqrs[bf1 + ns*nbasis*(bf2 + nr*i)];
|
---|
226 | for (q=0; q<nbasis; q++) {
|
---|
227 | *gamma_iqsr_ptr += *c_sj * *gamma_irjq_ptr++;
|
---|
228 | gamma_iqsr_ptr += ns;
|
---|
229 | } // exit q loop
|
---|
230 | } // endif
|
---|
231 | else gamma_irjq_ptr += nbasis;
|
---|
232 | } // exit bf2 loop
|
---|
233 | } // exit bf1 loop
|
---|
234 |
|
---|
235 | membuf_remote.release();
|
---|
236 |
|
---|
237 | } // exit j loop
|
---|
238 | } // exit i loop
|
---|
239 |
|
---|
240 | // end of third quarter back-transformation
|
---|
241 | // we now have gamma_iqrs (symmetrized)
|
---|
242 | // for i-batch, all q, s in S, r in R
|
---|
243 | timer->exit("3. q.b.t.");
|
---|
244 |
|
---|
245 | // only do this if integral is nonzero
|
---|
246 | if (tbint->log2_shell_bound(R,S) >= tol) {
|
---|
247 |
|
---|
248 | // Compute contrib to Laj from (ov|vv) integrals
|
---|
249 | // (done in AO basis to avoid generating (ov|vv);
|
---|
250 | // here, generate Lpi for i-batch; later, transform
|
---|
251 | // Lpi to get contribution to Laj
|
---|
252 | timer->enter("(ov|vv) contrib to Laj");
|
---|
253 | for (Q=0; Q<nshell; Q++) {
|
---|
254 | nq = basis->shell(Q).nfunction();
|
---|
255 | q_offset = basis->shell_to_function(Q);
|
---|
256 | for (P=0; P<=Q; P++) {
|
---|
257 | np = basis->shell(P).nfunction();
|
---|
258 | p_offset = basis->shell_to_function(P);
|
---|
259 | // if (scf_erep_bound(P,Q,R,S) < tol) {
|
---|
260 | // continue; // skip ereps less than tol
|
---|
261 | // }
|
---|
262 | if (tbint->log2_shell_bound(P,Q,R,S) < tol) {
|
---|
263 | continue; // skip ereps less than tol
|
---|
264 | }
|
---|
265 | timer->enter("erep");
|
---|
266 | tbint->compute_shell(P,Q,R,S);
|
---|
267 | timer->exit("erep");
|
---|
268 |
|
---|
269 | offset = nr*ns*nbasis;
|
---|
270 | int_index = 0;
|
---|
271 |
|
---|
272 | for (bf1 = 0; bf1 < np; bf1++) {
|
---|
273 | p = p_offset + bf1;
|
---|
274 | for (bf2 = 0; bf2 < nq; bf2++) {
|
---|
275 | q = q_offset + bf2;
|
---|
276 |
|
---|
277 | if (q < p) {
|
---|
278 | int_index = ns*nr*(bf2+1 + nq*bf1);
|
---|
279 | continue; // skip to next q value
|
---|
280 | }
|
---|
281 |
|
---|
282 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
283 | r = r_offset + bf3;
|
---|
284 |
|
---|
285 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
286 |
|
---|
287 | if (fabs(intbuf[int_index]) > dtol) {
|
---|
288 | s = s_offset + bf4;
|
---|
289 |
|
---|
290 | if (s < r) {
|
---|
291 | int_index++;
|
---|
292 | continue; // skip to next bf4 value
|
---|
293 | }
|
---|
294 |
|
---|
295 | gamma_iqrs_ptr = &gamma_iqrs[bf4 + ns*(q + nbasis*bf3)];
|
---|
296 | gamma_iprs_ptr = &gamma_iqrs[bf4 + ns*(p + nbasis*bf3)];
|
---|
297 | pqrs = intbuf[int_index];
|
---|
298 |
|
---|
299 | lpi_ptr = &Lpi[p*ni];
|
---|
300 | lqi_ptr = &Lpi[q*ni];
|
---|
301 |
|
---|
302 | for (i=0; i<ni; i++) {
|
---|
303 | *lpi_ptr++ -= pqrs**gamma_iqrs_ptr;
|
---|
304 | if (p != q) {
|
---|
305 | *lqi_ptr++ -= pqrs**gamma_iprs_ptr;
|
---|
306 | }
|
---|
307 | gamma_iqrs_ptr += offset;
|
---|
308 | gamma_iprs_ptr += offset;
|
---|
309 | } // exit i loop
|
---|
310 | } // endif
|
---|
311 |
|
---|
312 | int_index++;
|
---|
313 | } // exit bf4 loop
|
---|
314 | } // exit bf3 loop
|
---|
315 | } // exit bf2 loop
|
---|
316 | } // exit bf1 loop
|
---|
317 |
|
---|
318 | } // exit P loop
|
---|
319 | } // exit Q loop
|
---|
320 | timer->exit("(ov|vv) contrib to Laj");
|
---|
321 | } // endif
|
---|
322 |
|
---|
323 | if (!dograd) continue;
|
---|
324 |
|
---|
325 | if (tbintder->log2_shell_bound(R,S) >= tol) {
|
---|
326 |
|
---|
327 | for (Q=0; Q<=S; Q++) {
|
---|
328 | nq = basis->shell(Q).nfunction();
|
---|
329 | q_offset = basis->shell_to_function(Q);
|
---|
330 |
|
---|
331 | for (P=0; P<=(Q==S ? R:Q); P++) {
|
---|
332 | np = basis->shell(P).nfunction();
|
---|
333 | p_offset = basis->shell_to_function(P);
|
---|
334 |
|
---|
335 | // If integral derivative is less than threshold skip to next P
|
---|
336 | if (tbintder->log2_shell_bound(P,Q,R,S) < tol) continue;
|
---|
337 | aointder_computed++;
|
---|
338 |
|
---|
339 | timer->enter("4. q.b.t.");
|
---|
340 | bzerofast(gamma_pqrs,nfuncmax*nfuncmax*nfuncmax*nfuncmax);
|
---|
341 |
|
---|
342 | offset = nr*ns*nbasis;
|
---|
343 |
|
---|
344 | // Begin fourth quarter back-transformation
|
---|
345 | gamma_pqrs_ptr = gamma_pqrs;
|
---|
346 | for (bf1=0; bf1<np; bf1++) {
|
---|
347 | p = bf1 + p_offset;
|
---|
348 | for (bf2=0; bf2<nr; bf2++) {
|
---|
349 | for (bf3=0; bf3<nq; bf3++) {
|
---|
350 | q = bf3 + q_offset;
|
---|
351 | for (bf4=0; bf4<ns; bf4++) {
|
---|
352 | c_pi = &scf_vector[p][i_offset];
|
---|
353 | c_qi = &scf_vector[q][i_offset];
|
---|
354 | gamma_iqrs_ptr = &gamma_iqrs[bf4 + ns*(q + nbasis*bf2)];
|
---|
355 | gamma_iprs_ptr = &gamma_iqrs[bf4 + ns*(p + nbasis*bf2)];
|
---|
356 | tmpval = 0.0;
|
---|
357 | for (i=0; i<ni; i++) {
|
---|
358 | tmpval += *c_pi * *gamma_iqrs_ptr;
|
---|
359 | if (p!=q) tmpval += *c_qi * *gamma_iprs_ptr;
|
---|
360 | c_pi++;
|
---|
361 | c_qi++;
|
---|
362 | gamma_iqrs_ptr += offset;
|
---|
363 | gamma_iprs_ptr += offset;
|
---|
364 | } // exit i loop
|
---|
365 | *gamma_pqrs_ptr += tmpval;
|
---|
366 | gamma_pqrs_ptr++;
|
---|
367 | } // exit bf4 loop
|
---|
368 | } // exit bf3 loop
|
---|
369 | } // exit bf2 loop
|
---|
370 | } // exit bf1 loop
|
---|
371 | // end of fourth quarter back-transformation
|
---|
372 | timer->exit("4. q.b.t.");
|
---|
373 | // (we now have the contribution from one i-batch to the
|
---|
374 | // non-separable part of the 2PDM for one shell block PQRS)
|
---|
375 |
|
---|
376 | // Evaluate derivative integrals
|
---|
377 | timer->enter("erep derivs");
|
---|
378 | tbintder->compute_shell(P,Q,R,S,der_centers);
|
---|
379 | timer->exit("erep derivs");
|
---|
380 |
|
---|
381 | // Compute contribution to gradient from non-sep 2PDM
|
---|
382 | // (i.e., contract derivative integrals with gamma_pqrs)
|
---|
383 | int_index = 0;
|
---|
384 | timer->enter("non-sep 2PDM contrib.");
|
---|
385 | for (int derset=0; derset<der_centers.n(); derset++) {
|
---|
386 | for (int xyz=0; xyz<3; xyz++) {
|
---|
387 | grad_ptr1 = &ginter[der_centers.atom(derset)][xyz];
|
---|
388 | grad_ptr2 = &ginter[der_centers.omitted_atom()][xyz];
|
---|
389 | for (bf1=0; bf1<np; bf1++) {
|
---|
390 | p = bf1 + p_offset;
|
---|
391 | for (bf2=0; bf2<nq; bf2++) {
|
---|
392 | q = bf2 + q_offset;
|
---|
393 | qp = q*(q+1)/2 + p;
|
---|
394 | for (bf3=0; bf3<nr; bf3++) {
|
---|
395 | r = bf3 + r_offset;
|
---|
396 | gamma_pqrs_ptr = &gamma_pqrs[ns*(bf2 + nq*(bf3 + nr*bf1))];
|
---|
397 | for (bf4=0; bf4<ns; bf4++) {
|
---|
398 | s = bf4 + s_offset;
|
---|
399 | sr = s*(s+1)/2 + r;
|
---|
400 | if (q == s && p == r) factor_pqrs = 1;
|
---|
401 | else factor_pqrs = 2;
|
---|
402 | tmpval = intderbuf[int_index]*factor_pqrs**gamma_pqrs_ptr;
|
---|
403 | gamma_pqrs_ptr++;
|
---|
404 | if (q>=p && s>=r && (P != R || Q != S || sr >= qp)) {
|
---|
405 | *grad_ptr1 += tmpval;
|
---|
406 | if (der_centers.has_omitted_center())
|
---|
407 | *grad_ptr2 -= tmpval;
|
---|
408 | }
|
---|
409 | int_index++;
|
---|
410 | } // exit bf4 loop
|
---|
411 | } // exit bf3 loop
|
---|
412 | } // exit bf2 loop
|
---|
413 | } // exit bf1 loop
|
---|
414 | } // exit xyz loop
|
---|
415 | } // exit derset loop
|
---|
416 | timer->exit("non-sep 2PDM contrib.");
|
---|
417 |
|
---|
418 | } // exit P loop
|
---|
419 | } // exit Q loop
|
---|
420 | } // endif
|
---|
421 | } // end while
|
---|
422 |
|
---|
423 | delete[] gamma_iqrs;
|
---|
424 | delete[] gamma_pqrs;
|
---|
425 |
|
---|
426 | }
|
---|
427 |
|
---|
428 | ////////////////////////////////////////////////////////////////////////////
|
---|
429 |
|
---|
430 | // Local Variables:
|
---|
431 | // mode: c++
|
---|
432 | // c-file-style: "CLJ-CONDENSED"
|
---|
433 | // End:
|
---|