1 | //
|
---|
2 | // csgrad.cc
|
---|
3 | //
|
---|
4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
5 | //
|
---|
6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
7 | // Maintainer: LPS
|
---|
8 | //
|
---|
9 | // This file is part of the SC Toolkit.
|
---|
10 | //
|
---|
11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
12 | // it under the terms of the GNU Library General Public License as published by
|
---|
13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
14 | // any later version.
|
---|
15 | //
|
---|
16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | // GNU Library General Public License for more details.
|
---|
20 | //
|
---|
21 | // You should have received a copy of the GNU Library General Public License
|
---|
22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | //
|
---|
25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
26 | //
|
---|
27 |
|
---|
28 | #include <stdlib.h>
|
---|
29 | #include <math.h>
|
---|
30 | #include <limits.h>
|
---|
31 |
|
---|
32 | #include <util/misc/formio.h>
|
---|
33 | #include <util/misc/timer.h>
|
---|
34 | #include <util/group/memory.h>
|
---|
35 | #include <util/group/message.h>
|
---|
36 | #include <util/class/class.h>
|
---|
37 | #include <util/state/state.h>
|
---|
38 | #include <math/scmat/matrix.h>
|
---|
39 | #include <math/scmat/blocked.h>
|
---|
40 | #include <chemistry/molecule/molecule.h>
|
---|
41 | #include <chemistry/qc/basis/integral.h>
|
---|
42 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
43 | #include <chemistry/qc/mbpt/mbpt.h>
|
---|
44 | #include <chemistry/qc/mbpt/util.h>
|
---|
45 | #include <chemistry/qc/mbpt/csgrade12.h>
|
---|
46 | #include <chemistry/qc/mbpt/csgrad34qb.h>
|
---|
47 | #include <chemistry/qc/mbpt/csgrads2pdm.h>
|
---|
48 |
|
---|
49 | using namespace std;
|
---|
50 | using namespace sc;
|
---|
51 |
|
---|
52 | #define SINGLE_THREAD_E12 0
|
---|
53 | #define SINGLE_THREAD_QBT34 0
|
---|
54 | #define SINGLE_THREAD_S2PDM 0
|
---|
55 |
|
---|
56 | #define PRINT2Q 0
|
---|
57 | #define PRINT3Q 0
|
---|
58 | #define PRINT4Q 0
|
---|
59 | #if PRINT_BIGGEST_INTS
|
---|
60 | BiggestContribs biggest_ints_1(4,40);
|
---|
61 | #endif
|
---|
62 |
|
---|
63 | #define WRITE_DOUBLES 0
|
---|
64 |
|
---|
65 | static void sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2);
|
---|
66 | static void zero_gradients(double **f, int n1, int n2);
|
---|
67 | static void accum_gradients(double **g, double **f, int n1, int n2);
|
---|
68 |
|
---|
69 | #define PRINT1Q 0
|
---|
70 |
|
---|
71 | #if PRINT_CONTRIB
|
---|
72 | static void
|
---|
73 | sw(int&i,int&j)
|
---|
74 | {
|
---|
75 | int tmp = i;
|
---|
76 | i = j;
|
---|
77 | j = tmp;
|
---|
78 | }
|
---|
79 |
|
---|
80 | static void
|
---|
81 | print_contrib(double tmpval, int num, int onum,
|
---|
82 | int P,int Q,int R,int S, int p,int q,int r,int s)
|
---|
83 | {
|
---|
84 |
|
---|
85 | printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
86 | num, P, Q, R, S, p, q, r, s, tmpval);
|
---|
87 | printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
88 | onum, P, Q, R, S, p, q, r, s, -tmpval);
|
---|
89 |
|
---|
90 | if (p < q) {
|
---|
91 | sw(p,q); sw(P,Q);
|
---|
92 | }
|
---|
93 | if (r < s) {
|
---|
94 | sw(r,s); sw(R,S);
|
---|
95 | }
|
---|
96 | if (p < r || (p == r && q < s)) {
|
---|
97 | sw(P,R); sw(p,r);
|
---|
98 | sw(Q,S); sw(q,s);
|
---|
99 | }
|
---|
100 |
|
---|
101 | printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
102 | num, P, Q, R, S, p, q, r, s, tmpval);
|
---|
103 | printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
104 | onum, P, Q, R, S, p, q, r, s, -tmpval);
|
---|
105 | }
|
---|
106 | #endif
|
---|
107 |
|
---|
108 | void
|
---|
109 | MBPT2::compute_cs_grad()
|
---|
110 | {
|
---|
111 |
|
---|
112 | // New version of MP2 gradient program which uses the full
|
---|
113 | // permutational symmetry of the two-electron integral derivatives
|
---|
114 |
|
---|
115 | Ref<SCMatrixKit> kit = basis()->matrixkit();
|
---|
116 |
|
---|
117 | int do_d2_ = 1; // if true, compute d2 diagnostic
|
---|
118 |
|
---|
119 | int nij; // number of i,j pairs on a node (for e.g., mo_int)
|
---|
120 | double *mo_int; // MO integrals of type (ov|ov)
|
---|
121 | // (and these integrals divided by
|
---|
122 | // orbital energy denominators)
|
---|
123 | double *integral_iqjs; // half-transformed integrals
|
---|
124 |
|
---|
125 | int nocc_act, nvir_act;
|
---|
126 | int i, j, k;
|
---|
127 | int ii, bb;
|
---|
128 | int x, y;
|
---|
129 | int a, b, c;
|
---|
130 | int nshell;
|
---|
131 | int offset;
|
---|
132 | int ik_offset;
|
---|
133 | int i_offset;
|
---|
134 | int npass, pass;
|
---|
135 | int tmpint;
|
---|
136 | int np, nq, nr, ns;
|
---|
137 | int P, Q, R, S;
|
---|
138 | int p, q, r, s;
|
---|
139 | int bf1, bf2, bf3, bf4;
|
---|
140 | int index;
|
---|
141 | int me;
|
---|
142 | int nproc;
|
---|
143 | int rest;
|
---|
144 | int p_offset, q_offset, r_offset, s_offset;
|
---|
145 |
|
---|
146 | int aoint_computed = 0;
|
---|
147 | int aointder_computed = 0;
|
---|
148 | int xyz;
|
---|
149 | int natom = molecule()->natom(); // the number of atoms
|
---|
150 | int int_index;
|
---|
151 | size_t mem_static; // static memory in bytes
|
---|
152 | int ij_proc; // the processor which has ij pair
|
---|
153 | int ij_index; // of the ij pairs on a proc, this ij pair is number ij_index
|
---|
154 | // (i.e., ij_index < nij)
|
---|
155 | int ik_proc; // the processor which has ik pair
|
---|
156 | int ik_index;
|
---|
157 | int jloop, kloop;
|
---|
158 |
|
---|
159 | int ni;
|
---|
160 |
|
---|
161 | double *evals; // scf eigenvalues
|
---|
162 | double *iajb_ptr, *ibja_ptr, *iakb_ptr, *ibka_ptr;
|
---|
163 | double *iajc_ptr, *ibjc_ptr, *icjb_ptr, *icja_ptr;
|
---|
164 | double *ijkb_ptr, *ibkj_ptr;
|
---|
165 | double pqrs;
|
---|
166 | double *c_sa, c_rj;
|
---|
167 | double *c_pi, *c_qi, *c_sj;
|
---|
168 | double *c_qx, *c_qa, *c_sb, *c_pa, *c_pq, *c_sy;
|
---|
169 | double delta_ijab, delta_ijbc, delta_ijac;
|
---|
170 | double ecorr_mp2 = 0.0;
|
---|
171 | double escf;
|
---|
172 | double emp2=0.0;
|
---|
173 | int tol; // log2 of the erep tolerance
|
---|
174 | // (erep < 2^tol => discard)
|
---|
175 | double *Wkj=0,*Wab=0,*Waj=0;// occ-occ, vir-vir and vir-occ parts of
|
---|
176 | // second order correction to MP2
|
---|
177 | // energy weighted density matrix
|
---|
178 | double *Pkj=0,*Pab=0; // occ-occ and vir-vir parts of second order
|
---|
179 | // correction to MP2 density matrix
|
---|
180 | double *d2occ_mat, *d2vir_mat; // matrices for computation of D2 diagnostic
|
---|
181 | double *Laj=0; // MP2 Lagrangian
|
---|
182 | double *Lpi; // contrib to MP2 Lagrangian partially in AO basis
|
---|
183 | double *pkj_ptr=0, *pab_ptr;
|
---|
184 | double *d2occ_mat_ptr;
|
---|
185 | double *d2vir_mat_ptr;
|
---|
186 | double *wkj_ptr, *wjk_ptr, *wab_ptr, *wba_ptr, *waj_ptr=0;
|
---|
187 | double *laj_ptr, *lpi_ptr, *lqi_ptr;
|
---|
188 | double *gamma_iajs, *gamma_iajs_tmp;
|
---|
189 | // partially back-transformed non-sep 2PDM's
|
---|
190 | double *gamma_iqjs_tmp;
|
---|
191 | double *gamma_iajs_ptr;
|
---|
192 | double *gamma_iqjs_ptr;
|
---|
193 | double *gammabuf; // buffer used for sending elements of gamma_iqjs
|
---|
194 | double *mo_intbuf; // buffer used for sending mo integrals
|
---|
195 | double tmpval, tmpval1;
|
---|
196 | double *P2AO, *W2AO;
|
---|
197 | double *p2ao_ptr, *w2ao_ptr;
|
---|
198 | double *PHF, *WHF;
|
---|
199 | double *phf_ptr, *whf_ptr;
|
---|
200 | double *PMP2, *WMP2;
|
---|
201 | double *pmp2_ptr, *wmp2_ptr;
|
---|
202 |
|
---|
203 | double *ixjs_tmp; // three-quarter transformed two-el integrals
|
---|
204 | double *integral_ixjs; // all three-quarter transformed two-el integrals
|
---|
205 | double *integral_iajy; // mo integrals (y = any MO)
|
---|
206 | double *integral_ikja; // mo integrals
|
---|
207 | double *integral_iqjs_ptr;
|
---|
208 | double *iajy_ptr;
|
---|
209 | double *ixjs_ptr;
|
---|
210 | double *ikja_ptr;
|
---|
211 | double *iajs_ptr, *ikjs_ptr;
|
---|
212 |
|
---|
213 | double **gradient=0, *gradient_dat=0; // The MP2 gradient
|
---|
214 | double **hf_gradient=0, *hf_gradient_dat=0; // The HF gradient
|
---|
215 | double **ginter=0; // Intermediates for the MP2 gradient
|
---|
216 | double **hf_ginter=0; // Intermediates for the HF gradient
|
---|
217 | double d2o, d2v, d2_diag;
|
---|
218 |
|
---|
219 | BiggestContribs biggest_coefs(5,10);
|
---|
220 | CharacterTable ct = molecule()->point_group()->char_table();
|
---|
221 |
|
---|
222 | #if PRINT_BIGGEST_INTS
|
---|
223 | BiggestContribs biggest_ints_2(4,40);
|
---|
224 | BiggestContribs biggest_ints_2s(4,40);
|
---|
225 | BiggestContribs biggest_ints_3a(4,40);
|
---|
226 | BiggestContribs biggest_ints_3(4,40);
|
---|
227 | #endif
|
---|
228 |
|
---|
229 | int dograd = gradient_needed();
|
---|
230 |
|
---|
231 | tim_enter("mp2-mem");
|
---|
232 |
|
---|
233 | nfuncmax = basis()->max_nfunction_in_shell();
|
---|
234 |
|
---|
235 | nshell = basis()->nshell();
|
---|
236 |
|
---|
237 | me = msg_->me();
|
---|
238 |
|
---|
239 | if (me == 0) {
|
---|
240 | ExEnv::out0() << endl << indent
|
---|
241 | << "Entered memgrp based MP2 routine" << endl;
|
---|
242 | }
|
---|
243 |
|
---|
244 | nproc = msg_->n();
|
---|
245 | if (me == 0)
|
---|
246 | ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
|
---|
247 |
|
---|
248 | tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
|
---|
249 |
|
---|
250 | nocc = 0;
|
---|
251 | for (i=0; i<oso_dimension()->n(); i++) {
|
---|
252 | if (reference_->occupation(i) == 2.0) nocc++;
|
---|
253 | }
|
---|
254 |
|
---|
255 | nocc_act = nocc - nfzc;
|
---|
256 | nvir = noso - nocc;
|
---|
257 | nvir_act = nvir - nfzv;
|
---|
258 |
|
---|
259 | // Do a few preliminary tests to make sure the desired calculation
|
---|
260 | // can be done (and appears to be meaningful!)
|
---|
261 |
|
---|
262 | if (nocc_act <= 0) {
|
---|
263 | if (me == 0) {
|
---|
264 | ExEnv::err0() << "There are no active occupied orbitals; program exiting" << endl;
|
---|
265 | }
|
---|
266 | abort();
|
---|
267 | }
|
---|
268 |
|
---|
269 | if (nvir_act <= 0) {
|
---|
270 | if (me == 0) {
|
---|
271 | ExEnv::err0() << "There are no active virtual orbitals; program exiting" << endl;
|
---|
272 | }
|
---|
273 | abort();
|
---|
274 | }
|
---|
275 |
|
---|
276 | if (restart_orbital_memgrp_) {
|
---|
277 | if (!dograd && !do_d1_ && !do_d2_) {
|
---|
278 | ExEnv::out0() << indent
|
---|
279 | << scprintf("Restarting at orbital %d with partial energy %18.14f",
|
---|
280 | restart_orbital_memgrp_, restart_ecorr_)
|
---|
281 | << endl;
|
---|
282 | ecorr_mp2 = restart_ecorr_;
|
---|
283 | }
|
---|
284 | else {
|
---|
285 | ExEnv::out0() << indent
|
---|
286 | << "Restart requested but not possible with gradients, D1, or D2"
|
---|
287 | << endl;
|
---|
288 | restart_ecorr_ = 0.0;
|
---|
289 | restart_orbital_memgrp_ = 0;
|
---|
290 | }
|
---|
291 | }
|
---|
292 | else {
|
---|
293 | restart_ecorr_ = 0.0;
|
---|
294 | }
|
---|
295 |
|
---|
296 | ////////////////////////////////////////////////////////
|
---|
297 | // Compute batch size ni for mp2 loops;
|
---|
298 | //
|
---|
299 | // The following arrays are kept throughout (all of type double):
|
---|
300 | // scf_vector, gradient, ginter, Pkj, Pab, Wkj, Wab, Waj, Laj
|
---|
301 | // and memory allocated for these arrays and integral evaluators
|
---|
302 | // is called mem_static
|
---|
303 | //
|
---|
304 | ////////////////////////////////////////////////////////
|
---|
305 | if (me == 0) {
|
---|
306 | mem_static = nbasis*noso; // scf vector
|
---|
307 | mem_static += 2*nbasis*nfuncmax; // iqjs & iqjr
|
---|
308 | if (dograd) {
|
---|
309 | mem_static += 9*natom; // gradient & ginter & hf_ginter
|
---|
310 | mem_static += (nocc*(nocc+1))/2; // Pkj
|
---|
311 | mem_static += (nvir*(nvir+1))/2; // Pab
|
---|
312 | mem_static += nocc*nocc; // Wkj
|
---|
313 | mem_static += nvir*nvir; // Wab
|
---|
314 | mem_static += 2*nocc*nvir; // Waj & Laj
|
---|
315 | if (do_d2_) {
|
---|
316 | mem_static += (nocc_act*(nocc_act+1))/2; // d2occ_mat
|
---|
317 | mem_static += (nvir_act*(nvir_act+1))/2; // d2vir_mat
|
---|
318 | }
|
---|
319 | }
|
---|
320 | else if (do_d1_) {
|
---|
321 | mem_static += nocc*nvir; // partial Laj
|
---|
322 | }
|
---|
323 | mem_static *= sizeof(double);
|
---|
324 | int nthreads = thr_->nthread();
|
---|
325 | mem_static += nthreads * integral()->storage_required_eri(basis()); // integral evaluators
|
---|
326 | ni = compute_cs_batchsize(mem_static, nocc_act-restart_orbital_memgrp_);
|
---|
327 | }
|
---|
328 |
|
---|
329 | if (max_norb_ > 0 && ni > max_norb_) {
|
---|
330 | ExEnv::out0() << indent
|
---|
331 | << "\"max_norb\" set: could have done "
|
---|
332 | << ni << " orbitals per pass otherwise."
|
---|
333 | << endl;
|
---|
334 | ni = max_norb_;
|
---|
335 | }
|
---|
336 |
|
---|
337 | // Send value of ni and mem_static to other nodes
|
---|
338 | msg_->bcast(ni);
|
---|
339 | double dmem_static = mem_static;
|
---|
340 | msg_->bcast(dmem_static);
|
---|
341 | mem_static = size_t(dmem_static);
|
---|
342 |
|
---|
343 | // Compute the storage to be used by the integral routines (required plus optional)
|
---|
344 | size_t dyn_mem = distsize_to_size(compute_cs_dynamic_memory(ni,nocc_act));
|
---|
345 | int mem_remaining;
|
---|
346 | if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining = 0;
|
---|
347 | else mem_remaining = mem_alloc - dyn_mem - mem_static;
|
---|
348 | mem_remaining += thr_->nthread() * integral()->storage_required_eri(basis());
|
---|
349 |
|
---|
350 | ExEnv::out0() << indent
|
---|
351 | << "Memory available per node: " << mem_alloc << " Bytes"
|
---|
352 | << endl;
|
---|
353 | ExEnv::out0() << indent
|
---|
354 | << "Static memory used per node: " << mem_static << " Bytes"
|
---|
355 | << endl;
|
---|
356 | ExEnv::out0() << indent
|
---|
357 | << "Total memory used per node: " << dyn_mem+mem_static << " Bytes"
|
---|
358 | << endl;
|
---|
359 | ExEnv::out0() << indent
|
---|
360 | << "Memory required for one pass: "
|
---|
361 | << compute_cs_dynamic_memory(nocc_act,nocc_act)+mem_static
|
---|
362 | << " Bytes"
|
---|
363 | << endl;
|
---|
364 | ExEnv::out0() << indent
|
---|
365 | << "Minimum memory required: "
|
---|
366 | << compute_cs_dynamic_memory(1,nocc_act)+mem_static
|
---|
367 | << " Bytes"
|
---|
368 | << endl;
|
---|
369 | ExEnv::out0() << indent
|
---|
370 | << "Batch size: " << ni
|
---|
371 | << endl;
|
---|
372 |
|
---|
373 | if (ni == 0) {
|
---|
374 | ExEnv::err0() << "Batch size is 0: more memory or processors are needed"
|
---|
375 | << endl;
|
---|
376 | abort();
|
---|
377 | }
|
---|
378 |
|
---|
379 | if (dynamic_) {
|
---|
380 | ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
|
---|
381 | }
|
---|
382 |
|
---|
383 | if (ni == nocc_act-restart_orbital_memgrp_) {
|
---|
384 | npass = 1;
|
---|
385 | rest = 0;
|
---|
386 | }
|
---|
387 | else {
|
---|
388 | rest = (nocc_act-restart_orbital_memgrp_)%ni;
|
---|
389 | npass = (nocc_act-restart_orbital_memgrp_ - rest)/ni + 1;
|
---|
390 | if (rest == 0) npass--;
|
---|
391 | }
|
---|
392 |
|
---|
393 | if (me == 0) {
|
---|
394 | ExEnv::out0() << indent
|
---|
395 | << scprintf(" npass rest nbasis nshell nfuncmax") << endl;
|
---|
396 | ExEnv::out0() << indent
|
---|
397 | << scprintf(" %-4i %-3i %-5i %-4i %-3i",
|
---|
398 | npass,rest,nbasis,nshell,nfuncmax)
|
---|
399 | << endl;
|
---|
400 | ExEnv::out0() << indent
|
---|
401 | << scprintf(" nocc nvir nfzc nfzv") << endl;
|
---|
402 | ExEnv::out0() << indent
|
---|
403 | << scprintf(" %-4i %-4i %-4i %-4i",
|
---|
404 | nocc,nvir,nfzc,nfzv)
|
---|
405 | << endl;
|
---|
406 | }
|
---|
407 |
|
---|
408 | int nijmax = 0;
|
---|
409 | index = 0;
|
---|
410 | for (i=0; i<ni; i++) {
|
---|
411 | for (j=0; j<nocc; j++) {
|
---|
412 | if (index++ % nproc == me) nijmax++;
|
---|
413 | }
|
---|
414 | }
|
---|
415 |
|
---|
416 | ////////////////////////////////////////////////
|
---|
417 | // The scf vector is distributed between nodes;
|
---|
418 | // put a copy of the scf vector on each node;
|
---|
419 | ////////////////////////////////////////////////
|
---|
420 |
|
---|
421 | escf = reference_->energy();
|
---|
422 | hf_energy_ = escf;
|
---|
423 |
|
---|
424 | RefDiagSCMatrix occ;
|
---|
425 | RefSCMatrix Scf_Vec;
|
---|
426 | RefDiagSCMatrix evalmat;
|
---|
427 | eigen(evalmat, Scf_Vec, occ);
|
---|
428 |
|
---|
429 | if (debug_ > 1) {
|
---|
430 | evalmat.print("eigenvalues");
|
---|
431 | Scf_Vec.print("eigenvectors");
|
---|
432 | }
|
---|
433 |
|
---|
434 | double *scf_vector_dat = new double[nbasis*noso];
|
---|
435 | Scf_Vec.t()->convert(scf_vector_dat);
|
---|
436 |
|
---|
437 | evals = new double[noso];
|
---|
438 | double** scf_vector = new double*[nbasis];
|
---|
439 | for (i=0; i<nbasis; i++) {
|
---|
440 | scf_vector[i] = &scf_vector_dat[i*noso];
|
---|
441 | }
|
---|
442 | for (i=0; i<noso; i++) {
|
---|
443 | evals[i] = evalmat(i);
|
---|
444 | }
|
---|
445 |
|
---|
446 | Scf_Vec = 0;
|
---|
447 | evalmat = 0;
|
---|
448 |
|
---|
449 | //////////////////////////////////////////////////////////////
|
---|
450 | // Allocate storage for various arrays needed for gradients
|
---|
451 | // (Pkj and Pab are symmetric, so store only lower triangle)
|
---|
452 | //////////////////////////////////////////////////////////////
|
---|
453 |
|
---|
454 | if (dograd) {
|
---|
455 | Pkj = (double*) malloc((nocc*(nocc+1)/2)*sizeof(double));
|
---|
456 | Pab = (double*) malloc((nvir*(nvir+1)/2)*sizeof(double));
|
---|
457 | Wkj = (double*) malloc(nocc*nocc*sizeof(double));
|
---|
458 | Wab = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
459 | Waj = (double*) malloc(nvir*nocc*sizeof(double));
|
---|
460 | if (do_d2_) {
|
---|
461 | d2occ_mat = new double[nocc_act*(nocc_act+1)/2];
|
---|
462 | d2vir_mat = new double[nvir_act*(nvir_act+1)/2];
|
---|
463 | }
|
---|
464 |
|
---|
465 | gradient_dat = new double[natom*3];
|
---|
466 | gradient = new double*[natom];
|
---|
467 | for (i=0; i<natom; i++) {
|
---|
468 | gradient[i] = &gradient_dat[i*3];
|
---|
469 | }
|
---|
470 |
|
---|
471 | hf_gradient_dat = new double[natom*3];
|
---|
472 | hf_gradient = new double*[natom];
|
---|
473 | for (i=0; i<natom; i++) {
|
---|
474 | hf_gradient[i] = &hf_gradient_dat[i*3];
|
---|
475 | }
|
---|
476 |
|
---|
477 | ginter = new double*[natom];
|
---|
478 | for (i=0; i<natom; i++) {
|
---|
479 | ginter[i] = new double[3];
|
---|
480 | for (xyz=0; xyz<3; xyz++) ginter[i][xyz] = 0;
|
---|
481 | }
|
---|
482 |
|
---|
483 | hf_ginter = new double*[natom];
|
---|
484 | for (i=0; i<natom; i++) {
|
---|
485 | hf_ginter[i] = new double[3];
|
---|
486 | for (xyz=0; xyz<3; xyz++) hf_ginter[i][xyz] = 0;
|
---|
487 | }
|
---|
488 |
|
---|
489 | //////////////////////////////
|
---|
490 | // Initialize various arrays
|
---|
491 | //////////////////////////////
|
---|
492 |
|
---|
493 | bzerofast(Pkj,nocc*(nocc+1)/2);
|
---|
494 | bzerofast(Wkj,nocc*nocc);
|
---|
495 | bzerofast(Pab,nvir*(nvir+1)/2);
|
---|
496 | bzerofast(Wab,nvir*nvir);
|
---|
497 | bzerofast(Waj,nvir*nocc);
|
---|
498 | if (do_d2_) {
|
---|
499 | bzerofast(d2occ_mat,nocc_act*(nocc_act+1)/2);
|
---|
500 | bzerofast(d2vir_mat,nvir_act*(nvir_act+1)/2);
|
---|
501 | }
|
---|
502 |
|
---|
503 | if (me == 0) zero_gradients(gradient, natom, 3);
|
---|
504 | if (me == 0) zero_gradients(hf_gradient, natom, 3);
|
---|
505 | }
|
---|
506 |
|
---|
507 | if (dograd || do_d1_) {
|
---|
508 | Laj = (double*) malloc(nvir*nocc*sizeof(double));
|
---|
509 | bzerofast(Laj,nvir*nocc);
|
---|
510 | }
|
---|
511 |
|
---|
512 | if (debug_ > 2 && me == 0) {
|
---|
513 | for (j=0; j<noso; j++) {
|
---|
514 | ExEnv::out0() << indent
|
---|
515 | << scprintf("eigenvalue[%3d] = %15.10lf", j, evals[j]);
|
---|
516 | if (j < nfzc) ExEnv::out0() << " (frozen docc)";
|
---|
517 | else if (j < nocc_act + nfzc) ExEnv::out0() << " (active docc)";
|
---|
518 | else if (j < nvir_act + nocc_act + nfzc) ExEnv::out0() << " (active uocc)";
|
---|
519 | else ExEnv::out0() << " (frozen uocc)";
|
---|
520 | ExEnv::out0() << endl;
|
---|
521 | }
|
---|
522 | }
|
---|
523 |
|
---|
524 | /////////////////////////////////////
|
---|
525 | // Begin MP2 loops
|
---|
526 | /////////////////////////////////////
|
---|
527 |
|
---|
528 | // debug print
|
---|
529 | if (debug_ && me == 0) {
|
---|
530 | ExEnv::out0() << indent
|
---|
531 | << scprintf("node %i, begin loop over i-batches",me) << endl;
|
---|
532 | }
|
---|
533 | // end of debug print
|
---|
534 |
|
---|
535 | // Initialize the integrals
|
---|
536 | integral()->set_storage(mem_remaining);
|
---|
537 | tbints_ = new Ref<TwoBodyInt>[thr_->nthread()];
|
---|
538 | for (i=0; i<thr_->nthread(); i++) {
|
---|
539 | tbints_[i] = integral()->electron_repulsion();
|
---|
540 | }
|
---|
541 | if (dograd || do_d1_) {
|
---|
542 | tbintder_ = new Ref<TwoBodyDerivInt>[thr_->nthread()];
|
---|
543 | for (i=0; i<thr_->nthread(); i++) {
|
---|
544 | tbintder_[i] = integral()->electron_repulsion_deriv();
|
---|
545 | }
|
---|
546 | }
|
---|
547 |
|
---|
548 | int mem_integral_intermediates = integral()->storage_used();
|
---|
549 | int mem_integral_storage = (mem_remaining - mem_integral_intermediates) / thr_->nthread();
|
---|
550 | if (mem_integral_storage<0) mem_integral_storage = 0;
|
---|
551 | for (i=0; i<thr_->nthread(); i++) {
|
---|
552 | tbints_[i]->set_integral_storage(mem_integral_storage);
|
---|
553 | }
|
---|
554 |
|
---|
555 | ExEnv::out0() << endl << indent
|
---|
556 | << scprintf("Memory used for integral intermediates: %i Bytes",
|
---|
557 | mem_integral_intermediates)
|
---|
558 | << endl;
|
---|
559 | ExEnv::out0() << indent
|
---|
560 | << scprintf("Memory used for integral storage: %i Bytes",
|
---|
561 | mem_integral_storage)
|
---|
562 | << endl;
|
---|
563 |
|
---|
564 | if (mem.null()) {
|
---|
565 | ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
|
---|
566 | abort();
|
---|
567 | }
|
---|
568 |
|
---|
569 | mem->set_localsize(size_t(nijmax)*nbasis*nbasis*sizeof(double));
|
---|
570 | ExEnv::out0() << indent
|
---|
571 | << "Size of global distributed array: "
|
---|
572 | << mem->totalsize()
|
---|
573 | << " Bytes"
|
---|
574 | << endl;
|
---|
575 |
|
---|
576 | MemoryGrpBuf<double> membuf_remote(mem);
|
---|
577 |
|
---|
578 | int usep4 = !dograd;
|
---|
579 |
|
---|
580 | Ref<ThreadLock> lock = thr_->new_lock();
|
---|
581 | CSGradErep12Qtr** e12thread = new CSGradErep12Qtr*[thr_->nthread()];
|
---|
582 | DistShellPair::SharedData sp_e_data, sp_g_data;
|
---|
583 | for (i=0; i<thr_->nthread(); i++) {
|
---|
584 | e12thread[i] = new CSGradErep12Qtr(i, thr_->nthread(), me, nproc,
|
---|
585 | mem, msg_, lock, basis(), tbints_[i],
|
---|
586 | nocc, scf_vector, tol, debug_,
|
---|
587 | dynamic_, print_percent_,
|
---|
588 | &sp_e_data, usep4);
|
---|
589 | }
|
---|
590 |
|
---|
591 | CSGrad34Qbtr** qbt34thread;
|
---|
592 | if (dograd || do_d1_) {
|
---|
593 | qbt34thread = new CSGrad34Qbtr*[thr_->nthread()];
|
---|
594 | for (i=0; i<thr_->nthread(); i++) {
|
---|
595 | qbt34thread[i] = new CSGrad34Qbtr(i, thr_->nthread(), me, nproc,
|
---|
596 | mem, msg_, lock, basis(), tbints_[i],
|
---|
597 | tbintder_[i], nocc, nfzc, scf_vector,
|
---|
598 | tol, debug_, dynamic_, print_percent_,
|
---|
599 | &sp_g_data, dograd, natom);
|
---|
600 | }
|
---|
601 | }
|
---|
602 |
|
---|
603 | tim_enter("mp2 passes");
|
---|
604 | for (pass=0; pass<npass; pass++) {
|
---|
605 |
|
---|
606 | if (me == 0) {
|
---|
607 | ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
|
---|
608 | }
|
---|
609 |
|
---|
610 | i_offset = restart_orbital_memgrp_ + pass*ni + nfzc;
|
---|
611 | if ((pass == npass - 1) && (rest != 0)) ni = rest;
|
---|
612 |
|
---|
613 | // Compute number of of i,j pairs on each node for
|
---|
614 | // two-el integrals and non-sep 2PDM elements
|
---|
615 | index = 0;
|
---|
616 | nij = 0;
|
---|
617 | for (i=0; i<ni; i++) {
|
---|
618 | for (j=0; j<nocc; j++) {
|
---|
619 | if (index++ % nproc == me) nij++;
|
---|
620 | }
|
---|
621 | }
|
---|
622 |
|
---|
623 | // debug print
|
---|
624 | if (debug_)
|
---|
625 | ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
|
---|
626 | // end of debug print
|
---|
627 |
|
---|
628 | mem->sync(); // This must be here or gamma non-sep will be wrong when running
|
---|
629 | // on multiple processors with more than one pass
|
---|
630 |
|
---|
631 | r_offset = 0;
|
---|
632 |
|
---|
633 | // Allocate and initialize some arrays
|
---|
634 | // (done here to avoid having these arrays
|
---|
635 | // overlap with arrays allocated later)
|
---|
636 |
|
---|
637 | // Allocate (and initialize) some arrays
|
---|
638 |
|
---|
639 | integral_iqjs = (double*) mem->localdata();
|
---|
640 |
|
---|
641 | bzerofast(integral_iqjs, nij*nbasis*nbasis);
|
---|
642 |
|
---|
643 | integral_iqjs = 0;
|
---|
644 |
|
---|
645 | mem->sync();
|
---|
646 |
|
---|
647 | index = 0;
|
---|
648 |
|
---|
649 | if (me == 0) {
|
---|
650 | ExEnv::out0() << indent
|
---|
651 | << scprintf("Begin loop over shells (erep, 1.+2. q.t.)") << endl;
|
---|
652 | }
|
---|
653 |
|
---|
654 | // Do the two eletron integrals and the first two quarter transformations
|
---|
655 | tim_enter("erep+1.qt+2.qt");
|
---|
656 | sp_e_data.init();
|
---|
657 | for (i=0; i<thr_->nthread(); i++) {
|
---|
658 | e12thread[i]->set_i_offset(i_offset);
|
---|
659 | e12thread[i]->set_ni(ni);
|
---|
660 | thr_->add_thread(i,e12thread[i]);
|
---|
661 | # if SINGLE_THREAD_E12
|
---|
662 | e12thread[i]->run();
|
---|
663 | # endif
|
---|
664 | }
|
---|
665 | # if !SINGLE_THREAD_E12
|
---|
666 | thr_->start_threads();
|
---|
667 | thr_->wait_threads();
|
---|
668 | # endif
|
---|
669 | tim_exit("erep+1.qt+2.qt");
|
---|
670 |
|
---|
671 | if (me == 0) {
|
---|
672 | ExEnv::out0() << indent << "End of loop over shells" << endl;
|
---|
673 | }
|
---|
674 |
|
---|
675 | mem->sync(); // Make sure iqjs is complete on each node before continuing
|
---|
676 |
|
---|
677 | integral_iqjs = (double*) mem->localdata();
|
---|
678 |
|
---|
679 | #if PRINT2Q
|
---|
680 | if (me == 0) {
|
---|
681 | int index = 0;
|
---|
682 | int ij_index = 0;
|
---|
683 | for (int i = 0; i<ni; i++) {
|
---|
684 | for (int j = 0; j<nocc; j++) {
|
---|
685 | if (index++ % nproc == me) {
|
---|
686 | if (j >= nfzc) {
|
---|
687 | double *integral_ij_offset = integral_iqjs + nbasis*nbasis*ij_index;
|
---|
688 | for (int s = 0; s<nbasis; s++) {
|
---|
689 | double *integral_ijsq_ptr = integral_ij_offset + s*nbasis;
|
---|
690 | for (int q = 0; q<nbasis; q++) {
|
---|
691 | printf("2Q: (%d %d|%d %d) = %12.8f\n",
|
---|
692 | i,q,j,s,*integral_ijsq_ptr);
|
---|
693 | *integral_ijsq_ptr++;
|
---|
694 | }
|
---|
695 | }
|
---|
696 | }
|
---|
697 | ij_index++;
|
---|
698 | }
|
---|
699 | }
|
---|
700 | }
|
---|
701 | }
|
---|
702 | #endif
|
---|
703 |
|
---|
704 | // Allocate and initialize some arrays
|
---|
705 | ixjs_tmp = new double[nbasis];
|
---|
706 |
|
---|
707 | if (me == 0) {
|
---|
708 | ExEnv::out0() << indent << "Begin third q.t." << endl;
|
---|
709 | }
|
---|
710 |
|
---|
711 | tim_enter("3. q.t.");
|
---|
712 | // Begin third quarter transformation;
|
---|
713 | // generate (ix|js) for i act, j act or frz, and x any MO (act or frz)
|
---|
714 | index = 0;
|
---|
715 | ij_index = 0;
|
---|
716 | for (i=0; i<ni; i++) {
|
---|
717 | for (j=0; j<nocc; j++) {
|
---|
718 | if (index++ % nproc == me) {
|
---|
719 |
|
---|
720 | for (s=0; s<nbasis; s++) {
|
---|
721 |
|
---|
722 | bzerofast(ixjs_tmp, nbasis);
|
---|
723 | for (q=0; q<nbasis; q++) {
|
---|
724 | integral_iqjs_ptr = &integral_iqjs[q + nbasis*(s + nbasis*ij_index)];
|
---|
725 | ixjs_ptr = ixjs_tmp;
|
---|
726 | c_qx = scf_vector[q];
|
---|
727 | tmpval = *integral_iqjs_ptr;
|
---|
728 | #if PRINT_BIGGEST_INTS
|
---|
729 | biggest_ints_2.insert(tmpval,i+i_offset,j,s,q);
|
---|
730 | if ((i+i_offset==104 && j == 1)
|
---|
731 | ||(i+i_offset==104 && j == 2)) {
|
---|
732 | biggest_ints_2s.insert(tmpval,i+i_offset,j,s,q);
|
---|
733 | }
|
---|
734 | #endif
|
---|
735 | for (x=0; x<noso; x++) {
|
---|
736 | *ixjs_ptr++ += *c_qx++ * tmpval;
|
---|
737 | }
|
---|
738 | } // exit q loop
|
---|
739 |
|
---|
740 | // Put ixjs into integral_iqjs, while overwriting what was there;
|
---|
741 | // i.e., integral_iqjs will now contain three-quarter transformed
|
---|
742 | // integrals ixjs
|
---|
743 | integral_iqjs_ptr = &integral_iqjs[nbasis*(s + nbasis*ij_index)];
|
---|
744 | ixjs_ptr = ixjs_tmp;
|
---|
745 | for (x=0; x<noso; x++) {
|
---|
746 | #if PRINT_BIGGEST_INTS
|
---|
747 | if (x>=nocc) {
|
---|
748 | biggest_ints_3a.insert(*ixjs_ptr,i+i_offset,j,s,x-nocc);
|
---|
749 | }
|
---|
750 | #endif
|
---|
751 | *integral_iqjs_ptr++ = *ixjs_ptr++;
|
---|
752 | }
|
---|
753 | } // exit s loop
|
---|
754 | ij_index++;
|
---|
755 | } // endif
|
---|
756 | } // exit j loop
|
---|
757 | } // exit i loop
|
---|
758 | // end of third quarter transformation
|
---|
759 | tim_exit("3. q.t.");
|
---|
760 |
|
---|
761 | if (me == 0) {
|
---|
762 | ExEnv::out0() << indent << "End of third q.t." << endl;
|
---|
763 | }
|
---|
764 |
|
---|
765 | delete[] ixjs_tmp;
|
---|
766 |
|
---|
767 | // The array of half-transformed integrals integral_iqjs has now
|
---|
768 | // been overwritten by three-quarter transformed integrals ixjs;
|
---|
769 | // rename the array integral_ixjs, where x = any MO
|
---|
770 | integral_ixjs = integral_iqjs;
|
---|
771 |
|
---|
772 | #if PRINT3Q
|
---|
773 | if (me == 0) {
|
---|
774 | int index = 0;
|
---|
775 | int ij_index = 0;
|
---|
776 | for (int i = 0; i<ni; i++) {
|
---|
777 | for (int j = 0; j<nocc; j++) {
|
---|
778 | if (index++ % nproc == me) {
|
---|
779 | if (j >= nfzc) {
|
---|
780 | double *integral_ij_offset = integral_ixjs + nbasis*nbasis*ij_index;
|
---|
781 | for (int s = 0; s<nbasis; s++) {
|
---|
782 | double *integral_ijsx_ptr = integral_ij_offset + s*nbasis;
|
---|
783 | for (int x = 0; x<noso; x++) {
|
---|
784 | printf("3Q: (%d %d|%d %d) = %12.8f\n",
|
---|
785 | i,x,j,s,*integral_ijsx_ptr);
|
---|
786 | *integral_ijsx_ptr++;
|
---|
787 | }
|
---|
788 | }
|
---|
789 | }
|
---|
790 | ij_index++;
|
---|
791 | }
|
---|
792 | }
|
---|
793 | }
|
---|
794 | }
|
---|
795 | #endif
|
---|
796 |
|
---|
797 | integral_iajy = new double[noso];
|
---|
798 | // in iajy: i act; a,j act or frz; y act or frz occ or virt.
|
---|
799 | integral_ikja = new double[nvir_act];
|
---|
800 | // in ikja: i,j act; k act or frz; a act.
|
---|
801 |
|
---|
802 | if (me == 0) {
|
---|
803 | ExEnv::out0() << indent << "Begin fourth q.t." << endl;
|
---|
804 | }
|
---|
805 |
|
---|
806 | // Begin fourth quarter transformation
|
---|
807 | // generating MO integrals (ov|ov), (ov|oo) and (oo|ov)
|
---|
808 | tim_enter("4. q.t.");
|
---|
809 | index = 0;
|
---|
810 | ij_index = 0;
|
---|
811 | for (i=0; i<ni; i++) {
|
---|
812 | for (j=0; j<nocc; j++) {
|
---|
813 | if (index++ % nproc == me) {
|
---|
814 |
|
---|
815 | for (a=0; a<nvir; a++) {
|
---|
816 | bzerofast(integral_iajy, noso);
|
---|
817 | iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
|
---|
818 | for (s=0; s<nbasis; s++) {
|
---|
819 | c_sy = scf_vector[s];
|
---|
820 | iajy_ptr = integral_iajy;
|
---|
821 | tmpval = *iajs_ptr;
|
---|
822 | #if PRINT_BIGGEST_INTS
|
---|
823 | biggest_ints_3.insert(tmpval,i+i_offset,j,s,a);
|
---|
824 | if ((i+i_offset==105 && j == 2 && s == 170 && a == 3)
|
---|
825 | ||(i+i_offset==102 && j == 2 && s == 170 && a == 2)) {
|
---|
826 | ExEnv::outn() << scprintf("3/4: %3d %3d %3d %3d: %16.10f",
|
---|
827 | i+i_offset, j, s, x-nocc)
|
---|
828 | << endl;
|
---|
829 | }
|
---|
830 | #endif
|
---|
831 | for (y=0; y<noso; y++) {
|
---|
832 | *iajy_ptr++ += *c_sy++ * tmpval;
|
---|
833 | } // exit y loop
|
---|
834 | iajs_ptr += nbasis;
|
---|
835 | } // exit s loop
|
---|
836 | // Put integral_iajy into ixjs for one i,a,j while
|
---|
837 | // overwriting elements of ixjs
|
---|
838 | iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
|
---|
839 | iajy_ptr = integral_iajy;
|
---|
840 | for (y=0; y<noso; y++) {
|
---|
841 | *iajs_ptr = *iajy_ptr++;
|
---|
842 | iajs_ptr += nbasis;
|
---|
843 | } // exit y loop
|
---|
844 | } // exit a loop
|
---|
845 |
|
---|
846 | // this is only needed for gradients
|
---|
847 | if ((dograd || do_d1_) && j >= nfzc) {
|
---|
848 | for (k=0; k<nocc; k++) {
|
---|
849 | bzerofast(integral_ikja, nvir_act);
|
---|
850 | ikjs_ptr = &integral_ixjs[k + nbasis*nbasis*ij_index];
|
---|
851 | for (s=0; s<nbasis; s++) {
|
---|
852 | c_sa = &scf_vector[s][nocc];
|
---|
853 | ikja_ptr = integral_ikja;
|
---|
854 | tmpval = *ikjs_ptr;
|
---|
855 | for (a=0; a<nvir_act; a++) {
|
---|
856 | *ikja_ptr++ += *c_sa++ * tmpval;
|
---|
857 | } // exit a loop
|
---|
858 | ikjs_ptr += nbasis;
|
---|
859 | } // exit s loop
|
---|
860 | // Put integral_ikja into ixjs for one i,k,j while
|
---|
861 | // overwriting elements of ixjs
|
---|
862 | ikjs_ptr = &integral_ixjs[k + nbasis*(nocc + nbasis*ij_index)];
|
---|
863 | ikja_ptr = integral_ikja;
|
---|
864 | for (a=0; a<nvir_act; a++) {
|
---|
865 | *ikjs_ptr = *ikja_ptr++;
|
---|
866 | ikjs_ptr += nbasis;
|
---|
867 | } // exit a loop
|
---|
868 | } // exit k loop
|
---|
869 | } //endif
|
---|
870 |
|
---|
871 | ij_index++;
|
---|
872 | } // endif
|
---|
873 | } // exit j loop
|
---|
874 | } // exit i loop
|
---|
875 | // end of fourth quarter transformation
|
---|
876 | tim_exit("4. q.t.");
|
---|
877 |
|
---|
878 | if (me == 0) {
|
---|
879 | ExEnv::out0() << indent << "End of fourth q.t." << endl;
|
---|
880 | }
|
---|
881 |
|
---|
882 | // The array integral_ixjs has now been overwritten by MO integrals
|
---|
883 | // iajy and ikja, so rename the array mo_int
|
---|
884 | mo_int = integral_ixjs;
|
---|
885 |
|
---|
886 | delete[] integral_iajy;
|
---|
887 | delete[] integral_ikja;
|
---|
888 |
|
---|
889 | // Divide the (ia|jb) MO integrals by the term
|
---|
890 | // evals[i]+evals[j]-evals[a]-evals[b]
|
---|
891 | // and keep these integrals in mo_int;
|
---|
892 | // do this only for active i, j, a, and b
|
---|
893 | tim_enter("divide (ia|jb)'s");
|
---|
894 |
|
---|
895 | index = 0;
|
---|
896 | ij_index = 0;
|
---|
897 | for (i=0; i<ni; i++) {
|
---|
898 | ii = i+i_offset;
|
---|
899 | for (j=0; j<nocc; j++) {
|
---|
900 | if (index++ % nproc == me) {
|
---|
901 | if (j>=nfzc) {
|
---|
902 | for (b=0; b<nvir_act; b++) {
|
---|
903 | iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
|
---|
904 | bb = b+nocc;
|
---|
905 | for (a=0; a<nvir_act; a++) {
|
---|
906 | #if PRINT4Q
|
---|
907 | printf("4Q: (%d %d|%d %d) = %12.8f\n",
|
---|
908 | i,a+nocc,j,b+nocc,*iajb_ptr);
|
---|
909 | #endif
|
---|
910 | // Zero out nonsymmetric integral, else divide by denominators
|
---|
911 | if (usep4 && ( symorb_irrep_[ii] ^
|
---|
912 | symorb_irrep_[j] ^
|
---|
913 | symorb_irrep_[a+nocc] ^
|
---|
914 | symorb_irrep_[bb]) ) {
|
---|
915 | *iajb_ptr++ = 0.0;
|
---|
916 | }
|
---|
917 | else
|
---|
918 | *iajb_ptr++ /= evals[ii]+evals[j]-evals[a+nocc]-evals[bb];
|
---|
919 | } // exit a loop
|
---|
920 | } // exit b loop
|
---|
921 | } // endif
|
---|
922 | ij_index++;
|
---|
923 | } // endif
|
---|
924 | } // exit j loop
|
---|
925 | } // exit i loop
|
---|
926 | tim_exit("divide (ia|jb)'s");
|
---|
927 |
|
---|
928 | // We now have the fully transformed integrals (ia|jb)
|
---|
929 | // divided by the proper orbital energy denominators
|
---|
930 | // for one batch of i, all j<nocc, and all a<nvir and b<nvir,
|
---|
931 | // where i, j, a, and b are all active;
|
---|
932 | // compute contribution to the MP2 correlation energy
|
---|
933 | // from these integrals
|
---|
934 |
|
---|
935 | #if WRITE_DOUBLES
|
---|
936 | if (nproc > 1 || npass > 1) {
|
---|
937 | ExEnv::outn() << "csgrad.cc: WRITE_DOUBLES set but case not allowed" << endl;
|
---|
938 | abort();
|
---|
939 | }
|
---|
940 | ExEnv::outn() << "csgrad.cc: WRITING DOUBLES: CHECK ORDER" << endl;
|
---|
941 | char *doutname = SCFormIO::fileext_to_filename(".mp2");
|
---|
942 | FILE *dout = fopen(doutname,"w");
|
---|
943 | delete[] doutname;
|
---|
944 | fwrite(&nocc_act, sizeof(int), 1, dout);
|
---|
945 | fwrite(&nvir_act, sizeof(int), 1, dout);
|
---|
946 | for (j=nfzc; j<nocc; j++) {
|
---|
947 | for (b=0; b<nvir_act; b++) {
|
---|
948 | for (i=0; i<ni; i++) {
|
---|
949 | ij_index = nocc*i + j;
|
---|
950 | iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
|
---|
951 | fwrite(iajb_ptr, sizeof(double), nvir_act, dout);
|
---|
952 | for (a=0; a<nvir_act; a++) {
|
---|
953 | if (fabs(iajb_ptr[a])>1.0e-8) {
|
---|
954 | ExEnv::outn() << scprintf(" Djbia(%2d %2d %2d %2d) = %12.8f",
|
---|
955 | j+1-nfzc,b+1,i+1,a+1,iajb_ptr[a])
|
---|
956 | << endl;
|
---|
957 | }
|
---|
958 | }
|
---|
959 | }
|
---|
960 | }
|
---|
961 | }
|
---|
962 | fclose(dout);
|
---|
963 | #endif
|
---|
964 |
|
---|
965 | tim_enter("compute ecorr");
|
---|
966 |
|
---|
967 | index = 0;
|
---|
968 | ij_index = 0;
|
---|
969 | for (i=0; i<ni; i++) {
|
---|
970 | double ecorr_i = 0.0;
|
---|
971 | for (j=0; j<nocc; j++) {
|
---|
972 | double ecorr_ij = 0.0;
|
---|
973 | if (index++ % nproc == me) {
|
---|
974 |
|
---|
975 | if (j>=nfzc) {
|
---|
976 | for (b=0; b<nvir_act; b++) {
|
---|
977 | iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
|
---|
978 | ibja_ptr = &mo_int[b+nocc + nbasis*(nocc + nbasis*ij_index)];
|
---|
979 | for (a=0; a<nvir_act; a++) {
|
---|
980 | delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
|
---|
981 | // only include determinants with unique coefficients
|
---|
982 | if (a>=b && i_offset+i>=j) {
|
---|
983 | if (a>b && i_offset+i>j) {
|
---|
984 | // aaaa or bbbb
|
---|
985 | biggest_coefs.insert(*iajb_ptr - *ibja_ptr,
|
---|
986 | i_offset+i,j,a,b,1111);
|
---|
987 | // aabb or bbaa or abba or baab
|
---|
988 | biggest_coefs.insert(*ibja_ptr,i_offset+i,j,b,a,1212);
|
---|
989 | } // endif
|
---|
990 | // aabb or bbaa or abba or baab
|
---|
991 | biggest_coefs.insert(*iajb_ptr,i_offset+i,j,a,b,1212);
|
---|
992 | } // endif
|
---|
993 |
|
---|
994 | tmpval = *iajb_ptr*(2**iajb_ptr - *ibja_ptr)*delta_ijab;
|
---|
995 | ecorr_mp2 += tmpval;
|
---|
996 | if (debug_) ecorr_ij += tmpval;
|
---|
997 | iajb_ptr++;
|
---|
998 | ibja_ptr += nbasis;;
|
---|
999 | } // exit a loop
|
---|
1000 | } // exit b loop
|
---|
1001 | } // endif
|
---|
1002 | ij_index++;
|
---|
1003 | } // endif
|
---|
1004 | if (debug_) {
|
---|
1005 | msg_->sum(ecorr_ij);
|
---|
1006 | ecorr_i += ecorr_ij;
|
---|
1007 | ExEnv::out0() << indent
|
---|
1008 | << scprintf("correlation energy for pair %3d %3d = %16.12f",
|
---|
1009 | i+i_offset, j, ecorr_ij)
|
---|
1010 | << endl;
|
---|
1011 | }
|
---|
1012 | } // exit j loop
|
---|
1013 | if (debug_) {
|
---|
1014 | ExEnv::out0() << indent
|
---|
1015 | << scprintf("correlation energy for orbital %3d = %16.12f",
|
---|
1016 | i+i_offset, ecorr_i)
|
---|
1017 | << endl;
|
---|
1018 | }
|
---|
1019 | } // exit i loop
|
---|
1020 | tim_exit("compute ecorr");
|
---|
1021 |
|
---|
1022 | // debug print
|
---|
1023 | if (debug_ && me == 0) {
|
---|
1024 | ExEnv::out0() << indent << "End of ecorr" << endl;
|
---|
1025 | }
|
---|
1026 | // end of debug print
|
---|
1027 |
|
---|
1028 | if (npass > 1 && pass < npass - 1) {
|
---|
1029 | double passe = ecorr_mp2;
|
---|
1030 | msg_->sum(passe);
|
---|
1031 | ExEnv::out0() << indent
|
---|
1032 | << "Partial correlation energy for pass " << pass << ":" << endl;
|
---|
1033 | ExEnv::out0() << indent
|
---|
1034 | << scprintf(" restart_ecorr = %18.14f", passe)
|
---|
1035 | << endl;
|
---|
1036 | ExEnv::out0() << indent
|
---|
1037 | << scprintf(" restart_orbital_memgrp = %d", ((pass+1) * ni))
|
---|
1038 | << endl;
|
---|
1039 | }
|
---|
1040 |
|
---|
1041 | integral_iqjs = 0;
|
---|
1042 | mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
|
---|
1043 |
|
---|
1044 | // don't go beyond this point if only the energy is needed
|
---|
1045 | if (!dograd && !do_d1_) continue;
|
---|
1046 |
|
---|
1047 | mo_int = (double*) mem->localdata();
|
---|
1048 |
|
---|
1049 | if (!dograd) goto compute_L;
|
---|
1050 |
|
---|
1051 | // Update the matrices Pkj and Wkj with
|
---|
1052 | // contributions from (occ vir|occ vir) integrals
|
---|
1053 | index = 0;
|
---|
1054 | ij_index = 0;
|
---|
1055 | tim_enter("Pkj and Wkj");
|
---|
1056 | for (i=0; i<ni; i++) {
|
---|
1057 | for (j=0; j<nocc; j++) {
|
---|
1058 | if (index++ % nproc == me) {
|
---|
1059 |
|
---|
1060 | if (j>=nfzc) {
|
---|
1061 | for (kloop=me; kloop<me+nocc; kloop++) {
|
---|
1062 | // stagger k's to minimize contention
|
---|
1063 | k = kloop%nocc;
|
---|
1064 | if (k<=j) pkj_ptr = &Pkj[j*(j+1)/2 + k];
|
---|
1065 | if (do_d2_) {
|
---|
1066 | if (k<=j && k>=nfzc) {
|
---|
1067 | d2occ_mat_ptr = &d2occ_mat[(j-nfzc)*(j-nfzc+1)/2 + k-nfzc];
|
---|
1068 | }
|
---|
1069 | }
|
---|
1070 | wjk_ptr = &Wkj[j*nocc + k];
|
---|
1071 | // Send for iakb, if necessary
|
---|
1072 | ik_index = (i*nocc + k)/nproc;
|
---|
1073 | ik_proc = (i*nocc + k)%nproc;
|
---|
1074 | ik_offset = nocc + nocc*nbasis + nbasis*nbasis*ik_index;
|
---|
1075 | mo_intbuf = (double*) membuf_remote.readonly_on_node(ik_offset,
|
---|
1076 | nbasis*nvir-nocc,
|
---|
1077 | ik_proc);
|
---|
1078 | for (a=0; a<nvir_act; a++) {
|
---|
1079 | ibja_ptr = &mo_int[nocc + nbasis*(a+nocc + nbasis*ij_index)];
|
---|
1080 | iajb_ptr = &mo_int[a+nocc + nbasis*(nocc + nbasis*ij_index)];
|
---|
1081 | iakb_ptr = &mo_intbuf[a];
|
---|
1082 | for (b=0; b<nvir_act; b++) {
|
---|
1083 | tmpval1 = *iakb_ptr * *iajb_ptr;
|
---|
1084 | tmpval = 2**iakb_ptr * *ibja_ptr++ - 4*tmpval1;
|
---|
1085 | // tmpval = 2**iakb_ptr * (*ibja_ptr++ - 2 * *iajb_ptr);
|
---|
1086 | iakb_ptr += nbasis;
|
---|
1087 | iajb_ptr += nbasis;
|
---|
1088 | if (k<nfzc && k<=j) *pkj_ptr += tmpval/(evals[k]-evals[j]);
|
---|
1089 | else if (k<=j) {
|
---|
1090 | *pkj_ptr += tmpval;
|
---|
1091 | if (do_d2_) *d2occ_mat_ptr += tmpval1;
|
---|
1092 | }
|
---|
1093 | if (k>=nfzc) {
|
---|
1094 | delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
|
---|
1095 | *wjk_ptr += tmpval*delta_ijab;
|
---|
1096 | }
|
---|
1097 | } // exit b loop
|
---|
1098 | } // exit a loop
|
---|
1099 | mo_intbuf = 0;
|
---|
1100 | membuf_remote.release();
|
---|
1101 | } // end kloop loop
|
---|
1102 | } // endif
|
---|
1103 |
|
---|
1104 | ij_index++;
|
---|
1105 | } // endif
|
---|
1106 | } // exit j loop
|
---|
1107 | } // exit i loop
|
---|
1108 | tim_exit("Pkj and Wkj");
|
---|
1109 |
|
---|
1110 | // debug print
|
---|
1111 | if (debug_ && me == 0) {
|
---|
1112 | ExEnv::out0() << indent << "End of Pkj and Wkj" << endl;
|
---|
1113 | }
|
---|
1114 | // end of debug print
|
---|
1115 |
|
---|
1116 | // Update the matrices Pab and Wab with
|
---|
1117 | // contributions from (occ vir|occ vir) integrals
|
---|
1118 | tim_enter("Pab and Wab");
|
---|
1119 | index = 0;
|
---|
1120 | ij_index = 0;
|
---|
1121 | for (i=0; i<ni; i++) {
|
---|
1122 | for (j=0; j<nocc; j++) {
|
---|
1123 | if (index++ % nproc == me) {
|
---|
1124 | if (j>=nfzc) {
|
---|
1125 |
|
---|
1126 | offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
|
---|
1127 | for (a=0; a<nvir_act; a++) {
|
---|
1128 | pab_ptr = &Pab[a*(a+1)/2];
|
---|
1129 | if (do_d2_) d2vir_mat_ptr = &d2vir_mat[a*(a+1)/2];
|
---|
1130 | for (b=0; b<=a; b++) { // active-active part of Pab and Wab
|
---|
1131 | wab_ptr = &Wab[a*nvir + b];
|
---|
1132 | wba_ptr = &Wab[b*nvir + a];
|
---|
1133 | ibjc_ptr = &mo_int[offset + b];
|
---|
1134 | icjb_ptr = &mo_int[offset + b*nbasis];
|
---|
1135 | iajc_ptr = &mo_int[offset + a];
|
---|
1136 | icja_ptr = &mo_int[offset + a*nbasis];
|
---|
1137 | for (c=0; c<nvir_act; c++) {
|
---|
1138 | tmpval = *iajc_ptr**ibjc_ptr;
|
---|
1139 | if (do_d2_) *d2vir_mat_ptr += tmpval;
|
---|
1140 | *pab_ptr += 4*tmpval - 2**iajc_ptr * *icjb_ptr;
|
---|
1141 | // *pab_ptr += 2**iajc_ptr * (2 * *ibjc_ptr - *icjb_ptr);
|
---|
1142 |
|
---|
1143 | if (a == b) {
|
---|
1144 | delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
|
---|
1145 | *wab_ptr += 2**ibjc_ptr*(*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
|
---|
1146 | }
|
---|
1147 | else {
|
---|
1148 | delta_ijbc = evals[i_offset+i]+evals[j]-evals[nocc+b]-evals[nocc+c];
|
---|
1149 | delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
|
---|
1150 | *wab_ptr += 2**ibjc_ptr * (*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
|
---|
1151 | *wba_ptr += 2**iajc_ptr * (*icjb_ptr - 2 * *ibjc_ptr)*delta_ijbc;
|
---|
1152 | }
|
---|
1153 | iajc_ptr += nbasis;
|
---|
1154 | ibjc_ptr += nbasis;
|
---|
1155 | icja_ptr++;
|
---|
1156 | icjb_ptr++;
|
---|
1157 | } // exit c loop
|
---|
1158 | pab_ptr++;
|
---|
1159 | d2vir_mat_ptr++;
|
---|
1160 | } // exit b loop
|
---|
1161 | } // exit a loop
|
---|
1162 |
|
---|
1163 | for (a=0; a<nfzv; a++) { // active-frozen part of Pab
|
---|
1164 | pab_ptr = &Pab[(a+nvir_act)*(a+nvir_act+1)/2];
|
---|
1165 | for (b=0; b<nvir_act; b++) {
|
---|
1166 | tmpval = evals[nocc+b] - evals[nocc+nvir_act+a];
|
---|
1167 | ibjc_ptr = &mo_int[offset+b];
|
---|
1168 | iajc_ptr = &mo_int[offset+a+nvir_act];
|
---|
1169 | icja_ptr = &mo_int[offset+(a+nvir_act)*nbasis];
|
---|
1170 | for (c=0; c<nvir_act; c++) {
|
---|
1171 | *pab_ptr += 2**ibjc_ptr*(2**iajc_ptr - *icja_ptr++)/tmpval;
|
---|
1172 | ibjc_ptr += nbasis;
|
---|
1173 | iajc_ptr += nbasis;
|
---|
1174 | } // exit c loop
|
---|
1175 | pab_ptr++;
|
---|
1176 | } // exit b loop
|
---|
1177 | } // exit a loop
|
---|
1178 |
|
---|
1179 | } // endif
|
---|
1180 | ij_index++;
|
---|
1181 | } // endif
|
---|
1182 | } // exit j loop
|
---|
1183 | } // exit i loop
|
---|
1184 | tim_exit("Pab and Wab");
|
---|
1185 |
|
---|
1186 | // debug print
|
---|
1187 | if (debug_ && me == 0) {
|
---|
1188 | ExEnv::out0() << indent << "End of Pab and Wab" << endl;
|
---|
1189 | }
|
---|
1190 | // end of debug print
|
---|
1191 |
|
---|
1192 | compute_L:
|
---|
1193 |
|
---|
1194 | ///////////////////////////////////////
|
---|
1195 | // Update Waj and Laj with contrib.
|
---|
1196 | // from (oo|ov) and (ov|oo) integrals;
|
---|
1197 | // here a is active and j is active or
|
---|
1198 | // frozen
|
---|
1199 | ///////////////////////////////////////
|
---|
1200 | tim_enter("Waj and Laj");
|
---|
1201 |
|
---|
1202 | // (oo|ov) contribution
|
---|
1203 | index = 0;
|
---|
1204 | ik_index = 0;
|
---|
1205 | for (i=0; i<ni; i++) {
|
---|
1206 | for (k=0; k<nocc; k++) {
|
---|
1207 | if (index++ % nproc == me) {
|
---|
1208 | if (k>=nfzc) {
|
---|
1209 | offset = nbasis*nocc + nbasis*nbasis*ik_index;
|
---|
1210 | for (j=0; j<nocc; j++) {
|
---|
1211 | for (b=0; b<nvir_act; b++) {
|
---|
1212 | ibka_ptr = &mo_int[b+nocc + offset];
|
---|
1213 | ijkb_ptr = &mo_int[j + nbasis*b + offset];
|
---|
1214 | if (dograd) waj_ptr = &Waj[j*nvir]; // order as j*nvir+a to make loops more efficient
|
---|
1215 | laj_ptr = &Laj[j*nvir];
|
---|
1216 | for (a=0; a<nvir_act; a++) {
|
---|
1217 | tmpval = 2**ibka_ptr * *ijkb_ptr;
|
---|
1218 | ibka_ptr += nbasis;
|
---|
1219 | if (dograd) *waj_ptr++ += tmpval;
|
---|
1220 | *laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper
|
---|
1221 | } // exit a loop
|
---|
1222 | } // exit b loop
|
---|
1223 | } // exit j loop
|
---|
1224 | } // endif
|
---|
1225 | ik_index++;
|
---|
1226 | } // endif
|
---|
1227 | } // exit k loop
|
---|
1228 | } // exit i loop
|
---|
1229 |
|
---|
1230 | // (ov|oo) contribution
|
---|
1231 | index = 0;
|
---|
1232 | ik_index = 0;
|
---|
1233 | for (i=0; i<ni; i++) {
|
---|
1234 | for (k=0; k<nocc; k++) {
|
---|
1235 | if (index++ % nproc == me) {
|
---|
1236 | if (k>=nfzc) {
|
---|
1237 | offset = nocc + nbasis*nbasis*ik_index;
|
---|
1238 | for (b=0; b<nvir_act; b++) {
|
---|
1239 | for (j=0; j<nocc; j++) {
|
---|
1240 | ibkj_ptr = &mo_int[offset + b + j*nbasis];
|
---|
1241 | ibka_ptr = &mo_int[offset + b + nocc*nbasis];
|
---|
1242 | if (dograd) waj_ptr = &Waj[j*nvir];
|
---|
1243 | laj_ptr = &Laj[j*nvir];
|
---|
1244 | for (a=0; a<nvir_act; a++) {
|
---|
1245 | tmpval = 4 * *ibka_ptr * *ibkj_ptr;
|
---|
1246 | ibka_ptr += nbasis;
|
---|
1247 | if (dograd) *waj_ptr++ -= tmpval;
|
---|
1248 | *laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper
|
---|
1249 | } // exit a loop
|
---|
1250 | } // exit j loop
|
---|
1251 | } // exit b loop
|
---|
1252 | } // endif
|
---|
1253 | ik_index++;
|
---|
1254 | } // endif
|
---|
1255 | } // exit k loop
|
---|
1256 | } // exit i loop
|
---|
1257 |
|
---|
1258 | tim_exit("Waj and Laj");
|
---|
1259 | /////////////////////////////
|
---|
1260 | // End of Waj and Laj update
|
---|
1261 | /////////////////////////////
|
---|
1262 |
|
---|
1263 | // debug print
|
---|
1264 | if (debug_ && me == 0) {
|
---|
1265 | ExEnv::out0() << indent << "End of Paj and Waj" << endl;
|
---|
1266 | }
|
---|
1267 | // end of debug print
|
---|
1268 |
|
---|
1269 | mo_int = 0;
|
---|
1270 |
|
---|
1271 | mem->sync(); // Need to synchronize before deleting mo_intbuf
|
---|
1272 |
|
---|
1273 | mo_int = (double*) mem->localdata();
|
---|
1274 |
|
---|
1275 | gamma_iajs_tmp = new double[nbasis*nvir_act];
|
---|
1276 | if (!gamma_iajs_tmp) {
|
---|
1277 | ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl;
|
---|
1278 | abort();
|
---|
1279 | }
|
---|
1280 |
|
---|
1281 | // debug print
|
---|
1282 | if (debug_ && me == 0) {
|
---|
1283 | ExEnv::out0() << indent << "Begin first and second q.b.t." << endl;
|
---|
1284 | }
|
---|
1285 | // end of debug print
|
---|
1286 |
|
---|
1287 | ///////////////////////////////////////////////////////////
|
---|
1288 | // Perform first and second quarter back-transformation.
|
---|
1289 | // Each node produces gamma_iajs, and gamma_iqjs
|
---|
1290 | // for a subset of i and j, all a and all s;
|
---|
1291 | // the back-transf. is done only for active i, j, a, and b
|
---|
1292 | ///////////////////////////////////////////////////////////
|
---|
1293 |
|
---|
1294 | // Begin first quarter back-transformation
|
---|
1295 | tim_enter("1. q.b.t.");
|
---|
1296 | index = 0;
|
---|
1297 | ij_index = 0;
|
---|
1298 | for (i=0; i<ni; i++) {
|
---|
1299 | for (j=0; j<nocc; j++) {
|
---|
1300 | if (index++ % nproc == me) {
|
---|
1301 | if (j>=nfzc) {
|
---|
1302 | bzerofast(gamma_iajs_tmp,nbasis*nvir_act);
|
---|
1303 | offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
|
---|
1304 |
|
---|
1305 | for (a=0; a<nvir_act; a++) {
|
---|
1306 | for (s=0; s<nbasis; s++) {
|
---|
1307 | c_sb = &scf_vector[s][nocc];
|
---|
1308 | gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a];
|
---|
1309 | ibja_ptr = &mo_int[a*nbasis + offset];
|
---|
1310 | iajb_ptr = &mo_int[a + offset];
|
---|
1311 |
|
---|
1312 | tmpval = 0.0;
|
---|
1313 | for (b=0; b<nvir_act; b++) {
|
---|
1314 | tmpval += 2**c_sb++ * (2**iajb_ptr - *ibja_ptr++);
|
---|
1315 | iajb_ptr += nbasis;
|
---|
1316 | } // exit b loop
|
---|
1317 | *gamma_iajs_ptr += tmpval;
|
---|
1318 | } // exit s loop
|
---|
1319 | } // exit a loop
|
---|
1320 | // Put gamma_iajs_tmp into mo_int for one i,j
|
---|
1321 | // while overwriting mo_int
|
---|
1322 | gamma_iajs_ptr = gamma_iajs_tmp;
|
---|
1323 | for (y=0; y<nbasis; y++) {
|
---|
1324 | iajy_ptr = &mo_int[nocc + nbasis*(y + nbasis*ij_index)];
|
---|
1325 | for (a=0; a<nvir_act; a++) {
|
---|
1326 | *iajy_ptr++ = *gamma_iajs_ptr++;
|
---|
1327 | }
|
---|
1328 | }
|
---|
1329 |
|
---|
1330 | } // endif
|
---|
1331 | ij_index++;
|
---|
1332 | } // endif
|
---|
1333 | } // exit j loop
|
---|
1334 | } // exit i loop
|
---|
1335 | // end of first quarter back-transformation
|
---|
1336 | tim_exit("1. q.b.t.");
|
---|
1337 |
|
---|
1338 | // debug print
|
---|
1339 | if (debug_ && me == 0) {
|
---|
1340 | ExEnv::out0() << indent << "End of first q.b.t." << endl;
|
---|
1341 | }
|
---|
1342 | // end of debug print
|
---|
1343 |
|
---|
1344 | mo_int = 0;
|
---|
1345 |
|
---|
1346 | mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming
|
---|
1347 |
|
---|
1348 | delete[] gamma_iajs_tmp;
|
---|
1349 |
|
---|
1350 | // The array mo_int has now been overwritten by the quarter
|
---|
1351 | // back-transformed non-sep 2PDM gamma_iajs, so rename
|
---|
1352 | gamma_iajs = (double*) mem->localdata();
|
---|
1353 |
|
---|
1354 | gamma_iqjs_tmp = new double[nbasis];
|
---|
1355 | if (!gamma_iqjs_tmp) {
|
---|
1356 | ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl;
|
---|
1357 | abort();
|
---|
1358 | }
|
---|
1359 |
|
---|
1360 | if (debug_ && me == 0) {
|
---|
1361 | ExEnv::out0() << indent << "Begin second q.b.t." << endl;
|
---|
1362 | }
|
---|
1363 |
|
---|
1364 | // Begin second quarter back-transformation
|
---|
1365 | // (gamma_iqjs elements ordered as i,j,s,q,
|
---|
1366 | // i.e., q varies fastest)
|
---|
1367 | tim_enter("2. q.b.t.");
|
---|
1368 | index = 0;
|
---|
1369 | ij_index = 0;
|
---|
1370 | for (i=0; i<ni; i++) {
|
---|
1371 | for (j=0; j<nocc; j++) {
|
---|
1372 | if (index++ % nproc == me) {
|
---|
1373 | if (j>=nfzc) {
|
---|
1374 | offset = nbasis*nbasis*ij_index;
|
---|
1375 |
|
---|
1376 | for (s=0; s<nbasis; s++) {
|
---|
1377 | bzerofast(gamma_iqjs_tmp,nbasis);
|
---|
1378 | for (q=0; q<nbasis; q++) {
|
---|
1379 | gamma_iqjs_ptr = &gamma_iqjs_tmp[q];
|
---|
1380 | gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset];
|
---|
1381 | c_qa = &scf_vector[q][nocc];
|
---|
1382 |
|
---|
1383 | tmpval = 0.0;
|
---|
1384 | for (a=0; a<nvir_act; a++) {
|
---|
1385 | tmpval += *c_qa++ * *gamma_iajs_ptr++;
|
---|
1386 | } // exit a loop
|
---|
1387 | *gamma_iqjs_ptr += tmpval;
|
---|
1388 | } // exit q loop
|
---|
1389 | // Put gamma_iqjs_tmp into gamma_iajs for one i,j,s
|
---|
1390 | // while overwriting gamma_iajs
|
---|
1391 | gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset];
|
---|
1392 | gamma_iqjs_ptr = gamma_iqjs_tmp;
|
---|
1393 | for (q=0; q<nbasis; q++) {
|
---|
1394 | *gamma_iajs_ptr++ = *gamma_iqjs_ptr++;
|
---|
1395 | }
|
---|
1396 | } // exit s loop
|
---|
1397 |
|
---|
1398 | } // endif
|
---|
1399 | ij_index++;
|
---|
1400 | } // endif
|
---|
1401 | } // exit j loop
|
---|
1402 | } // exit i loop
|
---|
1403 | tim_exit("2. q.b.t.");
|
---|
1404 | // end of second quarter back-transformation
|
---|
1405 |
|
---|
1406 | if (debug_ && me == 0) {
|
---|
1407 | ExEnv::out0() << indent << "End of second q.b.t." << endl;
|
---|
1408 | }
|
---|
1409 |
|
---|
1410 | gamma_iajs = 0;
|
---|
1411 |
|
---|
1412 | mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs
|
---|
1413 | // before it is needed below, and that gamma_iajs is not
|
---|
1414 | // deleted prematurely
|
---|
1415 |
|
---|
1416 | // The quarter back-transformed elements gamma_iajs have now been
|
---|
1417 | // overwritten by the half back-transformed elements gamma_iqjs
|
---|
1418 |
|
---|
1419 | delete[] gamma_iqjs_tmp;
|
---|
1420 |
|
---|
1421 | /////////////////////////////////////////////////
|
---|
1422 | // End of 1. and 2. quarter back-transformation
|
---|
1423 | /////////////////////////////////////////////////
|
---|
1424 |
|
---|
1425 | Lpi = new double[nbasis*ni];
|
---|
1426 | bzerofast(Lpi,nbasis*ni);
|
---|
1427 |
|
---|
1428 | if (me == 0) {
|
---|
1429 | ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl;
|
---|
1430 | }
|
---|
1431 |
|
---|
1432 | //////////////////////////////////////////////////////////
|
---|
1433 | // Perform third and fourth quarter back-transformation
|
---|
1434 | // and compute contribution to gradient from non-sep 2PDM
|
---|
1435 | //////////////////////////////////////////////////////////
|
---|
1436 |
|
---|
1437 | tim_enter("3.qbt+4.qbt+non-sep contrib.");
|
---|
1438 | sp_g_data.init();
|
---|
1439 | for (i=0; i<thr_->nthread(); i++) {
|
---|
1440 | qbt34thread[i]->set_i_offset(i_offset);
|
---|
1441 | qbt34thread[i]->set_ni(ni);
|
---|
1442 | thr_->add_thread(i,qbt34thread[i]);
|
---|
1443 | # if SINGLE_THREAD_QBT34
|
---|
1444 | qbt34thread[i]->run();
|
---|
1445 | # endif
|
---|
1446 | }
|
---|
1447 | # if !SINGLE_THREAD_QBT34
|
---|
1448 | thr_->start_threads();
|
---|
1449 | thr_->wait_threads();
|
---|
1450 | # endif
|
---|
1451 | tim_exit("3.qbt+4.qbt+non-sep contrib.");
|
---|
1452 | // Add thread contributions to Lpi and ginter
|
---|
1453 | for (i=0; i<thr_->nthread(); i++) {
|
---|
1454 | double *Lpi_thread = qbt34thread[i]->get_Lpi();
|
---|
1455 | double **ginter_thread = qbt34thread[i]->get_ginter();
|
---|
1456 | for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j];
|
---|
1457 | for (j=0; j<natom; j++) {
|
---|
1458 | for (k=0; k<3; k++) {
|
---|
1459 | ginter[j][k] += ginter_thread[j][k];
|
---|
1460 | }
|
---|
1461 | }
|
---|
1462 | aointder_computed += qbt34thread[i]->get_aointder_computed();
|
---|
1463 | }
|
---|
1464 |
|
---|
1465 | if (me == 0) {
|
---|
1466 | ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl;
|
---|
1467 | }
|
---|
1468 |
|
---|
1469 | mem->sync(); // Make sure all nodes are done before deleting arrays
|
---|
1470 |
|
---|
1471 | if (debug_ > 1) {
|
---|
1472 | RefSCDimension ni_dim(new SCDimension(ni,1));
|
---|
1473 | ni_dim->blocks()->set_subdim(0, new SCDimension(ni));
|
---|
1474 | RefSCDimension nbasis_dim(new SCDimension(nbasis,1));
|
---|
1475 | nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis));
|
---|
1476 | RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit);
|
---|
1477 | Lpi_mat->assign(Lpi);
|
---|
1478 | Lpi_mat.print("Lpi");
|
---|
1479 | }
|
---|
1480 |
|
---|
1481 | if (debug_ && me == 0) {
|
---|
1482 | ExEnv::out0() << indent << "Back-transform Lpi" << endl;
|
---|
1483 | }
|
---|
1484 |
|
---|
1485 | // Back-transform Lpi to MO basis
|
---|
1486 | lpi_ptr = Lpi;
|
---|
1487 | for (p=0; p<nbasis; p++) {
|
---|
1488 | for (i=0; i<ni; i++) {
|
---|
1489 | c_pa = &scf_vector[p][nocc];
|
---|
1490 | laj_ptr = &Laj[nvir*(i_offset + i)];
|
---|
1491 | for (a=0; a<nvir; a++) {
|
---|
1492 | *laj_ptr++ += *c_pa++ * *lpi_ptr;
|
---|
1493 | } // exit a loop
|
---|
1494 | lpi_ptr++;
|
---|
1495 | } // exit i loop
|
---|
1496 | } // exit p loop
|
---|
1497 |
|
---|
1498 | // malloc_chain_check(1);
|
---|
1499 |
|
---|
1500 | delete[] Lpi;
|
---|
1501 |
|
---|
1502 | if (me == 0) {
|
---|
1503 | ExEnv::out0() << indent << "Done with pass " << pass+1 << endl;
|
---|
1504 | }
|
---|
1505 | } // exit loop over i-batches (pass)
|
---|
1506 | tim_exit("mp2 passes");
|
---|
1507 |
|
---|
1508 | if (dograd || do_d1_) {
|
---|
1509 | for (i=0; i<thr_->nthread(); i++) {
|
---|
1510 | delete qbt34thread[i];
|
---|
1511 | }
|
---|
1512 | delete[] qbt34thread;
|
---|
1513 | }
|
---|
1514 |
|
---|
1515 | mem->set_localsize(0);
|
---|
1516 |
|
---|
1517 | // debug print
|
---|
1518 | if (debug_ && me == 0) {
|
---|
1519 | ExEnv::out0() << indent << "Exited loop over i-batches" << endl;
|
---|
1520 | }
|
---|
1521 | // end of debug print
|
---|
1522 |
|
---|
1523 | ///////////////////////////////////////////////////////////////
|
---|
1524 | // The computation of the MP2 energy is now complete on each
|
---|
1525 | // node; add the nodes' contributions and print out the energy
|
---|
1526 | ///////////////////////////////////////////////////////////////
|
---|
1527 | msg_->sum(ecorr_mp2);
|
---|
1528 | msg_->sum(aoint_computed);
|
---|
1529 | msg_->sum(aointder_computed);
|
---|
1530 |
|
---|
1531 | biggest_coefs.combine(msg_);
|
---|
1532 | #if PRINT_BIGGEST_INTS
|
---|
1533 | biggest_ints_1.combine(msg_);
|
---|
1534 | biggest_ints_2.combine(msg_);
|
---|
1535 | biggest_ints_2s.combine(msg_);
|
---|
1536 | biggest_ints_3a.combine(msg_);
|
---|
1537 | biggest_ints_3.combine(msg_);
|
---|
1538 | #endif
|
---|
1539 |
|
---|
1540 | if (me == 0) {
|
---|
1541 | emp2 = escf + ecorr_mp2;
|
---|
1542 |
|
---|
1543 | #if PRINT_BIGGEST_INTS
|
---|
1544 | ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
|
---|
1545 | for (i=0; i<biggest_ints_1.ncontrib(); i++) {
|
---|
1546 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
1547 | biggest_ints_1.indices(i)[0],
|
---|
1548 | biggest_ints_1.indices(i)[1],
|
---|
1549 | biggest_ints_1.indices(i)[2],
|
---|
1550 | biggest_ints_1.indices(i)[3],
|
---|
1551 | biggest_ints_1.val(i)
|
---|
1552 | )
|
---|
1553 | << endl;
|
---|
1554 | }
|
---|
1555 | ExEnv::outn() << "biggest 2/4 transformed ints" << endl;
|
---|
1556 | for (i=0; i<biggest_ints_2.ncontrib(); i++) {
|
---|
1557 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
1558 | biggest_ints_2.indices(i)[0],
|
---|
1559 | biggest_ints_2.indices(i)[1],
|
---|
1560 | biggest_ints_2.indices(i)[2],
|
---|
1561 | biggest_ints_2.indices(i)[3],
|
---|
1562 | biggest_ints_2.val(i)
|
---|
1563 | )
|
---|
1564 | << endl;
|
---|
1565 | }
|
---|
1566 | ExEnv::outn() << "restricted 2/4 transformed ints" << endl;
|
---|
1567 | for (i=0; i<biggest_ints_2s.ncontrib(); i++) {
|
---|
1568 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
1569 | biggest_ints_2s.indices(i)[0],
|
---|
1570 | biggest_ints_2s.indices(i)[1],
|
---|
1571 | biggest_ints_2s.indices(i)[2],
|
---|
1572 | biggest_ints_2s.indices(i)[3],
|
---|
1573 | biggest_ints_2s.val(i)
|
---|
1574 | )
|
---|
1575 | << endl;
|
---|
1576 | }
|
---|
1577 | ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl;
|
---|
1578 | for (i=0; i<biggest_ints_3a.ncontrib(); i++) {
|
---|
1579 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
1580 | biggest_ints_3a.indices(i)[0],
|
---|
1581 | biggest_ints_3a.indices(i)[1],
|
---|
1582 | biggest_ints_3a.indices(i)[2],
|
---|
1583 | biggest_ints_3a.indices(i)[3],
|
---|
1584 | biggest_ints_3a.val(i)
|
---|
1585 | )
|
---|
1586 | << endl;
|
---|
1587 | }
|
---|
1588 | ExEnv::outn() << "biggest 3/4 transformed ints (in 4.)" << endl;
|
---|
1589 | for (i=0; i<biggest_ints_3.ncontrib(); i++) {
|
---|
1590 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
1591 | biggest_ints_3.indices(i)[0],
|
---|
1592 | biggest_ints_3.indices(i)[1],
|
---|
1593 | biggest_ints_3.indices(i)[2],
|
---|
1594 | biggest_ints_3.indices(i)[3],
|
---|
1595 | biggest_ints_3.val(i)
|
---|
1596 | )
|
---|
1597 | << endl;
|
---|
1598 | }
|
---|
1599 | #endif
|
---|
1600 |
|
---|
1601 | if (restart_orbital_memgrp_ == 0) {
|
---|
1602 | if (biggest_coefs.ncontrib()) {
|
---|
1603 | ExEnv::out0() << endl << indent
|
---|
1604 | << "Largest first order coefficients (unique):"
|
---|
1605 | << endl;
|
---|
1606 | }
|
---|
1607 | for (i=0; i<biggest_coefs.ncontrib(); i++) {
|
---|
1608 | int i0 = biggest_coefs.indices(i)[0];
|
---|
1609 | int i1 = biggest_coefs.indices(i)[1];
|
---|
1610 | int i2 = biggest_coefs.indices(i)[2] + nocc;
|
---|
1611 | int i3 = biggest_coefs.indices(i)[3] + nocc;
|
---|
1612 | int spincase = biggest_coefs.indices(i)[4];
|
---|
1613 | ExEnv::out0() << indent
|
---|
1614 | << scprintf(" %2d %12.8f %2d %3s %2d %3s -> %2d %3s %2d %3s (%s)",
|
---|
1615 | i+1, biggest_coefs.val(i),
|
---|
1616 | symorb_num_[i0]+1,
|
---|
1617 | ct.gamma(symorb_irrep_[i0]).symbol(),
|
---|
1618 | symorb_num_[i1]+1,
|
---|
1619 | ct.gamma(symorb_irrep_[i1]).symbol(),
|
---|
1620 | symorb_num_[i2]+1,
|
---|
1621 | ct.gamma(symorb_irrep_[i2]).symbol(),
|
---|
1622 | symorb_num_[i3]+1,
|
---|
1623 | ct.gamma(symorb_irrep_[i3]).symbol(),
|
---|
1624 | (spincase==1111?"++++":"+-+-")
|
---|
1625 | )
|
---|
1626 | << endl;
|
---|
1627 | }
|
---|
1628 | }
|
---|
1629 |
|
---|
1630 | // Print out various energies etc.
|
---|
1631 |
|
---|
1632 | if (debug_) {
|
---|
1633 | ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
|
---|
1634 | << indent << "(or integral derivatives) would have been computed\n"
|
---|
1635 | << indent << "without bounds checking: "
|
---|
1636 | << npass*nshell*nshell*(nshell+1)*(nshell+1)/2
|
---|
1637 | << endl;
|
---|
1638 |
|
---|
1639 | ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
|
---|
1640 | << indent << "were computed: " << aoint_computed
|
---|
1641 | << endl;
|
---|
1642 |
|
---|
1643 | if (dograd) {
|
---|
1644 | ExEnv::out0() << indent
|
---|
1645 | << "Number of shell quartets for which AO integral derivatives\n"
|
---|
1646 | << indent << "were computed: " << aointder_computed
|
---|
1647 | << endl;
|
---|
1648 | }
|
---|
1649 | }
|
---|
1650 |
|
---|
1651 | ExEnv::out0()<<endl<<indent
|
---|
1652 | <<scprintf("RHF energy [au]: %17.12lf\n", escf);
|
---|
1653 | ExEnv::out0()<<indent
|
---|
1654 | <<scprintf("MP2 correlation energy [au]: %17.12lf\n", ecorr_mp2);
|
---|
1655 | ExEnv::out0()<<indent
|
---|
1656 | <<scprintf("MP2 energy [au]: %17.12lf\n", emp2);
|
---|
1657 | ExEnv::out0().flush();
|
---|
1658 | }
|
---|
1659 | if (method_ && strcmp(method_,"mp")) {
|
---|
1660 | ExEnv::out0() << indent
|
---|
1661 | << "MBPT2: bad method for closed shell case: " << method_
|
---|
1662 | << ", using mp" << endl;
|
---|
1663 | }
|
---|
1664 | set_energy(emp2);
|
---|
1665 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
1666 | *ref_to_mp2_acc);
|
---|
1667 |
|
---|
1668 | RefSCDimension nocc_act_dim(new SCDimension(nocc_act,1));
|
---|
1669 | nocc_act_dim->blocks()->set_subdim(0, new SCDimension(nocc_act));
|
---|
1670 | RefSCDimension nvir_act_dim(new SCDimension(nvir_act,1));
|
---|
1671 | nvir_act_dim->blocks()->set_subdim(0, new SCDimension(nvir_act));
|
---|
1672 | RefSCDimension nocc_dim(new SCDimension(nocc,1));
|
---|
1673 | nocc_dim->blocks()->set_subdim(0, new SCDimension(nocc));
|
---|
1674 | RefSCDimension nvir_dim(new SCDimension(nvir,1));
|
---|
1675 | nvir_dim->blocks()->set_subdim(0, new SCDimension(nvir));
|
---|
1676 | RefSCDimension nbasis_dim = ao_dimension()->blocks()->subdim(0);
|
---|
1677 | RefSCDimension noso_dim(new SCDimension(noso,1));
|
---|
1678 |
|
---|
1679 | if (dograd || do_d1_) {
|
---|
1680 | msg_->sum(Laj,nvir*nocc);
|
---|
1681 |
|
---|
1682 | RefSCMatrix T1_mat(nocc_act_dim, nvir_act_dim, kit);
|
---|
1683 | // the elements of T1_mat are the single-substitution amplitudes
|
---|
1684 | BiggestContribs biggest_t1(2,10);
|
---|
1685 | // compute the S2 norm of Lee et al. (s2_diag)
|
---|
1686 | double s2_diag = 0.0;
|
---|
1687 | for (j=nfzc; j<nocc; j++) {
|
---|
1688 | laj_ptr = &Laj[j*nvir];
|
---|
1689 | for (a=0; a<nvir_act; a++) {
|
---|
1690 | tmpval = 0.5**laj_ptr++/(evals[a+nocc]-evals[j]);
|
---|
1691 | T1_mat.set_element(j-nfzc,a,tmpval);
|
---|
1692 | biggest_t1.insert(tmpval,j,a);
|
---|
1693 | s2_diag += tmpval*tmpval;
|
---|
1694 | }
|
---|
1695 | }
|
---|
1696 | s2_diag = sqrt(s2_diag/(2*nocc_act));
|
---|
1697 | // compute the T1 matrix 1-norm
|
---|
1698 | double t1onenorm = 0.0;
|
---|
1699 | Ref<SCElementSumAbs> sumabs = new SCElementSumAbs;
|
---|
1700 | Ref<SCElementOp> genop = sumabs.pointer();
|
---|
1701 | for (a=0; a < nvir_act; a++) {
|
---|
1702 | sumabs->init();
|
---|
1703 | T1_mat.get_column(a).element_op(genop);
|
---|
1704 | if (t1onenorm < sumabs->result()) t1onenorm = sumabs->result();
|
---|
1705 | }
|
---|
1706 | // compute the T1 matrix inf-norm
|
---|
1707 | double t1infnorm = 0.0;
|
---|
1708 | for (j=0; j < nocc_act; j++) {
|
---|
1709 | sumabs->init();
|
---|
1710 | T1_mat.get_row(j).element_op(genop);
|
---|
1711 | if (t1infnorm < sumabs->result()) t1infnorm = sumabs->result();
|
---|
1712 | }
|
---|
1713 | // compute the T1 matrix 2-norm ( = D1(MP2) )
|
---|
1714 | RefSymmSCMatrix D1_mat(nocc_act_dim,kit);
|
---|
1715 | D1_mat.assign(0.0);
|
---|
1716 | D1_mat.accumulate_symmetric_product(T1_mat);
|
---|
1717 | T1_mat = 0;
|
---|
1718 | double d1_diag = sqrt(D1_mat.eigvals().get_element(nocc_act-1));
|
---|
1719 | D1_mat = 0;
|
---|
1720 | // print the norms
|
---|
1721 | ExEnv::out0()<<endl;
|
---|
1722 | ExEnv::out0()
|
---|
1723 | <<indent<<scprintf("D1(MP2) = %12.8f", d1_diag)
|
---|
1724 | <<endl
|
---|
1725 | <<indent<<scprintf("S2 matrix 1-norm = %12.8f", t1onenorm)
|
---|
1726 | <<endl
|
---|
1727 | <<indent<<scprintf("S2 matrix inf-norm = %12.8f", t1infnorm)
|
---|
1728 | <<endl
|
---|
1729 | <<indent<<scprintf("S2 diagnostic = %12.8f", s2_diag)
|
---|
1730 | <<endl;
|
---|
1731 | if (biggest_t1.ncontrib()) {
|
---|
1732 | ExEnv::out0() << endl
|
---|
1733 | << indent << "Largest S2 values (unique determinants):" << endl;
|
---|
1734 | }
|
---|
1735 | for (i=0; i<biggest_t1.ncontrib(); i++) {
|
---|
1736 | int i0 = biggest_t1.indices(i)[0];
|
---|
1737 | int i1 = biggest_t1.indices(i)[1] + nocc;
|
---|
1738 | ExEnv::out0()
|
---|
1739 | << indent << scprintf(" %2d %12.8f %3d %3s -> %4d %3s",
|
---|
1740 | i+1, biggest_t1.val(i),
|
---|
1741 | symorb_num_[i0]+1,
|
---|
1742 | ct.gamma(symorb_irrep_[i0]).symbol(),
|
---|
1743 | symorb_num_[i1]+1,
|
---|
1744 | ct.gamma(symorb_irrep_[i1]).symbol())
|
---|
1745 | << endl;
|
---|
1746 | }
|
---|
1747 |
|
---|
1748 | } // if (dograd || do_d1_)
|
---|
1749 |
|
---|
1750 | for (i=0; i<thr_->nthread(); i++) {
|
---|
1751 | delete e12thread[i];
|
---|
1752 | }
|
---|
1753 | delete[] e12thread;
|
---|
1754 |
|
---|
1755 | // quit here if only the energy is needed
|
---|
1756 | if (!dograd) {
|
---|
1757 | delete[] tbints_; tbints_ = 0;
|
---|
1758 | if (do_d1_) delete[] Laj;
|
---|
1759 | delete[] scf_vector;
|
---|
1760 | delete[] scf_vector_dat;
|
---|
1761 | delete[] evals;
|
---|
1762 | tim_exit("mp2-mem");
|
---|
1763 | return;
|
---|
1764 | }
|
---|
1765 |
|
---|
1766 | // Accumulate intermediate gradients on node 0
|
---|
1767 | sum_gradients(msg_, ginter, natom, 3);
|
---|
1768 |
|
---|
1769 | // Add intermediate gradients to the gradient on node 0
|
---|
1770 | if (me==0)
|
---|
1771 | accum_gradients(gradient, ginter, natom, 3);
|
---|
1772 |
|
---|
1773 | // Print out contribution to the gradient from non-sep. 2PDM
|
---|
1774 | if (debug_) {
|
---|
1775 | print_natom_3(ginter,
|
---|
1776 | "Contribution to MP2 gradient from non-separable 2PDM [au]:");
|
---|
1777 | }
|
---|
1778 |
|
---|
1779 | ////////////////////////////////////////////////////////
|
---|
1780 | // Add contributions from all nodes to various matrices
|
---|
1781 | ////////////////////////////////////////////////////////
|
---|
1782 | tmpint = (nvir > nocc ? nvir:nocc);
|
---|
1783 | double *tmpmat = new double[tmpint*tmpint];
|
---|
1784 | msg_->sum(Pkj,nocc*(nocc+1)/2,tmpmat); // Pkj is now complete
|
---|
1785 | msg_->sum(Pab,nvir*(nvir+1)/2,tmpmat); // Pab is now complete
|
---|
1786 | msg_->sum(Wab,nvir*nvir,tmpmat);
|
---|
1787 | msg_->sum(Wkj,nocc*nocc,tmpmat);
|
---|
1788 | msg_->sum(Waj,nvir*nocc,tmpmat);
|
---|
1789 | if (do_d2_) {
|
---|
1790 | msg_->sum(d2occ_mat,nocc_act*(nocc_act+1)/2,tmpmat);
|
---|
1791 | msg_->sum(d2vir_mat,nvir_act*(nvir_act+1)/2,tmpmat);
|
---|
1792 | }
|
---|
1793 | delete[] tmpmat;
|
---|
1794 |
|
---|
1795 | // Compute D2 diagnostic (d2_diag) from matrices d2_occ_mat and d2_vir_mat
|
---|
1796 | if (do_d2_) {
|
---|
1797 | RefSymmSCMatrix D2occ_mat(nocc_act_dim, kit);
|
---|
1798 | RefSymmSCMatrix D2vir_mat(nvir_act_dim,kit);
|
---|
1799 | D2occ_mat->assign(d2occ_mat);
|
---|
1800 | D2vir_mat->assign(d2vir_mat);
|
---|
1801 | d2o = sqrt(D2occ_mat.eigvals().get_element(nocc_act-1));
|
---|
1802 | d2v = sqrt(D2vir_mat.eigvals().get_element(nvir_act-1));
|
---|
1803 | d2_diag = (d2o > d2v ? d2o:d2v);
|
---|
1804 | ExEnv::out0() << endl
|
---|
1805 | << indent << scprintf("D2(MP1) = %12.8f", d2_diag) << endl << endl;
|
---|
1806 | delete[] d2occ_mat;
|
---|
1807 | delete[] d2vir_mat;
|
---|
1808 | }
|
---|
1809 |
|
---|
1810 | // Finish computation of Wab
|
---|
1811 | tim_enter("Pab and Wab");
|
---|
1812 | pab_ptr = Pab;
|
---|
1813 | for (a=0; a<nvir_act; a++) { // active-active part of Wab
|
---|
1814 | wba_ptr = &Wab[a];
|
---|
1815 | wab_ptr = &Wab[a*nvir];
|
---|
1816 | for (b=0; b<=a; b++) {
|
---|
1817 | if (a==b) {
|
---|
1818 | *wab_ptr -= evals[nocc+a]**pab_ptr;
|
---|
1819 | }
|
---|
1820 | else {
|
---|
1821 | *wab_ptr -= evals[nocc+a]**pab_ptr;
|
---|
1822 | *wba_ptr -= evals[nocc+b]**pab_ptr;
|
---|
1823 | }
|
---|
1824 | pab_ptr++;
|
---|
1825 | wab_ptr++;
|
---|
1826 | wba_ptr += nvir;
|
---|
1827 | } // exit b loop
|
---|
1828 | } // exit a loop
|
---|
1829 | for (a=0; a<nfzv; a++) { // active-frozen part of Wab
|
---|
1830 | wba_ptr = &Wab[nvir_act+a];
|
---|
1831 | wab_ptr = &Wab[(nvir_act+a)*nvir];
|
---|
1832 | pab_ptr = &Pab[(nvir_act+a)*(nvir_act+a+1)/2];
|
---|
1833 | for (b=0; b<nvir_act; b++) {
|
---|
1834 | *wab_ptr -= evals[nocc+b]**pab_ptr;
|
---|
1835 | *wba_ptr -= evals[nocc+b]**pab_ptr;
|
---|
1836 | pab_ptr++;
|
---|
1837 | wab_ptr++;
|
---|
1838 | wba_ptr += nvir;
|
---|
1839 | } // exit b loop
|
---|
1840 | } // exit a loop
|
---|
1841 | // Wab is now complete
|
---|
1842 | tim_exit("Pab and Wab");
|
---|
1843 | RefSCMatrix Wab_matrix(nvir_dim, nvir_dim, kit);
|
---|
1844 | Wab_matrix->assign(Wab); // Put elements of Wab into Wab_matrix
|
---|
1845 | free(Wab);
|
---|
1846 |
|
---|
1847 | // Update Wkj with contribution from Pkj
|
---|
1848 | tim_enter("Pkj and Wkj");
|
---|
1849 | pkj_ptr = Pkj;
|
---|
1850 | for (k=0; k<nocc; k++) {
|
---|
1851 | wjk_ptr = &Wkj[k];
|
---|
1852 | wkj_ptr = &Wkj[k*nocc];
|
---|
1853 | for (j=0; j<=k; j++) {
|
---|
1854 | if (k<nfzc && j<nfzc) { // don't want both j and k frozen
|
---|
1855 | wkj_ptr++;
|
---|
1856 | wjk_ptr += nocc;
|
---|
1857 | pkj_ptr++;
|
---|
1858 | continue;
|
---|
1859 | }
|
---|
1860 | if (j==k) {
|
---|
1861 | *wkj_ptr++ -= evals[k]**pkj_ptr++;
|
---|
1862 | }
|
---|
1863 | else if (j<nfzc) {
|
---|
1864 | *wkj_ptr++ -= evals[k]**pkj_ptr;
|
---|
1865 | *wjk_ptr -= evals[k]**pkj_ptr;
|
---|
1866 | pkj_ptr++;
|
---|
1867 | }
|
---|
1868 | else {
|
---|
1869 | *wkj_ptr++ -= evals[k]**pkj_ptr;
|
---|
1870 | *wjk_ptr -= evals[j]**pkj_ptr;
|
---|
1871 | pkj_ptr++;
|
---|
1872 | }
|
---|
1873 | wjk_ptr += nocc;
|
---|
1874 | } // exit j loop
|
---|
1875 | } // exit k loop
|
---|
1876 | tim_exit("Pkj and Wkj");
|
---|
1877 |
|
---|
1878 | /////////////////////////////////
|
---|
1879 | // Finish the computation of Laj
|
---|
1880 | /////////////////////////////////
|
---|
1881 |
|
---|
1882 | tim_enter("Laj");
|
---|
1883 |
|
---|
1884 | RefSCMatrix Cv(nbasis_dim, nvir_dim, kit); // virtual block of scf_vector
|
---|
1885 | RefSCMatrix Co(nbasis_dim, nocc_dim, kit); // occupied block of scf_vector
|
---|
1886 | for (p=0; p<nbasis; p++) {
|
---|
1887 | c_pq = scf_vector[p];
|
---|
1888 | for (q=0; q<noso; q++) {
|
---|
1889 | if (q<nocc) Co->set_element(p, q, *c_pq++);
|
---|
1890 | else Cv->set_element(p, q-nocc, *c_pq++);
|
---|
1891 | }
|
---|
1892 | }
|
---|
1893 |
|
---|
1894 | // Compute the density-like Dmat_matrix
|
---|
1895 | // (Cv*Pab_matrix*Cv.t() + Co*Pkj_matrix*Co.t())
|
---|
1896 | RefSymmSCMatrix Pab_matrix(nvir_dim,kit);
|
---|
1897 | RefSymmSCMatrix Pkj_matrix(nocc_dim,kit);
|
---|
1898 | RefSymmSCMatrix Dmat_matrix(nbasis_dim,kit);
|
---|
1899 | Pab_matrix->assign(Pab); // fill in elements of Pab_matrix from Pab
|
---|
1900 | free(Pab);
|
---|
1901 | Pkj_matrix->assign(Pkj); // fill in elements of Pkj_matrix from Pkj
|
---|
1902 | free(Pkj);
|
---|
1903 | Dmat_matrix.assign(0.0);
|
---|
1904 | Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
|
---|
1905 | Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
|
---|
1906 | // We now have the density-like matrix Dmat_matrix
|
---|
1907 |
|
---|
1908 | // Compute the G matrix
|
---|
1909 |
|
---|
1910 | RefSymmSCMatrix Gmat(nbasis_dim,kit);
|
---|
1911 | init_cs_gmat();
|
---|
1912 | tim_enter("make_gmat for Laj");
|
---|
1913 | make_cs_gmat_new(Gmat, Dmat_matrix);
|
---|
1914 | if (debug_ > 1) {
|
---|
1915 | Dmat_matrix.print("Dmat");
|
---|
1916 | Gmat.print("Gmat");
|
---|
1917 | }
|
---|
1918 | tim_exit("make_gmat for Laj");
|
---|
1919 |
|
---|
1920 | // Finish computation of Laj
|
---|
1921 | RefSCMatrix Laj_matrix(nocc_dim,nvir_dim,kit); // elements are ordered as j*nvir+a
|
---|
1922 | Laj_matrix->assign(Laj);
|
---|
1923 | if (debug_ > 1) Laj_matrix->print("Laj (first bit)");
|
---|
1924 | Laj_matrix = Laj_matrix - 2*Co.t()*Gmat*Cv;
|
---|
1925 | if (debug_ > 1) Laj_matrix->print("Laj (all of it)");
|
---|
1926 | Laj_matrix->convert(Laj); // Put new Laj_matrix elements into Laj
|
---|
1927 |
|
---|
1928 | tim_exit("Laj");
|
---|
1929 |
|
---|
1930 | //////////////////////////////////////
|
---|
1931 | // Computation of Laj is now complete
|
---|
1932 | //////////////////////////////////////
|
---|
1933 |
|
---|
1934 | ////////////////////////////
|
---|
1935 | // Solve the CPHF equations
|
---|
1936 | ////////////////////////////
|
---|
1937 | RefSCMatrix Paj_matrix(nvir_dim, nocc_dim, kit);
|
---|
1938 | tim_enter("cphf");
|
---|
1939 | cs_cphf(scf_vector, Laj, evals, Paj_matrix);
|
---|
1940 | tim_exit("cphf");
|
---|
1941 |
|
---|
1942 | free(Laj);
|
---|
1943 |
|
---|
1944 | // Finish computation of Waj
|
---|
1945 | for (a=0; a<nvir; a++) {
|
---|
1946 | waj_ptr = &Waj[a];
|
---|
1947 | for (j=0; j<nocc; j++) {
|
---|
1948 | *waj_ptr -= evals[j]*Paj_matrix->get_element(a,j);
|
---|
1949 | waj_ptr += nvir;
|
---|
1950 | }
|
---|
1951 | }
|
---|
1952 | // Waj is now complete
|
---|
1953 | RefSCMatrix Waj_matrix(nocc_dim, nvir_dim, kit);
|
---|
1954 | Waj_matrix->assign(Waj); // Put elements of Waj into Waj_matrix
|
---|
1955 | // NB. Waj_matrix elements are ordered as j*nvir+a
|
---|
1956 | free(Waj);
|
---|
1957 |
|
---|
1958 |
|
---|
1959 | // Finish computation of Wkj
|
---|
1960 | tim_enter("Pkj and Wkj");
|
---|
1961 | // Compute Dmat_matrix =
|
---|
1962 | // Co*Pkj_matrix*Co.t() + Co*Paj_matrix.t()*Cv.t()
|
---|
1963 | // + Cv*Paj_matrix*Co.t() + Cv*Pab_matrix*Cv.t();
|
---|
1964 | Dmat_matrix.assign(0.0);
|
---|
1965 | Dmat_matrix.accumulate_symmetric_sum(Cv*Paj_matrix*Co.t());
|
---|
1966 | Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
|
---|
1967 | Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
|
---|
1968 | tim_enter("make_gmat for Wkj");
|
---|
1969 | make_cs_gmat_new(Gmat, Dmat_matrix);
|
---|
1970 | tim_exit("make_gmat for Wkj");
|
---|
1971 | done_cs_gmat();
|
---|
1972 | for (i=0; i<thr_->nthread(); i++) tbints_[i] = 0;
|
---|
1973 | delete[] tbints_; tbints_ = 0;
|
---|
1974 | RefSCMatrix Wkj_matrix(nocc_dim, nocc_dim, kit);
|
---|
1975 | Wkj_matrix->assign(Wkj);
|
---|
1976 | Wkj_matrix = Wkj_matrix - 2*Co.t()*Gmat*Co;
|
---|
1977 | free(Wkj);
|
---|
1978 | // Wkj is now complete - not as Wkj but as Wkj_matrix
|
---|
1979 | tim_exit("Pkj and Wkj");
|
---|
1980 |
|
---|
1981 | ////////////////////////////////////////////////////////////////
|
---|
1982 | // We now have the matrices Pkj_matrix, Paj_matrix, Pab_matrix,
|
---|
1983 | // Wkj_matrix, Waj_matrix, Wab_matrix and can compute the
|
---|
1984 | // remaining contributions to the gradient
|
---|
1985 | ///////////////////////////////////////////////////////////////
|
---|
1986 |
|
---|
1987 | // Compute the second order correction to
|
---|
1988 | // the density matrix and energy weighted
|
---|
1989 | // density matrix in the AO basis
|
---|
1990 | RefSCMatrix P2AO_matrix(nbasis_dim, nbasis_dim, kit);
|
---|
1991 | RefSCMatrix P2MO_matrix(noso_dim, noso_dim, kit);
|
---|
1992 | RefSCMatrix W2AO_matrix(nbasis_dim, nbasis_dim, kit);
|
---|
1993 | RefSCMatrix W2MO_matrix(noso_dim, noso_dim, kit);
|
---|
1994 | RefSCMatrix SCF_matrix(nbasis_dim, noso_dim, kit);
|
---|
1995 | for (i=0; i<nocc; i++) {
|
---|
1996 | for (j=0; j<nocc; j++) {
|
---|
1997 | P2MO_matrix->set_element(i,j,Pkj_matrix->get_element(i,j));
|
---|
1998 | W2MO_matrix->set_element(i,j,Wkj_matrix->get_element(i,j));
|
---|
1999 | }
|
---|
2000 | for (j=nocc; j<noso; j++) {
|
---|
2001 | P2MO_matrix->set_element(i,j,Paj_matrix->get_element(j-nocc,i));
|
---|
2002 | W2MO_matrix->set_element(i,j,Waj_matrix->get_element(i,j-nocc));
|
---|
2003 | }
|
---|
2004 | }
|
---|
2005 | for (i=nocc; i<noso; i++) {
|
---|
2006 | for (j=0; j<nocc; j++) {
|
---|
2007 | P2MO_matrix->set_element(i,j,Paj_matrix->get_element(i-nocc,j));
|
---|
2008 | W2MO_matrix->set_element(i,j,Waj_matrix->get_element(j,i-nocc));
|
---|
2009 | }
|
---|
2010 | for (j=nocc; j<noso; j++) {
|
---|
2011 | P2MO_matrix->set_element(i,j,Pab_matrix->get_element(i-nocc,j-nocc));
|
---|
2012 | W2MO_matrix->set_element(i,j,Wab_matrix->get_element(i-nocc,j-nocc));
|
---|
2013 | }
|
---|
2014 | }
|
---|
2015 | for (i=0; i<nbasis; i++) {
|
---|
2016 | for (j=0; j<nocc; j++) {
|
---|
2017 | SCF_matrix->set_element(i,j,Co->get_element(i,j));
|
---|
2018 | }
|
---|
2019 | for (j=nocc; j<noso; j++) {
|
---|
2020 | SCF_matrix->set_element(i,j,Cv->get_element(i,j-nocc));
|
---|
2021 | }
|
---|
2022 | }
|
---|
2023 | P2AO_matrix = SCF_matrix * P2MO_matrix * SCF_matrix.t();
|
---|
2024 | W2AO_matrix = SCF_matrix * W2MO_matrix * SCF_matrix.t();
|
---|
2025 | // P2AO_matrix = Co*(Pkj_matrix*Co.t() + Paj_matrix.t()*Cv.t()) +
|
---|
2026 | // Cv*(Paj_matrix*Co.t() + Pab_matrix*Cv.t());
|
---|
2027 | // W2AO_matrix = Co*(Wkj_matrix*Co.t() + Waj_matrix*Cv.t()) +
|
---|
2028 | // Cv*(Waj_matrix.t()*Co.t() + Wab_matrix*Cv.t());
|
---|
2029 |
|
---|
2030 | if (debug_ > 1) {
|
---|
2031 | SCF_matrix.print("SCF_matrix");
|
---|
2032 | P2MO_matrix.print("P2MO_matrix");
|
---|
2033 | W2MO_matrix.print("W2MO_matrix");
|
---|
2034 | P2AO_matrix.print("P2AO_matrix");
|
---|
2035 | W2AO_matrix.print("W2AO_matrix");
|
---|
2036 | }
|
---|
2037 |
|
---|
2038 | // Convert P2AO_matrix and W2AO_matrix to double*
|
---|
2039 | P2AO = new double[nbasis*nbasis];
|
---|
2040 | W2AO = new double[nbasis*nbasis];
|
---|
2041 | P2AO_matrix->convert(P2AO);
|
---|
2042 | W2AO_matrix->convert(W2AO);
|
---|
2043 |
|
---|
2044 | // Compute the HF density matrix and
|
---|
2045 | // energy weighted density matrix
|
---|
2046 | PHF = new double[nbasis*nbasis];
|
---|
2047 | WHF = new double[nbasis*nbasis];
|
---|
2048 | phf_ptr = PHF;
|
---|
2049 | whf_ptr = WHF;
|
---|
2050 | for (p=0; p<nbasis; p++) {
|
---|
2051 | for (q=0; q<nbasis; q++) {
|
---|
2052 | *phf_ptr++ = 0.0;
|
---|
2053 | *whf_ptr++ = 0.0;
|
---|
2054 | }
|
---|
2055 | }
|
---|
2056 | phf_ptr = PHF;
|
---|
2057 | whf_ptr = WHF;
|
---|
2058 | for (p=0; p<nbasis; p++) {
|
---|
2059 | for (q=0; q<nbasis; q++) {
|
---|
2060 | c_pi = scf_vector[p];
|
---|
2061 | c_qi = scf_vector[q];
|
---|
2062 | for (i=0; i<nocc; i++) {
|
---|
2063 | tmpval = 2* *c_pi++ * *c_qi++;
|
---|
2064 | *phf_ptr += tmpval;
|
---|
2065 | *whf_ptr -= evals[i] * tmpval;
|
---|
2066 | } // exit i loop
|
---|
2067 | phf_ptr++;
|
---|
2068 | whf_ptr++;
|
---|
2069 | } // exit q loop
|
---|
2070 | } // exit p loop
|
---|
2071 |
|
---|
2072 | // Compute the MP2 density and energy weighted density
|
---|
2073 |
|
---|
2074 | // PMP2 = PHF + P2AO; WMP2 = WHF + W2AO
|
---|
2075 | PMP2 = new double[nbasis*nbasis];
|
---|
2076 | WMP2 = new double[nbasis*nbasis];
|
---|
2077 | // Initialize PMP2 and WMP2
|
---|
2078 | pmp2_ptr = PMP2;
|
---|
2079 | wmp2_ptr = WMP2;
|
---|
2080 | for (p=0; p<nbasis; p++) {
|
---|
2081 | for (q=0; q<nbasis; q++) {
|
---|
2082 | *pmp2_ptr++ = 0.0;
|
---|
2083 | *wmp2_ptr++ = 0.0;
|
---|
2084 | }
|
---|
2085 | }
|
---|
2086 | pmp2_ptr = PMP2;
|
---|
2087 | wmp2_ptr = WMP2;
|
---|
2088 | p2ao_ptr = P2AO;
|
---|
2089 | w2ao_ptr = W2AO;
|
---|
2090 | phf_ptr = PHF;
|
---|
2091 | whf_ptr = WHF;
|
---|
2092 | for (p=0; p<nbasis; p++) {
|
---|
2093 | for (q=0; q<nbasis; q++) {
|
---|
2094 | *pmp2_ptr++ = *phf_ptr++ + *p2ao_ptr++;
|
---|
2095 | *wmp2_ptr++ = *whf_ptr++ + *w2ao_ptr++;
|
---|
2096 | }
|
---|
2097 | }
|
---|
2098 | delete[] W2AO;
|
---|
2099 |
|
---|
2100 | if (debug_ > 1) {
|
---|
2101 | RefSCMatrix tmpmat(ao_dimension(), ao_dimension(), kit);
|
---|
2102 | tmpmat->assign(PMP2);
|
---|
2103 | tmpmat.print("PMP2");
|
---|
2104 | tmpmat->assign(P2AO);
|
---|
2105 | tmpmat.print("P2AO");
|
---|
2106 | tmpmat->assign(PHF);
|
---|
2107 | tmpmat.print("PHF");
|
---|
2108 | tmpmat->assign(WMP2);
|
---|
2109 | tmpmat.print("WMP2");
|
---|
2110 | }
|
---|
2111 |
|
---|
2112 | ////////////////////////////////////////////////
|
---|
2113 | // Compute the contribution to the MP2 gradient
|
---|
2114 | // from the separable part of the 2PDM
|
---|
2115 | ////////////////////////////////////////////////
|
---|
2116 |
|
---|
2117 | zero_gradients(ginter, natom, 3);
|
---|
2118 | zero_gradients(hf_ginter, natom, 3);
|
---|
2119 | tim_enter("sep 2PDM contrib.");
|
---|
2120 |
|
---|
2121 | CSGradS2PDM** s2pdmthread = new CSGradS2PDM*[thr_->nthread()];
|
---|
2122 | for (i=0; i<thr_->nthread(); i++) {
|
---|
2123 | s2pdmthread[i] = new CSGradS2PDM(i, thr_->nthread(), me, nproc,
|
---|
2124 | lock, basis(), tbintder_[i],
|
---|
2125 | PHF, P2AO, tol, debug_, dynamic_);
|
---|
2126 | thr_->add_thread(i,s2pdmthread[i]);
|
---|
2127 | # if SINGLE_THREAD_S2PDM
|
---|
2128 | s2pdmthread[i]->run();
|
---|
2129 | # endif
|
---|
2130 | }
|
---|
2131 | # if !SINGLE_THREAD_S2PDM
|
---|
2132 | thr_->start_threads();
|
---|
2133 | thr_->wait_threads();
|
---|
2134 | # endif
|
---|
2135 | for (i=0; i<thr_->nthread(); i++) {
|
---|
2136 | s2pdmthread[i]->accum_mp2_contrib(ginter);
|
---|
2137 | s2pdmthread[i]->accum_hf_contrib(hf_ginter);
|
---|
2138 | delete s2pdmthread[i];
|
---|
2139 | }
|
---|
2140 | sum_gradients(msg_, ginter, molecule()->natom(), 3);
|
---|
2141 | sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
|
---|
2142 | delete[] s2pdmthread;
|
---|
2143 |
|
---|
2144 | tim_exit("sep 2PDM contrib.");
|
---|
2145 | delete[] P2AO;
|
---|
2146 |
|
---|
2147 | // The separable 2PDM contribution to the gradient has now been
|
---|
2148 | // accumulated in ginter on node 0; add it to the total gradients
|
---|
2149 | if (me == 0) {
|
---|
2150 | accum_gradients(gradient, ginter, natom, 3);
|
---|
2151 | accum_gradients(hf_gradient, hf_ginter, natom, 3);
|
---|
2152 | }
|
---|
2153 | // Print out the contribution to the gradient from sep. 2PDM
|
---|
2154 | if (debug_) {
|
---|
2155 | print_natom_3(hf_ginter,
|
---|
2156 | "Contribution from separable 2PDM to HF gradient [au]:");
|
---|
2157 | print_natom_3(ginter,
|
---|
2158 | "Contribution from separable 2PDM to MP2 gradient [au]:");
|
---|
2159 | }
|
---|
2160 |
|
---|
2161 | // Done with two-electron integrals
|
---|
2162 | tbint_ = 0;
|
---|
2163 | if (dograd || do_d1_) {
|
---|
2164 | delete[] tbintder_;
|
---|
2165 | tbintder_ = 0;
|
---|
2166 | }
|
---|
2167 |
|
---|
2168 | /////////////////////////////////////////////////////////////
|
---|
2169 | // Compute the one-electron contribution to the MP2 gradient
|
---|
2170 | /////////////////////////////////////////////////////////////
|
---|
2171 |
|
---|
2172 | zero_gradients(ginter, natom, 3);
|
---|
2173 | zero_gradients(hf_ginter, natom, 3);
|
---|
2174 | tim_enter("hcore contrib.");
|
---|
2175 | hcore_cs_grad(PHF, PMP2, hf_ginter, ginter);
|
---|
2176 | tim_exit("hcore contrib.");
|
---|
2177 | delete[] PHF;
|
---|
2178 | delete[] PMP2;
|
---|
2179 | // The hcore contribution to the gradient has now been accumulated
|
---|
2180 | // in ginter on node 0; add it to the total gradients
|
---|
2181 | if (me == 0) {
|
---|
2182 | accum_gradients(gradient, ginter, natom, 3);
|
---|
2183 | accum_gradients(hf_gradient, hf_ginter, natom, 3);
|
---|
2184 | }
|
---|
2185 | // Print out the contribution to the gradient from hcore
|
---|
2186 | if (debug_) {
|
---|
2187 | print_natom_3(hf_ginter, "Contribution to HF gradient from hcore [au]:");
|
---|
2188 | print_natom_3(ginter, "Contribution to MP2 gradient from hcore [au]:");
|
---|
2189 | }
|
---|
2190 |
|
---|
2191 | zero_gradients(ginter, natom, 3);
|
---|
2192 | zero_gradients(hf_ginter, natom, 3);
|
---|
2193 | tim_enter("overlap contrib.");
|
---|
2194 | overlap_cs_grad(WHF, WMP2, hf_ginter, ginter);
|
---|
2195 | delete[] WHF;
|
---|
2196 | tim_exit("overlap contrib.");
|
---|
2197 | delete[] WMP2;
|
---|
2198 | // The overlap contribution to the gradient has now been accumulated
|
---|
2199 | // in ginter on node 0; add it to the total gradients
|
---|
2200 | if (me == 0) {
|
---|
2201 | accum_gradients(gradient, ginter, natom, 3);
|
---|
2202 | accum_gradients(hf_gradient, hf_ginter, natom, 3);
|
---|
2203 | }
|
---|
2204 |
|
---|
2205 | // Print out the overlap contribution to the gradient
|
---|
2206 | if (debug_) {
|
---|
2207 | print_natom_3(hf_ginter, "Overlap contribution to HF gradient [au]:");
|
---|
2208 | print_natom_3(ginter,"Overlap contribution to MP2 gradient [au]:");
|
---|
2209 | }
|
---|
2210 |
|
---|
2211 | ////////////////////////////////////////////////////////
|
---|
2212 | // Compute the nuclear contribution to the MP2 gradient
|
---|
2213 | ////////////////////////////////////////////////////////
|
---|
2214 |
|
---|
2215 | if (me == 0) {
|
---|
2216 | nuclear_repulsion_energy_gradient(ginter);
|
---|
2217 | accum_gradients(gradient, ginter, natom, 3);
|
---|
2218 | accum_gradients(hf_gradient, ginter, natom, 3);
|
---|
2219 |
|
---|
2220 | }
|
---|
2221 |
|
---|
2222 | // Print out the nuclear contribution to the gradient
|
---|
2223 | if (debug_)
|
---|
2224 | print_natom_3(ginter,"Nuclear contribution to MP2 gradient [au]:");
|
---|
2225 |
|
---|
2226 |
|
---|
2227 | ////////////////////////////////////////////////////////
|
---|
2228 | // The computation of the MP2 gradient is now complete;
|
---|
2229 | // print out the gradient
|
---|
2230 | ////////////////////////////////////////////////////////
|
---|
2231 | if (debug_) {
|
---|
2232 | ExEnv::out0() << "Obtaining HF gradient" << endl;
|
---|
2233 | print_natom_3(ref()->gradient(),"HF gradient from HF");
|
---|
2234 | print_natom_3(hf_gradient,"Total HF gradient from MP2 [au]:");
|
---|
2235 | }
|
---|
2236 | print_natom_3(gradient,"Total MP2 gradient [au]:");
|
---|
2237 |
|
---|
2238 | msg_->bcast(gradient_dat, natom*3);
|
---|
2239 | RefSCVector gradientvec = matrixkit()->vector(moldim());
|
---|
2240 | gradientvec->assign(gradient_dat);
|
---|
2241 | set_gradient(gradientvec);
|
---|
2242 |
|
---|
2243 | msg_->bcast(hf_gradient_dat, natom*3);
|
---|
2244 | hf_gradient_ = matrixkit()->vector(moldim());
|
---|
2245 | hf_gradient_->assign(hf_gradient_dat);
|
---|
2246 |
|
---|
2247 | delete[] gradient;
|
---|
2248 | delete[] gradient_dat;
|
---|
2249 |
|
---|
2250 | delete[] hf_gradient;
|
---|
2251 | delete[] hf_gradient_dat;
|
---|
2252 |
|
---|
2253 | for (i=0; i<natom; i++) {
|
---|
2254 | delete[] ginter[i];
|
---|
2255 | delete[] hf_ginter[i];
|
---|
2256 | }
|
---|
2257 | delete[] ginter;
|
---|
2258 | delete[] hf_ginter;
|
---|
2259 |
|
---|
2260 | delete[] scf_vector;
|
---|
2261 | delete[] scf_vector_dat;
|
---|
2262 | delete[] evals;
|
---|
2263 |
|
---|
2264 | tim_exit("mp2-mem");
|
---|
2265 | }
|
---|
2266 |
|
---|
2267 | ///////////////////////////////////////////////////////////
|
---|
2268 | // Compute the contribution to the MP2 gradient from hcore
|
---|
2269 | ///////////////////////////////////////////////////////////
|
---|
2270 | void
|
---|
2271 | MBPT2::hcore_cs_grad(double *PHF, double *PMP2,
|
---|
2272 | double **hf_ginter, double **ginter)
|
---|
2273 | {
|
---|
2274 |
|
---|
2275 | int i, j, k, l, m;
|
---|
2276 | int jj, kk;
|
---|
2277 | int jsize, ksize;
|
---|
2278 | int j_offset, k_offset;
|
---|
2279 | int jk_index;
|
---|
2280 | int index;
|
---|
2281 | int nshell;
|
---|
2282 | int nbasis;
|
---|
2283 | int nproc = msg_->n();
|
---|
2284 | int me = msg_->me();
|
---|
2285 |
|
---|
2286 | const double *oneebuf; // 1-electron buffer
|
---|
2287 | double tmpval1, tmpval2;
|
---|
2288 | double gxyz[3];
|
---|
2289 | double hf_gxyz[3];
|
---|
2290 |
|
---|
2291 | // Initialize 1e object
|
---|
2292 | Ref<OneBodyDerivInt> obintder_ = integral()->hcore_deriv();
|
---|
2293 | oneebuf = obintder_->buffer();
|
---|
2294 |
|
---|
2295 | nshell = basis()->nshell();
|
---|
2296 | nbasis = basis()->nbasis();
|
---|
2297 |
|
---|
2298 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2299 | // Compute the kinetic and nuclear-electron energy contribution to the gradient
|
---|
2300 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2301 |
|
---|
2302 | jk_index = 0;
|
---|
2303 |
|
---|
2304 | for (i=0; i<molecule()->natom(); i++) {
|
---|
2305 | for (j=0; j<nshell; j++) {
|
---|
2306 | jsize = basis()->shell(j).nfunction();
|
---|
2307 | j_offset = basis()->shell_to_function(j);
|
---|
2308 |
|
---|
2309 | for (k=0; k<=j; k++) {
|
---|
2310 | ksize = basis()->shell(k).nfunction();
|
---|
2311 | k_offset = basis()->shell_to_function(k);
|
---|
2312 |
|
---|
2313 | if (jk_index++%nproc == me) {
|
---|
2314 | obintder_->compute_shell(j,k,i);
|
---|
2315 |
|
---|
2316 | for (l=0; l<3; l++) { gxyz[l] = 0.0; hf_gxyz[l] = 0.0; }
|
---|
2317 |
|
---|
2318 | index = 0;
|
---|
2319 |
|
---|
2320 | for (jj=0; jj<jsize; jj++) {
|
---|
2321 | for (kk=0; kk<ksize; kk++) {
|
---|
2322 | tmpval1 = PMP2[(j_offset + jj)*nbasis + k_offset + kk];
|
---|
2323 | tmpval2 = PHF[(j_offset + jj)*nbasis + k_offset + kk];
|
---|
2324 | for (m=0; m<3; m++) {
|
---|
2325 | gxyz[m] += oneebuf[index] * tmpval1;
|
---|
2326 | hf_gxyz[m] += oneebuf[index] * tmpval2;
|
---|
2327 | index++;
|
---|
2328 | } // exit m loop
|
---|
2329 | } // exit kk loop
|
---|
2330 | } // exit jj loop
|
---|
2331 |
|
---|
2332 | if (j != k) {
|
---|
2333 | // off-diagonal blocks
|
---|
2334 | for (l=0; l<3; l++) {
|
---|
2335 | gxyz[l] *= 2.0;
|
---|
2336 | hf_gxyz[l] *= 2.0;
|
---|
2337 | }
|
---|
2338 | }
|
---|
2339 |
|
---|
2340 | for (l=0; l<3; l++) {
|
---|
2341 | ginter[i][l] += gxyz[l];
|
---|
2342 | hf_ginter[i][l] += hf_gxyz[l];
|
---|
2343 | }
|
---|
2344 |
|
---|
2345 | } // exit "if"
|
---|
2346 | } // exit k loop
|
---|
2347 | } // exit j loop
|
---|
2348 | } // exit i loop
|
---|
2349 |
|
---|
2350 | /* Accumulate the nodes' intermediate gradients on node 0 */
|
---|
2351 | sum_gradients(msg_, ginter, molecule()->natom(), 3);
|
---|
2352 | sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
|
---|
2353 | }
|
---|
2354 |
|
---|
2355 |
|
---|
2356 | ////////////////////////////////////////////////////
|
---|
2357 | // Compute the overlap contribution to the gradient
|
---|
2358 | ////////////////////////////////////////////////////
|
---|
2359 | void
|
---|
2360 | MBPT2::overlap_cs_grad(double *WHF, double *WMP2,
|
---|
2361 | double **hf_ginter, double **ginter)
|
---|
2362 | {
|
---|
2363 | int i, j, k, l, m;
|
---|
2364 | int jj, kk;
|
---|
2365 | int jj_index, kk_index;
|
---|
2366 | int jsize, ksize;
|
---|
2367 | int j_offset, k_offset;
|
---|
2368 | int jk_index;
|
---|
2369 | int index;
|
---|
2370 | int nshell;
|
---|
2371 | int nbasis;
|
---|
2372 |
|
---|
2373 | const double *oneebuf; // 1-electron buffer
|
---|
2374 | double tmpval1, tmpval2;
|
---|
2375 | double hf_tmpval1, hf_tmpval2;
|
---|
2376 | double gxyz[3];
|
---|
2377 | double hf_gxyz[3];
|
---|
2378 |
|
---|
2379 | // Initialize 1e object
|
---|
2380 | Ref<OneBodyDerivInt> obintder_ = integral()->overlap_deriv();
|
---|
2381 | oneebuf = obintder_->buffer();
|
---|
2382 |
|
---|
2383 | nshell = basis()->nshell();
|
---|
2384 | nbasis = basis()->nbasis();
|
---|
2385 | int nproc = msg_->n();
|
---|
2386 | int me = msg_->me();
|
---|
2387 |
|
---|
2388 | for (i=0; i<molecule()->natom(); i++) {
|
---|
2389 | jk_index = 0;
|
---|
2390 |
|
---|
2391 | for (j=0; j<nshell; j++) {
|
---|
2392 | j_offset = basis()->shell_to_function(j);
|
---|
2393 | jsize = basis()->shell(j).nfunction();
|
---|
2394 |
|
---|
2395 | for (k=0; k<=j; k++) {
|
---|
2396 | k_offset = basis()->shell_to_function(k);
|
---|
2397 | ksize = basis()->shell(k).nfunction();
|
---|
2398 |
|
---|
2399 | if (jk_index++%nproc == me) {
|
---|
2400 | obintder_->compute_shell(j,k,i);
|
---|
2401 |
|
---|
2402 | for (l=0; l<3; l++) {
|
---|
2403 | hf_gxyz[l] = 0.0;
|
---|
2404 | gxyz[l] = 0.0;
|
---|
2405 | }
|
---|
2406 | index = 0;
|
---|
2407 |
|
---|
2408 | for (jj=0; jj<jsize; jj++) {
|
---|
2409 | jj_index = j_offset + jj;
|
---|
2410 | for (kk=0; kk<ksize; kk++) {
|
---|
2411 | kk_index = k_offset + kk;
|
---|
2412 | if (kk_index > jj_index) {
|
---|
2413 | index += 3; // increment index since m-loop will be skipped
|
---|
2414 | break; // skip to next jj value
|
---|
2415 | }
|
---|
2416 | // NB. WMP2 is not a symmetrix matrix
|
---|
2417 | tmpval1 = WMP2[jj_index*nbasis + kk_index];
|
---|
2418 | tmpval2 = WMP2[kk_index*nbasis + jj_index];
|
---|
2419 | hf_tmpval1 = WHF[jj_index*nbasis + kk_index];
|
---|
2420 | hf_tmpval2 = WHF[kk_index*nbasis + jj_index];
|
---|
2421 |
|
---|
2422 | for (m=0; m<3; m++) {
|
---|
2423 | if (jj_index != kk_index) {
|
---|
2424 | gxyz[m] += oneebuf[index] * (tmpval1 + tmpval2);
|
---|
2425 | hf_gxyz[m] += oneebuf[index] * (hf_tmpval1 + hf_tmpval2);
|
---|
2426 | }
|
---|
2427 | else {
|
---|
2428 | gxyz[m] += oneebuf[index] * tmpval1;
|
---|
2429 | hf_gxyz[m] += oneebuf[index] * hf_tmpval1;
|
---|
2430 | }
|
---|
2431 | index++;
|
---|
2432 | } // exit m loop
|
---|
2433 | } // exit kk loop
|
---|
2434 | } // exit jj loop
|
---|
2435 |
|
---|
2436 | for (l=0; l<3; l++) {
|
---|
2437 | ginter[i][l] += gxyz[l];
|
---|
2438 | hf_ginter[i][l] += hf_gxyz[l];
|
---|
2439 | }
|
---|
2440 | } // exit "if"
|
---|
2441 | } // exit k loop
|
---|
2442 | } // exit j loop
|
---|
2443 | } // exit i loop
|
---|
2444 |
|
---|
2445 | /* Accumulate the nodes' intermediate gradients on node 0 */
|
---|
2446 | sum_gradients(msg_, ginter, molecule()->natom(), 3);
|
---|
2447 | sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
|
---|
2448 | }
|
---|
2449 |
|
---|
2450 | static void
|
---|
2451 | sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2)
|
---|
2452 | {
|
---|
2453 | int i;
|
---|
2454 |
|
---|
2455 | if (msg->n() == 1) return;
|
---|
2456 |
|
---|
2457 | for (i=0; i<n1; i++) {
|
---|
2458 | msg->sum(f[i],n2);
|
---|
2459 | }
|
---|
2460 | }
|
---|
2461 |
|
---|
2462 | static void
|
---|
2463 | zero_gradients(double **f, int n1, int n2)
|
---|
2464 | {
|
---|
2465 | for (int i=0; i<n1; i++) {
|
---|
2466 | for (int j=0; j<3; j++) f[i][j] = 0.0;
|
---|
2467 | }
|
---|
2468 | }
|
---|
2469 |
|
---|
2470 | static void
|
---|
2471 | accum_gradients(double **g, double **f, int n1, int n2)
|
---|
2472 | {
|
---|
2473 | for (int i=0; i<n1; i++) {
|
---|
2474 | for (int j=0; j<3; j++) g[i][j] += f[i][j];
|
---|
2475 | }
|
---|
2476 | }
|
---|
2477 |
|
---|
2478 | /////////////////////////////////////
|
---|
2479 | // Compute the batchsize
|
---|
2480 | //
|
---|
2481 | // Only arrays allocated before exiting the loop over
|
---|
2482 | // i-batches are included here - only these arrays
|
---|
2483 | // affect the batch size.
|
---|
2484 | /////////////////////////////////////
|
---|
2485 | int
|
---|
2486 | MBPT2::compute_cs_batchsize(size_t mem_static, int nocc_act)
|
---|
2487 | {
|
---|
2488 | size_t mem_dyn; // dynamic memory available
|
---|
2489 | distsize_t maxdyn;
|
---|
2490 | int ni;
|
---|
2491 |
|
---|
2492 | ///////////////////////////////////////
|
---|
2493 | // the largest memory requirement will
|
---|
2494 | // either occur just before the end of
|
---|
2495 | // the 1. q.b.t. (mem1) or just before
|
---|
2496 | // the end of the i-batch loop (mem2)
|
---|
2497 | ///////////////////////////////////////
|
---|
2498 |
|
---|
2499 | if (mem_alloc > mem_static) {
|
---|
2500 | mem_dyn = mem_alloc - mem_static;
|
---|
2501 | }
|
---|
2502 | else {
|
---|
2503 | mem_dyn = 0;
|
---|
2504 | }
|
---|
2505 |
|
---|
2506 | // First determine if calculation is possible at all (i.e., if ni=1 possible)
|
---|
2507 |
|
---|
2508 | ni = 1;
|
---|
2509 | maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
|
---|
2510 |
|
---|
2511 | if (maxdyn > mem_dyn) {
|
---|
2512 | return 0;
|
---|
2513 | }
|
---|
2514 |
|
---|
2515 | ni = 2;
|
---|
2516 | while (ni<=nocc_act) {
|
---|
2517 | maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
|
---|
2518 | if (maxdyn >= mem_dyn) {
|
---|
2519 | ni--;
|
---|
2520 | break;
|
---|
2521 | }
|
---|
2522 | ni++;
|
---|
2523 | }
|
---|
2524 | if (ni > nocc_act) ni = nocc_act;
|
---|
2525 |
|
---|
2526 | return ni;
|
---|
2527 | }
|
---|
2528 |
|
---|
2529 | /////////////////////////////////////
|
---|
2530 | // Compute required (dynamic) memory
|
---|
2531 | // for a given batch size
|
---|
2532 | //
|
---|
2533 | // Only arrays allocated before exiting the loop over
|
---|
2534 | // i-batches are included here - only these arrays
|
---|
2535 | // affect the batch size.
|
---|
2536 | /////////////////////////////////////
|
---|
2537 | distsize_t
|
---|
2538 | MBPT2::compute_cs_dynamic_memory(int ni, int nocc_act)
|
---|
2539 | {
|
---|
2540 | int index;
|
---|
2541 | distsize_t mem1, mem2, mem3;
|
---|
2542 | distsize_t maxdyn;
|
---|
2543 | distsize_t tmp;
|
---|
2544 | int i, j;
|
---|
2545 | int nij;
|
---|
2546 | int nproc = msg_->n();
|
---|
2547 |
|
---|
2548 | ///////////////////////////////////////
|
---|
2549 | // the largest memory requirement will
|
---|
2550 | // either occur just before the end of
|
---|
2551 | // the 1. q.b.t. (mem1) or just before
|
---|
2552 | // the end of the i-batch loop (mem2)
|
---|
2553 | ///////////////////////////////////////
|
---|
2554 |
|
---|
2555 | // compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
|
---|
2556 | index = 0;
|
---|
2557 | nij = 0;
|
---|
2558 | for (i=0; i<ni; i++) {
|
---|
2559 | for (j=0; j<nocc; j++) {
|
---|
2560 | if (index++ % nproc == 0) nij++;
|
---|
2561 | }
|
---|
2562 | }
|
---|
2563 | mem1 = sizeof(double)*((distsize_t)nij*nbasis*nbasis
|
---|
2564 | + nbasis*nvir);
|
---|
2565 | mem2 = sizeof(double)*((distsize_t)thr_->nthread()*ni*nbasis*nfuncmax*nfuncmax
|
---|
2566 | + (distsize_t)nij*nbasis*nbasis
|
---|
2567 | + ni*nbasis + nbasis*nfuncmax
|
---|
2568 | + 2*nfuncmax*nfuncmax*nfuncmax*nfuncmax);
|
---|
2569 | mem3 = sizeof(double)*((distsize_t)ni*nbasis*nfuncmax*nfuncmax
|
---|
2570 | + (distsize_t)nij*nbasis*nbasis
|
---|
2571 | + 2*(2 + nbasis*nfuncmax));
|
---|
2572 | tmp = (mem2>mem3 ? mem2:mem3);
|
---|
2573 | maxdyn = (mem1>tmp ? mem1:tmp);
|
---|
2574 |
|
---|
2575 | return maxdyn;
|
---|
2576 | }
|
---|
2577 |
|
---|
2578 | ////////////////////////////////////////////////////////////////////////////
|
---|
2579 |
|
---|
2580 | // Local Variables:
|
---|
2581 | // mode: c++
|
---|
2582 | // c-file-style: "CLJ-CONDENSED"
|
---|
2583 | // End:
|
---|