source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/csgrad.cc@ aae63a

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since aae63a was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 82.1 KB
Line 
1//
2// csgrad.cc
3//
4// Copyright (C) 1996 Limit Point Systems, Inc.
5//
6// Author: Ida Nielsen <ida@kemi.aau.dk>
7// Maintainer: LPS
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#include <stdlib.h>
29#include <math.h>
30#include <limits.h>
31
32#include <util/misc/formio.h>
33#include <util/misc/timer.h>
34#include <util/group/memory.h>
35#include <util/group/message.h>
36#include <util/class/class.h>
37#include <util/state/state.h>
38#include <math/scmat/matrix.h>
39#include <math/scmat/blocked.h>
40#include <chemistry/molecule/molecule.h>
41#include <chemistry/qc/basis/integral.h>
42#include <chemistry/qc/mbpt/bzerofast.h>
43#include <chemistry/qc/mbpt/mbpt.h>
44#include <chemistry/qc/mbpt/util.h>
45#include <chemistry/qc/mbpt/csgrade12.h>
46#include <chemistry/qc/mbpt/csgrad34qb.h>
47#include <chemistry/qc/mbpt/csgrads2pdm.h>
48
49using namespace std;
50using namespace sc;
51
52#define SINGLE_THREAD_E12 0
53#define SINGLE_THREAD_QBT34 0
54#define SINGLE_THREAD_S2PDM 0
55
56#define PRINT2Q 0
57#define PRINT3Q 0
58#define PRINT4Q 0
59#if PRINT_BIGGEST_INTS
60BiggestContribs biggest_ints_1(4,40);
61#endif
62
63#define WRITE_DOUBLES 0
64
65static void sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2);
66static void zero_gradients(double **f, int n1, int n2);
67static void accum_gradients(double **g, double **f, int n1, int n2);
68
69#define PRINT1Q 0
70
71#if PRINT_CONTRIB
72static void
73sw(int&i,int&j)
74{
75 int tmp = i;
76 i = j;
77 j = tmp;
78}
79
80static void
81print_contrib(double tmpval, int num, int onum,
82 int P,int Q,int R,int S, int p,int q,int r,int s)
83{
84
85 printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
86 num, P, Q, R, S, p, q, r, s, tmpval);
87 printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
88 onum, P, Q, R, S, p, q, r, s, -tmpval);
89
90 if (p < q) {
91 sw(p,q); sw(P,Q);
92 }
93 if (r < s) {
94 sw(r,s); sw(R,S);
95 }
96 if (p < r || (p == r && q < s)) {
97 sw(P,R); sw(p,r);
98 sw(Q,S); sw(q,s);
99 }
100
101 printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
102 num, P, Q, R, S, p, q, r, s, tmpval);
103 printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
104 onum, P, Q, R, S, p, q, r, s, -tmpval);
105}
106#endif
107
108void
109MBPT2::compute_cs_grad()
110{
111
112 // New version of MP2 gradient program which uses the full
113 // permutational symmetry of the two-electron integral derivatives
114
115 Ref<SCMatrixKit> kit = basis()->matrixkit();
116
117 int do_d2_ = 1; // if true, compute d2 diagnostic
118
119 int nij; // number of i,j pairs on a node (for e.g., mo_int)
120 double *mo_int; // MO integrals of type (ov|ov)
121 // (and these integrals divided by
122 // orbital energy denominators)
123 double *integral_iqjs; // half-transformed integrals
124
125 int nocc_act, nvir_act;
126 int i, j, k;
127 int ii, bb;
128 int x, y;
129 int a, b, c;
130 int nshell;
131 int offset;
132 int ik_offset;
133 int i_offset;
134 int npass, pass;
135 int tmpint;
136 int np, nq, nr, ns;
137 int P, Q, R, S;
138 int p, q, r, s;
139 int bf1, bf2, bf3, bf4;
140 int index;
141 int me;
142 int nproc;
143 int rest;
144 int p_offset, q_offset, r_offset, s_offset;
145
146 int aoint_computed = 0;
147 int aointder_computed = 0;
148 int xyz;
149 int natom = molecule()->natom(); // the number of atoms
150 int int_index;
151 size_t mem_static; // static memory in bytes
152 int ij_proc; // the processor which has ij pair
153 int ij_index; // of the ij pairs on a proc, this ij pair is number ij_index
154 // (i.e., ij_index < nij)
155 int ik_proc; // the processor which has ik pair
156 int ik_index;
157 int jloop, kloop;
158
159 int ni;
160
161 double *evals; // scf eigenvalues
162 double *iajb_ptr, *ibja_ptr, *iakb_ptr, *ibka_ptr;
163 double *iajc_ptr, *ibjc_ptr, *icjb_ptr, *icja_ptr;
164 double *ijkb_ptr, *ibkj_ptr;
165 double pqrs;
166 double *c_sa, c_rj;
167 double *c_pi, *c_qi, *c_sj;
168 double *c_qx, *c_qa, *c_sb, *c_pa, *c_pq, *c_sy;
169 double delta_ijab, delta_ijbc, delta_ijac;
170 double ecorr_mp2 = 0.0;
171 double escf;
172 double emp2=0.0;
173 int tol; // log2 of the erep tolerance
174 // (erep < 2^tol => discard)
175 double *Wkj=0,*Wab=0,*Waj=0;// occ-occ, vir-vir and vir-occ parts of
176 // second order correction to MP2
177 // energy weighted density matrix
178 double *Pkj=0,*Pab=0; // occ-occ and vir-vir parts of second order
179 // correction to MP2 density matrix
180 double *d2occ_mat, *d2vir_mat; // matrices for computation of D2 diagnostic
181 double *Laj=0; // MP2 Lagrangian
182 double *Lpi; // contrib to MP2 Lagrangian partially in AO basis
183 double *pkj_ptr=0, *pab_ptr;
184 double *d2occ_mat_ptr;
185 double *d2vir_mat_ptr;
186 double *wkj_ptr, *wjk_ptr, *wab_ptr, *wba_ptr, *waj_ptr=0;
187 double *laj_ptr, *lpi_ptr, *lqi_ptr;
188 double *gamma_iajs, *gamma_iajs_tmp;
189 // partially back-transformed non-sep 2PDM's
190 double *gamma_iqjs_tmp;
191 double *gamma_iajs_ptr;
192 double *gamma_iqjs_ptr;
193 double *gammabuf; // buffer used for sending elements of gamma_iqjs
194 double *mo_intbuf; // buffer used for sending mo integrals
195 double tmpval, tmpval1;
196 double *P2AO, *W2AO;
197 double *p2ao_ptr, *w2ao_ptr;
198 double *PHF, *WHF;
199 double *phf_ptr, *whf_ptr;
200 double *PMP2, *WMP2;
201 double *pmp2_ptr, *wmp2_ptr;
202
203 double *ixjs_tmp; // three-quarter transformed two-el integrals
204 double *integral_ixjs; // all three-quarter transformed two-el integrals
205 double *integral_iajy; // mo integrals (y = any MO)
206 double *integral_ikja; // mo integrals
207 double *integral_iqjs_ptr;
208 double *iajy_ptr;
209 double *ixjs_ptr;
210 double *ikja_ptr;
211 double *iajs_ptr, *ikjs_ptr;
212
213 double **gradient=0, *gradient_dat=0; // The MP2 gradient
214 double **hf_gradient=0, *hf_gradient_dat=0; // The HF gradient
215 double **ginter=0; // Intermediates for the MP2 gradient
216 double **hf_ginter=0; // Intermediates for the HF gradient
217 double d2o, d2v, d2_diag;
218
219 BiggestContribs biggest_coefs(5,10);
220 CharacterTable ct = molecule()->point_group()->char_table();
221
222#if PRINT_BIGGEST_INTS
223 BiggestContribs biggest_ints_2(4,40);
224 BiggestContribs biggest_ints_2s(4,40);
225 BiggestContribs biggest_ints_3a(4,40);
226 BiggestContribs biggest_ints_3(4,40);
227#endif
228
229 int dograd = gradient_needed();
230
231 tim_enter("mp2-mem");
232
233 nfuncmax = basis()->max_nfunction_in_shell();
234
235 nshell = basis()->nshell();
236
237 me = msg_->me();
238
239 if (me == 0) {
240 ExEnv::out0() << endl << indent
241 << "Entered memgrp based MP2 routine" << endl;
242 }
243
244 nproc = msg_->n();
245 if (me == 0)
246 ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
247
248 tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
249
250 nocc = 0;
251 for (i=0; i<oso_dimension()->n(); i++) {
252 if (reference_->occupation(i) == 2.0) nocc++;
253 }
254
255 nocc_act = nocc - nfzc;
256 nvir = noso - nocc;
257 nvir_act = nvir - nfzv;
258
259 // Do a few preliminary tests to make sure the desired calculation
260 // can be done (and appears to be meaningful!)
261
262 if (nocc_act <= 0) {
263 if (me == 0) {
264 ExEnv::err0() << "There are no active occupied orbitals; program exiting" << endl;
265 }
266 abort();
267 }
268
269 if (nvir_act <= 0) {
270 if (me == 0) {
271 ExEnv::err0() << "There are no active virtual orbitals; program exiting" << endl;
272 }
273 abort();
274 }
275
276 if (restart_orbital_memgrp_) {
277 if (!dograd && !do_d1_ && !do_d2_) {
278 ExEnv::out0() << indent
279 << scprintf("Restarting at orbital %d with partial energy %18.14f",
280 restart_orbital_memgrp_, restart_ecorr_)
281 << endl;
282 ecorr_mp2 = restart_ecorr_;
283 }
284 else {
285 ExEnv::out0() << indent
286 << "Restart requested but not possible with gradients, D1, or D2"
287 << endl;
288 restart_ecorr_ = 0.0;
289 restart_orbital_memgrp_ = 0;
290 }
291 }
292 else {
293 restart_ecorr_ = 0.0;
294 }
295
296 ////////////////////////////////////////////////////////
297 // Compute batch size ni for mp2 loops;
298 //
299 // The following arrays are kept throughout (all of type double):
300 // scf_vector, gradient, ginter, Pkj, Pab, Wkj, Wab, Waj, Laj
301 // and memory allocated for these arrays and integral evaluators
302 // is called mem_static
303 //
304 ////////////////////////////////////////////////////////
305 if (me == 0) {
306 mem_static = nbasis*noso; // scf vector
307 mem_static += 2*nbasis*nfuncmax; // iqjs & iqjr
308 if (dograd) {
309 mem_static += 9*natom; // gradient & ginter & hf_ginter
310 mem_static += (nocc*(nocc+1))/2; // Pkj
311 mem_static += (nvir*(nvir+1))/2; // Pab
312 mem_static += nocc*nocc; // Wkj
313 mem_static += nvir*nvir; // Wab
314 mem_static += 2*nocc*nvir; // Waj & Laj
315 if (do_d2_) {
316 mem_static += (nocc_act*(nocc_act+1))/2; // d2occ_mat
317 mem_static += (nvir_act*(nvir_act+1))/2; // d2vir_mat
318 }
319 }
320 else if (do_d1_) {
321 mem_static += nocc*nvir; // partial Laj
322 }
323 mem_static *= sizeof(double);
324 int nthreads = thr_->nthread();
325 mem_static += nthreads * integral()->storage_required_eri(basis()); // integral evaluators
326 ni = compute_cs_batchsize(mem_static, nocc_act-restart_orbital_memgrp_);
327 }
328
329 if (max_norb_ > 0 && ni > max_norb_) {
330 ExEnv::out0() << indent
331 << "\"max_norb\" set: could have done "
332 << ni << " orbitals per pass otherwise."
333 << endl;
334 ni = max_norb_;
335 }
336
337 // Send value of ni and mem_static to other nodes
338 msg_->bcast(ni);
339 double dmem_static = mem_static;
340 msg_->bcast(dmem_static);
341 mem_static = size_t(dmem_static);
342
343 // Compute the storage to be used by the integral routines (required plus optional)
344 size_t dyn_mem = distsize_to_size(compute_cs_dynamic_memory(ni,nocc_act));
345 int mem_remaining;
346 if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining = 0;
347 else mem_remaining = mem_alloc - dyn_mem - mem_static;
348 mem_remaining += thr_->nthread() * integral()->storage_required_eri(basis());
349
350 ExEnv::out0() << indent
351 << "Memory available per node: " << mem_alloc << " Bytes"
352 << endl;
353 ExEnv::out0() << indent
354 << "Static memory used per node: " << mem_static << " Bytes"
355 << endl;
356 ExEnv::out0() << indent
357 << "Total memory used per node: " << dyn_mem+mem_static << " Bytes"
358 << endl;
359 ExEnv::out0() << indent
360 << "Memory required for one pass: "
361 << compute_cs_dynamic_memory(nocc_act,nocc_act)+mem_static
362 << " Bytes"
363 << endl;
364 ExEnv::out0() << indent
365 << "Minimum memory required: "
366 << compute_cs_dynamic_memory(1,nocc_act)+mem_static
367 << " Bytes"
368 << endl;
369 ExEnv::out0() << indent
370 << "Batch size: " << ni
371 << endl;
372
373 if (ni == 0) {
374 ExEnv::err0() << "Batch size is 0: more memory or processors are needed"
375 << endl;
376 abort();
377 }
378
379 if (dynamic_) {
380 ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
381 }
382
383 if (ni == nocc_act-restart_orbital_memgrp_) {
384 npass = 1;
385 rest = 0;
386 }
387 else {
388 rest = (nocc_act-restart_orbital_memgrp_)%ni;
389 npass = (nocc_act-restart_orbital_memgrp_ - rest)/ni + 1;
390 if (rest == 0) npass--;
391 }
392
393 if (me == 0) {
394 ExEnv::out0() << indent
395 << scprintf(" npass rest nbasis nshell nfuncmax") << endl;
396 ExEnv::out0() << indent
397 << scprintf(" %-4i %-3i %-5i %-4i %-3i",
398 npass,rest,nbasis,nshell,nfuncmax)
399 << endl;
400 ExEnv::out0() << indent
401 << scprintf(" nocc nvir nfzc nfzv") << endl;
402 ExEnv::out0() << indent
403 << scprintf(" %-4i %-4i %-4i %-4i",
404 nocc,nvir,nfzc,nfzv)
405 << endl;
406 }
407
408 int nijmax = 0;
409 index = 0;
410 for (i=0; i<ni; i++) {
411 for (j=0; j<nocc; j++) {
412 if (index++ % nproc == me) nijmax++;
413 }
414 }
415
416 ////////////////////////////////////////////////
417 // The scf vector is distributed between nodes;
418 // put a copy of the scf vector on each node;
419 ////////////////////////////////////////////////
420
421 escf = reference_->energy();
422 hf_energy_ = escf;
423
424 RefDiagSCMatrix occ;
425 RefSCMatrix Scf_Vec;
426 RefDiagSCMatrix evalmat;
427 eigen(evalmat, Scf_Vec, occ);
428
429 if (debug_ > 1) {
430 evalmat.print("eigenvalues");
431 Scf_Vec.print("eigenvectors");
432 }
433
434 double *scf_vector_dat = new double[nbasis*noso];
435 Scf_Vec.t()->convert(scf_vector_dat);
436
437 evals = new double[noso];
438 double** scf_vector = new double*[nbasis];
439 for (i=0; i<nbasis; i++) {
440 scf_vector[i] = &scf_vector_dat[i*noso];
441 }
442 for (i=0; i<noso; i++) {
443 evals[i] = evalmat(i);
444 }
445
446 Scf_Vec = 0;
447 evalmat = 0;
448
449 //////////////////////////////////////////////////////////////
450 // Allocate storage for various arrays needed for gradients
451 // (Pkj and Pab are symmetric, so store only lower triangle)
452 //////////////////////////////////////////////////////////////
453
454 if (dograd) {
455 Pkj = (double*) malloc((nocc*(nocc+1)/2)*sizeof(double));
456 Pab = (double*) malloc((nvir*(nvir+1)/2)*sizeof(double));
457 Wkj = (double*) malloc(nocc*nocc*sizeof(double));
458 Wab = (double*) malloc(nvir*nvir*sizeof(double));
459 Waj = (double*) malloc(nvir*nocc*sizeof(double));
460 if (do_d2_) {
461 d2occ_mat = new double[nocc_act*(nocc_act+1)/2];
462 d2vir_mat = new double[nvir_act*(nvir_act+1)/2];
463 }
464
465 gradient_dat = new double[natom*3];
466 gradient = new double*[natom];
467 for (i=0; i<natom; i++) {
468 gradient[i] = &gradient_dat[i*3];
469 }
470
471 hf_gradient_dat = new double[natom*3];
472 hf_gradient = new double*[natom];
473 for (i=0; i<natom; i++) {
474 hf_gradient[i] = &hf_gradient_dat[i*3];
475 }
476
477 ginter = new double*[natom];
478 for (i=0; i<natom; i++) {
479 ginter[i] = new double[3];
480 for (xyz=0; xyz<3; xyz++) ginter[i][xyz] = 0;
481 }
482
483 hf_ginter = new double*[natom];
484 for (i=0; i<natom; i++) {
485 hf_ginter[i] = new double[3];
486 for (xyz=0; xyz<3; xyz++) hf_ginter[i][xyz] = 0;
487 }
488
489 //////////////////////////////
490 // Initialize various arrays
491 //////////////////////////////
492
493 bzerofast(Pkj,nocc*(nocc+1)/2);
494 bzerofast(Wkj,nocc*nocc);
495 bzerofast(Pab,nvir*(nvir+1)/2);
496 bzerofast(Wab,nvir*nvir);
497 bzerofast(Waj,nvir*nocc);
498 if (do_d2_) {
499 bzerofast(d2occ_mat,nocc_act*(nocc_act+1)/2);
500 bzerofast(d2vir_mat,nvir_act*(nvir_act+1)/2);
501 }
502
503 if (me == 0) zero_gradients(gradient, natom, 3);
504 if (me == 0) zero_gradients(hf_gradient, natom, 3);
505 }
506
507 if (dograd || do_d1_) {
508 Laj = (double*) malloc(nvir*nocc*sizeof(double));
509 bzerofast(Laj,nvir*nocc);
510 }
511
512 if (debug_ > 2 && me == 0) {
513 for (j=0; j<noso; j++) {
514 ExEnv::out0() << indent
515 << scprintf("eigenvalue[%3d] = %15.10lf", j, evals[j]);
516 if (j < nfzc) ExEnv::out0() << " (frozen docc)";
517 else if (j < nocc_act + nfzc) ExEnv::out0() << " (active docc)";
518 else if (j < nvir_act + nocc_act + nfzc) ExEnv::out0() << " (active uocc)";
519 else ExEnv::out0() << " (frozen uocc)";
520 ExEnv::out0() << endl;
521 }
522 }
523
524 /////////////////////////////////////
525 // Begin MP2 loops
526 /////////////////////////////////////
527
528 // debug print
529 if (debug_ && me == 0) {
530 ExEnv::out0() << indent
531 << scprintf("node %i, begin loop over i-batches",me) << endl;
532 }
533 // end of debug print
534
535 // Initialize the integrals
536 integral()->set_storage(mem_remaining);
537 tbints_ = new Ref<TwoBodyInt>[thr_->nthread()];
538 for (i=0; i<thr_->nthread(); i++) {
539 tbints_[i] = integral()->electron_repulsion();
540 }
541 if (dograd || do_d1_) {
542 tbintder_ = new Ref<TwoBodyDerivInt>[thr_->nthread()];
543 for (i=0; i<thr_->nthread(); i++) {
544 tbintder_[i] = integral()->electron_repulsion_deriv();
545 }
546 }
547
548 int mem_integral_intermediates = integral()->storage_used();
549 int mem_integral_storage = (mem_remaining - mem_integral_intermediates) / thr_->nthread();
550 if (mem_integral_storage<0) mem_integral_storage = 0;
551 for (i=0; i<thr_->nthread(); i++) {
552 tbints_[i]->set_integral_storage(mem_integral_storage);
553 }
554
555 ExEnv::out0() << endl << indent
556 << scprintf("Memory used for integral intermediates: %i Bytes",
557 mem_integral_intermediates)
558 << endl;
559 ExEnv::out0() << indent
560 << scprintf("Memory used for integral storage: %i Bytes",
561 mem_integral_storage)
562 << endl;
563
564 if (mem.null()) {
565 ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
566 abort();
567 }
568
569 mem->set_localsize(size_t(nijmax)*nbasis*nbasis*sizeof(double));
570 ExEnv::out0() << indent
571 << "Size of global distributed array: "
572 << mem->totalsize()
573 << " Bytes"
574 << endl;
575
576 MemoryGrpBuf<double> membuf_remote(mem);
577
578 int usep4 = !dograd;
579
580 Ref<ThreadLock> lock = thr_->new_lock();
581 CSGradErep12Qtr** e12thread = new CSGradErep12Qtr*[thr_->nthread()];
582 DistShellPair::SharedData sp_e_data, sp_g_data;
583 for (i=0; i<thr_->nthread(); i++) {
584 e12thread[i] = new CSGradErep12Qtr(i, thr_->nthread(), me, nproc,
585 mem, msg_, lock, basis(), tbints_[i],
586 nocc, scf_vector, tol, debug_,
587 dynamic_, print_percent_,
588 &sp_e_data, usep4);
589 }
590
591 CSGrad34Qbtr** qbt34thread;
592 if (dograd || do_d1_) {
593 qbt34thread = new CSGrad34Qbtr*[thr_->nthread()];
594 for (i=0; i<thr_->nthread(); i++) {
595 qbt34thread[i] = new CSGrad34Qbtr(i, thr_->nthread(), me, nproc,
596 mem, msg_, lock, basis(), tbints_[i],
597 tbintder_[i], nocc, nfzc, scf_vector,
598 tol, debug_, dynamic_, print_percent_,
599 &sp_g_data, dograd, natom);
600 }
601 }
602
603 tim_enter("mp2 passes");
604 for (pass=0; pass<npass; pass++) {
605
606 if (me == 0) {
607 ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
608 }
609
610 i_offset = restart_orbital_memgrp_ + pass*ni + nfzc;
611 if ((pass == npass - 1) && (rest != 0)) ni = rest;
612
613 // Compute number of of i,j pairs on each node for
614 // two-el integrals and non-sep 2PDM elements
615 index = 0;
616 nij = 0;
617 for (i=0; i<ni; i++) {
618 for (j=0; j<nocc; j++) {
619 if (index++ % nproc == me) nij++;
620 }
621 }
622
623 // debug print
624 if (debug_)
625 ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
626 // end of debug print
627
628 mem->sync(); // This must be here or gamma non-sep will be wrong when running
629 // on multiple processors with more than one pass
630
631 r_offset = 0;
632
633 // Allocate and initialize some arrays
634 // (done here to avoid having these arrays
635 // overlap with arrays allocated later)
636
637 // Allocate (and initialize) some arrays
638
639 integral_iqjs = (double*) mem->localdata();
640
641 bzerofast(integral_iqjs, nij*nbasis*nbasis);
642
643 integral_iqjs = 0;
644
645 mem->sync();
646
647 index = 0;
648
649 if (me == 0) {
650 ExEnv::out0() << indent
651 << scprintf("Begin loop over shells (erep, 1.+2. q.t.)") << endl;
652 }
653
654 // Do the two eletron integrals and the first two quarter transformations
655 tim_enter("erep+1.qt+2.qt");
656 sp_e_data.init();
657 for (i=0; i<thr_->nthread(); i++) {
658 e12thread[i]->set_i_offset(i_offset);
659 e12thread[i]->set_ni(ni);
660 thr_->add_thread(i,e12thread[i]);
661# if SINGLE_THREAD_E12
662 e12thread[i]->run();
663# endif
664 }
665# if !SINGLE_THREAD_E12
666 thr_->start_threads();
667 thr_->wait_threads();
668# endif
669 tim_exit("erep+1.qt+2.qt");
670
671 if (me == 0) {
672 ExEnv::out0() << indent << "End of loop over shells" << endl;
673 }
674
675 mem->sync(); // Make sure iqjs is complete on each node before continuing
676
677 integral_iqjs = (double*) mem->localdata();
678
679#if PRINT2Q
680 if (me == 0) {
681 int index = 0;
682 int ij_index = 0;
683 for (int i = 0; i<ni; i++) {
684 for (int j = 0; j<nocc; j++) {
685 if (index++ % nproc == me) {
686 if (j >= nfzc) {
687 double *integral_ij_offset = integral_iqjs + nbasis*nbasis*ij_index;
688 for (int s = 0; s<nbasis; s++) {
689 double *integral_ijsq_ptr = integral_ij_offset + s*nbasis;
690 for (int q = 0; q<nbasis; q++) {
691 printf("2Q: (%d %d|%d %d) = %12.8f\n",
692 i,q,j,s,*integral_ijsq_ptr);
693 *integral_ijsq_ptr++;
694 }
695 }
696 }
697 ij_index++;
698 }
699 }
700 }
701 }
702#endif
703
704 // Allocate and initialize some arrays
705 ixjs_tmp = new double[nbasis];
706
707 if (me == 0) {
708 ExEnv::out0() << indent << "Begin third q.t." << endl;
709 }
710
711 tim_enter("3. q.t.");
712 // Begin third quarter transformation;
713 // generate (ix|js) for i act, j act or frz, and x any MO (act or frz)
714 index = 0;
715 ij_index = 0;
716 for (i=0; i<ni; i++) {
717 for (j=0; j<nocc; j++) {
718 if (index++ % nproc == me) {
719
720 for (s=0; s<nbasis; s++) {
721
722 bzerofast(ixjs_tmp, nbasis);
723 for (q=0; q<nbasis; q++) {
724 integral_iqjs_ptr = &integral_iqjs[q + nbasis*(s + nbasis*ij_index)];
725 ixjs_ptr = ixjs_tmp;
726 c_qx = scf_vector[q];
727 tmpval = *integral_iqjs_ptr;
728#if PRINT_BIGGEST_INTS
729 biggest_ints_2.insert(tmpval,i+i_offset,j,s,q);
730 if ((i+i_offset==104 && j == 1)
731 ||(i+i_offset==104 && j == 2)) {
732 biggest_ints_2s.insert(tmpval,i+i_offset,j,s,q);
733 }
734#endif
735 for (x=0; x<noso; x++) {
736 *ixjs_ptr++ += *c_qx++ * tmpval;
737 }
738 } // exit q loop
739
740 // Put ixjs into integral_iqjs, while overwriting what was there;
741 // i.e., integral_iqjs will now contain three-quarter transformed
742 // integrals ixjs
743 integral_iqjs_ptr = &integral_iqjs[nbasis*(s + nbasis*ij_index)];
744 ixjs_ptr = ixjs_tmp;
745 for (x=0; x<noso; x++) {
746#if PRINT_BIGGEST_INTS
747 if (x>=nocc) {
748 biggest_ints_3a.insert(*ixjs_ptr,i+i_offset,j,s,x-nocc);
749 }
750#endif
751 *integral_iqjs_ptr++ = *ixjs_ptr++;
752 }
753 } // exit s loop
754 ij_index++;
755 } // endif
756 } // exit j loop
757 } // exit i loop
758 // end of third quarter transformation
759 tim_exit("3. q.t.");
760
761 if (me == 0) {
762 ExEnv::out0() << indent << "End of third q.t." << endl;
763 }
764
765 delete[] ixjs_tmp;
766
767 // The array of half-transformed integrals integral_iqjs has now
768 // been overwritten by three-quarter transformed integrals ixjs;
769 // rename the array integral_ixjs, where x = any MO
770 integral_ixjs = integral_iqjs;
771
772#if PRINT3Q
773 if (me == 0) {
774 int index = 0;
775 int ij_index = 0;
776 for (int i = 0; i<ni; i++) {
777 for (int j = 0; j<nocc; j++) {
778 if (index++ % nproc == me) {
779 if (j >= nfzc) {
780 double *integral_ij_offset = integral_ixjs + nbasis*nbasis*ij_index;
781 for (int s = 0; s<nbasis; s++) {
782 double *integral_ijsx_ptr = integral_ij_offset + s*nbasis;
783 for (int x = 0; x<noso; x++) {
784 printf("3Q: (%d %d|%d %d) = %12.8f\n",
785 i,x,j,s,*integral_ijsx_ptr);
786 *integral_ijsx_ptr++;
787 }
788 }
789 }
790 ij_index++;
791 }
792 }
793 }
794 }
795#endif
796
797 integral_iajy = new double[noso];
798 // in iajy: i act; a,j act or frz; y act or frz occ or virt.
799 integral_ikja = new double[nvir_act];
800 // in ikja: i,j act; k act or frz; a act.
801
802 if (me == 0) {
803 ExEnv::out0() << indent << "Begin fourth q.t." << endl;
804 }
805
806 // Begin fourth quarter transformation
807 // generating MO integrals (ov|ov), (ov|oo) and (oo|ov)
808 tim_enter("4. q.t.");
809 index = 0;
810 ij_index = 0;
811 for (i=0; i<ni; i++) {
812 for (j=0; j<nocc; j++) {
813 if (index++ % nproc == me) {
814
815 for (a=0; a<nvir; a++) {
816 bzerofast(integral_iajy, noso);
817 iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
818 for (s=0; s<nbasis; s++) {
819 c_sy = scf_vector[s];
820 iajy_ptr = integral_iajy;
821 tmpval = *iajs_ptr;
822#if PRINT_BIGGEST_INTS
823 biggest_ints_3.insert(tmpval,i+i_offset,j,s,a);
824 if ((i+i_offset==105 && j == 2 && s == 170 && a == 3)
825 ||(i+i_offset==102 && j == 2 && s == 170 && a == 2)) {
826 ExEnv::outn() << scprintf("3/4: %3d %3d %3d %3d: %16.10f",
827 i+i_offset, j, s, x-nocc)
828 << endl;
829 }
830#endif
831 for (y=0; y<noso; y++) {
832 *iajy_ptr++ += *c_sy++ * tmpval;
833 } // exit y loop
834 iajs_ptr += nbasis;
835 } // exit s loop
836 // Put integral_iajy into ixjs for one i,a,j while
837 // overwriting elements of ixjs
838 iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
839 iajy_ptr = integral_iajy;
840 for (y=0; y<noso; y++) {
841 *iajs_ptr = *iajy_ptr++;
842 iajs_ptr += nbasis;
843 } // exit y loop
844 } // exit a loop
845
846 // this is only needed for gradients
847 if ((dograd || do_d1_) && j >= nfzc) {
848 for (k=0; k<nocc; k++) {
849 bzerofast(integral_ikja, nvir_act);
850 ikjs_ptr = &integral_ixjs[k + nbasis*nbasis*ij_index];
851 for (s=0; s<nbasis; s++) {
852 c_sa = &scf_vector[s][nocc];
853 ikja_ptr = integral_ikja;
854 tmpval = *ikjs_ptr;
855 for (a=0; a<nvir_act; a++) {
856 *ikja_ptr++ += *c_sa++ * tmpval;
857 } // exit a loop
858 ikjs_ptr += nbasis;
859 } // exit s loop
860 // Put integral_ikja into ixjs for one i,k,j while
861 // overwriting elements of ixjs
862 ikjs_ptr = &integral_ixjs[k + nbasis*(nocc + nbasis*ij_index)];
863 ikja_ptr = integral_ikja;
864 for (a=0; a<nvir_act; a++) {
865 *ikjs_ptr = *ikja_ptr++;
866 ikjs_ptr += nbasis;
867 } // exit a loop
868 } // exit k loop
869 } //endif
870
871 ij_index++;
872 } // endif
873 } // exit j loop
874 } // exit i loop
875 // end of fourth quarter transformation
876 tim_exit("4. q.t.");
877
878 if (me == 0) {
879 ExEnv::out0() << indent << "End of fourth q.t." << endl;
880 }
881
882 // The array integral_ixjs has now been overwritten by MO integrals
883 // iajy and ikja, so rename the array mo_int
884 mo_int = integral_ixjs;
885
886 delete[] integral_iajy;
887 delete[] integral_ikja;
888
889 // Divide the (ia|jb) MO integrals by the term
890 // evals[i]+evals[j]-evals[a]-evals[b]
891 // and keep these integrals in mo_int;
892 // do this only for active i, j, a, and b
893 tim_enter("divide (ia|jb)'s");
894
895 index = 0;
896 ij_index = 0;
897 for (i=0; i<ni; i++) {
898 ii = i+i_offset;
899 for (j=0; j<nocc; j++) {
900 if (index++ % nproc == me) {
901 if (j>=nfzc) {
902 for (b=0; b<nvir_act; b++) {
903 iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
904 bb = b+nocc;
905 for (a=0; a<nvir_act; a++) {
906#if PRINT4Q
907 printf("4Q: (%d %d|%d %d) = %12.8f\n",
908 i,a+nocc,j,b+nocc,*iajb_ptr);
909#endif
910 // Zero out nonsymmetric integral, else divide by denominators
911 if (usep4 && ( symorb_irrep_[ii] ^
912 symorb_irrep_[j] ^
913 symorb_irrep_[a+nocc] ^
914 symorb_irrep_[bb]) ) {
915 *iajb_ptr++ = 0.0;
916 }
917 else
918 *iajb_ptr++ /= evals[ii]+evals[j]-evals[a+nocc]-evals[bb];
919 } // exit a loop
920 } // exit b loop
921 } // endif
922 ij_index++;
923 } // endif
924 } // exit j loop
925 } // exit i loop
926 tim_exit("divide (ia|jb)'s");
927
928 // We now have the fully transformed integrals (ia|jb)
929 // divided by the proper orbital energy denominators
930 // for one batch of i, all j<nocc, and all a<nvir and b<nvir,
931 // where i, j, a, and b are all active;
932 // compute contribution to the MP2 correlation energy
933 // from these integrals
934
935#if WRITE_DOUBLES
936 if (nproc > 1 || npass > 1) {
937 ExEnv::outn() << "csgrad.cc: WRITE_DOUBLES set but case not allowed" << endl;
938 abort();
939 }
940 ExEnv::outn() << "csgrad.cc: WRITING DOUBLES: CHECK ORDER" << endl;
941 char *doutname = SCFormIO::fileext_to_filename(".mp2");
942 FILE *dout = fopen(doutname,"w");
943 delete[] doutname;
944 fwrite(&nocc_act, sizeof(int), 1, dout);
945 fwrite(&nvir_act, sizeof(int), 1, dout);
946 for (j=nfzc; j<nocc; j++) {
947 for (b=0; b<nvir_act; b++) {
948 for (i=0; i<ni; i++) {
949 ij_index = nocc*i + j;
950 iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
951 fwrite(iajb_ptr, sizeof(double), nvir_act, dout);
952 for (a=0; a<nvir_act; a++) {
953 if (fabs(iajb_ptr[a])>1.0e-8) {
954 ExEnv::outn() << scprintf(" Djbia(%2d %2d %2d %2d) = %12.8f",
955 j+1-nfzc,b+1,i+1,a+1,iajb_ptr[a])
956 << endl;
957 }
958 }
959 }
960 }
961 }
962 fclose(dout);
963#endif
964
965 tim_enter("compute ecorr");
966
967 index = 0;
968 ij_index = 0;
969 for (i=0; i<ni; i++) {
970 double ecorr_i = 0.0;
971 for (j=0; j<nocc; j++) {
972 double ecorr_ij = 0.0;
973 if (index++ % nproc == me) {
974
975 if (j>=nfzc) {
976 for (b=0; b<nvir_act; b++) {
977 iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
978 ibja_ptr = &mo_int[b+nocc + nbasis*(nocc + nbasis*ij_index)];
979 for (a=0; a<nvir_act; a++) {
980 delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
981 // only include determinants with unique coefficients
982 if (a>=b && i_offset+i>=j) {
983 if (a>b && i_offset+i>j) {
984 // aaaa or bbbb
985 biggest_coefs.insert(*iajb_ptr - *ibja_ptr,
986 i_offset+i,j,a,b,1111);
987 // aabb or bbaa or abba or baab
988 biggest_coefs.insert(*ibja_ptr,i_offset+i,j,b,a,1212);
989 } // endif
990 // aabb or bbaa or abba or baab
991 biggest_coefs.insert(*iajb_ptr,i_offset+i,j,a,b,1212);
992 } // endif
993
994 tmpval = *iajb_ptr*(2**iajb_ptr - *ibja_ptr)*delta_ijab;
995 ecorr_mp2 += tmpval;
996 if (debug_) ecorr_ij += tmpval;
997 iajb_ptr++;
998 ibja_ptr += nbasis;;
999 } // exit a loop
1000 } // exit b loop
1001 } // endif
1002 ij_index++;
1003 } // endif
1004 if (debug_) {
1005 msg_->sum(ecorr_ij);
1006 ecorr_i += ecorr_ij;
1007 ExEnv::out0() << indent
1008 << scprintf("correlation energy for pair %3d %3d = %16.12f",
1009 i+i_offset, j, ecorr_ij)
1010 << endl;
1011 }
1012 } // exit j loop
1013 if (debug_) {
1014 ExEnv::out0() << indent
1015 << scprintf("correlation energy for orbital %3d = %16.12f",
1016 i+i_offset, ecorr_i)
1017 << endl;
1018 }
1019 } // exit i loop
1020 tim_exit("compute ecorr");
1021
1022 // debug print
1023 if (debug_ && me == 0) {
1024 ExEnv::out0() << indent << "End of ecorr" << endl;
1025 }
1026 // end of debug print
1027
1028 if (npass > 1 && pass < npass - 1) {
1029 double passe = ecorr_mp2;
1030 msg_->sum(passe);
1031 ExEnv::out0() << indent
1032 << "Partial correlation energy for pass " << pass << ":" << endl;
1033 ExEnv::out0() << indent
1034 << scprintf(" restart_ecorr = %18.14f", passe)
1035 << endl;
1036 ExEnv::out0() << indent
1037 << scprintf(" restart_orbital_memgrp = %d", ((pass+1) * ni))
1038 << endl;
1039 }
1040
1041 integral_iqjs = 0;
1042 mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
1043
1044 // don't go beyond this point if only the energy is needed
1045 if (!dograd && !do_d1_) continue;
1046
1047 mo_int = (double*) mem->localdata();
1048
1049 if (!dograd) goto compute_L;
1050
1051 // Update the matrices Pkj and Wkj with
1052 // contributions from (occ vir|occ vir) integrals
1053 index = 0;
1054 ij_index = 0;
1055 tim_enter("Pkj and Wkj");
1056 for (i=0; i<ni; i++) {
1057 for (j=0; j<nocc; j++) {
1058 if (index++ % nproc == me) {
1059
1060 if (j>=nfzc) {
1061 for (kloop=me; kloop<me+nocc; kloop++) {
1062 // stagger k's to minimize contention
1063 k = kloop%nocc;
1064 if (k<=j) pkj_ptr = &Pkj[j*(j+1)/2 + k];
1065 if (do_d2_) {
1066 if (k<=j && k>=nfzc) {
1067 d2occ_mat_ptr = &d2occ_mat[(j-nfzc)*(j-nfzc+1)/2 + k-nfzc];
1068 }
1069 }
1070 wjk_ptr = &Wkj[j*nocc + k];
1071 // Send for iakb, if necessary
1072 ik_index = (i*nocc + k)/nproc;
1073 ik_proc = (i*nocc + k)%nproc;
1074 ik_offset = nocc + nocc*nbasis + nbasis*nbasis*ik_index;
1075 mo_intbuf = (double*) membuf_remote.readonly_on_node(ik_offset,
1076 nbasis*nvir-nocc,
1077 ik_proc);
1078 for (a=0; a<nvir_act; a++) {
1079 ibja_ptr = &mo_int[nocc + nbasis*(a+nocc + nbasis*ij_index)];
1080 iajb_ptr = &mo_int[a+nocc + nbasis*(nocc + nbasis*ij_index)];
1081 iakb_ptr = &mo_intbuf[a];
1082 for (b=0; b<nvir_act; b++) {
1083 tmpval1 = *iakb_ptr * *iajb_ptr;
1084 tmpval = 2**iakb_ptr * *ibja_ptr++ - 4*tmpval1;
1085 // tmpval = 2**iakb_ptr * (*ibja_ptr++ - 2 * *iajb_ptr);
1086 iakb_ptr += nbasis;
1087 iajb_ptr += nbasis;
1088 if (k<nfzc && k<=j) *pkj_ptr += tmpval/(evals[k]-evals[j]);
1089 else if (k<=j) {
1090 *pkj_ptr += tmpval;
1091 if (do_d2_) *d2occ_mat_ptr += tmpval1;
1092 }
1093 if (k>=nfzc) {
1094 delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
1095 *wjk_ptr += tmpval*delta_ijab;
1096 }
1097 } // exit b loop
1098 } // exit a loop
1099 mo_intbuf = 0;
1100 membuf_remote.release();
1101 } // end kloop loop
1102 } // endif
1103
1104 ij_index++;
1105 } // endif
1106 } // exit j loop
1107 } // exit i loop
1108 tim_exit("Pkj and Wkj");
1109
1110 // debug print
1111 if (debug_ && me == 0) {
1112 ExEnv::out0() << indent << "End of Pkj and Wkj" << endl;
1113 }
1114 // end of debug print
1115
1116 // Update the matrices Pab and Wab with
1117 // contributions from (occ vir|occ vir) integrals
1118 tim_enter("Pab and Wab");
1119 index = 0;
1120 ij_index = 0;
1121 for (i=0; i<ni; i++) {
1122 for (j=0; j<nocc; j++) {
1123 if (index++ % nproc == me) {
1124 if (j>=nfzc) {
1125
1126 offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
1127 for (a=0; a<nvir_act; a++) {
1128 pab_ptr = &Pab[a*(a+1)/2];
1129 if (do_d2_) d2vir_mat_ptr = &d2vir_mat[a*(a+1)/2];
1130 for (b=0; b<=a; b++) { // active-active part of Pab and Wab
1131 wab_ptr = &Wab[a*nvir + b];
1132 wba_ptr = &Wab[b*nvir + a];
1133 ibjc_ptr = &mo_int[offset + b];
1134 icjb_ptr = &mo_int[offset + b*nbasis];
1135 iajc_ptr = &mo_int[offset + a];
1136 icja_ptr = &mo_int[offset + a*nbasis];
1137 for (c=0; c<nvir_act; c++) {
1138 tmpval = *iajc_ptr**ibjc_ptr;
1139 if (do_d2_) *d2vir_mat_ptr += tmpval;
1140 *pab_ptr += 4*tmpval - 2**iajc_ptr * *icjb_ptr;
1141 // *pab_ptr += 2**iajc_ptr * (2 * *ibjc_ptr - *icjb_ptr);
1142
1143 if (a == b) {
1144 delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
1145 *wab_ptr += 2**ibjc_ptr*(*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
1146 }
1147 else {
1148 delta_ijbc = evals[i_offset+i]+evals[j]-evals[nocc+b]-evals[nocc+c];
1149 delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
1150 *wab_ptr += 2**ibjc_ptr * (*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
1151 *wba_ptr += 2**iajc_ptr * (*icjb_ptr - 2 * *ibjc_ptr)*delta_ijbc;
1152 }
1153 iajc_ptr += nbasis;
1154 ibjc_ptr += nbasis;
1155 icja_ptr++;
1156 icjb_ptr++;
1157 } // exit c loop
1158 pab_ptr++;
1159 d2vir_mat_ptr++;
1160 } // exit b loop
1161 } // exit a loop
1162
1163 for (a=0; a<nfzv; a++) { // active-frozen part of Pab
1164 pab_ptr = &Pab[(a+nvir_act)*(a+nvir_act+1)/2];
1165 for (b=0; b<nvir_act; b++) {
1166 tmpval = evals[nocc+b] - evals[nocc+nvir_act+a];
1167 ibjc_ptr = &mo_int[offset+b];
1168 iajc_ptr = &mo_int[offset+a+nvir_act];
1169 icja_ptr = &mo_int[offset+(a+nvir_act)*nbasis];
1170 for (c=0; c<nvir_act; c++) {
1171 *pab_ptr += 2**ibjc_ptr*(2**iajc_ptr - *icja_ptr++)/tmpval;
1172 ibjc_ptr += nbasis;
1173 iajc_ptr += nbasis;
1174 } // exit c loop
1175 pab_ptr++;
1176 } // exit b loop
1177 } // exit a loop
1178
1179 } // endif
1180 ij_index++;
1181 } // endif
1182 } // exit j loop
1183 } // exit i loop
1184 tim_exit("Pab and Wab");
1185
1186 // debug print
1187 if (debug_ && me == 0) {
1188 ExEnv::out0() << indent << "End of Pab and Wab" << endl;
1189 }
1190 // end of debug print
1191
1192 compute_L:
1193
1194 ///////////////////////////////////////
1195 // Update Waj and Laj with contrib.
1196 // from (oo|ov) and (ov|oo) integrals;
1197 // here a is active and j is active or
1198 // frozen
1199 ///////////////////////////////////////
1200 tim_enter("Waj and Laj");
1201
1202 // (oo|ov) contribution
1203 index = 0;
1204 ik_index = 0;
1205 for (i=0; i<ni; i++) {
1206 for (k=0; k<nocc; k++) {
1207 if (index++ % nproc == me) {
1208 if (k>=nfzc) {
1209 offset = nbasis*nocc + nbasis*nbasis*ik_index;
1210 for (j=0; j<nocc; j++) {
1211 for (b=0; b<nvir_act; b++) {
1212 ibka_ptr = &mo_int[b+nocc + offset];
1213 ijkb_ptr = &mo_int[j + nbasis*b + offset];
1214 if (dograd) waj_ptr = &Waj[j*nvir]; // order as j*nvir+a to make loops more efficient
1215 laj_ptr = &Laj[j*nvir];
1216 for (a=0; a<nvir_act; a++) {
1217 tmpval = 2**ibka_ptr * *ijkb_ptr;
1218 ibka_ptr += nbasis;
1219 if (dograd) *waj_ptr++ += tmpval;
1220 *laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper
1221 } // exit a loop
1222 } // exit b loop
1223 } // exit j loop
1224 } // endif
1225 ik_index++;
1226 } // endif
1227 } // exit k loop
1228 } // exit i loop
1229
1230 // (ov|oo) contribution
1231 index = 0;
1232 ik_index = 0;
1233 for (i=0; i<ni; i++) {
1234 for (k=0; k<nocc; k++) {
1235 if (index++ % nproc == me) {
1236 if (k>=nfzc) {
1237 offset = nocc + nbasis*nbasis*ik_index;
1238 for (b=0; b<nvir_act; b++) {
1239 for (j=0; j<nocc; j++) {
1240 ibkj_ptr = &mo_int[offset + b + j*nbasis];
1241 ibka_ptr = &mo_int[offset + b + nocc*nbasis];
1242 if (dograd) waj_ptr = &Waj[j*nvir];
1243 laj_ptr = &Laj[j*nvir];
1244 for (a=0; a<nvir_act; a++) {
1245 tmpval = 4 * *ibka_ptr * *ibkj_ptr;
1246 ibka_ptr += nbasis;
1247 if (dograd) *waj_ptr++ -= tmpval;
1248 *laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper
1249 } // exit a loop
1250 } // exit j loop
1251 } // exit b loop
1252 } // endif
1253 ik_index++;
1254 } // endif
1255 } // exit k loop
1256 } // exit i loop
1257
1258 tim_exit("Waj and Laj");
1259 /////////////////////////////
1260 // End of Waj and Laj update
1261 /////////////////////////////
1262
1263 // debug print
1264 if (debug_ && me == 0) {
1265 ExEnv::out0() << indent << "End of Paj and Waj" << endl;
1266 }
1267 // end of debug print
1268
1269 mo_int = 0;
1270
1271 mem->sync(); // Need to synchronize before deleting mo_intbuf
1272
1273 mo_int = (double*) mem->localdata();
1274
1275 gamma_iajs_tmp = new double[nbasis*nvir_act];
1276 if (!gamma_iajs_tmp) {
1277 ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl;
1278 abort();
1279 }
1280
1281 // debug print
1282 if (debug_ && me == 0) {
1283 ExEnv::out0() << indent << "Begin first and second q.b.t." << endl;
1284 }
1285 // end of debug print
1286
1287 ///////////////////////////////////////////////////////////
1288 // Perform first and second quarter back-transformation.
1289 // Each node produces gamma_iajs, and gamma_iqjs
1290 // for a subset of i and j, all a and all s;
1291 // the back-transf. is done only for active i, j, a, and b
1292 ///////////////////////////////////////////////////////////
1293
1294 // Begin first quarter back-transformation
1295 tim_enter("1. q.b.t.");
1296 index = 0;
1297 ij_index = 0;
1298 for (i=0; i<ni; i++) {
1299 for (j=0; j<nocc; j++) {
1300 if (index++ % nproc == me) {
1301 if (j>=nfzc) {
1302 bzerofast(gamma_iajs_tmp,nbasis*nvir_act);
1303 offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
1304
1305 for (a=0; a<nvir_act; a++) {
1306 for (s=0; s<nbasis; s++) {
1307 c_sb = &scf_vector[s][nocc];
1308 gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a];
1309 ibja_ptr = &mo_int[a*nbasis + offset];
1310 iajb_ptr = &mo_int[a + offset];
1311
1312 tmpval = 0.0;
1313 for (b=0; b<nvir_act; b++) {
1314 tmpval += 2**c_sb++ * (2**iajb_ptr - *ibja_ptr++);
1315 iajb_ptr += nbasis;
1316 } // exit b loop
1317 *gamma_iajs_ptr += tmpval;
1318 } // exit s loop
1319 } // exit a loop
1320 // Put gamma_iajs_tmp into mo_int for one i,j
1321 // while overwriting mo_int
1322 gamma_iajs_ptr = gamma_iajs_tmp;
1323 for (y=0; y<nbasis; y++) {
1324 iajy_ptr = &mo_int[nocc + nbasis*(y + nbasis*ij_index)];
1325 for (a=0; a<nvir_act; a++) {
1326 *iajy_ptr++ = *gamma_iajs_ptr++;
1327 }
1328 }
1329
1330 } // endif
1331 ij_index++;
1332 } // endif
1333 } // exit j loop
1334 } // exit i loop
1335 // end of first quarter back-transformation
1336 tim_exit("1. q.b.t.");
1337
1338 // debug print
1339 if (debug_ && me == 0) {
1340 ExEnv::out0() << indent << "End of first q.b.t." << endl;
1341 }
1342 // end of debug print
1343
1344 mo_int = 0;
1345
1346 mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming
1347
1348 delete[] gamma_iajs_tmp;
1349
1350 // The array mo_int has now been overwritten by the quarter
1351 // back-transformed non-sep 2PDM gamma_iajs, so rename
1352 gamma_iajs = (double*) mem->localdata();
1353
1354 gamma_iqjs_tmp = new double[nbasis];
1355 if (!gamma_iqjs_tmp) {
1356 ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl;
1357 abort();
1358 }
1359
1360 if (debug_ && me == 0) {
1361 ExEnv::out0() << indent << "Begin second q.b.t." << endl;
1362 }
1363
1364 // Begin second quarter back-transformation
1365 // (gamma_iqjs elements ordered as i,j,s,q,
1366 // i.e., q varies fastest)
1367 tim_enter("2. q.b.t.");
1368 index = 0;
1369 ij_index = 0;
1370 for (i=0; i<ni; i++) {
1371 for (j=0; j<nocc; j++) {
1372 if (index++ % nproc == me) {
1373 if (j>=nfzc) {
1374 offset = nbasis*nbasis*ij_index;
1375
1376 for (s=0; s<nbasis; s++) {
1377 bzerofast(gamma_iqjs_tmp,nbasis);
1378 for (q=0; q<nbasis; q++) {
1379 gamma_iqjs_ptr = &gamma_iqjs_tmp[q];
1380 gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset];
1381 c_qa = &scf_vector[q][nocc];
1382
1383 tmpval = 0.0;
1384 for (a=0; a<nvir_act; a++) {
1385 tmpval += *c_qa++ * *gamma_iajs_ptr++;
1386 } // exit a loop
1387 *gamma_iqjs_ptr += tmpval;
1388 } // exit q loop
1389 // Put gamma_iqjs_tmp into gamma_iajs for one i,j,s
1390 // while overwriting gamma_iajs
1391 gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset];
1392 gamma_iqjs_ptr = gamma_iqjs_tmp;
1393 for (q=0; q<nbasis; q++) {
1394 *gamma_iajs_ptr++ = *gamma_iqjs_ptr++;
1395 }
1396 } // exit s loop
1397
1398 } // endif
1399 ij_index++;
1400 } // endif
1401 } // exit j loop
1402 } // exit i loop
1403 tim_exit("2. q.b.t.");
1404 // end of second quarter back-transformation
1405
1406 if (debug_ && me == 0) {
1407 ExEnv::out0() << indent << "End of second q.b.t." << endl;
1408 }
1409
1410 gamma_iajs = 0;
1411
1412 mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs
1413 // before it is needed below, and that gamma_iajs is not
1414 // deleted prematurely
1415
1416 // The quarter back-transformed elements gamma_iajs have now been
1417 // overwritten by the half back-transformed elements gamma_iqjs
1418
1419 delete[] gamma_iqjs_tmp;
1420
1421 /////////////////////////////////////////////////
1422 // End of 1. and 2. quarter back-transformation
1423 /////////////////////////////////////////////////
1424
1425 Lpi = new double[nbasis*ni];
1426 bzerofast(Lpi,nbasis*ni);
1427
1428 if (me == 0) {
1429 ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl;
1430 }
1431
1432 //////////////////////////////////////////////////////////
1433 // Perform third and fourth quarter back-transformation
1434 // and compute contribution to gradient from non-sep 2PDM
1435 //////////////////////////////////////////////////////////
1436
1437 tim_enter("3.qbt+4.qbt+non-sep contrib.");
1438 sp_g_data.init();
1439 for (i=0; i<thr_->nthread(); i++) {
1440 qbt34thread[i]->set_i_offset(i_offset);
1441 qbt34thread[i]->set_ni(ni);
1442 thr_->add_thread(i,qbt34thread[i]);
1443# if SINGLE_THREAD_QBT34
1444 qbt34thread[i]->run();
1445# endif
1446 }
1447# if !SINGLE_THREAD_QBT34
1448 thr_->start_threads();
1449 thr_->wait_threads();
1450# endif
1451 tim_exit("3.qbt+4.qbt+non-sep contrib.");
1452 // Add thread contributions to Lpi and ginter
1453 for (i=0; i<thr_->nthread(); i++) {
1454 double *Lpi_thread = qbt34thread[i]->get_Lpi();
1455 double **ginter_thread = qbt34thread[i]->get_ginter();
1456 for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j];
1457 for (j=0; j<natom; j++) {
1458 for (k=0; k<3; k++) {
1459 ginter[j][k] += ginter_thread[j][k];
1460 }
1461 }
1462 aointder_computed += qbt34thread[i]->get_aointder_computed();
1463 }
1464
1465 if (me == 0) {
1466 ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl;
1467 }
1468
1469 mem->sync(); // Make sure all nodes are done before deleting arrays
1470
1471 if (debug_ > 1) {
1472 RefSCDimension ni_dim(new SCDimension(ni,1));
1473 ni_dim->blocks()->set_subdim(0, new SCDimension(ni));
1474 RefSCDimension nbasis_dim(new SCDimension(nbasis,1));
1475 nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis));
1476 RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit);
1477 Lpi_mat->assign(Lpi);
1478 Lpi_mat.print("Lpi");
1479 }
1480
1481 if (debug_ && me == 0) {
1482 ExEnv::out0() << indent << "Back-transform Lpi" << endl;
1483 }
1484
1485 // Back-transform Lpi to MO basis
1486 lpi_ptr = Lpi;
1487 for (p=0; p<nbasis; p++) {
1488 for (i=0; i<ni; i++) {
1489 c_pa = &scf_vector[p][nocc];
1490 laj_ptr = &Laj[nvir*(i_offset + i)];
1491 for (a=0; a<nvir; a++) {
1492 *laj_ptr++ += *c_pa++ * *lpi_ptr;
1493 } // exit a loop
1494 lpi_ptr++;
1495 } // exit i loop
1496 } // exit p loop
1497
1498// malloc_chain_check(1);
1499
1500 delete[] Lpi;
1501
1502 if (me == 0) {
1503 ExEnv::out0() << indent << "Done with pass " << pass+1 << endl;
1504 }
1505 } // exit loop over i-batches (pass)
1506 tim_exit("mp2 passes");
1507
1508 if (dograd || do_d1_) {
1509 for (i=0; i<thr_->nthread(); i++) {
1510 delete qbt34thread[i];
1511 }
1512 delete[] qbt34thread;
1513 }
1514
1515 mem->set_localsize(0);
1516
1517 // debug print
1518 if (debug_ && me == 0) {
1519 ExEnv::out0() << indent << "Exited loop over i-batches" << endl;
1520 }
1521 // end of debug print
1522
1523 ///////////////////////////////////////////////////////////////
1524 // The computation of the MP2 energy is now complete on each
1525 // node; add the nodes' contributions and print out the energy
1526 ///////////////////////////////////////////////////////////////
1527 msg_->sum(ecorr_mp2);
1528 msg_->sum(aoint_computed);
1529 msg_->sum(aointder_computed);
1530
1531 biggest_coefs.combine(msg_);
1532#if PRINT_BIGGEST_INTS
1533 biggest_ints_1.combine(msg_);
1534 biggest_ints_2.combine(msg_);
1535 biggest_ints_2s.combine(msg_);
1536 biggest_ints_3a.combine(msg_);
1537 biggest_ints_3.combine(msg_);
1538#endif
1539
1540 if (me == 0) {
1541 emp2 = escf + ecorr_mp2;
1542
1543#if PRINT_BIGGEST_INTS
1544 ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
1545 for (i=0; i<biggest_ints_1.ncontrib(); i++) {
1546 ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1547 biggest_ints_1.indices(i)[0],
1548 biggest_ints_1.indices(i)[1],
1549 biggest_ints_1.indices(i)[2],
1550 biggest_ints_1.indices(i)[3],
1551 biggest_ints_1.val(i)
1552 )
1553 << endl;
1554 }
1555 ExEnv::outn() << "biggest 2/4 transformed ints" << endl;
1556 for (i=0; i<biggest_ints_2.ncontrib(); i++) {
1557 ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1558 biggest_ints_2.indices(i)[0],
1559 biggest_ints_2.indices(i)[1],
1560 biggest_ints_2.indices(i)[2],
1561 biggest_ints_2.indices(i)[3],
1562 biggest_ints_2.val(i)
1563 )
1564 << endl;
1565 }
1566 ExEnv::outn() << "restricted 2/4 transformed ints" << endl;
1567 for (i=0; i<biggest_ints_2s.ncontrib(); i++) {
1568 ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1569 biggest_ints_2s.indices(i)[0],
1570 biggest_ints_2s.indices(i)[1],
1571 biggest_ints_2s.indices(i)[2],
1572 biggest_ints_2s.indices(i)[3],
1573 biggest_ints_2s.val(i)
1574 )
1575 << endl;
1576 }
1577 ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl;
1578 for (i=0; i<biggest_ints_3a.ncontrib(); i++) {
1579 ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1580 biggest_ints_3a.indices(i)[0],
1581 biggest_ints_3a.indices(i)[1],
1582 biggest_ints_3a.indices(i)[2],
1583 biggest_ints_3a.indices(i)[3],
1584 biggest_ints_3a.val(i)
1585 )
1586 << endl;
1587 }
1588 ExEnv::outn() << "biggest 3/4 transformed ints (in 4.)" << endl;
1589 for (i=0; i<biggest_ints_3.ncontrib(); i++) {
1590 ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1591 biggest_ints_3.indices(i)[0],
1592 biggest_ints_3.indices(i)[1],
1593 biggest_ints_3.indices(i)[2],
1594 biggest_ints_3.indices(i)[3],
1595 biggest_ints_3.val(i)
1596 )
1597 << endl;
1598 }
1599#endif
1600
1601 if (restart_orbital_memgrp_ == 0) {
1602 if (biggest_coefs.ncontrib()) {
1603 ExEnv::out0() << endl << indent
1604 << "Largest first order coefficients (unique):"
1605 << endl;
1606 }
1607 for (i=0; i<biggest_coefs.ncontrib(); i++) {
1608 int i0 = biggest_coefs.indices(i)[0];
1609 int i1 = biggest_coefs.indices(i)[1];
1610 int i2 = biggest_coefs.indices(i)[2] + nocc;
1611 int i3 = biggest_coefs.indices(i)[3] + nocc;
1612 int spincase = biggest_coefs.indices(i)[4];
1613 ExEnv::out0() << indent
1614 << scprintf(" %2d %12.8f %2d %3s %2d %3s -> %2d %3s %2d %3s (%s)",
1615 i+1, biggest_coefs.val(i),
1616 symorb_num_[i0]+1,
1617 ct.gamma(symorb_irrep_[i0]).symbol(),
1618 symorb_num_[i1]+1,
1619 ct.gamma(symorb_irrep_[i1]).symbol(),
1620 symorb_num_[i2]+1,
1621 ct.gamma(symorb_irrep_[i2]).symbol(),
1622 symorb_num_[i3]+1,
1623 ct.gamma(symorb_irrep_[i3]).symbol(),
1624 (spincase==1111?"++++":"+-+-")
1625 )
1626 << endl;
1627 }
1628 }
1629
1630 // Print out various energies etc.
1631
1632 if (debug_) {
1633 ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
1634 << indent << "(or integral derivatives) would have been computed\n"
1635 << indent << "without bounds checking: "
1636 << npass*nshell*nshell*(nshell+1)*(nshell+1)/2
1637 << endl;
1638
1639 ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
1640 << indent << "were computed: " << aoint_computed
1641 << endl;
1642
1643 if (dograd) {
1644 ExEnv::out0() << indent
1645 << "Number of shell quartets for which AO integral derivatives\n"
1646 << indent << "were computed: " << aointder_computed
1647 << endl;
1648 }
1649 }
1650
1651 ExEnv::out0()<<endl<<indent
1652 <<scprintf("RHF energy [au]: %17.12lf\n", escf);
1653 ExEnv::out0()<<indent
1654 <<scprintf("MP2 correlation energy [au]: %17.12lf\n", ecorr_mp2);
1655 ExEnv::out0()<<indent
1656 <<scprintf("MP2 energy [au]: %17.12lf\n", emp2);
1657 ExEnv::out0().flush();
1658 }
1659 if (method_ && strcmp(method_,"mp")) {
1660 ExEnv::out0() << indent
1661 << "MBPT2: bad method for closed shell case: " << method_
1662 << ", using mp" << endl;
1663 }
1664 set_energy(emp2);
1665 set_actual_value_accuracy(reference_->actual_value_accuracy()
1666 *ref_to_mp2_acc);
1667
1668 RefSCDimension nocc_act_dim(new SCDimension(nocc_act,1));
1669 nocc_act_dim->blocks()->set_subdim(0, new SCDimension(nocc_act));
1670 RefSCDimension nvir_act_dim(new SCDimension(nvir_act,1));
1671 nvir_act_dim->blocks()->set_subdim(0, new SCDimension(nvir_act));
1672 RefSCDimension nocc_dim(new SCDimension(nocc,1));
1673 nocc_dim->blocks()->set_subdim(0, new SCDimension(nocc));
1674 RefSCDimension nvir_dim(new SCDimension(nvir,1));
1675 nvir_dim->blocks()->set_subdim(0, new SCDimension(nvir));
1676 RefSCDimension nbasis_dim = ao_dimension()->blocks()->subdim(0);
1677 RefSCDimension noso_dim(new SCDimension(noso,1));
1678
1679 if (dograd || do_d1_) {
1680 msg_->sum(Laj,nvir*nocc);
1681
1682 RefSCMatrix T1_mat(nocc_act_dim, nvir_act_dim, kit);
1683 // the elements of T1_mat are the single-substitution amplitudes
1684 BiggestContribs biggest_t1(2,10);
1685 // compute the S2 norm of Lee et al. (s2_diag)
1686 double s2_diag = 0.0;
1687 for (j=nfzc; j<nocc; j++) {
1688 laj_ptr = &Laj[j*nvir];
1689 for (a=0; a<nvir_act; a++) {
1690 tmpval = 0.5**laj_ptr++/(evals[a+nocc]-evals[j]);
1691 T1_mat.set_element(j-nfzc,a,tmpval);
1692 biggest_t1.insert(tmpval,j,a);
1693 s2_diag += tmpval*tmpval;
1694 }
1695 }
1696 s2_diag = sqrt(s2_diag/(2*nocc_act));
1697 // compute the T1 matrix 1-norm
1698 double t1onenorm = 0.0;
1699 Ref<SCElementSumAbs> sumabs = new SCElementSumAbs;
1700 Ref<SCElementOp> genop = sumabs.pointer();
1701 for (a=0; a < nvir_act; a++) {
1702 sumabs->init();
1703 T1_mat.get_column(a).element_op(genop);
1704 if (t1onenorm < sumabs->result()) t1onenorm = sumabs->result();
1705 }
1706 // compute the T1 matrix inf-norm
1707 double t1infnorm = 0.0;
1708 for (j=0; j < nocc_act; j++) {
1709 sumabs->init();
1710 T1_mat.get_row(j).element_op(genop);
1711 if (t1infnorm < sumabs->result()) t1infnorm = sumabs->result();
1712 }
1713 // compute the T1 matrix 2-norm ( = D1(MP2) )
1714 RefSymmSCMatrix D1_mat(nocc_act_dim,kit);
1715 D1_mat.assign(0.0);
1716 D1_mat.accumulate_symmetric_product(T1_mat);
1717 T1_mat = 0;
1718 double d1_diag = sqrt(D1_mat.eigvals().get_element(nocc_act-1));
1719 D1_mat = 0;
1720 // print the norms
1721 ExEnv::out0()<<endl;
1722 ExEnv::out0()
1723 <<indent<<scprintf("D1(MP2) = %12.8f", d1_diag)
1724 <<endl
1725 <<indent<<scprintf("S2 matrix 1-norm = %12.8f", t1onenorm)
1726 <<endl
1727 <<indent<<scprintf("S2 matrix inf-norm = %12.8f", t1infnorm)
1728 <<endl
1729 <<indent<<scprintf("S2 diagnostic = %12.8f", s2_diag)
1730 <<endl;
1731 if (biggest_t1.ncontrib()) {
1732 ExEnv::out0() << endl
1733 << indent << "Largest S2 values (unique determinants):" << endl;
1734 }
1735 for (i=0; i<biggest_t1.ncontrib(); i++) {
1736 int i0 = biggest_t1.indices(i)[0];
1737 int i1 = biggest_t1.indices(i)[1] + nocc;
1738 ExEnv::out0()
1739 << indent << scprintf(" %2d %12.8f %3d %3s -> %4d %3s",
1740 i+1, biggest_t1.val(i),
1741 symorb_num_[i0]+1,
1742 ct.gamma(symorb_irrep_[i0]).symbol(),
1743 symorb_num_[i1]+1,
1744 ct.gamma(symorb_irrep_[i1]).symbol())
1745 << endl;
1746 }
1747
1748 } // if (dograd || do_d1_)
1749
1750 for (i=0; i<thr_->nthread(); i++) {
1751 delete e12thread[i];
1752 }
1753 delete[] e12thread;
1754
1755 // quit here if only the energy is needed
1756 if (!dograd) {
1757 delete[] tbints_; tbints_ = 0;
1758 if (do_d1_) delete[] Laj;
1759 delete[] scf_vector;
1760 delete[] scf_vector_dat;
1761 delete[] evals;
1762 tim_exit("mp2-mem");
1763 return;
1764 }
1765
1766 // Accumulate intermediate gradients on node 0
1767 sum_gradients(msg_, ginter, natom, 3);
1768
1769 // Add intermediate gradients to the gradient on node 0
1770 if (me==0)
1771 accum_gradients(gradient, ginter, natom, 3);
1772
1773 // Print out contribution to the gradient from non-sep. 2PDM
1774 if (debug_) {
1775 print_natom_3(ginter,
1776 "Contribution to MP2 gradient from non-separable 2PDM [au]:");
1777 }
1778
1779 ////////////////////////////////////////////////////////
1780 // Add contributions from all nodes to various matrices
1781 ////////////////////////////////////////////////////////
1782 tmpint = (nvir > nocc ? nvir:nocc);
1783 double *tmpmat = new double[tmpint*tmpint];
1784 msg_->sum(Pkj,nocc*(nocc+1)/2,tmpmat); // Pkj is now complete
1785 msg_->sum(Pab,nvir*(nvir+1)/2,tmpmat); // Pab is now complete
1786 msg_->sum(Wab,nvir*nvir,tmpmat);
1787 msg_->sum(Wkj,nocc*nocc,tmpmat);
1788 msg_->sum(Waj,nvir*nocc,tmpmat);
1789 if (do_d2_) {
1790 msg_->sum(d2occ_mat,nocc_act*(nocc_act+1)/2,tmpmat);
1791 msg_->sum(d2vir_mat,nvir_act*(nvir_act+1)/2,tmpmat);
1792 }
1793 delete[] tmpmat;
1794
1795 // Compute D2 diagnostic (d2_diag) from matrices d2_occ_mat and d2_vir_mat
1796 if (do_d2_) {
1797 RefSymmSCMatrix D2occ_mat(nocc_act_dim, kit);
1798 RefSymmSCMatrix D2vir_mat(nvir_act_dim,kit);
1799 D2occ_mat->assign(d2occ_mat);
1800 D2vir_mat->assign(d2vir_mat);
1801 d2o = sqrt(D2occ_mat.eigvals().get_element(nocc_act-1));
1802 d2v = sqrt(D2vir_mat.eigvals().get_element(nvir_act-1));
1803 d2_diag = (d2o > d2v ? d2o:d2v);
1804 ExEnv::out0() << endl
1805 << indent << scprintf("D2(MP1) = %12.8f", d2_diag) << endl << endl;
1806 delete[] d2occ_mat;
1807 delete[] d2vir_mat;
1808 }
1809
1810 // Finish computation of Wab
1811 tim_enter("Pab and Wab");
1812 pab_ptr = Pab;
1813 for (a=0; a<nvir_act; a++) { // active-active part of Wab
1814 wba_ptr = &Wab[a];
1815 wab_ptr = &Wab[a*nvir];
1816 for (b=0; b<=a; b++) {
1817 if (a==b) {
1818 *wab_ptr -= evals[nocc+a]**pab_ptr;
1819 }
1820 else {
1821 *wab_ptr -= evals[nocc+a]**pab_ptr;
1822 *wba_ptr -= evals[nocc+b]**pab_ptr;
1823 }
1824 pab_ptr++;
1825 wab_ptr++;
1826 wba_ptr += nvir;
1827 } // exit b loop
1828 } // exit a loop
1829 for (a=0; a<nfzv; a++) { // active-frozen part of Wab
1830 wba_ptr = &Wab[nvir_act+a];
1831 wab_ptr = &Wab[(nvir_act+a)*nvir];
1832 pab_ptr = &Pab[(nvir_act+a)*(nvir_act+a+1)/2];
1833 for (b=0; b<nvir_act; b++) {
1834 *wab_ptr -= evals[nocc+b]**pab_ptr;
1835 *wba_ptr -= evals[nocc+b]**pab_ptr;
1836 pab_ptr++;
1837 wab_ptr++;
1838 wba_ptr += nvir;
1839 } // exit b loop
1840 } // exit a loop
1841 // Wab is now complete
1842 tim_exit("Pab and Wab");
1843 RefSCMatrix Wab_matrix(nvir_dim, nvir_dim, kit);
1844 Wab_matrix->assign(Wab); // Put elements of Wab into Wab_matrix
1845 free(Wab);
1846
1847 // Update Wkj with contribution from Pkj
1848 tim_enter("Pkj and Wkj");
1849 pkj_ptr = Pkj;
1850 for (k=0; k<nocc; k++) {
1851 wjk_ptr = &Wkj[k];
1852 wkj_ptr = &Wkj[k*nocc];
1853 for (j=0; j<=k; j++) {
1854 if (k<nfzc && j<nfzc) { // don't want both j and k frozen
1855 wkj_ptr++;
1856 wjk_ptr += nocc;
1857 pkj_ptr++;
1858 continue;
1859 }
1860 if (j==k) {
1861 *wkj_ptr++ -= evals[k]**pkj_ptr++;
1862 }
1863 else if (j<nfzc) {
1864 *wkj_ptr++ -= evals[k]**pkj_ptr;
1865 *wjk_ptr -= evals[k]**pkj_ptr;
1866 pkj_ptr++;
1867 }
1868 else {
1869 *wkj_ptr++ -= evals[k]**pkj_ptr;
1870 *wjk_ptr -= evals[j]**pkj_ptr;
1871 pkj_ptr++;
1872 }
1873 wjk_ptr += nocc;
1874 } // exit j loop
1875 } // exit k loop
1876 tim_exit("Pkj and Wkj");
1877
1878 /////////////////////////////////
1879 // Finish the computation of Laj
1880 /////////////////////////////////
1881
1882 tim_enter("Laj");
1883
1884 RefSCMatrix Cv(nbasis_dim, nvir_dim, kit); // virtual block of scf_vector
1885 RefSCMatrix Co(nbasis_dim, nocc_dim, kit); // occupied block of scf_vector
1886 for (p=0; p<nbasis; p++) {
1887 c_pq = scf_vector[p];
1888 for (q=0; q<noso; q++) {
1889 if (q<nocc) Co->set_element(p, q, *c_pq++);
1890 else Cv->set_element(p, q-nocc, *c_pq++);
1891 }
1892 }
1893
1894 // Compute the density-like Dmat_matrix
1895 // (Cv*Pab_matrix*Cv.t() + Co*Pkj_matrix*Co.t())
1896 RefSymmSCMatrix Pab_matrix(nvir_dim,kit);
1897 RefSymmSCMatrix Pkj_matrix(nocc_dim,kit);
1898 RefSymmSCMatrix Dmat_matrix(nbasis_dim,kit);
1899 Pab_matrix->assign(Pab); // fill in elements of Pab_matrix from Pab
1900 free(Pab);
1901 Pkj_matrix->assign(Pkj); // fill in elements of Pkj_matrix from Pkj
1902 free(Pkj);
1903 Dmat_matrix.assign(0.0);
1904 Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
1905 Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
1906 // We now have the density-like matrix Dmat_matrix
1907
1908 // Compute the G matrix
1909
1910 RefSymmSCMatrix Gmat(nbasis_dim,kit);
1911 init_cs_gmat();
1912 tim_enter("make_gmat for Laj");
1913 make_cs_gmat_new(Gmat, Dmat_matrix);
1914 if (debug_ > 1) {
1915 Dmat_matrix.print("Dmat");
1916 Gmat.print("Gmat");
1917 }
1918 tim_exit("make_gmat for Laj");
1919
1920 // Finish computation of Laj
1921 RefSCMatrix Laj_matrix(nocc_dim,nvir_dim,kit); // elements are ordered as j*nvir+a
1922 Laj_matrix->assign(Laj);
1923 if (debug_ > 1) Laj_matrix->print("Laj (first bit)");
1924 Laj_matrix = Laj_matrix - 2*Co.t()*Gmat*Cv;
1925 if (debug_ > 1) Laj_matrix->print("Laj (all of it)");
1926 Laj_matrix->convert(Laj); // Put new Laj_matrix elements into Laj
1927
1928 tim_exit("Laj");
1929
1930 //////////////////////////////////////
1931 // Computation of Laj is now complete
1932 //////////////////////////////////////
1933
1934 ////////////////////////////
1935 // Solve the CPHF equations
1936 ////////////////////////////
1937 RefSCMatrix Paj_matrix(nvir_dim, nocc_dim, kit);
1938 tim_enter("cphf");
1939 cs_cphf(scf_vector, Laj, evals, Paj_matrix);
1940 tim_exit("cphf");
1941
1942 free(Laj);
1943
1944 // Finish computation of Waj
1945 for (a=0; a<nvir; a++) {
1946 waj_ptr = &Waj[a];
1947 for (j=0; j<nocc; j++) {
1948 *waj_ptr -= evals[j]*Paj_matrix->get_element(a,j);
1949 waj_ptr += nvir;
1950 }
1951 }
1952 // Waj is now complete
1953 RefSCMatrix Waj_matrix(nocc_dim, nvir_dim, kit);
1954 Waj_matrix->assign(Waj); // Put elements of Waj into Waj_matrix
1955 // NB. Waj_matrix elements are ordered as j*nvir+a
1956 free(Waj);
1957
1958
1959 // Finish computation of Wkj
1960 tim_enter("Pkj and Wkj");
1961 // Compute Dmat_matrix =
1962 // Co*Pkj_matrix*Co.t() + Co*Paj_matrix.t()*Cv.t()
1963 // + Cv*Paj_matrix*Co.t() + Cv*Pab_matrix*Cv.t();
1964 Dmat_matrix.assign(0.0);
1965 Dmat_matrix.accumulate_symmetric_sum(Cv*Paj_matrix*Co.t());
1966 Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
1967 Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
1968 tim_enter("make_gmat for Wkj");
1969 make_cs_gmat_new(Gmat, Dmat_matrix);
1970 tim_exit("make_gmat for Wkj");
1971 done_cs_gmat();
1972 for (i=0; i<thr_->nthread(); i++) tbints_[i] = 0;
1973 delete[] tbints_; tbints_ = 0;
1974 RefSCMatrix Wkj_matrix(nocc_dim, nocc_dim, kit);
1975 Wkj_matrix->assign(Wkj);
1976 Wkj_matrix = Wkj_matrix - 2*Co.t()*Gmat*Co;
1977 free(Wkj);
1978 // Wkj is now complete - not as Wkj but as Wkj_matrix
1979 tim_exit("Pkj and Wkj");
1980
1981 ////////////////////////////////////////////////////////////////
1982 // We now have the matrices Pkj_matrix, Paj_matrix, Pab_matrix,
1983 // Wkj_matrix, Waj_matrix, Wab_matrix and can compute the
1984 // remaining contributions to the gradient
1985 ///////////////////////////////////////////////////////////////
1986
1987 // Compute the second order correction to
1988 // the density matrix and energy weighted
1989 // density matrix in the AO basis
1990 RefSCMatrix P2AO_matrix(nbasis_dim, nbasis_dim, kit);
1991 RefSCMatrix P2MO_matrix(noso_dim, noso_dim, kit);
1992 RefSCMatrix W2AO_matrix(nbasis_dim, nbasis_dim, kit);
1993 RefSCMatrix W2MO_matrix(noso_dim, noso_dim, kit);
1994 RefSCMatrix SCF_matrix(nbasis_dim, noso_dim, kit);
1995 for (i=0; i<nocc; i++) {
1996 for (j=0; j<nocc; j++) {
1997 P2MO_matrix->set_element(i,j,Pkj_matrix->get_element(i,j));
1998 W2MO_matrix->set_element(i,j,Wkj_matrix->get_element(i,j));
1999 }
2000 for (j=nocc; j<noso; j++) {
2001 P2MO_matrix->set_element(i,j,Paj_matrix->get_element(j-nocc,i));
2002 W2MO_matrix->set_element(i,j,Waj_matrix->get_element(i,j-nocc));
2003 }
2004 }
2005 for (i=nocc; i<noso; i++) {
2006 for (j=0; j<nocc; j++) {
2007 P2MO_matrix->set_element(i,j,Paj_matrix->get_element(i-nocc,j));
2008 W2MO_matrix->set_element(i,j,Waj_matrix->get_element(j,i-nocc));
2009 }
2010 for (j=nocc; j<noso; j++) {
2011 P2MO_matrix->set_element(i,j,Pab_matrix->get_element(i-nocc,j-nocc));
2012 W2MO_matrix->set_element(i,j,Wab_matrix->get_element(i-nocc,j-nocc));
2013 }
2014 }
2015 for (i=0; i<nbasis; i++) {
2016 for (j=0; j<nocc; j++) {
2017 SCF_matrix->set_element(i,j,Co->get_element(i,j));
2018 }
2019 for (j=nocc; j<noso; j++) {
2020 SCF_matrix->set_element(i,j,Cv->get_element(i,j-nocc));
2021 }
2022 }
2023 P2AO_matrix = SCF_matrix * P2MO_matrix * SCF_matrix.t();
2024 W2AO_matrix = SCF_matrix * W2MO_matrix * SCF_matrix.t();
2025// P2AO_matrix = Co*(Pkj_matrix*Co.t() + Paj_matrix.t()*Cv.t()) +
2026// Cv*(Paj_matrix*Co.t() + Pab_matrix*Cv.t());
2027// W2AO_matrix = Co*(Wkj_matrix*Co.t() + Waj_matrix*Cv.t()) +
2028// Cv*(Waj_matrix.t()*Co.t() + Wab_matrix*Cv.t());
2029
2030 if (debug_ > 1) {
2031 SCF_matrix.print("SCF_matrix");
2032 P2MO_matrix.print("P2MO_matrix");
2033 W2MO_matrix.print("W2MO_matrix");
2034 P2AO_matrix.print("P2AO_matrix");
2035 W2AO_matrix.print("W2AO_matrix");
2036 }
2037
2038 // Convert P2AO_matrix and W2AO_matrix to double*
2039 P2AO = new double[nbasis*nbasis];
2040 W2AO = new double[nbasis*nbasis];
2041 P2AO_matrix->convert(P2AO);
2042 W2AO_matrix->convert(W2AO);
2043
2044 // Compute the HF density matrix and
2045 // energy weighted density matrix
2046 PHF = new double[nbasis*nbasis];
2047 WHF = new double[nbasis*nbasis];
2048 phf_ptr = PHF;
2049 whf_ptr = WHF;
2050 for (p=0; p<nbasis; p++) {
2051 for (q=0; q<nbasis; q++) {
2052 *phf_ptr++ = 0.0;
2053 *whf_ptr++ = 0.0;
2054 }
2055 }
2056 phf_ptr = PHF;
2057 whf_ptr = WHF;
2058 for (p=0; p<nbasis; p++) {
2059 for (q=0; q<nbasis; q++) {
2060 c_pi = scf_vector[p];
2061 c_qi = scf_vector[q];
2062 for (i=0; i<nocc; i++) {
2063 tmpval = 2* *c_pi++ * *c_qi++;
2064 *phf_ptr += tmpval;
2065 *whf_ptr -= evals[i] * tmpval;
2066 } // exit i loop
2067 phf_ptr++;
2068 whf_ptr++;
2069 } // exit q loop
2070 } // exit p loop
2071
2072 // Compute the MP2 density and energy weighted density
2073
2074 // PMP2 = PHF + P2AO; WMP2 = WHF + W2AO
2075 PMP2 = new double[nbasis*nbasis];
2076 WMP2 = new double[nbasis*nbasis];
2077 // Initialize PMP2 and WMP2
2078 pmp2_ptr = PMP2;
2079 wmp2_ptr = WMP2;
2080 for (p=0; p<nbasis; p++) {
2081 for (q=0; q<nbasis; q++) {
2082 *pmp2_ptr++ = 0.0;
2083 *wmp2_ptr++ = 0.0;
2084 }
2085 }
2086 pmp2_ptr = PMP2;
2087 wmp2_ptr = WMP2;
2088 p2ao_ptr = P2AO;
2089 w2ao_ptr = W2AO;
2090 phf_ptr = PHF;
2091 whf_ptr = WHF;
2092 for (p=0; p<nbasis; p++) {
2093 for (q=0; q<nbasis; q++) {
2094 *pmp2_ptr++ = *phf_ptr++ + *p2ao_ptr++;
2095 *wmp2_ptr++ = *whf_ptr++ + *w2ao_ptr++;
2096 }
2097 }
2098 delete[] W2AO;
2099
2100 if (debug_ > 1) {
2101 RefSCMatrix tmpmat(ao_dimension(), ao_dimension(), kit);
2102 tmpmat->assign(PMP2);
2103 tmpmat.print("PMP2");
2104 tmpmat->assign(P2AO);
2105 tmpmat.print("P2AO");
2106 tmpmat->assign(PHF);
2107 tmpmat.print("PHF");
2108 tmpmat->assign(WMP2);
2109 tmpmat.print("WMP2");
2110 }
2111
2112 ////////////////////////////////////////////////
2113 // Compute the contribution to the MP2 gradient
2114 // from the separable part of the 2PDM
2115 ////////////////////////////////////////////////
2116
2117 zero_gradients(ginter, natom, 3);
2118 zero_gradients(hf_ginter, natom, 3);
2119 tim_enter("sep 2PDM contrib.");
2120
2121 CSGradS2PDM** s2pdmthread = new CSGradS2PDM*[thr_->nthread()];
2122 for (i=0; i<thr_->nthread(); i++) {
2123 s2pdmthread[i] = new CSGradS2PDM(i, thr_->nthread(), me, nproc,
2124 lock, basis(), tbintder_[i],
2125 PHF, P2AO, tol, debug_, dynamic_);
2126 thr_->add_thread(i,s2pdmthread[i]);
2127# if SINGLE_THREAD_S2PDM
2128 s2pdmthread[i]->run();
2129# endif
2130 }
2131# if !SINGLE_THREAD_S2PDM
2132 thr_->start_threads();
2133 thr_->wait_threads();
2134# endif
2135 for (i=0; i<thr_->nthread(); i++) {
2136 s2pdmthread[i]->accum_mp2_contrib(ginter);
2137 s2pdmthread[i]->accum_hf_contrib(hf_ginter);
2138 delete s2pdmthread[i];
2139 }
2140 sum_gradients(msg_, ginter, molecule()->natom(), 3);
2141 sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
2142 delete[] s2pdmthread;
2143
2144 tim_exit("sep 2PDM contrib.");
2145 delete[] P2AO;
2146
2147 // The separable 2PDM contribution to the gradient has now been
2148 // accumulated in ginter on node 0; add it to the total gradients
2149 if (me == 0) {
2150 accum_gradients(gradient, ginter, natom, 3);
2151 accum_gradients(hf_gradient, hf_ginter, natom, 3);
2152 }
2153 // Print out the contribution to the gradient from sep. 2PDM
2154 if (debug_) {
2155 print_natom_3(hf_ginter,
2156 "Contribution from separable 2PDM to HF gradient [au]:");
2157 print_natom_3(ginter,
2158 "Contribution from separable 2PDM to MP2 gradient [au]:");
2159 }
2160
2161 // Done with two-electron integrals
2162 tbint_ = 0;
2163 if (dograd || do_d1_) {
2164 delete[] tbintder_;
2165 tbintder_ = 0;
2166 }
2167
2168 /////////////////////////////////////////////////////////////
2169 // Compute the one-electron contribution to the MP2 gradient
2170 /////////////////////////////////////////////////////////////
2171
2172 zero_gradients(ginter, natom, 3);
2173 zero_gradients(hf_ginter, natom, 3);
2174 tim_enter("hcore contrib.");
2175 hcore_cs_grad(PHF, PMP2, hf_ginter, ginter);
2176 tim_exit("hcore contrib.");
2177 delete[] PHF;
2178 delete[] PMP2;
2179 // The hcore contribution to the gradient has now been accumulated
2180 // in ginter on node 0; add it to the total gradients
2181 if (me == 0) {
2182 accum_gradients(gradient, ginter, natom, 3);
2183 accum_gradients(hf_gradient, hf_ginter, natom, 3);
2184 }
2185 // Print out the contribution to the gradient from hcore
2186 if (debug_) {
2187 print_natom_3(hf_ginter, "Contribution to HF gradient from hcore [au]:");
2188 print_natom_3(ginter, "Contribution to MP2 gradient from hcore [au]:");
2189 }
2190
2191 zero_gradients(ginter, natom, 3);
2192 zero_gradients(hf_ginter, natom, 3);
2193 tim_enter("overlap contrib.");
2194 overlap_cs_grad(WHF, WMP2, hf_ginter, ginter);
2195 delete[] WHF;
2196 tim_exit("overlap contrib.");
2197 delete[] WMP2;
2198 // The overlap contribution to the gradient has now been accumulated
2199 // in ginter on node 0; add it to the total gradients
2200 if (me == 0) {
2201 accum_gradients(gradient, ginter, natom, 3);
2202 accum_gradients(hf_gradient, hf_ginter, natom, 3);
2203 }
2204
2205 // Print out the overlap contribution to the gradient
2206 if (debug_) {
2207 print_natom_3(hf_ginter, "Overlap contribution to HF gradient [au]:");
2208 print_natom_3(ginter,"Overlap contribution to MP2 gradient [au]:");
2209 }
2210
2211 ////////////////////////////////////////////////////////
2212 // Compute the nuclear contribution to the MP2 gradient
2213 ////////////////////////////////////////////////////////
2214
2215 if (me == 0) {
2216 nuclear_repulsion_energy_gradient(ginter);
2217 accum_gradients(gradient, ginter, natom, 3);
2218 accum_gradients(hf_gradient, ginter, natom, 3);
2219
2220 }
2221
2222 // Print out the nuclear contribution to the gradient
2223 if (debug_)
2224 print_natom_3(ginter,"Nuclear contribution to MP2 gradient [au]:");
2225
2226
2227 ////////////////////////////////////////////////////////
2228 // The computation of the MP2 gradient is now complete;
2229 // print out the gradient
2230 ////////////////////////////////////////////////////////
2231 if (debug_) {
2232 ExEnv::out0() << "Obtaining HF gradient" << endl;
2233 print_natom_3(ref()->gradient(),"HF gradient from HF");
2234 print_natom_3(hf_gradient,"Total HF gradient from MP2 [au]:");
2235 }
2236 print_natom_3(gradient,"Total MP2 gradient [au]:");
2237
2238 msg_->bcast(gradient_dat, natom*3);
2239 RefSCVector gradientvec = matrixkit()->vector(moldim());
2240 gradientvec->assign(gradient_dat);
2241 set_gradient(gradientvec);
2242
2243 msg_->bcast(hf_gradient_dat, natom*3);
2244 hf_gradient_ = matrixkit()->vector(moldim());
2245 hf_gradient_->assign(hf_gradient_dat);
2246
2247 delete[] gradient;
2248 delete[] gradient_dat;
2249
2250 delete[] hf_gradient;
2251 delete[] hf_gradient_dat;
2252
2253 for (i=0; i<natom; i++) {
2254 delete[] ginter[i];
2255 delete[] hf_ginter[i];
2256 }
2257 delete[] ginter;
2258 delete[] hf_ginter;
2259
2260 delete[] scf_vector;
2261 delete[] scf_vector_dat;
2262 delete[] evals;
2263
2264 tim_exit("mp2-mem");
2265 }
2266
2267///////////////////////////////////////////////////////////
2268// Compute the contribution to the MP2 gradient from hcore
2269///////////////////////////////////////////////////////////
2270void
2271MBPT2::hcore_cs_grad(double *PHF, double *PMP2,
2272 double **hf_ginter, double **ginter)
2273{
2274
2275 int i, j, k, l, m;
2276 int jj, kk;
2277 int jsize, ksize;
2278 int j_offset, k_offset;
2279 int jk_index;
2280 int index;
2281 int nshell;
2282 int nbasis;
2283 int nproc = msg_->n();
2284 int me = msg_->me();
2285
2286 const double *oneebuf; // 1-electron buffer
2287 double tmpval1, tmpval2;
2288 double gxyz[3];
2289 double hf_gxyz[3];
2290
2291 // Initialize 1e object
2292 Ref<OneBodyDerivInt> obintder_ = integral()->hcore_deriv();
2293 oneebuf = obintder_->buffer();
2294
2295 nshell = basis()->nshell();
2296 nbasis = basis()->nbasis();
2297
2298 ///////////////////////////////////////////////////////////////////////////////
2299 // Compute the kinetic and nuclear-electron energy contribution to the gradient
2300 ///////////////////////////////////////////////////////////////////////////////
2301
2302 jk_index = 0;
2303
2304 for (i=0; i<molecule()->natom(); i++) {
2305 for (j=0; j<nshell; j++) {
2306 jsize = basis()->shell(j).nfunction();
2307 j_offset = basis()->shell_to_function(j);
2308
2309 for (k=0; k<=j; k++) {
2310 ksize = basis()->shell(k).nfunction();
2311 k_offset = basis()->shell_to_function(k);
2312
2313 if (jk_index++%nproc == me) {
2314 obintder_->compute_shell(j,k,i);
2315
2316 for (l=0; l<3; l++) { gxyz[l] = 0.0; hf_gxyz[l] = 0.0; }
2317
2318 index = 0;
2319
2320 for (jj=0; jj<jsize; jj++) {
2321 for (kk=0; kk<ksize; kk++) {
2322 tmpval1 = PMP2[(j_offset + jj)*nbasis + k_offset + kk];
2323 tmpval2 = PHF[(j_offset + jj)*nbasis + k_offset + kk];
2324 for (m=0; m<3; m++) {
2325 gxyz[m] += oneebuf[index] * tmpval1;
2326 hf_gxyz[m] += oneebuf[index] * tmpval2;
2327 index++;
2328 } // exit m loop
2329 } // exit kk loop
2330 } // exit jj loop
2331
2332 if (j != k) {
2333 // off-diagonal blocks
2334 for (l=0; l<3; l++) {
2335 gxyz[l] *= 2.0;
2336 hf_gxyz[l] *= 2.0;
2337 }
2338 }
2339
2340 for (l=0; l<3; l++) {
2341 ginter[i][l] += gxyz[l];
2342 hf_ginter[i][l] += hf_gxyz[l];
2343 }
2344
2345 } // exit "if"
2346 } // exit k loop
2347 } // exit j loop
2348 } // exit i loop
2349
2350 /* Accumulate the nodes' intermediate gradients on node 0 */
2351 sum_gradients(msg_, ginter, molecule()->natom(), 3);
2352 sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
2353}
2354
2355
2356////////////////////////////////////////////////////
2357// Compute the overlap contribution to the gradient
2358////////////////////////////////////////////////////
2359void
2360MBPT2::overlap_cs_grad(double *WHF, double *WMP2,
2361 double **hf_ginter, double **ginter)
2362{
2363 int i, j, k, l, m;
2364 int jj, kk;
2365 int jj_index, kk_index;
2366 int jsize, ksize;
2367 int j_offset, k_offset;
2368 int jk_index;
2369 int index;
2370 int nshell;
2371 int nbasis;
2372
2373 const double *oneebuf; // 1-electron buffer
2374 double tmpval1, tmpval2;
2375 double hf_tmpval1, hf_tmpval2;
2376 double gxyz[3];
2377 double hf_gxyz[3];
2378
2379 // Initialize 1e object
2380 Ref<OneBodyDerivInt> obintder_ = integral()->overlap_deriv();
2381 oneebuf = obintder_->buffer();
2382
2383 nshell = basis()->nshell();
2384 nbasis = basis()->nbasis();
2385 int nproc = msg_->n();
2386 int me = msg_->me();
2387
2388 for (i=0; i<molecule()->natom(); i++) {
2389 jk_index = 0;
2390
2391 for (j=0; j<nshell; j++) {
2392 j_offset = basis()->shell_to_function(j);
2393 jsize = basis()->shell(j).nfunction();
2394
2395 for (k=0; k<=j; k++) {
2396 k_offset = basis()->shell_to_function(k);
2397 ksize = basis()->shell(k).nfunction();
2398
2399 if (jk_index++%nproc == me) {
2400 obintder_->compute_shell(j,k,i);
2401
2402 for (l=0; l<3; l++) {
2403 hf_gxyz[l] = 0.0;
2404 gxyz[l] = 0.0;
2405 }
2406 index = 0;
2407
2408 for (jj=0; jj<jsize; jj++) {
2409 jj_index = j_offset + jj;
2410 for (kk=0; kk<ksize; kk++) {
2411 kk_index = k_offset + kk;
2412 if (kk_index > jj_index) {
2413 index += 3; // increment index since m-loop will be skipped
2414 break; // skip to next jj value
2415 }
2416 // NB. WMP2 is not a symmetrix matrix
2417 tmpval1 = WMP2[jj_index*nbasis + kk_index];
2418 tmpval2 = WMP2[kk_index*nbasis + jj_index];
2419 hf_tmpval1 = WHF[jj_index*nbasis + kk_index];
2420 hf_tmpval2 = WHF[kk_index*nbasis + jj_index];
2421
2422 for (m=0; m<3; m++) {
2423 if (jj_index != kk_index) {
2424 gxyz[m] += oneebuf[index] * (tmpval1 + tmpval2);
2425 hf_gxyz[m] += oneebuf[index] * (hf_tmpval1 + hf_tmpval2);
2426 }
2427 else {
2428 gxyz[m] += oneebuf[index] * tmpval1;
2429 hf_gxyz[m] += oneebuf[index] * hf_tmpval1;
2430 }
2431 index++;
2432 } // exit m loop
2433 } // exit kk loop
2434 } // exit jj loop
2435
2436 for (l=0; l<3; l++) {
2437 ginter[i][l] += gxyz[l];
2438 hf_ginter[i][l] += hf_gxyz[l];
2439 }
2440 } // exit "if"
2441 } // exit k loop
2442 } // exit j loop
2443 } // exit i loop
2444
2445 /* Accumulate the nodes' intermediate gradients on node 0 */
2446 sum_gradients(msg_, ginter, molecule()->natom(), 3);
2447 sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
2448}
2449
2450static void
2451sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2)
2452{
2453 int i;
2454
2455 if (msg->n() == 1) return;
2456
2457 for (i=0; i<n1; i++) {
2458 msg->sum(f[i],n2);
2459 }
2460}
2461
2462static void
2463zero_gradients(double **f, int n1, int n2)
2464{
2465 for (int i=0; i<n1; i++) {
2466 for (int j=0; j<3; j++) f[i][j] = 0.0;
2467 }
2468}
2469
2470static void
2471accum_gradients(double **g, double **f, int n1, int n2)
2472{
2473 for (int i=0; i<n1; i++) {
2474 for (int j=0; j<3; j++) g[i][j] += f[i][j];
2475 }
2476}
2477
2478/////////////////////////////////////
2479// Compute the batchsize
2480//
2481// Only arrays allocated before exiting the loop over
2482// i-batches are included here - only these arrays
2483// affect the batch size.
2484/////////////////////////////////////
2485int
2486MBPT2::compute_cs_batchsize(size_t mem_static, int nocc_act)
2487{
2488 size_t mem_dyn; // dynamic memory available
2489 distsize_t maxdyn;
2490 int ni;
2491
2492 ///////////////////////////////////////
2493 // the largest memory requirement will
2494 // either occur just before the end of
2495 // the 1. q.b.t. (mem1) or just before
2496 // the end of the i-batch loop (mem2)
2497 ///////////////////////////////////////
2498
2499 if (mem_alloc > mem_static) {
2500 mem_dyn = mem_alloc - mem_static;
2501 }
2502 else {
2503 mem_dyn = 0;
2504 }
2505
2506 // First determine if calculation is possible at all (i.e., if ni=1 possible)
2507
2508 ni = 1;
2509 maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
2510
2511 if (maxdyn > mem_dyn) {
2512 return 0;
2513 }
2514
2515 ni = 2;
2516 while (ni<=nocc_act) {
2517 maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
2518 if (maxdyn >= mem_dyn) {
2519 ni--;
2520 break;
2521 }
2522 ni++;
2523 }
2524 if (ni > nocc_act) ni = nocc_act;
2525
2526 return ni;
2527}
2528
2529/////////////////////////////////////
2530// Compute required (dynamic) memory
2531// for a given batch size
2532//
2533// Only arrays allocated before exiting the loop over
2534// i-batches are included here - only these arrays
2535// affect the batch size.
2536/////////////////////////////////////
2537distsize_t
2538MBPT2::compute_cs_dynamic_memory(int ni, int nocc_act)
2539{
2540 int index;
2541 distsize_t mem1, mem2, mem3;
2542 distsize_t maxdyn;
2543 distsize_t tmp;
2544 int i, j;
2545 int nij;
2546 int nproc = msg_->n();
2547
2548 ///////////////////////////////////////
2549 // the largest memory requirement will
2550 // either occur just before the end of
2551 // the 1. q.b.t. (mem1) or just before
2552 // the end of the i-batch loop (mem2)
2553 ///////////////////////////////////////
2554
2555 // compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
2556 index = 0;
2557 nij = 0;
2558 for (i=0; i<ni; i++) {
2559 for (j=0; j<nocc; j++) {
2560 if (index++ % nproc == 0) nij++;
2561 }
2562 }
2563 mem1 = sizeof(double)*((distsize_t)nij*nbasis*nbasis
2564 + nbasis*nvir);
2565 mem2 = sizeof(double)*((distsize_t)thr_->nthread()*ni*nbasis*nfuncmax*nfuncmax
2566 + (distsize_t)nij*nbasis*nbasis
2567 + ni*nbasis + nbasis*nfuncmax
2568 + 2*nfuncmax*nfuncmax*nfuncmax*nfuncmax);
2569 mem3 = sizeof(double)*((distsize_t)ni*nbasis*nfuncmax*nfuncmax
2570 + (distsize_t)nij*nbasis*nbasis
2571 + 2*(2 + nbasis*nfuncmax));
2572 tmp = (mem2>mem3 ? mem2:mem3);
2573 maxdyn = (mem1>tmp ? mem1:tmp);
2574
2575 return maxdyn;
2576}
2577
2578////////////////////////////////////////////////////////////////////////////
2579
2580// Local Variables:
2581// mode: c++
2582// c-file-style: "CLJ-CONDENSED"
2583// End:
Note: See TracBrowser for help on using the repository browser.