Context Navigation

csgrad.cc@ aae63a

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable

Last change on this file since aae63a was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 82.1 KB

Line
1	//
2	// csgrad.cc
3	//
4	// Copyright (C) 1996 Limit Point Systems, Inc.
5	//
6	// Author: Ida Nielsen <ida@kemi.aau.dk>
7	// Maintainer: LPS
8	//
9	// This file is part of the SC Toolkit.
10	//
11	// The SC Toolkit is free software; you can redistribute it and/or modify
12	// it under the terms of the GNU Library General Public License as published by
13	// the Free Software Foundation; either version 2, or (at your option)
14	// any later version.
15	//
16	// The SC Toolkit is distributed in the hope that it will be useful,
17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	// GNU Library General Public License for more details.
20	//
21	// You should have received a copy of the GNU Library General Public License
22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24	//
25	// The U.S. Government is granted a limited license as per AL 91-7.
26	//
27
28	#include <stdlib.h>
29	#include <math.h>
30	#include <limits.h>
31
32	#include <util/misc/formio.h>
33	#include <util/misc/timer.h>
34	#include <util/group/memory.h>
35	#include <util/group/message.h>
36	#include <util/class/class.h>
37	#include <util/state/state.h>
38	#include <math/scmat/matrix.h>
39	#include <math/scmat/blocked.h>
40	#include <chemistry/molecule/molecule.h>
41	#include <chemistry/qc/basis/integral.h>
42	#include <chemistry/qc/mbpt/bzerofast.h>
43	#include <chemistry/qc/mbpt/mbpt.h>
44	#include <chemistry/qc/mbpt/util.h>
45	#include <chemistry/qc/mbpt/csgrade12.h>
46	#include <chemistry/qc/mbpt/csgrad34qb.h>
47	#include <chemistry/qc/mbpt/csgrads2pdm.h>
48
49	using namespace std;
50	using namespace sc;
51
52	#define SINGLE_THREAD_E12 0
53	#define SINGLE_THREAD_QBT34 0
54	#define SINGLE_THREAD_S2PDM 0
55
56	#define PRINT2Q 0
57	#define PRINT3Q 0
58	#define PRINT4Q 0
59	#if PRINT_BIGGEST_INTS
60	BiggestContribs biggest_ints_1(4,40);
61	#endif
62
63	#define WRITE_DOUBLES 0
64
65	static void sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2);
66	static void zero_gradients(double **f, int n1, int n2);
67	static void accum_gradients(double g, double f, int n1, int n2);
68
69	#define PRINT1Q 0
70
71	#if PRINT_CONTRIB
72	static void
73	sw(int&i,int&j)
74	{
75	int tmp = i;
76	i = j;
77	j = tmp;
78	}
79
80	static void
81	print_contrib(double tmpval, int num, int onum,
82	int P,int Q,int R,int S, int p,int q,int r,int s)
83	{
84
85	printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
86	num, P, Q, R, S, p, q, r, s, tmpval);
87	printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
88	onum, P, Q, R, S, p, q, r, s, -tmpval);
89
90	if (p < q) {
91	sw(p,q); sw(P,Q);
92	}
93	if (r < s) {
94	sw(r,s); sw(R,S);
95	}
96	if (p < r \|\| (p == r && q < s)) {
97	sw(P,R); sw(p,r);
98	sw(Q,S); sw(q,s);
99	}
100
101	printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
102	num, P, Q, R, S, p, q, r, s, tmpval);
103	printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
104	onum, P, Q, R, S, p, q, r, s, -tmpval);
105	}
106	#endif
107
108	void
109	MBPT2::compute_cs_grad()
110	{
111
112	// New version of MP2 gradient program which uses the full
113	// permutational symmetry of the two-electron integral derivatives
114
115	Ref<SCMatrixKit> kit = basis()->matrixkit();
116
117	int do_d2_ = 1; // if true, compute d2 diagnostic
118
119	int nij; // number of i,j pairs on a node (for e.g., mo_int)
120	double *mo_int; // MO integrals of type (ov\|ov)
121	// (and these integrals divided by
122	// orbital energy denominators)
123	double *integral_iqjs; // half-transformed integrals
124
125	int nocc_act, nvir_act;
126	int i, j, k;
127	int ii, bb;
128	int x, y;
129	int a, b, c;
130	int nshell;
131	int offset;
132	int ik_offset;
133	int i_offset;
134	int npass, pass;
135	int tmpint;
136	int np, nq, nr, ns;
137	int P, Q, R, S;
138	int p, q, r, s;
139	int bf1, bf2, bf3, bf4;
140	int index;
141	int me;
142	int nproc;
143	int rest;
144	int p_offset, q_offset, r_offset, s_offset;
145
146	int aoint_computed = 0;
147	int aointder_computed = 0;
148	int xyz;
149	int natom = molecule()->natom(); // the number of atoms
150	int int_index;
151	size_t mem_static; // static memory in bytes
152	int ij_proc; // the processor which has ij pair
153	int ij_index; // of the ij pairs on a proc, this ij pair is number ij_index
154	// (i.e., ij_index < nij)
155	int ik_proc; // the processor which has ik pair
156	int ik_index;
157	int jloop, kloop;
158
159	int ni;
160
161	double *evals; // scf eigenvalues
162	double iajb_ptr, ibja_ptr, iakb_ptr, ibka_ptr;
163	double iajc_ptr, ibjc_ptr, icjb_ptr, icja_ptr;
164	double ijkb_ptr, ibkj_ptr;
165	double pqrs;
166	double *c_sa, c_rj;
167	double c_pi, c_qi, *c_sj;
168	double c_qx, c_qa, c_sb, c_pa, c_pq, c_sy;
169	double delta_ijab, delta_ijbc, delta_ijac;
170	double ecorr_mp2 = 0.0;
171	double escf;
172	double emp2=0.0;
173	int tol; // log2 of the erep tolerance
174	// (erep < 2^tol => discard)
175	double Wkj=0,Wab=0,*Waj=0;// occ-occ, vir-vir and vir-occ parts of
176	// second order correction to MP2
177	// energy weighted density matrix
178	double Pkj=0,Pab=0; // occ-occ and vir-vir parts of second order
179	// correction to MP2 density matrix
180	double d2occ_mat, d2vir_mat; // matrices for computation of D2 diagnostic
181	double *Laj=0; // MP2 Lagrangian
182	double *Lpi; // contrib to MP2 Lagrangian partially in AO basis
183	double pkj_ptr=0, pab_ptr;
184	double *d2occ_mat_ptr;
185	double *d2vir_mat_ptr;
186	double wkj_ptr, wjk_ptr, wab_ptr, wba_ptr, *waj_ptr=0;
187	double laj_ptr, lpi_ptr, *lqi_ptr;
188	double gamma_iajs, gamma_iajs_tmp;
189	// partially back-transformed non-sep 2PDM's
190	double *gamma_iqjs_tmp;
191	double *gamma_iajs_ptr;
192	double *gamma_iqjs_ptr;
193	double *gammabuf; // buffer used for sending elements of gamma_iqjs
194	double *mo_intbuf; // buffer used for sending mo integrals
195	double tmpval, tmpval1;
196	double P2AO, W2AO;
197	double p2ao_ptr, w2ao_ptr;
198	double PHF, WHF;
199	double phf_ptr, whf_ptr;
200	double PMP2, WMP2;
201	double pmp2_ptr, wmp2_ptr;
202
203	double *ixjs_tmp; // three-quarter transformed two-el integrals
204	double *integral_ixjs; // all three-quarter transformed two-el integrals
205	double *integral_iajy; // mo integrals (y = any MO)
206	double *integral_ikja; // mo integrals
207	double *integral_iqjs_ptr;
208	double *iajy_ptr;
209	double *ixjs_ptr;
210	double *ikja_ptr;
211	double iajs_ptr, ikjs_ptr;
212
213	double *gradient=0, gradient_dat=0; // The MP2 gradient
214	double *hf_gradient=0, hf_gradient_dat=0; // The HF gradient
215	double **ginter=0; // Intermediates for the MP2 gradient
216	double **hf_ginter=0; // Intermediates for the HF gradient
217	double d2o, d2v, d2_diag;
218
219	BiggestContribs biggest_coefs(5,10);
220	CharacterTable ct = molecule()->point_group()->char_table();
221
222	#if PRINT_BIGGEST_INTS
223	BiggestContribs biggest_ints_2(4,40);
224	BiggestContribs biggest_ints_2s(4,40);
225	BiggestContribs biggest_ints_3a(4,40);
226	BiggestContribs biggest_ints_3(4,40);
227	#endif
228
229	int dograd = gradient_needed();
230
231	tim_enter("mp2-mem");
232
233	nfuncmax = basis()->max_nfunction_in_shell();
234
235	nshell = basis()->nshell();
236
237	me = msg_->me();
238
239	if (me == 0) {
240	ExEnv::out0() << endl << indent
241	<< "Entered memgrp based MP2 routine" << endl;
242	}
243
244	nproc = msg_->n();
245	if (me == 0)
246	ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
247
248	tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
249
250	nocc = 0;
251	for (i=0; i<oso_dimension()->n(); i++) {
252	if (reference_->occupation(i) == 2.0) nocc++;
253	}
254
255	nocc_act = nocc - nfzc;
256	nvir = noso - nocc;
257	nvir_act = nvir - nfzv;
258
259	// Do a few preliminary tests to make sure the desired calculation
260	// can be done (and appears to be meaningful!)
261
262	if (nocc_act <= 0) {
263	if (me == 0) {
264	ExEnv::err0() << "There are no active occupied orbitals; program exiting" << endl;
265	}
266	abort();
267	}
268
269	if (nvir_act <= 0) {
270	if (me == 0) {
271	ExEnv::err0() << "There are no active virtual orbitals; program exiting" << endl;
272	}
273	abort();
274	}
275
276	if (restart_orbital_memgrp_) {
277	if (!dograd && !do_d1_ && !do_d2_) {
278	ExEnv::out0() << indent
279	<< scprintf("Restarting at orbital %d with partial energy %18.14f",
280	restart_orbital_memgrp_, restart_ecorr_)
281	<< endl;
282	ecorr_mp2 = restart_ecorr_;
283	}
284	else {
285	ExEnv::out0() << indent
286	<< "Restart requested but not possible with gradients, D1, or D2"
287	<< endl;
288	restart_ecorr_ = 0.0;
289	restart_orbital_memgrp_ = 0;
290	}
291	}
292	else {
293	restart_ecorr_ = 0.0;
294	}
295
296	////////////////////////////////////////////////////////
297	// Compute batch size ni for mp2 loops;
298	//
299	// The following arrays are kept throughout (all of type double):
300	// scf_vector, gradient, ginter, Pkj, Pab, Wkj, Wab, Waj, Laj
301	// and memory allocated for these arrays and integral evaluators
302	// is called mem_static
303	//
304	////////////////////////////////////////////////////////
305	if (me == 0) {
306	mem_static = nbasis*noso; // scf vector
307	mem_static += 2nbasisnfuncmax; // iqjs & iqjr
308	if (dograd) {
309	mem_static += 9*natom; // gradient & ginter & hf_ginter
310	mem_static += (nocc*(nocc+1))/2; // Pkj
311	mem_static += (nvir*(nvir+1))/2; // Pab
312	mem_static += nocc*nocc; // Wkj
313	mem_static += nvir*nvir; // Wab
314	mem_static += 2noccnvir; // Waj & Laj
315	if (do_d2_) {
316	mem_static += (nocc_act*(nocc_act+1))/2; // d2occ_mat
317	mem_static += (nvir_act*(nvir_act+1))/2; // d2vir_mat
318	}
319	}
320	else if (do_d1_) {
321	mem_static += nocc*nvir; // partial Laj
322	}
323	mem_static *= sizeof(double);
324	int nthreads = thr_->nthread();
325	mem_static += nthreads * integral()->storage_required_eri(basis()); // integral evaluators
326	ni = compute_cs_batchsize(mem_static, nocc_act-restart_orbital_memgrp_);
327	}
328
329	if (max_norb_ > 0 && ni > max_norb_) {
330	ExEnv::out0() << indent
331	<< "\"max_norb\" set: could have done "
332	<< ni << " orbitals per pass otherwise."
333	<< endl;
334	ni = max_norb_;
335	}
336
337	// Send value of ni and mem_static to other nodes
338	msg_->bcast(ni);
339	double dmem_static = mem_static;
340	msg_->bcast(dmem_static);
341	mem_static = size_t(dmem_static);
342
343	// Compute the storage to be used by the integral routines (required plus optional)
344	size_t dyn_mem = distsize_to_size(compute_cs_dynamic_memory(ni,nocc_act));
345	int mem_remaining;
346	if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining = 0;
347	else mem_remaining = mem_alloc - dyn_mem - mem_static;
348	mem_remaining += thr_->nthread() * integral()->storage_required_eri(basis());
349
350	ExEnv::out0() << indent
351	<< "Memory available per node: " << mem_alloc << " Bytes"
352	<< endl;
353	ExEnv::out0() << indent
354	<< "Static memory used per node: " << mem_static << " Bytes"
355	<< endl;
356	ExEnv::out0() << indent
357	<< "Total memory used per node: " << dyn_mem+mem_static << " Bytes"
358	<< endl;
359	ExEnv::out0() << indent
360	<< "Memory required for one pass: "
361	<< compute_cs_dynamic_memory(nocc_act,nocc_act)+mem_static
362	<< " Bytes"
363	<< endl;
364	ExEnv::out0() << indent
365	<< "Minimum memory required: "
366	<< compute_cs_dynamic_memory(1,nocc_act)+mem_static
367	<< " Bytes"
368	<< endl;
369	ExEnv::out0() << indent
370	<< "Batch size: " << ni
371	<< endl;
372
373	if (ni == 0) {
374	ExEnv::err0() << "Batch size is 0: more memory or processors are needed"
375	<< endl;
376	abort();
377	}
378
379	if (dynamic_) {
380	ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
381	}
382
383	if (ni == nocc_act-restart_orbital_memgrp_) {
384	npass = 1;
385	rest = 0;
386	}
387	else {
388	rest = (nocc_act-restart_orbital_memgrp_)%ni;
389	npass = (nocc_act-restart_orbital_memgrp_ - rest)/ni + 1;
390	if (rest == 0) npass--;
391	}
392
393	if (me == 0) {
394	ExEnv::out0() << indent
395	<< scprintf(" npass rest nbasis nshell nfuncmax") << endl;
396	ExEnv::out0() << indent
397	<< scprintf(" %-4i %-3i %-5i %-4i %-3i",
398	npass,rest,nbasis,nshell,nfuncmax)
399	<< endl;
400	ExEnv::out0() << indent
401	<< scprintf(" nocc nvir nfzc nfzv") << endl;
402	ExEnv::out0() << indent
403	<< scprintf(" %-4i %-4i %-4i %-4i",
404	nocc,nvir,nfzc,nfzv)
405	<< endl;
406	}
407
408	int nijmax = 0;
409	index = 0;
410	for (i=0; i<ni; i++) {
411	for (j=0; j<nocc; j++) {
412	if (index++ % nproc == me) nijmax++;
413	}
414	}
415
416	////////////////////////////////////////////////
417	// The scf vector is distributed between nodes;
418	// put a copy of the scf vector on each node;
419	////////////////////////////////////////////////
420
421	escf = reference_->energy();
422	hf_energy_ = escf;
423
424	RefDiagSCMatrix occ;
425	RefSCMatrix Scf_Vec;
426	RefDiagSCMatrix evalmat;
427	eigen(evalmat, Scf_Vec, occ);
428
429	if (debug_ > 1) {
430	evalmat.print("eigenvalues");
431	Scf_Vec.print("eigenvectors");
432	}
433
434	double scf_vector_dat = new double[nbasisnoso];
435	Scf_Vec.t()->convert(scf_vector_dat);
436
437	evals = new double[noso];
438	double** scf_vector = new double*[nbasis];
439	for (i=0; i<nbasis; i++) {
440	scf_vector[i] = &scf_vector_dat[i*noso];
441	}
442	for (i=0; i<noso; i++) {
443	evals[i] = evalmat(i);
444	}
445
446	Scf_Vec = 0;
447	evalmat = 0;
448
449	//////////////////////////////////////////////////////////////
450	// Allocate storage for various arrays needed for gradients
451	// (Pkj and Pab are symmetric, so store only lower triangle)
452	//////////////////////////////////////////////////////////////
453
454	if (dograd) {
455	Pkj = (double) malloc((nocc(nocc+1)/2)*sizeof(double));
456	Pab = (double) malloc((nvir(nvir+1)/2)*sizeof(double));
457	Wkj = (double) malloc(noccnocc*sizeof(double));
458	Wab = (double) malloc(nvirnvir*sizeof(double));
459	Waj = (double) malloc(nvirnocc*sizeof(double));
460	if (do_d2_) {
461	d2occ_mat = new double[nocc_act*(nocc_act+1)/2];
462	d2vir_mat = new double[nvir_act*(nvir_act+1)/2];
463	}
464
465	gradient_dat = new double[natom*3];
466	gradient = new double*[natom];
467	for (i=0; i<natom; i++) {
468	gradient[i] = &gradient_dat[i*3];
469	}
470
471	hf_gradient_dat = new double[natom*3];
472	hf_gradient = new double*[natom];
473	for (i=0; i<natom; i++) {
474	hf_gradient[i] = &hf_gradient_dat[i*3];
475	}
476
477	ginter = new double*[natom];
478	for (i=0; i<natom; i++) {
479	ginter[i] = new double[3];
480	for (xyz=0; xyz<3; xyz++) ginter[i][xyz] = 0;
481	}
482
483	hf_ginter = new double*[natom];
484	for (i=0; i<natom; i++) {
485	hf_ginter[i] = new double[3];
486	for (xyz=0; xyz<3; xyz++) hf_ginter[i][xyz] = 0;
487	}
488
489	//////////////////////////////
490	// Initialize various arrays
491	//////////////////////////////
492
493	bzerofast(Pkj,nocc*(nocc+1)/2);
494	bzerofast(Wkj,nocc*nocc);
495	bzerofast(Pab,nvir*(nvir+1)/2);
496	bzerofast(Wab,nvir*nvir);
497	bzerofast(Waj,nvir*nocc);
498	if (do_d2_) {
499	bzerofast(d2occ_mat,nocc_act*(nocc_act+1)/2);
500	bzerofast(d2vir_mat,nvir_act*(nvir_act+1)/2);
501	}
502
503	if (me == 0) zero_gradients(gradient, natom, 3);
504	if (me == 0) zero_gradients(hf_gradient, natom, 3);
505	}
506
507	if (dograd \|\| do_d1_) {
508	Laj = (double) malloc(nvirnocc*sizeof(double));
509	bzerofast(Laj,nvir*nocc);
510	}
511
512	if (debug_ > 2 && me == 0) {
513	for (j=0; j<noso; j++) {
514	ExEnv::out0() << indent
515	<< scprintf("eigenvalue[%3d] = %15.10lf", j, evals[j]);
516	if (j < nfzc) ExEnv::out0() << " (frozen docc)";
517	else if (j < nocc_act + nfzc) ExEnv::out0() << " (active docc)";
518	else if (j < nvir_act + nocc_act + nfzc) ExEnv::out0() << " (active uocc)";
519	else ExEnv::out0() << " (frozen uocc)";
520	ExEnv::out0() << endl;
521	}
522	}
523
524	/////////////////////////////////////
525	// Begin MP2 loops
526	/////////////////////////////////////
527
528	// debug print
529	if (debug_ && me == 0) {
530	ExEnv::out0() << indent
531	<< scprintf("node %i, begin loop over i-batches",me) << endl;
532	}
533	// end of debug print
534
535	// Initialize the integrals
536	integral()->set_storage(mem_remaining);
537	tbints_ = new Ref<TwoBodyInt>[thr_->nthread()];
538	for (i=0; i<thr_->nthread(); i++) {
539	tbints_[i] = integral()->electron_repulsion();
540	}
541	if (dograd \|\| do_d1_) {
542	tbintder_ = new Ref<TwoBodyDerivInt>[thr_->nthread()];
543	for (i=0; i<thr_->nthread(); i++) {
544	tbintder_[i] = integral()->electron_repulsion_deriv();
545	}
546	}
547
548	int mem_integral_intermediates = integral()->storage_used();
549	int mem_integral_storage = (mem_remaining - mem_integral_intermediates) / thr_->nthread();
550	if (mem_integral_storage<0) mem_integral_storage = 0;
551	for (i=0; i<thr_->nthread(); i++) {
552	tbints_[i]->set_integral_storage(mem_integral_storage);
553	}
554
555	ExEnv::out0() << endl << indent
556	<< scprintf("Memory used for integral intermediates: %i Bytes",
557	mem_integral_intermediates)
558	<< endl;
559	ExEnv::out0() << indent
560	<< scprintf("Memory used for integral storage: %i Bytes",
561	mem_integral_storage)
562	<< endl;
563
564	if (mem.null()) {
565	ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
566	abort();
567	}
568
569	mem->set_localsize(size_t(nijmax)nbasisnbasis*sizeof(double));
570	ExEnv::out0() << indent
571	<< "Size of global distributed array: "
572	<< mem->totalsize()
573	<< " Bytes"
574	<< endl;
575
576	MemoryGrpBuf<double> membuf_remote(mem);
577
578	int usep4 = !dograd;
579
580	Ref<ThreadLock> lock = thr_->new_lock();
581	CSGradErep12Qtr** e12thread = new CSGradErep12Qtr*[thr_->nthread()];
582	DistShellPair::SharedData sp_e_data, sp_g_data;
583	for (i=0; i<thr_->nthread(); i++) {
584	e12thread[i] = new CSGradErep12Qtr(i, thr_->nthread(), me, nproc,
585	mem, msg_, lock, basis(), tbints_[i],
586	nocc, scf_vector, tol, debug_,
587	dynamic_, print_percent_,
588	&sp_e_data, usep4);
589	}
590
591	CSGrad34Qbtr** qbt34thread;
592	if (dograd \|\| do_d1_) {
593	qbt34thread = new CSGrad34Qbtr*[thr_->nthread()];
594	for (i=0; i<thr_->nthread(); i++) {
595	qbt34thread[i] = new CSGrad34Qbtr(i, thr_->nthread(), me, nproc,
596	mem, msg_, lock, basis(), tbints_[i],
597	tbintder_[i], nocc, nfzc, scf_vector,
598	tol, debug_, dynamic_, print_percent_,
599	&sp_g_data, dograd, natom);
600	}
601	}
602
603	tim_enter("mp2 passes");
604	for (pass=0; pass<npass; pass++) {
605
606	if (me == 0) {
607	ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
608	}
609
610	i_offset = restart_orbital_memgrp_ + pass*ni + nfzc;
611	if ((pass == npass - 1) && (rest != 0)) ni = rest;
612
613	// Compute number of of i,j pairs on each node for
614	// two-el integrals and non-sep 2PDM elements
615	index = 0;
616	nij = 0;
617	for (i=0; i<ni; i++) {
618	for (j=0; j<nocc; j++) {
619	if (index++ % nproc == me) nij++;
620	}
621	}
622
623	// debug print
624	if (debug_)
625	ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
626	// end of debug print
627
628	mem->sync(); // This must be here or gamma non-sep will be wrong when running
629	// on multiple processors with more than one pass
630
631	r_offset = 0;
632
633	// Allocate and initialize some arrays
634	// (done here to avoid having these arrays
635	// overlap with arrays allocated later)
636
637	// Allocate (and initialize) some arrays
638
639	integral_iqjs = (double*) mem->localdata();
640
641	bzerofast(integral_iqjs, nijnbasisnbasis);
642
643	integral_iqjs = 0;
644
645	mem->sync();
646
647	index = 0;
648
649	if (me == 0) {
650	ExEnv::out0() << indent
651	<< scprintf("Begin loop over shells (erep, 1.+2. q.t.)") << endl;
652	}
653
654	// Do the two eletron integrals and the first two quarter transformations
655	tim_enter("erep+1.qt+2.qt");
656	sp_e_data.init();
657	for (i=0; i<thr_->nthread(); i++) {
658	e12thread[i]->set_i_offset(i_offset);
659	e12thread[i]->set_ni(ni);
660	thr_->add_thread(i,e12thread[i]);
661	# if SINGLE_THREAD_E12
662	e12thread[i]->run();
663	# endif
664	}
665	# if !SINGLE_THREAD_E12
666	thr_->start_threads();
667	thr_->wait_threads();
668	# endif
669	tim_exit("erep+1.qt+2.qt");
670
671	if (me == 0) {
672	ExEnv::out0() << indent << "End of loop over shells" << endl;
673	}
674
675	mem->sync(); // Make sure iqjs is complete on each node before continuing
676
677	integral_iqjs = (double*) mem->localdata();
678
679	#if PRINT2Q
680	if (me == 0) {
681	int index = 0;
682	int ij_index = 0;
683	for (int i = 0; i<ni; i++) {
684	for (int j = 0; j<nocc; j++) {
685	if (index++ % nproc == me) {
686	if (j >= nfzc) {
687	double integral_ij_offset = integral_iqjs + nbasisnbasis*ij_index;
688	for (int s = 0; s<nbasis; s++) {
689	double integral_ijsq_ptr = integral_ij_offset + snbasis;
690	for (int q = 0; q<nbasis; q++) {
691	printf("2Q: (%d %d\|%d %d) = %12.8f\n",
692	i,q,j,s,*integral_ijsq_ptr);
693	*integral_ijsq_ptr++;
694	}
695	}
696	}
697	ij_index++;
698	}
699	}
700	}
701	}
702	#endif
703
704	// Allocate and initialize some arrays
705	ixjs_tmp = new double[nbasis];
706
707	if (me == 0) {
708	ExEnv::out0() << indent << "Begin third q.t." << endl;
709	}
710
711	tim_enter("3. q.t.");
712	// Begin third quarter transformation;
713	// generate (ix\|js) for i act, j act or frz, and x any MO (act or frz)
714	index = 0;
715	ij_index = 0;
716	for (i=0; i<ni; i++) {
717	for (j=0; j<nocc; j++) {
718	if (index++ % nproc == me) {
719
720	for (s=0; s<nbasis; s++) {
721
722	bzerofast(ixjs_tmp, nbasis);
723	for (q=0; q<nbasis; q++) {
724	integral_iqjs_ptr = &integral_iqjs[q + nbasis(s + nbasisij_index)];
725	ixjs_ptr = ixjs_tmp;
726	c_qx = scf_vector[q];
727	tmpval = *integral_iqjs_ptr;
728	#if PRINT_BIGGEST_INTS
729	biggest_ints_2.insert(tmpval,i+i_offset,j,s,q);
730	if ((i+i_offset==104 && j == 1)
731	\|\|(i+i_offset==104 && j == 2)) {
732	biggest_ints_2s.insert(tmpval,i+i_offset,j,s,q);
733	}
734	#endif
735	for (x=0; x<noso; x++) {
736	ixjs_ptr++ += c_qx++ * tmpval;
737	}
738	} // exit q loop
739
740	// Put ixjs into integral_iqjs, while overwriting what was there;
741	// i.e., integral_iqjs will now contain three-quarter transformed
742	// integrals ixjs
743	integral_iqjs_ptr = &integral_iqjs[nbasis(s + nbasisij_index)];
744	ixjs_ptr = ixjs_tmp;
745	for (x=0; x<noso; x++) {
746	#if PRINT_BIGGEST_INTS
747	if (x>=nocc) {
748	biggest_ints_3a.insert(*ixjs_ptr,i+i_offset,j,s,x-nocc);
749	}
750	#endif
751	integral_iqjs_ptr++ = ixjs_ptr++;
752	}
753	} // exit s loop
754	ij_index++;
755	} // endif
756	} // exit j loop
757	} // exit i loop
758	// end of third quarter transformation
759	tim_exit("3. q.t.");
760
761	if (me == 0) {
762	ExEnv::out0() << indent << "End of third q.t." << endl;
763	}
764
765	delete[] ixjs_tmp;
766
767	// The array of half-transformed integrals integral_iqjs has now
768	// been overwritten by three-quarter transformed integrals ixjs;
769	// rename the array integral_ixjs, where x = any MO
770	integral_ixjs = integral_iqjs;
771
772	#if PRINT3Q
773	if (me == 0) {
774	int index = 0;
775	int ij_index = 0;
776	for (int i = 0; i<ni; i++) {
777	for (int j = 0; j<nocc; j++) {
778	if (index++ % nproc == me) {
779	if (j >= nfzc) {
780	double integral_ij_offset = integral_ixjs + nbasisnbasis*ij_index;
781	for (int s = 0; s<nbasis; s++) {
782	double integral_ijsx_ptr = integral_ij_offset + snbasis;
783	for (int x = 0; x<noso; x++) {
784	printf("3Q: (%d %d\|%d %d) = %12.8f\n",
785	i,x,j,s,*integral_ijsx_ptr);
786	*integral_ijsx_ptr++;
787	}
788	}
789	}
790	ij_index++;
791	}
792	}
793	}
794	}
795	#endif
796
797	integral_iajy = new double[noso];
798	// in iajy: i act; a,j act or frz; y act or frz occ or virt.
799	integral_ikja = new double[nvir_act];
800	// in ikja: i,j act; k act or frz; a act.
801
802	if (me == 0) {
803	ExEnv::out0() << indent << "Begin fourth q.t." << endl;
804	}
805
806	// Begin fourth quarter transformation
807	// generating MO integrals (ov\|ov), (ov\|oo) and (oo\|ov)
808	tim_enter("4. q.t.");
809	index = 0;
810	ij_index = 0;
811	for (i=0; i<ni; i++) {
812	for (j=0; j<nocc; j++) {
813	if (index++ % nproc == me) {
814
815	for (a=0; a<nvir; a++) {
816	bzerofast(integral_iajy, noso);
817	iajs_ptr = &integral_ixjs[a+nocc + nbasisnbasisij_index];
818	for (s=0; s<nbasis; s++) {
819	c_sy = scf_vector[s];
820	iajy_ptr = integral_iajy;
821	tmpval = *iajs_ptr;
822	#if PRINT_BIGGEST_INTS
823	biggest_ints_3.insert(tmpval,i+i_offset,j,s,a);
824	if ((i+i_offset==105 && j == 2 && s == 170 && a == 3)
825	\|\|(i+i_offset==102 && j == 2 && s == 170 && a == 2)) {
826	ExEnv::outn() << scprintf("3/4: %3d %3d %3d %3d: %16.10f",
827	i+i_offset, j, s, x-nocc)
828	<< endl;
829	}
830	#endif
831	for (y=0; y<noso; y++) {
832	iajy_ptr++ += c_sy++ * tmpval;
833	} // exit y loop
834	iajs_ptr += nbasis;
835	} // exit s loop
836	// Put integral_iajy into ixjs for one i,a,j while
837	// overwriting elements of ixjs
838	iajs_ptr = &integral_ixjs[a+nocc + nbasisnbasisij_index];
839	iajy_ptr = integral_iajy;
840	for (y=0; y<noso; y++) {
841	iajs_ptr = iajy_ptr++;
842	iajs_ptr += nbasis;
843	} // exit y loop
844	} // exit a loop
845
846	// this is only needed for gradients
847	if ((dograd \|\| do_d1_) && j >= nfzc) {
848	for (k=0; k<nocc; k++) {
849	bzerofast(integral_ikja, nvir_act);
850	ikjs_ptr = &integral_ixjs[k + nbasisnbasisij_index];
851	for (s=0; s<nbasis; s++) {
852	c_sa = &scf_vector[s][nocc];
853	ikja_ptr = integral_ikja;
854	tmpval = *ikjs_ptr;
855	for (a=0; a<nvir_act; a++) {
856	ikja_ptr++ += c_sa++ * tmpval;
857	} // exit a loop
858	ikjs_ptr += nbasis;
859	} // exit s loop
860	// Put integral_ikja into ixjs for one i,k,j while
861	// overwriting elements of ixjs
862	ikjs_ptr = &integral_ixjs[k + nbasis(nocc + nbasisij_index)];
863	ikja_ptr = integral_ikja;
864	for (a=0; a<nvir_act; a++) {
865	ikjs_ptr = ikja_ptr++;
866	ikjs_ptr += nbasis;
867	} // exit a loop
868	} // exit k loop
869	} //endif
870
871	ij_index++;
872	} // endif
873	} // exit j loop
874	} // exit i loop
875	// end of fourth quarter transformation
876	tim_exit("4. q.t.");
877
878	if (me == 0) {
879	ExEnv::out0() << indent << "End of fourth q.t." << endl;
880	}
881
882	// The array integral_ixjs has now been overwritten by MO integrals
883	// iajy and ikja, so rename the array mo_int
884	mo_int = integral_ixjs;
885
886	delete[] integral_iajy;
887	delete[] integral_ikja;
888
889	// Divide the (ia\|jb) MO integrals by the term
890	// evals[i]+evals[j]-evals[a]-evals[b]
891	// and keep these integrals in mo_int;
892	// do this only for active i, j, a, and b
893	tim_enter("divide (ia\|jb)'s");
894
895	index = 0;
896	ij_index = 0;
897	for (i=0; i<ni; i++) {
898	ii = i+i_offset;
899	for (j=0; j<nocc; j++) {
900	if (index++ % nproc == me) {
901	if (j>=nfzc) {
902	for (b=0; b<nvir_act; b++) {
903	iajb_ptr = &mo_int[nocc + nbasis(b+nocc + nbasisij_index)];
904	bb = b+nocc;
905	for (a=0; a<nvir_act; a++) {
906	#if PRINT4Q
907	printf("4Q: (%d %d\|%d %d) = %12.8f\n",
908	i,a+nocc,j,b+nocc,*iajb_ptr);
909	#endif
910	// Zero out nonsymmetric integral, else divide by denominators
911	if (usep4 && ( symorb_irrep_[ii] ^
912	symorb_irrep_[j] ^
913	symorb_irrep_[a+nocc] ^
914	symorb_irrep_[bb]) ) {
915	*iajb_ptr++ = 0.0;
916	}
917	else
918	*iajb_ptr++ /= evals[ii]+evals[j]-evals[a+nocc]-evals[bb];
919	} // exit a loop
920	} // exit b loop
921	} // endif
922	ij_index++;
923	} // endif
924	} // exit j loop
925	} // exit i loop
926	tim_exit("divide (ia\|jb)'s");
927
928	// We now have the fully transformed integrals (ia\|jb)
929	// divided by the proper orbital energy denominators
930	// for one batch of i, all j<nocc, and all a<nvir and b<nvir,
931	// where i, j, a, and b are all active;
932	// compute contribution to the MP2 correlation energy
933	// from these integrals
934
935	#if WRITE_DOUBLES
936	if (nproc > 1 \|\| npass > 1) {
937	ExEnv::outn() << "csgrad.cc: WRITE_DOUBLES set but case not allowed" << endl;
938	abort();
939	}
940	ExEnv::outn() << "csgrad.cc: WRITING DOUBLES: CHECK ORDER" << endl;
941	char *doutname = SCFormIO::fileext_to_filename(".mp2");
942	FILE *dout = fopen(doutname,"w");
943	delete[] doutname;
944	fwrite(&nocc_act, sizeof(int), 1, dout);
945	fwrite(&nvir_act, sizeof(int), 1, dout);
946	for (j=nfzc; j<nocc; j++) {
947	for (b=0; b<nvir_act; b++) {
948	for (i=0; i<ni; i++) {
949	ij_index = nocc*i + j;
950	iajb_ptr = &mo_int[nocc + nbasis(b+nocc + nbasisij_index)];
951	fwrite(iajb_ptr, sizeof(double), nvir_act, dout);
952	for (a=0; a<nvir_act; a++) {
953	if (fabs(iajb_ptr[a])>1.0e-8) {
954	ExEnv::outn() << scprintf(" Djbia(%2d %2d %2d %2d) = %12.8f",
955	j+1-nfzc,b+1,i+1,a+1,iajb_ptr[a])
956	<< endl;
957	}
958	}
959	}
960	}
961	}
962	fclose(dout);
963	#endif
964
965	tim_enter("compute ecorr");
966
967	index = 0;
968	ij_index = 0;
969	for (i=0; i<ni; i++) {
970	double ecorr_i = 0.0;
971	for (j=0; j<nocc; j++) {
972	double ecorr_ij = 0.0;
973	if (index++ % nproc == me) {
974
975	if (j>=nfzc) {
976	for (b=0; b<nvir_act; b++) {
977	iajb_ptr = &mo_int[nocc + nbasis(b+nocc + nbasisij_index)];
978	ibja_ptr = &mo_int[b+nocc + nbasis(nocc + nbasisij_index)];
979	for (a=0; a<nvir_act; a++) {
980	delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
981	// only include determinants with unique coefficients
982	if (a>=b && i_offset+i>=j) {
983	if (a>b && i_offset+i>j) {
984	// aaaa or bbbb
985	biggest_coefs.insert(iajb_ptr - ibja_ptr,
986	i_offset+i,j,a,b,1111);
987	// aabb or bbaa or abba or baab
988	biggest_coefs.insert(*ibja_ptr,i_offset+i,j,b,a,1212);
989	} // endif
990	// aabb or bbaa or abba or baab
991	biggest_coefs.insert(*iajb_ptr,i_offset+i,j,a,b,1212);
992	} // endif
993
994	tmpval = iajb_ptr(2*iajb_ptr - ibja_ptr)*delta_ijab;
995	ecorr_mp2 += tmpval;
996	if (debug_) ecorr_ij += tmpval;
997	iajb_ptr++;
998	ibja_ptr += nbasis;;
999	} // exit a loop
1000	} // exit b loop
1001	} // endif
1002	ij_index++;
1003	} // endif
1004	if (debug_) {
1005	msg_->sum(ecorr_ij);
1006	ecorr_i += ecorr_ij;
1007	ExEnv::out0() << indent
1008	<< scprintf("correlation energy for pair %3d %3d = %16.12f",
1009	i+i_offset, j, ecorr_ij)
1010	<< endl;
1011	}
1012	} // exit j loop
1013	if (debug_) {
1014	ExEnv::out0() << indent
1015	<< scprintf("correlation energy for orbital %3d = %16.12f",
1016	i+i_offset, ecorr_i)
1017	<< endl;
1018	}
1019	} // exit i loop
1020	tim_exit("compute ecorr");
1021
1022	// debug print
1023	if (debug_ && me == 0) {
1024	ExEnv::out0() << indent << "End of ecorr" << endl;
1025	}
1026	// end of debug print
1027
1028	if (npass > 1 && pass < npass - 1) {
1029	double passe = ecorr_mp2;
1030	msg_->sum(passe);
1031	ExEnv::out0() << indent
1032	<< "Partial correlation energy for pass " << pass << ":" << endl;
1033	ExEnv::out0() << indent
1034	<< scprintf(" restart_ecorr = %18.14f", passe)
1035	<< endl;
1036	ExEnv::out0() << indent
1037	<< scprintf(" restart_orbital_memgrp = %d", ((pass+1) * ni))
1038	<< endl;
1039	}
1040
1041	integral_iqjs = 0;
1042	mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
1043
1044	// don't go beyond this point if only the energy is needed
1045	if (!dograd && !do_d1_) continue;
1046
1047	mo_int = (double*) mem->localdata();
1048
1049	if (!dograd) goto compute_L;
1050
1051	// Update the matrices Pkj and Wkj with
1052	// contributions from (occ vir\|occ vir) integrals
1053	index = 0;
1054	ij_index = 0;
1055	tim_enter("Pkj and Wkj");
1056	for (i=0; i<ni; i++) {
1057	for (j=0; j<nocc; j++) {
1058	if (index++ % nproc == me) {
1059
1060	if (j>=nfzc) {
1061	for (kloop=me; kloop<me+nocc; kloop++) {
1062	// stagger k's to minimize contention
1063	k = kloop%nocc;
1064	if (k<=j) pkj_ptr = &Pkj[j*(j+1)/2 + k];
1065	if (do_d2_) {
1066	if (k<=j && k>=nfzc) {
1067	d2occ_mat_ptr = &d2occ_mat[(j-nfzc)*(j-nfzc+1)/2 + k-nfzc];
1068	}
1069	}
1070	wjk_ptr = &Wkj[j*nocc + k];
1071	// Send for iakb, if necessary
1072	ik_index = (i*nocc + k)/nproc;
1073	ik_proc = (i*nocc + k)%nproc;
1074	ik_offset = nocc + noccnbasis + nbasisnbasis*ik_index;
1075	mo_intbuf = (double*) membuf_remote.readonly_on_node(ik_offset,
1076	nbasis*nvir-nocc,
1077	ik_proc);
1078	for (a=0; a<nvir_act; a++) {
1079	ibja_ptr = &mo_int[nocc + nbasis(a+nocc + nbasisij_index)];
1080	iajb_ptr = &mo_int[a+nocc + nbasis(nocc + nbasisij_index)];
1081	iakb_ptr = &mo_intbuf[a];
1082	for (b=0; b<nvir_act; b++) {
1083	tmpval1 = iakb_ptr *iajb_ptr;
1084	tmpval = 2*iakb_ptr ibja_ptr++ - 4tmpval1;
1085	// tmpval = 2*iakb_ptr (ibja_ptr++ - 2 *iajb_ptr);
1086	iakb_ptr += nbasis;
1087	iajb_ptr += nbasis;
1088	if (k<nfzc && k<=j) *pkj_ptr += tmpval/(evals[k]-evals[j]);
1089	else if (k<=j) {
1090	*pkj_ptr += tmpval;
1091	if (do_d2_) *d2occ_mat_ptr += tmpval1;
1092	}
1093	if (k>=nfzc) {
1094	delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
1095	wjk_ptr += tmpvaldelta_ijab;
1096	}
1097	} // exit b loop
1098	} // exit a loop
1099	mo_intbuf = 0;
1100	membuf_remote.release();
1101	} // end kloop loop
1102	} // endif
1103
1104	ij_index++;
1105	} // endif
1106	} // exit j loop
1107	} // exit i loop
1108	tim_exit("Pkj and Wkj");
1109
1110	// debug print
1111	if (debug_ && me == 0) {
1112	ExEnv::out0() << indent << "End of Pkj and Wkj" << endl;
1113	}
1114	// end of debug print
1115
1116	// Update the matrices Pab and Wab with
1117	// contributions from (occ vir\|occ vir) integrals
1118	tim_enter("Pab and Wab");
1119	index = 0;
1120	ij_index = 0;
1121	for (i=0; i<ni; i++) {
1122	for (j=0; j<nocc; j++) {
1123	if (index++ % nproc == me) {
1124	if (j>=nfzc) {
1125
1126	offset = nocc + noccnbasis + nbasisnbasis*ij_index;
1127	for (a=0; a<nvir_act; a++) {
1128	pab_ptr = &Pab[a*(a+1)/2];
1129	if (do_d2_) d2vir_mat_ptr = &d2vir_mat[a*(a+1)/2];
1130	for (b=0; b<=a; b++) { // active-active part of Pab and Wab
1131	wab_ptr = &Wab[a*nvir + b];
1132	wba_ptr = &Wab[b*nvir + a];
1133	ibjc_ptr = &mo_int[offset + b];
1134	icjb_ptr = &mo_int[offset + b*nbasis];
1135	iajc_ptr = &mo_int[offset + a];
1136	icja_ptr = &mo_int[offset + a*nbasis];
1137	for (c=0; c<nvir_act; c++) {
1138	tmpval = iajc_ptr*ibjc_ptr;
1139	if (do_d2_) *d2vir_mat_ptr += tmpval;
1140	pab_ptr += 4tmpval - 2*iajc_ptr *icjb_ptr;
1141	// pab_ptr += 2iajc_ptr (2 * ibjc_ptr - icjb_ptr);
1142
1143	if (a == b) {
1144	delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
1145	wab_ptr += 2ibjc_ptr(icja_ptr - 2 iajc_ptr)delta_ijac;
1146	}
1147	else {
1148	delta_ijbc = evals[i_offset+i]+evals[j]-evals[nocc+b]-evals[nocc+c];
1149	delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
1150	wab_ptr += 2ibjc_ptr (icja_ptr - 2 iajc_ptr)delta_ijac;
1151	wba_ptr += 2iajc_ptr (icjb_ptr - 2 ibjc_ptr)delta_ijbc;
1152	}
1153	iajc_ptr += nbasis;
1154	ibjc_ptr += nbasis;
1155	icja_ptr++;
1156	icjb_ptr++;
1157	} // exit c loop
1158	pab_ptr++;
1159	d2vir_mat_ptr++;
1160	} // exit b loop
1161	} // exit a loop
1162
1163	for (a=0; a<nfzv; a++) { // active-frozen part of Pab
1164	pab_ptr = &Pab[(a+nvir_act)*(a+nvir_act+1)/2];
1165	for (b=0; b<nvir_act; b++) {
1166	tmpval = evals[nocc+b] - evals[nocc+nvir_act+a];
1167	ibjc_ptr = &mo_int[offset+b];
1168	iajc_ptr = &mo_int[offset+a+nvir_act];
1169	icja_ptr = &mo_int[offset+(a+nvir_act)*nbasis];
1170	for (c=0; c<nvir_act; c++) {
1171	pab_ptr += 2ibjc_ptr(2*iajc_ptr - icja_ptr++)/tmpval;
1172	ibjc_ptr += nbasis;
1173	iajc_ptr += nbasis;
1174	} // exit c loop
1175	pab_ptr++;
1176	} // exit b loop
1177	} // exit a loop
1178
1179	} // endif
1180	ij_index++;
1181	} // endif
1182	} // exit j loop
1183	} // exit i loop
1184	tim_exit("Pab and Wab");
1185
1186	// debug print
1187	if (debug_ && me == 0) {
1188	ExEnv::out0() << indent << "End of Pab and Wab" << endl;
1189	}
1190	// end of debug print
1191
1192	compute_L:
1193
1194	///////////////////////////////////////
1195	// Update Waj and Laj with contrib.
1196	// from (oo\|ov) and (ov\|oo) integrals;
1197	// here a is active and j is active or
1198	// frozen
1199	///////////////////////////////////////
1200	tim_enter("Waj and Laj");
1201
1202	// (oo\|ov) contribution
1203	index = 0;
1204	ik_index = 0;
1205	for (i=0; i<ni; i++) {
1206	for (k=0; k<nocc; k++) {
1207	if (index++ % nproc == me) {
1208	if (k>=nfzc) {
1209	offset = nbasisnocc + nbasisnbasis*ik_index;
1210	for (j=0; j<nocc; j++) {
1211	for (b=0; b<nvir_act; b++) {
1212	ibka_ptr = &mo_int[b+nocc + offset];
1213	ijkb_ptr = &mo_int[j + nbasis*b + offset];
1214	if (dograd) waj_ptr = &Waj[jnvir]; // order as jnvir+a to make loops more efficient
1215	laj_ptr = &Laj[j*nvir];
1216	for (a=0; a<nvir_act; a++) {
1217	tmpval = 2*ibka_ptr *ijkb_ptr;
1218	ibka_ptr += nbasis;
1219	if (dograd) *waj_ptr++ += tmpval;
1220	*laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper
1221	} // exit a loop
1222	} // exit b loop
1223	} // exit j loop
1224	} // endif
1225	ik_index++;
1226	} // endif
1227	} // exit k loop
1228	} // exit i loop
1229
1230	// (ov\|oo) contribution
1231	index = 0;
1232	ik_index = 0;
1233	for (i=0; i<ni; i++) {
1234	for (k=0; k<nocc; k++) {
1235	if (index++ % nproc == me) {
1236	if (k>=nfzc) {
1237	offset = nocc + nbasisnbasisik_index;
1238	for (b=0; b<nvir_act; b++) {
1239	for (j=0; j<nocc; j++) {
1240	ibkj_ptr = &mo_int[offset + b + j*nbasis];
1241	ibka_ptr = &mo_int[offset + b + nocc*nbasis];
1242	if (dograd) waj_ptr = &Waj[j*nvir];
1243	laj_ptr = &Laj[j*nvir];
1244	for (a=0; a<nvir_act; a++) {
1245	tmpval = 4 * ibka_ptr *ibkj_ptr;
1246	ibka_ptr += nbasis;
1247	if (dograd) *waj_ptr++ -= tmpval;
1248	*laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper
1249	} // exit a loop
1250	} // exit j loop
1251	} // exit b loop
1252	} // endif
1253	ik_index++;
1254	} // endif
1255	} // exit k loop
1256	} // exit i loop
1257
1258	tim_exit("Waj and Laj");
1259	/////////////////////////////
1260	// End of Waj and Laj update
1261	/////////////////////////////
1262
1263	// debug print
1264	if (debug_ && me == 0) {
1265	ExEnv::out0() << indent << "End of Paj and Waj" << endl;
1266	}
1267	// end of debug print
1268
1269	mo_int = 0;
1270
1271	mem->sync(); // Need to synchronize before deleting mo_intbuf
1272
1273	mo_int = (double*) mem->localdata();
1274
1275	gamma_iajs_tmp = new double[nbasis*nvir_act];
1276	if (!gamma_iajs_tmp) {
1277	ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl;
1278	abort();
1279	}
1280
1281	// debug print
1282	if (debug_ && me == 0) {
1283	ExEnv::out0() << indent << "Begin first and second q.b.t." << endl;
1284	}
1285	// end of debug print
1286
1287	///////////////////////////////////////////////////////////
1288	// Perform first and second quarter back-transformation.
1289	// Each node produces gamma_iajs, and gamma_iqjs
1290	// for a subset of i and j, all a and all s;
1291	// the back-transf. is done only for active i, j, a, and b
1292	///////////////////////////////////////////////////////////
1293
1294	// Begin first quarter back-transformation
1295	tim_enter("1. q.b.t.");
1296	index = 0;
1297	ij_index = 0;
1298	for (i=0; i<ni; i++) {
1299	for (j=0; j<nocc; j++) {
1300	if (index++ % nproc == me) {
1301	if (j>=nfzc) {
1302	bzerofast(gamma_iajs_tmp,nbasis*nvir_act);
1303	offset = nocc + noccnbasis + nbasisnbasis*ij_index;
1304
1305	for (a=0; a<nvir_act; a++) {
1306	for (s=0; s<nbasis; s++) {
1307	c_sb = &scf_vector[s][nocc];
1308	gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a];
1309	ibja_ptr = &mo_int[a*nbasis + offset];
1310	iajb_ptr = &mo_int[a + offset];
1311
1312	tmpval = 0.0;
1313	for (b=0; b<nvir_act; b++) {
1314	tmpval += 2*c_sb++ (2*iajb_ptr - ibja_ptr++);
1315	iajb_ptr += nbasis;
1316	} // exit b loop
1317	*gamma_iajs_ptr += tmpval;
1318	} // exit s loop
1319	} // exit a loop
1320	// Put gamma_iajs_tmp into mo_int for one i,j
1321	// while overwriting mo_int
1322	gamma_iajs_ptr = gamma_iajs_tmp;
1323	for (y=0; y<nbasis; y++) {
1324	iajy_ptr = &mo_int[nocc + nbasis(y + nbasisij_index)];
1325	for (a=0; a<nvir_act; a++) {
1326	iajy_ptr++ = gamma_iajs_ptr++;
1327	}
1328	}
1329
1330	} // endif
1331	ij_index++;
1332	} // endif
1333	} // exit j loop
1334	} // exit i loop
1335	// end of first quarter back-transformation
1336	tim_exit("1. q.b.t.");
1337
1338	// debug print
1339	if (debug_ && me == 0) {
1340	ExEnv::out0() << indent << "End of first q.b.t." << endl;
1341	}
1342	// end of debug print
1343
1344	mo_int = 0;
1345
1346	mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming
1347
1348	delete[] gamma_iajs_tmp;
1349
1350	// The array mo_int has now been overwritten by the quarter
1351	// back-transformed non-sep 2PDM gamma_iajs, so rename
1352	gamma_iajs = (double*) mem->localdata();
1353
1354	gamma_iqjs_tmp = new double[nbasis];
1355	if (!gamma_iqjs_tmp) {
1356	ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl;
1357	abort();
1358	}
1359
1360	if (debug_ && me == 0) {
1361	ExEnv::out0() << indent << "Begin second q.b.t." << endl;
1362	}
1363
1364	// Begin second quarter back-transformation
1365	// (gamma_iqjs elements ordered as i,j,s,q,
1366	// i.e., q varies fastest)
1367	tim_enter("2. q.b.t.");
1368	index = 0;
1369	ij_index = 0;
1370	for (i=0; i<ni; i++) {
1371	for (j=0; j<nocc; j++) {
1372	if (index++ % nproc == me) {
1373	if (j>=nfzc) {
1374	offset = nbasisnbasisij_index;
1375
1376	for (s=0; s<nbasis; s++) {
1377	bzerofast(gamma_iqjs_tmp,nbasis);
1378	for (q=0; q<nbasis; q++) {
1379	gamma_iqjs_ptr = &gamma_iqjs_tmp[q];
1380	gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset];
1381	c_qa = &scf_vector[q][nocc];
1382
1383	tmpval = 0.0;
1384	for (a=0; a<nvir_act; a++) {
1385	tmpval += c_qa++ *gamma_iajs_ptr++;
1386	} // exit a loop
1387	*gamma_iqjs_ptr += tmpval;
1388	} // exit q loop
1389	// Put gamma_iqjs_tmp into gamma_iajs for one i,j,s
1390	// while overwriting gamma_iajs
1391	gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset];
1392	gamma_iqjs_ptr = gamma_iqjs_tmp;
1393	for (q=0; q<nbasis; q++) {
1394	gamma_iajs_ptr++ = gamma_iqjs_ptr++;
1395	}
1396	} // exit s loop
1397
1398	} // endif
1399	ij_index++;
1400	} // endif
1401	} // exit j loop
1402	} // exit i loop
1403	tim_exit("2. q.b.t.");
1404	// end of second quarter back-transformation
1405
1406	if (debug_ && me == 0) {
1407	ExEnv::out0() << indent << "End of second q.b.t." << endl;
1408	}
1409
1410	gamma_iajs = 0;
1411
1412	mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs
1413	// before it is needed below, and that gamma_iajs is not
1414	// deleted prematurely
1415
1416	// The quarter back-transformed elements gamma_iajs have now been
1417	// overwritten by the half back-transformed elements gamma_iqjs
1418
1419	delete[] gamma_iqjs_tmp;
1420
1421	/////////////////////////////////////////////////
1422	// End of 1. and 2. quarter back-transformation
1423	/////////////////////////////////////////////////
1424
1425	Lpi = new double[nbasis*ni];
1426	bzerofast(Lpi,nbasis*ni);
1427
1428	if (me == 0) {
1429	ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl;
1430	}
1431
1432	//////////////////////////////////////////////////////////
1433	// Perform third and fourth quarter back-transformation
1434	// and compute contribution to gradient from non-sep 2PDM
1435	//////////////////////////////////////////////////////////
1436
1437	tim_enter("3.qbt+4.qbt+non-sep contrib.");
1438	sp_g_data.init();
1439	for (i=0; i<thr_->nthread(); i++) {
1440	qbt34thread[i]->set_i_offset(i_offset);
1441	qbt34thread[i]->set_ni(ni);
1442	thr_->add_thread(i,qbt34thread[i]);
1443	# if SINGLE_THREAD_QBT34
1444	qbt34thread[i]->run();
1445	# endif
1446	}
1447	# if !SINGLE_THREAD_QBT34
1448	thr_->start_threads();
1449	thr_->wait_threads();
1450	# endif
1451	tim_exit("3.qbt+4.qbt+non-sep contrib.");
1452	// Add thread contributions to Lpi and ginter
1453	for (i=0; i<thr_->nthread(); i++) {
1454	double *Lpi_thread = qbt34thread[i]->get_Lpi();
1455	double **ginter_thread = qbt34thread[i]->get_ginter();
1456	for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j];
1457	for (j=0; j<natom; j++) {
1458	for (k=0; k<3; k++) {
1459	ginter[j][k] += ginter_thread[j][k];
1460	}
1461	}
1462	aointder_computed += qbt34thread[i]->get_aointder_computed();
1463	}
1464
1465	if (me == 0) {
1466	ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl;
1467	}
1468
1469	mem->sync(); // Make sure all nodes are done before deleting arrays
1470
1471	if (debug_ > 1) {
1472	RefSCDimension ni_dim(new SCDimension(ni,1));
1473	ni_dim->blocks()->set_subdim(0, new SCDimension(ni));
1474	RefSCDimension nbasis_dim(new SCDimension(nbasis,1));
1475	nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis));
1476	RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit);
1477	Lpi_mat->assign(Lpi);
1478	Lpi_mat.print("Lpi");
1479	}
1480
1481	if (debug_ && me == 0) {
1482	ExEnv::out0() << indent << "Back-transform Lpi" << endl;
1483	}
1484
1485	// Back-transform Lpi to MO basis
1486	lpi_ptr = Lpi;
1487	for (p=0; p<nbasis; p++) {
1488	for (i=0; i<ni; i++) {
1489	c_pa = &scf_vector[p][nocc];
1490	laj_ptr = &Laj[nvir*(i_offset + i)];
1491	for (a=0; a<nvir; a++) {
1492	laj_ptr++ += c_pa++ * *lpi_ptr;
1493	} // exit a loop
1494	lpi_ptr++;
1495	} // exit i loop
1496	} // exit p loop
1497
1498	// malloc_chain_check(1);
1499
1500	delete[] Lpi;
1501
1502	if (me == 0) {
1503	ExEnv::out0() << indent << "Done with pass " << pass+1 << endl;
1504	}
1505	} // exit loop over i-batches (pass)
1506	tim_exit("mp2 passes");
1507
1508	if (dograd \|\| do_d1_) {
1509	for (i=0; i<thr_->nthread(); i++) {
1510	delete qbt34thread[i];
1511	}
1512	delete[] qbt34thread;
1513	}
1514
1515	mem->set_localsize(0);
1516
1517	// debug print
1518	if (debug_ && me == 0) {
1519	ExEnv::out0() << indent << "Exited loop over i-batches" << endl;
1520	}
1521	// end of debug print
1522
1523	///////////////////////////////////////////////////////////////
1524	// The computation of the MP2 energy is now complete on each
1525	// node; add the nodes' contributions and print out the energy
1526	///////////////////////////////////////////////////////////////
1527	msg_->sum(ecorr_mp2);
1528	msg_->sum(aoint_computed);
1529	msg_->sum(aointder_computed);
1530
1531	biggest_coefs.combine(msg_);
1532	#if PRINT_BIGGEST_INTS
1533	biggest_ints_1.combine(msg_);
1534	biggest_ints_2.combine(msg_);
1535	biggest_ints_2s.combine(msg_);
1536	biggest_ints_3a.combine(msg_);
1537	biggest_ints_3.combine(msg_);
1538	#endif
1539
1540	if (me == 0) {
1541	emp2 = escf + ecorr_mp2;
1542
1543	#if PRINT_BIGGEST_INTS
1544	ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
1545	for (i=0; i<biggest_ints_1.ncontrib(); i++) {
1546	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1547	biggest_ints_1.indices(i)[0],
1548	biggest_ints_1.indices(i)[1],
1549	biggest_ints_1.indices(i)[2],
1550	biggest_ints_1.indices(i)[3],
1551	biggest_ints_1.val(i)
1552	)
1553	<< endl;
1554	}
1555	ExEnv::outn() << "biggest 2/4 transformed ints" << endl;
1556	for (i=0; i<biggest_ints_2.ncontrib(); i++) {
1557	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1558	biggest_ints_2.indices(i)[0],
1559	biggest_ints_2.indices(i)[1],
1560	biggest_ints_2.indices(i)[2],
1561	biggest_ints_2.indices(i)[3],
1562	biggest_ints_2.val(i)
1563	)
1564	<< endl;
1565	}
1566	ExEnv::outn() << "restricted 2/4 transformed ints" << endl;
1567	for (i=0; i<biggest_ints_2s.ncontrib(); i++) {
1568	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1569	biggest_ints_2s.indices(i)[0],
1570	biggest_ints_2s.indices(i)[1],
1571	biggest_ints_2s.indices(i)[2],
1572	biggest_ints_2s.indices(i)[3],
1573	biggest_ints_2s.val(i)
1574	)
1575	<< endl;
1576	}
1577	ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl;
1578	for (i=0; i<biggest_ints_3a.ncontrib(); i++) {
1579	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1580	biggest_ints_3a.indices(i)[0],
1581	biggest_ints_3a.indices(i)[1],
1582	biggest_ints_3a.indices(i)[2],
1583	biggest_ints_3a.indices(i)[3],
1584	biggest_ints_3a.val(i)
1585	)
1586	<< endl;
1587	}
1588	ExEnv::outn() << "biggest 3/4 transformed ints (in 4.)" << endl;
1589	for (i=0; i<biggest_ints_3.ncontrib(); i++) {
1590	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
1591	biggest_ints_3.indices(i)[0],
1592	biggest_ints_3.indices(i)[1],
1593	biggest_ints_3.indices(i)[2],
1594	biggest_ints_3.indices(i)[3],
1595	biggest_ints_3.val(i)
1596	)
1597	<< endl;
1598	}
1599	#endif
1600
1601	if (restart_orbital_memgrp_ == 0) {
1602	if (biggest_coefs.ncontrib()) {
1603	ExEnv::out0() << endl << indent
1604	<< "Largest first order coefficients (unique):"
1605	<< endl;
1606	}
1607	for (i=0; i<biggest_coefs.ncontrib(); i++) {
1608	int i0 = biggest_coefs.indices(i)[0];
1609	int i1 = biggest_coefs.indices(i)[1];
1610	int i2 = biggest_coefs.indices(i)[2] + nocc;
1611	int i3 = biggest_coefs.indices(i)[3] + nocc;
1612	int spincase = biggest_coefs.indices(i)[4];
1613	ExEnv::out0() << indent
1614	<< scprintf(" %2d %12.8f %2d %3s %2d %3s -> %2d %3s %2d %3s (%s)",
1615	i+1, biggest_coefs.val(i),
1616	symorb_num_[i0]+1,
1617	ct.gamma(symorb_irrep_[i0]).symbol(),
1618	symorb_num_[i1]+1,
1619	ct.gamma(symorb_irrep_[i1]).symbol(),
1620	symorb_num_[i2]+1,
1621	ct.gamma(symorb_irrep_[i2]).symbol(),
1622	symorb_num_[i3]+1,
1623	ct.gamma(symorb_irrep_[i3]).symbol(),
1624	(spincase==1111?"++++":"+-+-")
1625	)
1626	<< endl;
1627	}
1628	}
1629
1630	// Print out various energies etc.
1631
1632	if (debug_) {
1633	ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
1634	<< indent << "(or integral derivatives) would have been computed\n"
1635	<< indent << "without bounds checking: "
1636	<< npassnshellnshell(nshell+1)(nshell+1)/2
1637	<< endl;
1638
1639	ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
1640	<< indent << "were computed: " << aoint_computed
1641	<< endl;
1642
1643	if (dograd) {
1644	ExEnv::out0() << indent
1645	<< "Number of shell quartets for which AO integral derivatives\n"
1646	<< indent << "were computed: " << aointder_computed
1647	<< endl;
1648	}
1649	}
1650
1651	ExEnv::out0()<<endl<<indent
1652	<<scprintf("RHF energy [au]: %17.12lf\n", escf);
1653	ExEnv::out0()<<indent
1654	<<scprintf("MP2 correlation energy [au]: %17.12lf\n", ecorr_mp2);
1655	ExEnv::out0()<<indent
1656	<<scprintf("MP2 energy [au]: %17.12lf\n", emp2);
1657	ExEnv::out0().flush();
1658	}
1659	if (method_ && strcmp(method_,"mp")) {
1660	ExEnv::out0() << indent
1661	<< "MBPT2: bad method for closed shell case: " << method_
1662	<< ", using mp" << endl;
1663	}
1664	set_energy(emp2);
1665	set_actual_value_accuracy(reference_->actual_value_accuracy()
1666	*ref_to_mp2_acc);
1667
1668	RefSCDimension nocc_act_dim(new SCDimension(nocc_act,1));
1669	nocc_act_dim->blocks()->set_subdim(0, new SCDimension(nocc_act));
1670	RefSCDimension nvir_act_dim(new SCDimension(nvir_act,1));
1671	nvir_act_dim->blocks()->set_subdim(0, new SCDimension(nvir_act));
1672	RefSCDimension nocc_dim(new SCDimension(nocc,1));
1673	nocc_dim->blocks()->set_subdim(0, new SCDimension(nocc));
1674	RefSCDimension nvir_dim(new SCDimension(nvir,1));
1675	nvir_dim->blocks()->set_subdim(0, new SCDimension(nvir));
1676	RefSCDimension nbasis_dim = ao_dimension()->blocks()->subdim(0);
1677	RefSCDimension noso_dim(new SCDimension(noso,1));
1678
1679	if (dograd \|\| do_d1_) {
1680	msg_->sum(Laj,nvir*nocc);
1681
1682	RefSCMatrix T1_mat(nocc_act_dim, nvir_act_dim, kit);
1683	// the elements of T1_mat are the single-substitution amplitudes
1684	BiggestContribs biggest_t1(2,10);
1685	// compute the S2 norm of Lee et al. (s2_diag)
1686	double s2_diag = 0.0;
1687	for (j=nfzc; j<nocc; j++) {
1688	laj_ptr = &Laj[j*nvir];
1689	for (a=0; a<nvir_act; a++) {
1690	tmpval = 0.5**laj_ptr++/(evals[a+nocc]-evals[j]);
1691	T1_mat.set_element(j-nfzc,a,tmpval);
1692	biggest_t1.insert(tmpval,j,a);
1693	s2_diag += tmpval*tmpval;
1694	}
1695	}
1696	s2_diag = sqrt(s2_diag/(2*nocc_act));
1697	// compute the T1 matrix 1-norm
1698	double t1onenorm = 0.0;
1699	Ref<SCElementSumAbs> sumabs = new SCElementSumAbs;
1700	Ref<SCElementOp> genop = sumabs.pointer();
1701	for (a=0; a < nvir_act; a++) {
1702	sumabs->init();
1703	T1_mat.get_column(a).element_op(genop);
1704	if (t1onenorm < sumabs->result()) t1onenorm = sumabs->result();
1705	}
1706	// compute the T1 matrix inf-norm
1707	double t1infnorm = 0.0;
1708	for (j=0; j < nocc_act; j++) {
1709	sumabs->init();
1710	T1_mat.get_row(j).element_op(genop);
1711	if (t1infnorm < sumabs->result()) t1infnorm = sumabs->result();
1712	}
1713	// compute the T1 matrix 2-norm ( = D1(MP2) )
1714	RefSymmSCMatrix D1_mat(nocc_act_dim,kit);
1715	D1_mat.assign(0.0);
1716	D1_mat.accumulate_symmetric_product(T1_mat);
1717	T1_mat = 0;
1718	double d1_diag = sqrt(D1_mat.eigvals().get_element(nocc_act-1));
1719	D1_mat = 0;
1720	// print the norms
1721	ExEnv::out0()<<endl;
1722	ExEnv::out0()
1723	<<indent<<scprintf("D1(MP2) = %12.8f", d1_diag)
1724	<<endl
1725	<<indent<<scprintf("S2 matrix 1-norm = %12.8f", t1onenorm)
1726	<<endl
1727	<<indent<<scprintf("S2 matrix inf-norm = %12.8f", t1infnorm)
1728	<<endl
1729	<<indent<<scprintf("S2 diagnostic = %12.8f", s2_diag)
1730	<<endl;
1731	if (biggest_t1.ncontrib()) {
1732	ExEnv::out0() << endl
1733	<< indent << "Largest S2 values (unique determinants):" << endl;
1734	}
1735	for (i=0; i<biggest_t1.ncontrib(); i++) {
1736	int i0 = biggest_t1.indices(i)[0];
1737	int i1 = biggest_t1.indices(i)[1] + nocc;
1738	ExEnv::out0()
1739	<< indent << scprintf(" %2d %12.8f %3d %3s -> %4d %3s",
1740	i+1, biggest_t1.val(i),
1741	symorb_num_[i0]+1,
1742	ct.gamma(symorb_irrep_[i0]).symbol(),
1743	symorb_num_[i1]+1,
1744	ct.gamma(symorb_irrep_[i1]).symbol())
1745	<< endl;
1746	}
1747
1748	} // if (dograd \|\| do_d1_)
1749
1750	for (i=0; i<thr_->nthread(); i++) {
1751	delete e12thread[i];
1752	}
1753	delete[] e12thread;
1754
1755	// quit here if only the energy is needed
1756	if (!dograd) {
1757	delete[] tbints_; tbints_ = 0;
1758	if (do_d1_) delete[] Laj;
1759	delete[] scf_vector;
1760	delete[] scf_vector_dat;
1761	delete[] evals;
1762	tim_exit("mp2-mem");
1763	return;
1764	}
1765
1766	// Accumulate intermediate gradients on node 0
1767	sum_gradients(msg_, ginter, natom, 3);
1768
1769	// Add intermediate gradients to the gradient on node 0
1770	if (me==0)
1771	accum_gradients(gradient, ginter, natom, 3);
1772
1773	// Print out contribution to the gradient from non-sep. 2PDM
1774	if (debug_) {
1775	print_natom_3(ginter,
1776	"Contribution to MP2 gradient from non-separable 2PDM [au]:");
1777	}
1778
1779	////////////////////////////////////////////////////////
1780	// Add contributions from all nodes to various matrices
1781	////////////////////////////////////////////////////////
1782	tmpint = (nvir > nocc ? nvir:nocc);
1783	double tmpmat = new double[tmpinttmpint];
1784	msg_->sum(Pkj,nocc*(nocc+1)/2,tmpmat); // Pkj is now complete
1785	msg_->sum(Pab,nvir*(nvir+1)/2,tmpmat); // Pab is now complete
1786	msg_->sum(Wab,nvir*nvir,tmpmat);
1787	msg_->sum(Wkj,nocc*nocc,tmpmat);
1788	msg_->sum(Waj,nvir*nocc,tmpmat);
1789	if (do_d2_) {
1790	msg_->sum(d2occ_mat,nocc_act*(nocc_act+1)/2,tmpmat);
1791	msg_->sum(d2vir_mat,nvir_act*(nvir_act+1)/2,tmpmat);
1792	}
1793	delete[] tmpmat;
1794
1795	// Compute D2 diagnostic (d2_diag) from matrices d2_occ_mat and d2_vir_mat
1796	if (do_d2_) {
1797	RefSymmSCMatrix D2occ_mat(nocc_act_dim, kit);
1798	RefSymmSCMatrix D2vir_mat(nvir_act_dim,kit);
1799	D2occ_mat->assign(d2occ_mat);
1800	D2vir_mat->assign(d2vir_mat);
1801	d2o = sqrt(D2occ_mat.eigvals().get_element(nocc_act-1));
1802	d2v = sqrt(D2vir_mat.eigvals().get_element(nvir_act-1));
1803	d2_diag = (d2o > d2v ? d2o:d2v);
1804	ExEnv::out0() << endl
1805	<< indent << scprintf("D2(MP1) = %12.8f", d2_diag) << endl << endl;
1806	delete[] d2occ_mat;
1807	delete[] d2vir_mat;
1808	}
1809
1810	// Finish computation of Wab
1811	tim_enter("Pab and Wab");
1812	pab_ptr = Pab;
1813	for (a=0; a<nvir_act; a++) { // active-active part of Wab
1814	wba_ptr = &Wab[a];
1815	wab_ptr = &Wab[a*nvir];
1816	for (b=0; b<=a; b++) {
1817	if (a==b) {
1818	wab_ptr -= evals[nocc+a]*pab_ptr;
1819	}
1820	else {
1821	wab_ptr -= evals[nocc+a]*pab_ptr;
1822	wba_ptr -= evals[nocc+b]*pab_ptr;
1823	}
1824	pab_ptr++;
1825	wab_ptr++;
1826	wba_ptr += nvir;
1827	} // exit b loop
1828	} // exit a loop
1829	for (a=0; a<nfzv; a++) { // active-frozen part of Wab
1830	wba_ptr = &Wab[nvir_act+a];
1831	wab_ptr = &Wab[(nvir_act+a)*nvir];
1832	pab_ptr = &Pab[(nvir_act+a)*(nvir_act+a+1)/2];
1833	for (b=0; b<nvir_act; b++) {
1834	wab_ptr -= evals[nocc+b]*pab_ptr;
1835	wba_ptr -= evals[nocc+b]*pab_ptr;
1836	pab_ptr++;
1837	wab_ptr++;
1838	wba_ptr += nvir;
1839	} // exit b loop
1840	} // exit a loop
1841	// Wab is now complete
1842	tim_exit("Pab and Wab");
1843	RefSCMatrix Wab_matrix(nvir_dim, nvir_dim, kit);
1844	Wab_matrix->assign(Wab); // Put elements of Wab into Wab_matrix
1845	free(Wab);
1846
1847	// Update Wkj with contribution from Pkj
1848	tim_enter("Pkj and Wkj");
1849	pkj_ptr = Pkj;
1850	for (k=0; k<nocc; k++) {
1851	wjk_ptr = &Wkj[k];
1852	wkj_ptr = &Wkj[k*nocc];
1853	for (j=0; j<=k; j++) {
1854	if (k<nfzc && j<nfzc) { // don't want both j and k frozen
1855	wkj_ptr++;
1856	wjk_ptr += nocc;
1857	pkj_ptr++;
1858	continue;
1859	}
1860	if (j==k) {
1861	wkj_ptr++ -= evals[k]*pkj_ptr++;
1862	}
1863	else if (j<nfzc) {
1864	wkj_ptr++ -= evals[k]*pkj_ptr;
1865	wjk_ptr -= evals[k]*pkj_ptr;
1866	pkj_ptr++;
1867	}
1868	else {
1869	wkj_ptr++ -= evals[k]*pkj_ptr;
1870	wjk_ptr -= evals[j]*pkj_ptr;
1871	pkj_ptr++;
1872	}
1873	wjk_ptr += nocc;
1874	} // exit j loop
1875	} // exit k loop
1876	tim_exit("Pkj and Wkj");
1877
1878	/////////////////////////////////
1879	// Finish the computation of Laj
1880	/////////////////////////////////
1881
1882	tim_enter("Laj");
1883
1884	RefSCMatrix Cv(nbasis_dim, nvir_dim, kit); // virtual block of scf_vector
1885	RefSCMatrix Co(nbasis_dim, nocc_dim, kit); // occupied block of scf_vector
1886	for (p=0; p<nbasis; p++) {
1887	c_pq = scf_vector[p];
1888	for (q=0; q<noso; q++) {
1889	if (q<nocc) Co->set_element(p, q, *c_pq++);
1890	else Cv->set_element(p, q-nocc, *c_pq++);
1891	}
1892	}
1893
1894	// Compute the density-like Dmat_matrix
1895	// (CvPab_matrixCv.t() + CoPkj_matrixCo.t())
1896	RefSymmSCMatrix Pab_matrix(nvir_dim,kit);
1897	RefSymmSCMatrix Pkj_matrix(nocc_dim,kit);
1898	RefSymmSCMatrix Dmat_matrix(nbasis_dim,kit);
1899	Pab_matrix->assign(Pab); // fill in elements of Pab_matrix from Pab
1900	free(Pab);
1901	Pkj_matrix->assign(Pkj); // fill in elements of Pkj_matrix from Pkj
1902	free(Pkj);
1903	Dmat_matrix.assign(0.0);
1904	Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
1905	Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
1906	// We now have the density-like matrix Dmat_matrix
1907
1908	// Compute the G matrix
1909
1910	RefSymmSCMatrix Gmat(nbasis_dim,kit);
1911	init_cs_gmat();
1912	tim_enter("make_gmat for Laj");
1913	make_cs_gmat_new(Gmat, Dmat_matrix);
1914	if (debug_ > 1) {
1915	Dmat_matrix.print("Dmat");
1916	Gmat.print("Gmat");
1917	}
1918	tim_exit("make_gmat for Laj");
1919
1920	// Finish computation of Laj
1921	RefSCMatrix Laj_matrix(nocc_dim,nvir_dim,kit); // elements are ordered as j*nvir+a
1922	Laj_matrix->assign(Laj);
1923	if (debug_ > 1) Laj_matrix->print("Laj (first bit)");
1924	Laj_matrix = Laj_matrix - 2Co.t()Gmat*Cv;
1925	if (debug_ > 1) Laj_matrix->print("Laj (all of it)");
1926	Laj_matrix->convert(Laj); // Put new Laj_matrix elements into Laj
1927
1928	tim_exit("Laj");
1929
1930	//////////////////////////////////////
1931	// Computation of Laj is now complete
1932	//////////////////////////////////////
1933
1934	////////////////////////////
1935	// Solve the CPHF equations
1936	////////////////////////////
1937	RefSCMatrix Paj_matrix(nvir_dim, nocc_dim, kit);
1938	tim_enter("cphf");
1939	cs_cphf(scf_vector, Laj, evals, Paj_matrix);
1940	tim_exit("cphf");
1941
1942	free(Laj);
1943
1944	// Finish computation of Waj
1945	for (a=0; a<nvir; a++) {
1946	waj_ptr = &Waj[a];
1947	for (j=0; j<nocc; j++) {
1948	waj_ptr -= evals[j]Paj_matrix->get_element(a,j);
1949	waj_ptr += nvir;
1950	}
1951	}
1952	// Waj is now complete
1953	RefSCMatrix Waj_matrix(nocc_dim, nvir_dim, kit);
1954	Waj_matrix->assign(Waj); // Put elements of Waj into Waj_matrix
1955	// NB. Waj_matrix elements are ordered as j*nvir+a
1956	free(Waj);
1957
1958
1959	// Finish computation of Wkj
1960	tim_enter("Pkj and Wkj");
1961	// Compute Dmat_matrix =
1962	// CoPkj_matrixCo.t() + CoPaj_matrix.t()Cv.t()
1963	// + CvPaj_matrixCo.t() + CvPab_matrixCv.t();
1964	Dmat_matrix.assign(0.0);
1965	Dmat_matrix.accumulate_symmetric_sum(CvPaj_matrixCo.t());
1966	Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
1967	Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
1968	tim_enter("make_gmat for Wkj");
1969	make_cs_gmat_new(Gmat, Dmat_matrix);
1970	tim_exit("make_gmat for Wkj");
1971	done_cs_gmat();
1972	for (i=0; i<thr_->nthread(); i++) tbints_[i] = 0;
1973	delete[] tbints_; tbints_ = 0;
1974	RefSCMatrix Wkj_matrix(nocc_dim, nocc_dim, kit);
1975	Wkj_matrix->assign(Wkj);
1976	Wkj_matrix = Wkj_matrix - 2Co.t()Gmat*Co;
1977	free(Wkj);
1978	// Wkj is now complete - not as Wkj but as Wkj_matrix
1979	tim_exit("Pkj and Wkj");
1980
1981	////////////////////////////////////////////////////////////////
1982	// We now have the matrices Pkj_matrix, Paj_matrix, Pab_matrix,
1983	// Wkj_matrix, Waj_matrix, Wab_matrix and can compute the
1984	// remaining contributions to the gradient
1985	///////////////////////////////////////////////////////////////
1986
1987	// Compute the second order correction to
1988	// the density matrix and energy weighted
1989	// density matrix in the AO basis
1990	RefSCMatrix P2AO_matrix(nbasis_dim, nbasis_dim, kit);
1991	RefSCMatrix P2MO_matrix(noso_dim, noso_dim, kit);
1992	RefSCMatrix W2AO_matrix(nbasis_dim, nbasis_dim, kit);
1993	RefSCMatrix W2MO_matrix(noso_dim, noso_dim, kit);
1994	RefSCMatrix SCF_matrix(nbasis_dim, noso_dim, kit);
1995	for (i=0; i<nocc; i++) {
1996	for (j=0; j<nocc; j++) {
1997	P2MO_matrix->set_element(i,j,Pkj_matrix->get_element(i,j));
1998	W2MO_matrix->set_element(i,j,Wkj_matrix->get_element(i,j));
1999	}
2000	for (j=nocc; j<noso; j++) {
2001	P2MO_matrix->set_element(i,j,Paj_matrix->get_element(j-nocc,i));
2002	W2MO_matrix->set_element(i,j,Waj_matrix->get_element(i,j-nocc));
2003	}
2004	}
2005	for (i=nocc; i<noso; i++) {
2006	for (j=0; j<nocc; j++) {
2007	P2MO_matrix->set_element(i,j,Paj_matrix->get_element(i-nocc,j));
2008	W2MO_matrix->set_element(i,j,Waj_matrix->get_element(j,i-nocc));
2009	}
2010	for (j=nocc; j<noso; j++) {
2011	P2MO_matrix->set_element(i,j,Pab_matrix->get_element(i-nocc,j-nocc));
2012	W2MO_matrix->set_element(i,j,Wab_matrix->get_element(i-nocc,j-nocc));
2013	}
2014	}
2015	for (i=0; i<nbasis; i++) {
2016	for (j=0; j<nocc; j++) {
2017	SCF_matrix->set_element(i,j,Co->get_element(i,j));
2018	}
2019	for (j=nocc; j<noso; j++) {
2020	SCF_matrix->set_element(i,j,Cv->get_element(i,j-nocc));
2021	}
2022	}
2023	P2AO_matrix = SCF_matrix * P2MO_matrix * SCF_matrix.t();
2024	W2AO_matrix = SCF_matrix * W2MO_matrix * SCF_matrix.t();
2025	// P2AO_matrix = Co(Pkj_matrixCo.t() + Paj_matrix.t()*Cv.t()) +
2026	// Cv(Paj_matrixCo.t() + Pab_matrix*Cv.t());
2027	// W2AO_matrix = Co(Wkj_matrixCo.t() + Waj_matrix*Cv.t()) +
2028	// Cv(Waj_matrix.t()Co.t() + Wab_matrix*Cv.t());
2029
2030	if (debug_ > 1) {
2031	SCF_matrix.print("SCF_matrix");
2032	P2MO_matrix.print("P2MO_matrix");
2033	W2MO_matrix.print("W2MO_matrix");
2034	P2AO_matrix.print("P2AO_matrix");
2035	W2AO_matrix.print("W2AO_matrix");
2036	}
2037
2038	// Convert P2AO_matrix and W2AO_matrix to double*
2039	P2AO = new double[nbasis*nbasis];
2040	W2AO = new double[nbasis*nbasis];
2041	P2AO_matrix->convert(P2AO);
2042	W2AO_matrix->convert(W2AO);
2043
2044	// Compute the HF density matrix and
2045	// energy weighted density matrix
2046	PHF = new double[nbasis*nbasis];
2047	WHF = new double[nbasis*nbasis];
2048	phf_ptr = PHF;
2049	whf_ptr = WHF;
2050	for (p=0; p<nbasis; p++) {
2051	for (q=0; q<nbasis; q++) {
2052	*phf_ptr++ = 0.0;
2053	*whf_ptr++ = 0.0;
2054	}
2055	}
2056	phf_ptr = PHF;
2057	whf_ptr = WHF;
2058	for (p=0; p<nbasis; p++) {
2059	for (q=0; q<nbasis; q++) {
2060	c_pi = scf_vector[p];
2061	c_qi = scf_vector[q];
2062	for (i=0; i<nocc; i++) {
2063	tmpval = 2* c_pi++ *c_qi++;
2064	*phf_ptr += tmpval;
2065	whf_ptr -= evals[i] tmpval;
2066	} // exit i loop
2067	phf_ptr++;
2068	whf_ptr++;
2069	} // exit q loop
2070	} // exit p loop
2071
2072	// Compute the MP2 density and energy weighted density
2073
2074	// PMP2 = PHF + P2AO; WMP2 = WHF + W2AO
2075	PMP2 = new double[nbasis*nbasis];
2076	WMP2 = new double[nbasis*nbasis];
2077	// Initialize PMP2 and WMP2
2078	pmp2_ptr = PMP2;
2079	wmp2_ptr = WMP2;
2080	for (p=0; p<nbasis; p++) {
2081	for (q=0; q<nbasis; q++) {
2082	*pmp2_ptr++ = 0.0;
2083	*wmp2_ptr++ = 0.0;
2084	}
2085	}
2086	pmp2_ptr = PMP2;
2087	wmp2_ptr = WMP2;
2088	p2ao_ptr = P2AO;
2089	w2ao_ptr = W2AO;
2090	phf_ptr = PHF;
2091	whf_ptr = WHF;
2092	for (p=0; p<nbasis; p++) {
2093	for (q=0; q<nbasis; q++) {
2094	pmp2_ptr++ = phf_ptr++ + *p2ao_ptr++;
2095	wmp2_ptr++ = whf_ptr++ + *w2ao_ptr++;
2096	}
2097	}
2098	delete[] W2AO;
2099
2100	if (debug_ > 1) {
2101	RefSCMatrix tmpmat(ao_dimension(), ao_dimension(), kit);
2102	tmpmat->assign(PMP2);
2103	tmpmat.print("PMP2");
2104	tmpmat->assign(P2AO);
2105	tmpmat.print("P2AO");
2106	tmpmat->assign(PHF);
2107	tmpmat.print("PHF");
2108	tmpmat->assign(WMP2);
2109	tmpmat.print("WMP2");
2110	}
2111
2112	////////////////////////////////////////////////
2113	// Compute the contribution to the MP2 gradient
2114	// from the separable part of the 2PDM
2115	////////////////////////////////////////////////
2116
2117	zero_gradients(ginter, natom, 3);
2118	zero_gradients(hf_ginter, natom, 3);
2119	tim_enter("sep 2PDM contrib.");
2120
2121	CSGradS2PDM** s2pdmthread = new CSGradS2PDM*[thr_->nthread()];
2122	for (i=0; i<thr_->nthread(); i++) {
2123	s2pdmthread[i] = new CSGradS2PDM(i, thr_->nthread(), me, nproc,
2124	lock, basis(), tbintder_[i],
2125	PHF, P2AO, tol, debug_, dynamic_);
2126	thr_->add_thread(i,s2pdmthread[i]);
2127	# if SINGLE_THREAD_S2PDM
2128	s2pdmthread[i]->run();
2129	# endif
2130	}
2131	# if !SINGLE_THREAD_S2PDM
2132	thr_->start_threads();
2133	thr_->wait_threads();
2134	# endif
2135	for (i=0; i<thr_->nthread(); i++) {
2136	s2pdmthread[i]->accum_mp2_contrib(ginter);
2137	s2pdmthread[i]->accum_hf_contrib(hf_ginter);
2138	delete s2pdmthread[i];
2139	}
2140	sum_gradients(msg_, ginter, molecule()->natom(), 3);
2141	sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
2142	delete[] s2pdmthread;
2143
2144	tim_exit("sep 2PDM contrib.");
2145	delete[] P2AO;
2146
2147	// The separable 2PDM contribution to the gradient has now been
2148	// accumulated in ginter on node 0; add it to the total gradients
2149	if (me == 0) {
2150	accum_gradients(gradient, ginter, natom, 3);
2151	accum_gradients(hf_gradient, hf_ginter, natom, 3);
2152	}
2153	// Print out the contribution to the gradient from sep. 2PDM
2154	if (debug_) {
2155	print_natom_3(hf_ginter,
2156	"Contribution from separable 2PDM to HF gradient [au]:");
2157	print_natom_3(ginter,
2158	"Contribution from separable 2PDM to MP2 gradient [au]:");
2159	}
2160
2161	// Done with two-electron integrals
2162	tbint_ = 0;
2163	if (dograd \|\| do_d1_) {
2164	delete[] tbintder_;
2165	tbintder_ = 0;
2166	}
2167
2168	/////////////////////////////////////////////////////////////
2169	// Compute the one-electron contribution to the MP2 gradient
2170	/////////////////////////////////////////////////////////////
2171
2172	zero_gradients(ginter, natom, 3);
2173	zero_gradients(hf_ginter, natom, 3);
2174	tim_enter("hcore contrib.");
2175	hcore_cs_grad(PHF, PMP2, hf_ginter, ginter);
2176	tim_exit("hcore contrib.");
2177	delete[] PHF;
2178	delete[] PMP2;
2179	// The hcore contribution to the gradient has now been accumulated
2180	// in ginter on node 0; add it to the total gradients
2181	if (me == 0) {
2182	accum_gradients(gradient, ginter, natom, 3);
2183	accum_gradients(hf_gradient, hf_ginter, natom, 3);
2184	}
2185	// Print out the contribution to the gradient from hcore
2186	if (debug_) {
2187	print_natom_3(hf_ginter, "Contribution to HF gradient from hcore [au]:");
2188	print_natom_3(ginter, "Contribution to MP2 gradient from hcore [au]:");
2189	}
2190
2191	zero_gradients(ginter, natom, 3);
2192	zero_gradients(hf_ginter, natom, 3);
2193	tim_enter("overlap contrib.");
2194	overlap_cs_grad(WHF, WMP2, hf_ginter, ginter);
2195	delete[] WHF;
2196	tim_exit("overlap contrib.");
2197	delete[] WMP2;
2198	// The overlap contribution to the gradient has now been accumulated
2199	// in ginter on node 0; add it to the total gradients
2200	if (me == 0) {
2201	accum_gradients(gradient, ginter, natom, 3);
2202	accum_gradients(hf_gradient, hf_ginter, natom, 3);
2203	}
2204
2205	// Print out the overlap contribution to the gradient
2206	if (debug_) {
2207	print_natom_3(hf_ginter, "Overlap contribution to HF gradient [au]:");
2208	print_natom_3(ginter,"Overlap contribution to MP2 gradient [au]:");
2209	}
2210
2211	////////////////////////////////////////////////////////
2212	// Compute the nuclear contribution to the MP2 gradient
2213	////////////////////////////////////////////////////////
2214
2215	if (me == 0) {
2216	nuclear_repulsion_energy_gradient(ginter);
2217	accum_gradients(gradient, ginter, natom, 3);
2218	accum_gradients(hf_gradient, ginter, natom, 3);
2219
2220	}
2221
2222	// Print out the nuclear contribution to the gradient
2223	if (debug_)
2224	print_natom_3(ginter,"Nuclear contribution to MP2 gradient [au]:");
2225
2226
2227	////////////////////////////////////////////////////////
2228	// The computation of the MP2 gradient is now complete;
2229	// print out the gradient
2230	////////////////////////////////////////////////////////
2231	if (debug_) {
2232	ExEnv::out0() << "Obtaining HF gradient" << endl;
2233	print_natom_3(ref()->gradient(),"HF gradient from HF");
2234	print_natom_3(hf_gradient,"Total HF gradient from MP2 [au]:");
2235	}
2236	print_natom_3(gradient,"Total MP2 gradient [au]:");
2237
2238	msg_->bcast(gradient_dat, natom*3);
2239	RefSCVector gradientvec = matrixkit()->vector(moldim());
2240	gradientvec->assign(gradient_dat);
2241	set_gradient(gradientvec);
2242
2243	msg_->bcast(hf_gradient_dat, natom*3);
2244	hf_gradient_ = matrixkit()->vector(moldim());
2245	hf_gradient_->assign(hf_gradient_dat);
2246
2247	delete[] gradient;
2248	delete[] gradient_dat;
2249
2250	delete[] hf_gradient;
2251	delete[] hf_gradient_dat;
2252
2253	for (i=0; i<natom; i++) {
2254	delete[] ginter[i];
2255	delete[] hf_ginter[i];
2256	}
2257	delete[] ginter;
2258	delete[] hf_ginter;
2259
2260	delete[] scf_vector;
2261	delete[] scf_vector_dat;
2262	delete[] evals;
2263
2264	tim_exit("mp2-mem");
2265	}
2266
2267	///////////////////////////////////////////////////////////
2268	// Compute the contribution to the MP2 gradient from hcore
2269	///////////////////////////////////////////////////////////
2270	void
2271	MBPT2::hcore_cs_grad(double PHF, double PMP2,
2272	double hf_ginter, double ginter)
2273	{
2274
2275	int i, j, k, l, m;
2276	int jj, kk;
2277	int jsize, ksize;
2278	int j_offset, k_offset;
2279	int jk_index;
2280	int index;
2281	int nshell;
2282	int nbasis;
2283	int nproc = msg_->n();
2284	int me = msg_->me();
2285
2286	const double *oneebuf; // 1-electron buffer
2287	double tmpval1, tmpval2;
2288	double gxyz[3];
2289	double hf_gxyz[3];
2290
2291	// Initialize 1e object
2292	Ref<OneBodyDerivInt> obintder_ = integral()->hcore_deriv();
2293	oneebuf = obintder_->buffer();
2294
2295	nshell = basis()->nshell();
2296	nbasis = basis()->nbasis();
2297
2298	///////////////////////////////////////////////////////////////////////////////
2299	// Compute the kinetic and nuclear-electron energy contribution to the gradient
2300	///////////////////////////////////////////////////////////////////////////////
2301
2302	jk_index = 0;
2303
2304	for (i=0; i<molecule()->natom(); i++) {
2305	for (j=0; j<nshell; j++) {
2306	jsize = basis()->shell(j).nfunction();
2307	j_offset = basis()->shell_to_function(j);
2308
2309	for (k=0; k<=j; k++) {
2310	ksize = basis()->shell(k).nfunction();
2311	k_offset = basis()->shell_to_function(k);
2312
2313	if (jk_index++%nproc == me) {
2314	obintder_->compute_shell(j,k,i);
2315
2316	for (l=0; l<3; l++) { gxyz[l] = 0.0; hf_gxyz[l] = 0.0; }
2317
2318	index = 0;
2319
2320	for (jj=0; jj<jsize; jj++) {
2321	for (kk=0; kk<ksize; kk++) {
2322	tmpval1 = PMP2[(j_offset + jj)*nbasis + k_offset + kk];
2323	tmpval2 = PHF[(j_offset + jj)*nbasis + k_offset + kk];
2324	for (m=0; m<3; m++) {
2325	gxyz[m] += oneebuf[index] * tmpval1;
2326	hf_gxyz[m] += oneebuf[index] * tmpval2;
2327	index++;
2328	} // exit m loop
2329	} // exit kk loop
2330	} // exit jj loop
2331
2332	if (j != k) {
2333	// off-diagonal blocks
2334	for (l=0; l<3; l++) {
2335	gxyz[l] *= 2.0;
2336	hf_gxyz[l] *= 2.0;
2337	}
2338	}
2339
2340	for (l=0; l<3; l++) {
2341	ginter[i][l] += gxyz[l];
2342	hf_ginter[i][l] += hf_gxyz[l];
2343	}
2344
2345	} // exit "if"
2346	} // exit k loop
2347	} // exit j loop
2348	} // exit i loop
2349
2350	/* Accumulate the nodes' intermediate gradients on node 0 */
2351	sum_gradients(msg_, ginter, molecule()->natom(), 3);
2352	sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
2353	}
2354
2355
2356	////////////////////////////////////////////////////
2357	// Compute the overlap contribution to the gradient
2358	////////////////////////////////////////////////////
2359	void
2360	MBPT2::overlap_cs_grad(double WHF, double WMP2,
2361	double hf_ginter, double ginter)
2362	{
2363	int i, j, k, l, m;
2364	int jj, kk;
2365	int jj_index, kk_index;
2366	int jsize, ksize;
2367	int j_offset, k_offset;
2368	int jk_index;
2369	int index;
2370	int nshell;
2371	int nbasis;
2372
2373	const double *oneebuf; // 1-electron buffer
2374	double tmpval1, tmpval2;
2375	double hf_tmpval1, hf_tmpval2;
2376	double gxyz[3];
2377	double hf_gxyz[3];
2378
2379	// Initialize 1e object
2380	Ref<OneBodyDerivInt> obintder_ = integral()->overlap_deriv();
2381	oneebuf = obintder_->buffer();
2382
2383	nshell = basis()->nshell();
2384	nbasis = basis()->nbasis();
2385	int nproc = msg_->n();
2386	int me = msg_->me();
2387
2388	for (i=0; i<molecule()->natom(); i++) {
2389	jk_index = 0;
2390
2391	for (j=0; j<nshell; j++) {
2392	j_offset = basis()->shell_to_function(j);
2393	jsize = basis()->shell(j).nfunction();
2394
2395	for (k=0; k<=j; k++) {
2396	k_offset = basis()->shell_to_function(k);
2397	ksize = basis()->shell(k).nfunction();
2398
2399	if (jk_index++%nproc == me) {
2400	obintder_->compute_shell(j,k,i);
2401
2402	for (l=0; l<3; l++) {
2403	hf_gxyz[l] = 0.0;
2404	gxyz[l] = 0.0;
2405	}
2406	index = 0;
2407
2408	for (jj=0; jj<jsize; jj++) {
2409	jj_index = j_offset + jj;
2410	for (kk=0; kk<ksize; kk++) {
2411	kk_index = k_offset + kk;
2412	if (kk_index > jj_index) {
2413	index += 3; // increment index since m-loop will be skipped
2414	break; // skip to next jj value
2415	}
2416	// NB. WMP2 is not a symmetrix matrix
2417	tmpval1 = WMP2[jj_index*nbasis + kk_index];
2418	tmpval2 = WMP2[kk_index*nbasis + jj_index];
2419	hf_tmpval1 = WHF[jj_index*nbasis + kk_index];
2420	hf_tmpval2 = WHF[kk_index*nbasis + jj_index];
2421
2422	for (m=0; m<3; m++) {
2423	if (jj_index != kk_index) {
2424	gxyz[m] += oneebuf[index] * (tmpval1 + tmpval2);
2425	hf_gxyz[m] += oneebuf[index] * (hf_tmpval1 + hf_tmpval2);
2426	}
2427	else {
2428	gxyz[m] += oneebuf[index] * tmpval1;
2429	hf_gxyz[m] += oneebuf[index] * hf_tmpval1;
2430	}
2431	index++;
2432	} // exit m loop
2433	} // exit kk loop
2434	} // exit jj loop
2435
2436	for (l=0; l<3; l++) {
2437	ginter[i][l] += gxyz[l];
2438	hf_ginter[i][l] += hf_gxyz[l];
2439	}
2440	} // exit "if"
2441	} // exit k loop
2442	} // exit j loop
2443	} // exit i loop
2444
2445	/* Accumulate the nodes' intermediate gradients on node 0 */
2446	sum_gradients(msg_, ginter, molecule()->natom(), 3);
2447	sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
2448	}
2449
2450	static void
2451	sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2)
2452	{
2453	int i;
2454
2455	if (msg->n() == 1) return;
2456
2457	for (i=0; i<n1; i++) {
2458	msg->sum(f[i],n2);
2459	}
2460	}
2461
2462	static void
2463	zero_gradients(double **f, int n1, int n2)
2464	{
2465	for (int i=0; i<n1; i++) {
2466	for (int j=0; j<3; j++) f[i][j] = 0.0;
2467	}
2468	}
2469
2470	static void
2471	accum_gradients(double g, double f, int n1, int n2)
2472	{
2473	for (int i=0; i<n1; i++) {
2474	for (int j=0; j<3; j++) g[i][j] += f[i][j];
2475	}
2476	}
2477
2478	/////////////////////////////////////
2479	// Compute the batchsize
2480	//
2481	// Only arrays allocated before exiting the loop over
2482	// i-batches are included here - only these arrays
2483	// affect the batch size.
2484	/////////////////////////////////////
2485	int
2486	MBPT2::compute_cs_batchsize(size_t mem_static, int nocc_act)
2487	{
2488	size_t mem_dyn; // dynamic memory available
2489	distsize_t maxdyn;
2490	int ni;
2491
2492	///////////////////////////////////////
2493	// the largest memory requirement will
2494	// either occur just before the end of
2495	// the 1. q.b.t. (mem1) or just before
2496	// the end of the i-batch loop (mem2)
2497	///////////////////////////////////////
2498
2499	if (mem_alloc > mem_static) {
2500	mem_dyn = mem_alloc - mem_static;
2501	}
2502	else {
2503	mem_dyn = 0;
2504	}
2505
2506	// First determine if calculation is possible at all (i.e., if ni=1 possible)
2507
2508	ni = 1;
2509	maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
2510
2511	if (maxdyn > mem_dyn) {
2512	return 0;
2513	}
2514
2515	ni = 2;
2516	while (ni<=nocc_act) {
2517	maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
2518	if (maxdyn >= mem_dyn) {
2519	ni--;
2520	break;
2521	}
2522	ni++;
2523	}
2524	if (ni > nocc_act) ni = nocc_act;
2525
2526	return ni;
2527	}
2528
2529	/////////////////////////////////////
2530	// Compute required (dynamic) memory
2531	// for a given batch size
2532	//
2533	// Only arrays allocated before exiting the loop over
2534	// i-batches are included here - only these arrays
2535	// affect the batch size.
2536	/////////////////////////////////////
2537	distsize_t
2538	MBPT2::compute_cs_dynamic_memory(int ni, int nocc_act)
2539	{
2540	int index;
2541	distsize_t mem1, mem2, mem3;
2542	distsize_t maxdyn;
2543	distsize_t tmp;
2544	int i, j;
2545	int nij;
2546	int nproc = msg_->n();
2547
2548	///////////////////////////////////////
2549	// the largest memory requirement will
2550	// either occur just before the end of
2551	// the 1. q.b.t. (mem1) or just before
2552	// the end of the i-batch loop (mem2)
2553	///////////////////////////////////////
2554
2555	// compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
2556	index = 0;
2557	nij = 0;
2558	for (i=0; i<ni; i++) {
2559	for (j=0; j<nocc; j++) {
2560	if (index++ % nproc == 0) nij++;
2561	}
2562	}
2563	mem1 = sizeof(double)((distsize_t)nijnbasis*nbasis
2564	+ nbasis*nvir);
2565	mem2 = sizeof(double)((distsize_t)thr_->nthread()ninbasisnfuncmax*nfuncmax
2566	+ (distsize_t)nijnbasisnbasis
2567	+ ninbasis + nbasisnfuncmax
2568	+ 2nfuncmaxnfuncmaxnfuncmaxnfuncmax);
2569	mem3 = sizeof(double)((distsize_t)ninbasisnfuncmaxnfuncmax
2570	+ (distsize_t)nijnbasisnbasis
2571	+ 2(2 + nbasisnfuncmax));
2572	tmp = (mem2>mem3 ? mem2:mem3);
2573	maxdyn = (mem1>tmp ? mem1:tmp);
2574
2575	return maxdyn;
2576	}
2577
2578	////////////////////////////////////////////////////////////////////////////
2579
2580	// Local Variables:
2581	// mode: c++
2582	// c-file-style: "CLJ-CONDENSED"
2583	// End:

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/csgrad.cc@ aae63a

Download in other formats: