Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/csgrad.cc

Visit:

Candidate_v1.6.1

Last change on this file was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 82.1 KB

Rev	Line
[0b990d]	1	//
	2	// csgrad.cc
	3	//
	4	// Copyright (C) 1996 Limit Point Systems, Inc.
	5	//
	6	// Author: Ida Nielsen <ida@kemi.aau.dk>
	7	// Maintainer: LPS
	8	//
	9	// This file is part of the SC Toolkit.
	10	//
	11	// The SC Toolkit is free software; you can redistribute it and/or modify
	12	// it under the terms of the GNU Library General Public License as published by
	13	// the Free Software Foundation; either version 2, or (at your option)
	14	// any later version.
	15	//
	16	// The SC Toolkit is distributed in the hope that it will be useful,
	17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	// GNU Library General Public License for more details.
	20	//
	21	// You should have received a copy of the GNU Library General Public License
	22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
	23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	24	//
	25	// The U.S. Government is granted a limited license as per AL 91-7.
	26	//
	27
	28	#include <stdlib.h>
	29	#include <math.h>
	30	#include <limits.h>
	31
	32	#include <util/misc/formio.h>
	33	#include <util/misc/timer.h>
	34	#include <util/group/memory.h>
	35	#include <util/group/message.h>
	36	#include <util/class/class.h>
	37	#include <util/state/state.h>
	38	#include <math/scmat/matrix.h>
	39	#include <math/scmat/blocked.h>
	40	#include <chemistry/molecule/molecule.h>
	41	#include <chemistry/qc/basis/integral.h>
	42	#include <chemistry/qc/mbpt/bzerofast.h>
	43	#include <chemistry/qc/mbpt/mbpt.h>
	44	#include <chemistry/qc/mbpt/util.h>
	45	#include <chemistry/qc/mbpt/csgrade12.h>
	46	#include <chemistry/qc/mbpt/csgrad34qb.h>
	47	#include <chemistry/qc/mbpt/csgrads2pdm.h>
	48
	49	using namespace std;
	50	using namespace sc;
	51
	52	#define SINGLE_THREAD_E12 0
	53	#define SINGLE_THREAD_QBT34 0
	54	#define SINGLE_THREAD_S2PDM 0
	55
	56	#define PRINT2Q 0
	57	#define PRINT3Q 0
	58	#define PRINT4Q 0
	59	#if PRINT_BIGGEST_INTS
	60	BiggestContribs biggest_ints_1(4,40);
	61	#endif
	62
	63	#define WRITE_DOUBLES 0
	64
	65	static void sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2);
	66	static void zero_gradients(double **f, int n1, int n2);
	67	static void accum_gradients(double g, double f, int n1, int n2);
	68
	69	#define PRINT1Q 0
	70
	71	#if PRINT_CONTRIB
	72	static void
	73	sw(int&i,int&j)
	74	{
	75	int tmp = i;
	76	i = j;
	77	j = tmp;
	78	}
	79
	80	static void
	81	print_contrib(double tmpval, int num, int onum,
	82	int P,int Q,int R,int S, int p,int q,int r,int s)
	83	{
	84
	85	printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
	86	num, P, Q, R, S, p, q, r, s, tmpval);
	87	printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
	88	onum, P, Q, R, S, p, q, r, s, -tmpval);
	89
	90	if (p < q) {
	91	sw(p,q); sw(P,Q);
	92	}
	93	if (r < s) {
	94	sw(r,s); sw(R,S);
	95	}
	96	if (p < r \|\| (p == r && q < s)) {
	97	sw(P,R); sw(p,r);
	98	sw(Q,S); sw(q,s);
	99	}
	100
	101	printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
	102	num, P, Q, R, S, p, q, r, s, tmpval);
	103	printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
	104	onum, P, Q, R, S, p, q, r, s, -tmpval);
	105	}
	106	#endif
	107
	108	void
	109	MBPT2::compute_cs_grad()
	110	{
	111
	112	// New version of MP2 gradient program which uses the full
	113	// permutational symmetry of the two-electron integral derivatives
	114
	115	Ref<SCMatrixKit> kit = basis()->matrixkit();
	116
	117	int do_d2_ = 1; // if true, compute d2 diagnostic
	118
	119	int nij; // number of i,j pairs on a node (for e.g., mo_int)
	120	double *mo_int; // MO integrals of type (ov\|ov)
	121	// (and these integrals divided by
	122	// orbital energy denominators)
	123	double *integral_iqjs; // half-transformed integrals
	124
	125	int nocc_act, nvir_act;
	126	int i, j, k;
	127	int ii, bb;
	128	int x, y;
	129	int a, b, c;
	130	int nshell;
	131	int offset;
	132	int ik_offset;
	133	int i_offset;
	134	int npass, pass;
	135	int tmpint;
	136	int np, nq, nr, ns;
	137	int P, Q, R, S;
	138	int p, q, r, s;
	139	int bf1, bf2, bf3, bf4;
	140	int index;
	141	int me;
	142	int nproc;
	143	int rest;
	144	int p_offset, q_offset, r_offset, s_offset;
	145
	146	int aoint_computed = 0;
	147	int aointder_computed = 0;
	148	int xyz;
	149	int natom = molecule()->natom(); // the number of atoms
	150	int int_index;
	151	size_t mem_static; // static memory in bytes
	152	int ij_proc; // the processor which has ij pair
	153	int ij_index; // of the ij pairs on a proc, this ij pair is number ij_index
	154	// (i.e., ij_index < nij)
	155	int ik_proc; // the processor which has ik pair
	156	int ik_index;
	157	int jloop, kloop;
	158
	159	int ni;
	160
	161	double *evals; // scf eigenvalues
	162	double iajb_ptr, ibja_ptr, iakb_ptr, ibka_ptr;
	163	double iajc_ptr, ibjc_ptr, icjb_ptr, icja_ptr;
	164	double ijkb_ptr, ibkj_ptr;
	165	double pqrs;
	166	double *c_sa, c_rj;
	167	double c_pi, c_qi, *c_sj;
	168	double c_qx, c_qa, c_sb, c_pa, c_pq, c_sy;
	169	double delta_ijab, delta_ijbc, delta_ijac;
	170	double ecorr_mp2 = 0.0;
	171	double escf;
	172	double emp2=0.0;
	173	int tol; // log2 of the erep tolerance
	174	// (erep < 2^tol => discard)
	175	double Wkj=0,Wab=0,*Waj=0;// occ-occ, vir-vir and vir-occ parts of
	176	// second order correction to MP2
	177	// energy weighted density matrix
	178	double Pkj=0,Pab=0; // occ-occ and vir-vir parts of second order
	179	// correction to MP2 density matrix
	180	double d2occ_mat, d2vir_mat; // matrices for computation of D2 diagnostic
	181	double *Laj=0; // MP2 Lagrangian
	182	double *Lpi; // contrib to MP2 Lagrangian partially in AO basis
	183	double pkj_ptr=0, pab_ptr;
	184	double *d2occ_mat_ptr;
	185	double *d2vir_mat_ptr;
	186	double wkj_ptr, wjk_ptr, wab_ptr, wba_ptr, *waj_ptr=0;
	187	double laj_ptr, lpi_ptr, *lqi_ptr;
	188	double gamma_iajs, gamma_iajs_tmp;
	189	// partially back-transformed non-sep 2PDM's
	190	double *gamma_iqjs_tmp;
	191	double *gamma_iajs_ptr;
	192	double *gamma_iqjs_ptr;
	193	double *gammabuf; // buffer used for sending elements of gamma_iqjs
	194	double *mo_intbuf; // buffer used for sending mo integrals
	195	double tmpval, tmpval1;
	196	double P2AO, W2AO;
	197	double p2ao_ptr, w2ao_ptr;
	198	double PHF, WHF;
	199	double phf_ptr, whf_ptr;
	200	double PMP2, WMP2;
	201	double pmp2_ptr, wmp2_ptr;
	202
	203	double *ixjs_tmp; // three-quarter transformed two-el integrals
	204	double *integral_ixjs; // all three-quarter transformed two-el integrals
	205	double *integral_iajy; // mo integrals (y = any MO)
	206	double *integral_ikja; // mo integrals
	207	double *integral_iqjs_ptr;
	208	double *iajy_ptr;
	209	double *ixjs_ptr;
	210	double *ikja_ptr;
	211	double iajs_ptr, ikjs_ptr;
	212
	213	double *gradient=0, gradient_dat=0; // The MP2 gradient
	214	double *hf_gradient=0, hf_gradient_dat=0; // The HF gradient
	215	double **ginter=0; // Intermediates for the MP2 gradient
	216	double **hf_ginter=0; // Intermediates for the HF gradient
	217	double d2o, d2v, d2_diag;
	218
	219	BiggestContribs biggest_coefs(5,10);
	220	CharacterTable ct = molecule()->point_group()->char_table();
	221
	222	#if PRINT_BIGGEST_INTS
	223	BiggestContribs biggest_ints_2(4,40);
	224	BiggestContribs biggest_ints_2s(4,40);
	225	BiggestContribs biggest_ints_3a(4,40);
	226	BiggestContribs biggest_ints_3(4,40);
	227	#endif
	228
	229	int dograd = gradient_needed();
	230
	231	tim_enter("mp2-mem");
	232
	233	nfuncmax = basis()->max_nfunction_in_shell();
	234
	235	nshell = basis()->nshell();
	236
	237	me = msg_->me();
	238
	239	if (me == 0) {
	240	ExEnv::out0() << endl << indent
	241	<< "Entered memgrp based MP2 routine" << endl;
	242	}
	243
	244	nproc = msg_->n();
	245	if (me == 0)
	246	ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
	247
	248	tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
	249
	250	nocc = 0;
	251	for (i=0; i<oso_dimension()->n(); i++) {
	252	if (reference_->occupation(i) == 2.0) nocc++;
	253	}
	254
	255	nocc_act = nocc - nfzc;
	256	nvir = noso - nocc;
	257	nvir_act = nvir - nfzv;
	258
	259	// Do a few preliminary tests to make sure the desired calculation
	260	// can be done (and appears to be meaningful!)
	261
	262	if (nocc_act <= 0) {
	263	if (me == 0) {
	264	ExEnv::err0() << "There are no active occupied orbitals; program exiting" << endl;
	265	}
	266	abort();
	267	}
	268
	269	if (nvir_act <= 0) {
	270	if (me == 0) {
	271	ExEnv::err0() << "There are no active virtual orbitals; program exiting" << endl;
	272	}
	273	abort();
	274	}
	275
	276	if (restart_orbital_memgrp_) {
	277	if (!dograd && !do_d1_ && !do_d2_) {
	278	ExEnv::out0() << indent
	279	<< scprintf("Restarting at orbital %d with partial energy %18.14f",
	280	restart_orbital_memgrp_, restart_ecorr_)
	281	<< endl;
	282	ecorr_mp2 = restart_ecorr_;
	283	}
	284	else {
	285	ExEnv::out0() << indent
	286	<< "Restart requested but not possible with gradients, D1, or D2"
	287	<< endl;
	288	restart_ecorr_ = 0.0;
	289	restart_orbital_memgrp_ = 0;
	290	}
	291	}
	292	else {
	293	restart_ecorr_ = 0.0;
	294	}
	295
	296	////////////////////////////////////////////////////////
	297	// Compute batch size ni for mp2 loops;
	298	//
	299	// The following arrays are kept throughout (all of type double):
	300	// scf_vector, gradient, ginter, Pkj, Pab, Wkj, Wab, Waj, Laj
	301	// and memory allocated for these arrays and integral evaluators
	302	// is called mem_static
	303	//
	304	////////////////////////////////////////////////////////
	305	if (me == 0) {
	306	mem_static = nbasis*noso; // scf vector
	307	mem_static += 2nbasisnfuncmax; // iqjs & iqjr
	308	if (dograd) {
	309	mem_static += 9*natom; // gradient & ginter & hf_ginter
	310	mem_static += (nocc*(nocc+1))/2; // Pkj
	311	mem_static += (nvir*(nvir+1))/2; // Pab
	312	mem_static += nocc*nocc; // Wkj
	313	mem_static += nvir*nvir; // Wab
	314	mem_static += 2noccnvir; // Waj & Laj
	315	if (do_d2_) {
	316	mem_static += (nocc_act*(nocc_act+1))/2; // d2occ_mat
	317	mem_static += (nvir_act*(nvir_act+1))/2; // d2vir_mat
	318	}
	319	}
	320	else if (do_d1_) {
	321	mem_static += nocc*nvir; // partial Laj
	322	}
	323	mem_static *= sizeof(double);
	324	int nthreads = thr_->nthread();
	325	mem_static += nthreads * integral()->storage_required_eri(basis()); // integral evaluators
	326	ni = compute_cs_batchsize(mem_static, nocc_act-restart_orbital_memgrp_);
	327	}
	328
	329	if (max_norb_ > 0 && ni > max_norb_) {
	330	ExEnv::out0() << indent
	331	<< "\"max_norb\" set: could have done "
	332	<< ni << " orbitals per pass otherwise."
	333	<< endl;
	334	ni = max_norb_;
	335	}
	336
	337	// Send value of ni and mem_static to other nodes
	338	msg_->bcast(ni);
	339	double dmem_static = mem_static;
	340	msg_->bcast(dmem_static);
	341	mem_static = size_t(dmem_static);
	342
	343	// Compute the storage to be used by the integral routines (required plus optional)
	344	size_t dyn_mem = distsize_to_size(compute_cs_dynamic_memory(ni,nocc_act));
	345	int mem_remaining;
	346	if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining = 0;
	347	else mem_remaining = mem_alloc - dyn_mem - mem_static;
	348	mem_remaining += thr_->nthread() * integral()->storage_required_eri(basis());
	349
	350	ExEnv::out0() << indent
	351	<< "Memory available per node: " << mem_alloc << " Bytes"
	352	<< endl;
	353	ExEnv::out0() << indent
	354	<< "Static memory used per node: " << mem_static << " Bytes"
	355	<< endl;
	356	ExEnv::out0() << indent
	357	<< "Total memory used per node: " << dyn_mem+mem_static << " Bytes"
	358	<< endl;
	359	ExEnv::out0() << indent
	360	<< "Memory required for one pass: "
	361	<< compute_cs_dynamic_memory(nocc_act,nocc_act)+mem_static
	362	<< " Bytes"
	363	<< endl;
	364	ExEnv::out0() << indent
	365	<< "Minimum memory required: "
	366	<< compute_cs_dynamic_memory(1,nocc_act)+mem_static
	367	<< " Bytes"
	368	<< endl;
	369	ExEnv::out0() << indent
	370	<< "Batch size: " << ni
	371	<< endl;
	372
	373	if (ni == 0) {
	374	ExEnv::err0() << "Batch size is 0: more memory or processors are needed"
	375	<< endl;
	376	abort();
	377	}
	378
	379	if (dynamic_) {
	380	ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
	381	}
	382
	383	if (ni == nocc_act-restart_orbital_memgrp_) {
	384	npass = 1;
	385	rest = 0;
	386	}
	387	else {
	388	rest = (nocc_act-restart_orbital_memgrp_)%ni;
	389	npass = (nocc_act-restart_orbital_memgrp_ - rest)/ni + 1;
	390	if (rest == 0) npass--;
	391	}
	392
	393	if (me == 0) {
	394	ExEnv::out0() << indent
	395	<< scprintf(" npass rest nbasis nshell nfuncmax") << endl;
	396	ExEnv::out0() << indent
	397	<< scprintf(" %-4i %-3i %-5i %-4i %-3i",
	398	npass,rest,nbasis,nshell,nfuncmax)
	399	<< endl;
	400	ExEnv::out0() << indent
	401	<< scprintf(" nocc nvir nfzc nfzv") << endl;
	402	ExEnv::out0() << indent
	403	<< scprintf(" %-4i %-4i %-4i %-4i",
	404	nocc,nvir,nfzc,nfzv)
	405	<< endl;
	406	}
	407
	408	int nijmax = 0;
	409	index = 0;
	410	for (i=0; i<ni; i++) {
	411	for (j=0; j<nocc; j++) {
	412	if (index++ % nproc == me) nijmax++;
	413	}
	414	}
	415
	416	////////////////////////////////////////////////
	417	// The scf vector is distributed between nodes;
	418	// put a copy of the scf vector on each node;
	419	////////////////////////////////////////////////
	420
	421	escf = reference_->energy();
	422	hf_energy_ = escf;
	423
	424	RefDiagSCMatrix occ;
	425	RefSCMatrix Scf_Vec;
	426	RefDiagSCMatrix evalmat;
	427	eigen(evalmat, Scf_Vec, occ);
	428
	429	if (debug_ > 1) {
	430	evalmat.print("eigenvalues");
	431	Scf_Vec.print("eigenvectors");
	432	}
	433
	434	double scf_vector_dat = new double[nbasisnoso];
	435	Scf_Vec.t()->convert(scf_vector_dat);
	436
	437	evals = new double[noso];
	438	double** scf_vector = new double*[nbasis];
	439	for (i=0; i<nbasis; i++) {
	440	scf_vector[i] = &scf_vector_dat[i*noso];
	441	}
	442	for (i=0; i<noso; i++) {
	443	evals[i] = evalmat(i);
	444	}
	445
	446	Scf_Vec = 0;
	447	evalmat = 0;
	448
	449	//////////////////////////////////////////////////////////////
	450	// Allocate storage for various arrays needed for gradients
	451	// (Pkj and Pab are symmetric, so store only lower triangle)
	452	//////////////////////////////////////////////////////////////
	453
	454	if (dograd) {
	455	Pkj = (double) malloc((nocc(nocc+1)/2)*sizeof(double));
	456	Pab = (double) malloc((nvir(nvir+1)/2)*sizeof(double));
	457	Wkj = (double) malloc(noccnocc*sizeof(double));
	458	Wab = (double) malloc(nvirnvir*sizeof(double));
	459	Waj = (double) malloc(nvirnocc*sizeof(double));
	460	if (do_d2_) {
	461	d2occ_mat = new double[nocc_act*(nocc_act+1)/2];
	462	d2vir_mat = new double[nvir_act*(nvir_act+1)/2];
	463	}
	464
	465	gradient_dat = new double[natom*3];
	466	gradient = new double*[natom];
	467	for (i=0; i<natom; i++) {
	468	gradient[i] = &gradient_dat[i*3];
	469	}
	470
	471	hf_gradient_dat = new double[natom*3];
	472	hf_gradient = new double*[natom];
	473	for (i=0; i<natom; i++) {
	474	hf_gradient[i] = &hf_gradient_dat[i*3];
	475	}
	476
	477	ginter = new double*[natom];
	478	for (i=0; i<natom; i++) {
	479	ginter[i] = new double[3];
	480	for (xyz=0; xyz<3; xyz++) ginter[i][xyz] = 0;
	481	}
	482
	483	hf_ginter = new double*[natom];
	484	for (i=0; i<natom; i++) {
	485	hf_ginter[i] = new double[3];
	486	for (xyz=0; xyz<3; xyz++) hf_ginter[i][xyz] = 0;
	487	}
	488
	489	//////////////////////////////
	490	// Initialize various arrays
	491	//////////////////////////////
	492
	493	bzerofast(Pkj,nocc*(nocc+1)/2);
	494	bzerofast(Wkj,nocc*nocc);
	495	bzerofast(Pab,nvir*(nvir+1)/2);
	496	bzerofast(Wab,nvir*nvir);
	497	bzerofast(Waj,nvir*nocc);
	498	if (do_d2_) {
	499	bzerofast(d2occ_mat,nocc_act*(nocc_act+1)/2);
	500	bzerofast(d2vir_mat,nvir_act*(nvir_act+1)/2);
	501	}
	502
	503	if (me == 0) zero_gradients(gradient, natom, 3);
	504	if (me == 0) zero_gradients(hf_gradient, natom, 3);
	505	}
	506
	507	if (dograd \|\| do_d1_) {
	508	Laj = (double) malloc(nvirnocc*sizeof(double));
	509	bzerofast(Laj,nvir*nocc);
	510	}
	511
	512	if (debug_ > 2 && me == 0) {
	513	for (j=0; j<noso; j++) {
	514	ExEnv::out0() << indent
	515	<< scprintf("eigenvalue[%3d] = %15.10lf", j, evals[j]);
	516	if (j < nfzc) ExEnv::out0() << " (frozen docc)";
	517	else if (j < nocc_act + nfzc) ExEnv::out0() << " (active docc)";
	518	else if (j < nvir_act + nocc_act + nfzc) ExEnv::out0() << " (active uocc)";
	519	else ExEnv::out0() << " (frozen uocc)";
	520	ExEnv::out0() << endl;
	521	}
	522	}
	523
	524	/////////////////////////////////////
	525	// Begin MP2 loops
	526	/////////////////////////////////////
	527
	528	// debug print
	529	if (debug_ && me == 0) {
	530	ExEnv::out0() << indent
	531	<< scprintf("node %i, begin loop over i-batches",me) << endl;
	532	}
	533	// end of debug print
	534
	535	// Initialize the integrals
	536	integral()->set_storage(mem_remaining);
	537	tbints_ = new Ref<TwoBodyInt>[thr_->nthread()];
	538	for (i=0; i<thr_->nthread(); i++) {
	539	tbints_[i] = integral()->electron_repulsion();
	540	}
	541	if (dograd \|\| do_d1_) {
	542	tbintder_ = new Ref<TwoBodyDerivInt>[thr_->nthread()];
	543	for (i=0; i<thr_->nthread(); i++) {
	544	tbintder_[i] = integral()->electron_repulsion_deriv();
	545	}
	546	}
	547
	548	int mem_integral_intermediates = integral()->storage_used();
	549	int mem_integral_storage = (mem_remaining - mem_integral_intermediates) / thr_->nthread();
	550	if (mem_integral_storage<0) mem_integral_storage = 0;
	551	for (i=0; i<thr_->nthread(); i++) {
	552	tbints_[i]->set_integral_storage(mem_integral_storage);
	553	}
	554
	555	ExEnv::out0() << endl << indent
	556	<< scprintf("Memory used for integral intermediates: %i Bytes",
	557	mem_integral_intermediates)
	558	<< endl;
	559	ExEnv::out0() << indent
	560	<< scprintf("Memory used for integral storage: %i Bytes",
	561	mem_integral_storage)
	562	<< endl;
	563
	564	if (mem.null()) {
	565	ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
	566	abort();
	567	}
	568
	569	mem->set_localsize(size_t(nijmax)nbasisnbasis*sizeof(double));
	570	ExEnv::out0() << indent
	571	<< "Size of global distributed array: "
	572	<< mem->totalsize()
	573	<< " Bytes"
	574	<< endl;
	575
	576	MemoryGrpBuf<double> membuf_remote(mem);
	577
	578	int usep4 = !dograd;
	579
	580	Ref<ThreadLock> lock = thr_->new_lock();
	581	CSGradErep12Qtr** e12thread = new CSGradErep12Qtr*[thr_->nthread()];
	582	DistShellPair::SharedData sp_e_data, sp_g_data;
	583	for (i=0; i<thr_->nthread(); i++) {
	584	e12thread[i] = new CSGradErep12Qtr(i, thr_->nthread(), me, nproc,
	585	mem, msg_, lock, basis(), tbints_[i],
	586	nocc, scf_vector, tol, debug_,
	587	dynamic_, print_percent_,
	588	&sp_e_data, usep4);
	589	}
	590
	591	CSGrad34Qbtr** qbt34thread;
	592	if (dograd \|\| do_d1_) {
	593	qbt34thread = new CSGrad34Qbtr*[thr_->nthread()];
	594	for (i=0; i<thr_->nthread(); i++) {
	595	qbt34thread[i] = new CSGrad34Qbtr(i, thr_->nthread(), me, nproc,
	596	mem, msg_, lock, basis(), tbints_[i],
	597	tbintder_[i], nocc, nfzc, scf_vector,
	598	tol, debug_, dynamic_, print_percent_,
	599	&sp_g_data, dograd, natom);
	600	}
	601	}
	602
	603	tim_enter("mp2 passes");
	604	for (pass=0; pass<npass; pass++) {
	605
	606	if (me == 0) {
	607	ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
	608	}
	609
	610	i_offset = restart_orbital_memgrp_ + pass*ni + nfzc;
	611	if ((pass == npass - 1) && (rest != 0)) ni = rest;
	612
	613	// Compute number of of i,j pairs on each node for
	614	// two-el integrals and non-sep 2PDM elements
	615	index = 0;
	616	nij = 0;
	617	for (i=0; i<ni; i++) {
	618	for (j=0; j<nocc; j++) {
	619	if (index++ % nproc == me) nij++;
	620	}
	621	}
	622
	623	// debug print
	624	if (debug_)
	625	ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
	626	// end of debug print
	627
	628	mem->sync(); // This must be here or gamma non-sep will be wrong when running
	629	// on multiple processors with more than one pass
	630
	631	r_offset = 0;
	632
	633	// Allocate and initialize some arrays
	634	// (done here to avoid having these arrays
	635	// overlap with arrays allocated later)
	636
	637	// Allocate (and initialize) some arrays
	638
	639	integral_iqjs = (double*) mem->localdata();
	640
	641	bzerofast(integral_iqjs, nijnbasisnbasis);
	642
	643	integral_iqjs = 0;
	644
	645	mem->sync();
	646
	647	index = 0;
	648
	649	if (me == 0) {
	650	ExEnv::out0() << indent
	651	<< scprintf("Begin loop over shells (erep, 1.+2. q.t.)") << endl;
	652	}
	653
	654	// Do the two eletron integrals and the first two quarter transformations
	655	tim_enter("erep+1.qt+2.qt");
	656	sp_e_data.init();
	657	for (i=0; i<thr_->nthread(); i++) {
	658	e12thread[i]->set_i_offset(i_offset);
	659	e12thread[i]->set_ni(ni);
	660	thr_->add_thread(i,e12thread[i]);
	661	# if SINGLE_THREAD_E12
	662	e12thread[i]->run();
	663	# endif
	664	}
	665	# if !SINGLE_THREAD_E12
	666	thr_->start_threads();
	667	thr_->wait_threads();
	668	# endif
	669	tim_exit("erep+1.qt+2.qt");
	670
	671	if (me == 0) {
	672	ExEnv::out0() << indent << "End of loop over shells" << endl;
	673	}
	674
	675	mem->sync(); // Make sure iqjs is complete on each node before continuing
	676
	677	integral_iqjs = (double*) mem->localdata();
	678
	679	#if PRINT2Q
	680	if (me == 0) {
	681	int index = 0;
	682	int ij_index = 0;
	683	for (int i = 0; i<ni; i++) {
	684	for (int j = 0; j<nocc; j++) {
	685	if (index++ % nproc == me) {
	686	if (j >= nfzc) {
	687	double integral_ij_offset = integral_iqjs + nbasisnbasis*ij_index;
	688	for (int s = 0; s<nbasis; s++) {
	689	double integral_ijsq_ptr = integral_ij_offset + snbasis;
	690	for (int q = 0; q<nbasis; q++) {
	691	printf("2Q: (%d %d\|%d %d) = %12.8f\n",
	692	i,q,j,s,*integral_ijsq_ptr);
	693	*integral_ijsq_ptr++;
	694	}
	695	}
	696	}
	697	ij_index++;
	698	}
	699	}
	700	}
	701	}
	702	#endif
	703
	704	// Allocate and initialize some arrays
	705	ixjs_tmp = new double[nbasis];
	706
	707	if (me == 0) {
	708	ExEnv::out0() << indent << "Begin third q.t." << endl;
	709	}
	710
	711	tim_enter("3. q.t.");
	712	// Begin third quarter transformation;
	713	// generate (ix\|js) for i act, j act or frz, and x any MO (act or frz)
	714	index = 0;
	715	ij_index = 0;
	716	for (i=0; i<ni; i++) {
	717	for (j=0; j<nocc; j++) {
	718	if (index++ % nproc == me) {
	719
	720	for (s=0; s<nbasis; s++) {
	721
	722	bzerofast(ixjs_tmp, nbasis);
	723	for (q=0; q<nbasis; q++) {
	724	integral_iqjs_ptr = &integral_iqjs[q + nbasis(s + nbasisij_index)];
	725	ixjs_ptr = ixjs_tmp;
	726	c_qx = scf_vector[q];
	727	tmpval = *integral_iqjs_ptr;
	728	#if PRINT_BIGGEST_INTS
	729	biggest_ints_2.insert(tmpval,i+i_offset,j,s,q);
	730	if ((i+i_offset==104 && j == 1)
	731	\|\|(i+i_offset==104 && j == 2)) {
	732	biggest_ints_2s.insert(tmpval,i+i_offset,j,s,q);
	733	}
	734	#endif
	735	for (x=0; x<noso; x++) {
	736	ixjs_ptr++ += c_qx++ * tmpval;
	737	}
	738	} // exit q loop
	739
	740	// Put ixjs into integral_iqjs, while overwriting what was there;
	741	// i.e., integral_iqjs will now contain three-quarter transformed
	742	// integrals ixjs
	743	integral_iqjs_ptr = &integral_iqjs[nbasis(s + nbasisij_index)];
	744	ixjs_ptr = ixjs_tmp;
	745	for (x=0; x<noso; x++) {
	746	#if PRINT_BIGGEST_INTS
	747	if (x>=nocc) {
	748	biggest_ints_3a.insert(*ixjs_ptr,i+i_offset,j,s,x-nocc);
	749	}
	750	#endif
	751	integral_iqjs_ptr++ = ixjs_ptr++;
	752	}
	753	} // exit s loop
	754	ij_index++;
	755	} // endif
	756	} // exit j loop
	757	} // exit i loop
	758	// end of third quarter transformation
	759	tim_exit("3. q.t.");
	760
	761	if (me == 0) {
	762	ExEnv::out0() << indent << "End of third q.t." << endl;
	763	}
	764
	765	delete[] ixjs_tmp;
	766
	767	// The array of half-transformed integrals integral_iqjs has now
	768	// been overwritten by three-quarter transformed integrals ixjs;
	769	// rename the array integral_ixjs, where x = any MO
	770	integral_ixjs = integral_iqjs;
	771
	772	#if PRINT3Q
	773	if (me == 0) {
	774	int index = 0;
	775	int ij_index = 0;
	776	for (int i = 0; i<ni; i++) {
	777	for (int j = 0; j<nocc; j++) {
	778	if (index++ % nproc == me) {
	779	if (j >= nfzc) {
	780	double integral_ij_offset = integral_ixjs + nbasisnbasis*ij_index;
	781	for (int s = 0; s<nbasis; s++) {
	782	double integral_ijsx_ptr = integral_ij_offset + snbasis;
	783	for (int x = 0; x<noso; x++) {
	784	printf("3Q: (%d %d\|%d %d) = %12.8f\n",
	785	i,x,j,s,*integral_ijsx_ptr);
	786	*integral_ijsx_ptr++;
	787	}
	788	}
	789	}
	790	ij_index++;
	791	}
	792	}
	793	}
	794	}
	795	#endif
	796
	797	integral_iajy = new double[noso];
	798	// in iajy: i act; a,j act or frz; y act or frz occ or virt.
	799	integral_ikja = new double[nvir_act];
	800	// in ikja: i,j act; k act or frz; a act.
	801
	802	if (me == 0) {
	803	ExEnv::out0() << indent << "Begin fourth q.t." << endl;
	804	}
	805
	806	// Begin fourth quarter transformation
	807	// generating MO integrals (ov\|ov), (ov\|oo) and (oo\|ov)
	808	tim_enter("4. q.t.");
	809	index = 0;
	810	ij_index = 0;
	811	for (i=0; i<ni; i++) {
	812	for (j=0; j<nocc; j++) {
	813	if (index++ % nproc == me) {
	814
	815	for (a=0; a<nvir; a++) {
	816	bzerofast(integral_iajy, noso);
	817	iajs_ptr = &integral_ixjs[a+nocc + nbasisnbasisij_index];
	818	for (s=0; s<nbasis; s++) {
	819	c_sy = scf_vector[s];
	820	iajy_ptr = integral_iajy;
	821	tmpval = *iajs_ptr;
	822	#if PRINT_BIGGEST_INTS
	823	biggest_ints_3.insert(tmpval,i+i_offset,j,s,a);
	824	if ((i+i_offset==105 && j == 2 && s == 170 && a == 3)
	825	\|\|(i+i_offset==102 && j == 2 && s == 170 && a == 2)) {
	826	ExEnv::outn() << scprintf("3/4: %3d %3d %3d %3d: %16.10f",
	827	i+i_offset, j, s, x-nocc)
	828	<< endl;
	829	}
	830	#endif
	831	for (y=0; y<noso; y++) {
	832	iajy_ptr++ += c_sy++ * tmpval;
	833	} // exit y loop
	834	iajs_ptr += nbasis;
	835	} // exit s loop
	836	// Put integral_iajy into ixjs for one i,a,j while
	837	// overwriting elements of ixjs
	838	iajs_ptr = &integral_ixjs[a+nocc + nbasisnbasisij_index];
	839	iajy_ptr = integral_iajy;
	840	for (y=0; y<noso; y++) {
	841	iajs_ptr = iajy_ptr++;
	842	iajs_ptr += nbasis;
	843	} // exit y loop
	844	} // exit a loop
	845
	846	// this is only needed for gradients
	847	if ((dograd \|\| do_d1_) && j >= nfzc) {
	848	for (k=0; k<nocc; k++) {
	849	bzerofast(integral_ikja, nvir_act);
	850	ikjs_ptr = &integral_ixjs[k + nbasisnbasisij_index];
	851	for (s=0; s<nbasis; s++) {
	852	c_sa = &scf_vector[s][nocc];
	853	ikja_ptr = integral_ikja;
	854	tmpval = *ikjs_ptr;
	855	for (a=0; a<nvir_act; a++) {
	856	ikja_ptr++ += c_sa++ * tmpval;
	857	} // exit a loop
	858	ikjs_ptr += nbasis;
	859	} // exit s loop
	860	// Put integral_ikja into ixjs for one i,k,j while
	861	// overwriting elements of ixjs
	862	ikjs_ptr = &integral_ixjs[k + nbasis(nocc + nbasisij_index)];
	863	ikja_ptr = integral_ikja;
	864	for (a=0; a<nvir_act; a++) {
	865	ikjs_ptr = ikja_ptr++;
	866	ikjs_ptr += nbasis;
	867	} // exit a loop
	868	} // exit k loop
	869	} //endif
	870
	871	ij_index++;
	872	} // endif
	873	} // exit j loop
	874	} // exit i loop
	875	// end of fourth quarter transformation
	876	tim_exit("4. q.t.");
	877
	878	if (me == 0) {
	879	ExEnv::out0() << indent << "End of fourth q.t." << endl;
	880	}
	881
	882	// The array integral_ixjs has now been overwritten by MO integrals
	883	// iajy and ikja, so rename the array mo_int
	884	mo_int = integral_ixjs;
	885
	886	delete[] integral_iajy;
	887	delete[] integral_ikja;
	888
	889	// Divide the (ia\|jb) MO integrals by the term
	890	// evals[i]+evals[j]-evals[a]-evals[b]
	891	// and keep these integrals in mo_int;
	892	// do this only for active i, j, a, and b
	893	tim_enter("divide (ia\|jb)'s");
	894
	895	index = 0;
	896	ij_index = 0;
	897	for (i=0; i<ni; i++) {
	898	ii = i+i_offset;
	899	for (j=0; j<nocc; j++) {
	900	if (index++ % nproc == me) {
	901	if (j>=nfzc) {
	902	for (b=0; b<nvir_act; b++) {
	903	iajb_ptr = &mo_int[nocc + nbasis(b+nocc + nbasisij_index)];
	904	bb = b+nocc;
	905	for (a=0; a<nvir_act; a++) {
	906	#if PRINT4Q
	907	printf("4Q: (%d %d\|%d %d) = %12.8f\n",
	908	i,a+nocc,j,b+nocc,*iajb_ptr);
	909	#endif
	910	// Zero out nonsymmetric integral, else divide by denominators
	911	if (usep4 && ( symorb_irrep_[ii] ^
	912	symorb_irrep_[j] ^
	913	symorb_irrep_[a+nocc] ^
	914	symorb_irrep_[bb]) ) {
	915	*iajb_ptr++ = 0.0;
	916	}
	917	else
	918	*iajb_ptr++ /= evals[ii]+evals[j]-evals[a+nocc]-evals[bb];
	919	} // exit a loop
	920	} // exit b loop
	921	} // endif
	922	ij_index++;
	923	} // endif
	924	} // exit j loop
	925	} // exit i loop
	926	tim_exit("divide (ia\|jb)'s");
	927
	928	// We now have the fully transformed integrals (ia\|jb)
	929	// divided by the proper orbital energy denominators
	930	// for one batch of i, all j<nocc, and all a<nvir and b<nvir,
	931	// where i, j, a, and b are all active;
	932	// compute contribution to the MP2 correlation energy
	933	// from these integrals
	934
	935	#if WRITE_DOUBLES
	936	if (nproc > 1 \|\| npass > 1) {
	937	ExEnv::outn() << "csgrad.cc: WRITE_DOUBLES set but case not allowed" << endl;
	938	abort();
	939	}
	940	ExEnv::outn() << "csgrad.cc: WRITING DOUBLES: CHECK ORDER" << endl;
	941	char *doutname = SCFormIO::fileext_to_filename(".mp2");
	942	FILE *dout = fopen(doutname,"w");
	943	delete[] doutname;
	944	fwrite(&nocc_act, sizeof(int), 1, dout);
	945	fwrite(&nvir_act, sizeof(int), 1, dout);
	946	for (j=nfzc; j<nocc; j++) {
	947	for (b=0; b<nvir_act; b++) {
	948	for (i=0; i<ni; i++) {
	949	ij_index = nocc*i + j;
	950	iajb_ptr = &mo_int[nocc + nbasis(b+nocc + nbasisij_index)];
	951	fwrite(iajb_ptr, sizeof(double), nvir_act, dout);
	952	for (a=0; a<nvir_act; a++) {
	953	if (fabs(iajb_ptr[a])>1.0e-8) {
	954	ExEnv::outn() << scprintf(" Djbia(%2d %2d %2d %2d) = %12.8f",
	955	j+1-nfzc,b+1,i+1,a+1,iajb_ptr[a])
	956	<< endl;
	957	}
	958	}
	959	}
	960	}
	961	}
	962	fclose(dout);
	963	#endif
	964
	965	tim_enter("compute ecorr");
	966
	967	index = 0;
	968	ij_index = 0;
	969	for (i=0; i<ni; i++) {
	970	double ecorr_i = 0.0;
	971	for (j=0; j<nocc; j++) {
	972	double ecorr_ij = 0.0;
	973	if (index++ % nproc == me) {
	974
	975	if (j>=nfzc) {
	976	for (b=0; b<nvir_act; b++) {
	977	iajb_ptr = &mo_int[nocc + nbasis(b+nocc + nbasisij_index)];
	978	ibja_ptr = &mo_int[b+nocc + nbasis(nocc + nbasisij_index)];
	979	for (a=0; a<nvir_act; a++) {
	980	delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
	981	// only include determinants with unique coefficients
	982	if (a>=b && i_offset+i>=j) {
	983	if (a>b && i_offset+i>j) {
	984	// aaaa or bbbb
	985	biggest_coefs.insert(iajb_ptr - ibja_ptr,
	986	i_offset+i,j,a,b,1111);
	987	// aabb or bbaa or abba or baab
	988	biggest_coefs.insert(*ibja_ptr,i_offset+i,j,b,a,1212);
	989	} // endif
	990	// aabb or bbaa or abba or baab
	991	biggest_coefs.insert(*iajb_ptr,i_offset+i,j,a,b,1212);
	992	} // endif
	993
	994	tmpval = iajb_ptr(2*iajb_ptr - ibja_ptr)*delta_ijab;
	995	ecorr_mp2 += tmpval;
	996	if (debug_) ecorr_ij += tmpval;
	997	iajb_ptr++;
	998	ibja_ptr += nbasis;;
	999	} // exit a loop
	1000	} // exit b loop
	1001	} // endif
	1002	ij_index++;
	1003	} // endif
	1004	if (debug_) {
	1005	msg_->sum(ecorr_ij);
	1006	ecorr_i += ecorr_ij;
	1007	ExEnv::out0() << indent
	1008	<< scprintf("correlation energy for pair %3d %3d = %16.12f",
	1009	i+i_offset, j, ecorr_ij)
	1010	<< endl;
	1011	}
	1012	} // exit j loop
	1013	if (debug_) {
	1014	ExEnv::out0() << indent
	1015	<< scprintf("correlation energy for orbital %3d = %16.12f",
	1016	i+i_offset, ecorr_i)
	1017	<< endl;
	1018	}
	1019	} // exit i loop
	1020	tim_exit("compute ecorr");
	1021
	1022	// debug print
	1023	if (debug_ && me == 0) {
	1024	ExEnv::out0() << indent << "End of ecorr" << endl;
	1025	}
	1026	// end of debug print
	1027
	1028	if (npass > 1 && pass < npass - 1) {
	1029	double passe = ecorr_mp2;
	1030	msg_->sum(passe);
	1031	ExEnv::out0() << indent
	1032	<< "Partial correlation energy for pass " << pass << ":" << endl;
	1033	ExEnv::out0() << indent
	1034	<< scprintf(" restart_ecorr = %18.14f", passe)
	1035	<< endl;
	1036	ExEnv::out0() << indent
	1037	<< scprintf(" restart_orbital_memgrp = %d", ((pass+1) * ni))
	1038	<< endl;
	1039	}
	1040
	1041	integral_iqjs = 0;
	1042	mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
	1043
	1044	// don't go beyond this point if only the energy is needed
	1045	if (!dograd && !do_d1_) continue;
	1046
	1047	mo_int = (double*) mem->localdata();
	1048
	1049	if (!dograd) goto compute_L;
	1050
	1051	// Update the matrices Pkj and Wkj with
	1052	// contributions from (occ vir\|occ vir) integrals
	1053	index = 0;
	1054	ij_index = 0;
	1055	tim_enter("Pkj and Wkj");
	1056	for (i=0; i<ni; i++) {
	1057	for (j=0; j<nocc; j++) {
	1058	if (index++ % nproc == me) {
	1059
	1060	if (j>=nfzc) {
	1061	for (kloop=me; kloop<me+nocc; kloop++) {
	1062	// stagger k's to minimize contention
	1063	k = kloop%nocc;
	1064	if (k<=j) pkj_ptr = &Pkj[j*(j+1)/2 + k];
	1065	if (do_d2_) {
	1066	if (k<=j && k>=nfzc) {
	1067	d2occ_mat_ptr = &d2occ_mat[(j-nfzc)*(j-nfzc+1)/2 + k-nfzc];
	1068	}
	1069	}
	1070	wjk_ptr = &Wkj[j*nocc + k];
	1071	// Send for iakb, if necessary
	1072	ik_index = (i*nocc + k)/nproc;
	1073	ik_proc = (i*nocc + k)%nproc;
	1074	ik_offset = nocc + noccnbasis + nbasisnbasis*ik_index;
	1075	mo_intbuf = (double*) membuf_remote.readonly_on_node(ik_offset,
	1076	nbasis*nvir-nocc,
	1077	ik_proc);
	1078	for (a=0; a<nvir_act; a++) {
	1079	ibja_ptr = &mo_int[nocc + nbasis(a+nocc + nbasisij_index)];
	1080	iajb_ptr = &mo_int[a+nocc + nbasis(nocc + nbasisij_index)];
	1081	iakb_ptr = &mo_intbuf[a];
	1082	for (b=0; b<nvir_act; b++) {
	1083	tmpval1 = iakb_ptr *iajb_ptr;
	1084	tmpval = 2*iakb_ptr ibja_ptr++ - 4tmpval1;
	1085	// tmpval = 2*iakb_ptr (ibja_ptr++ - 2 *iajb_ptr);
	1086	iakb_ptr += nbasis;
	1087	iajb_ptr += nbasis;
	1088	if (k<nfzc && k<=j) *pkj_ptr += tmpval/(evals[k]-evals[j]);
	1089	else if (k<=j) {
	1090	*pkj_ptr += tmpval;
	1091	if (do_d2_) *d2occ_mat_ptr += tmpval1;
	1092	}
	1093	if (k>=nfzc) {
	1094	delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
	1095	wjk_ptr += tmpvaldelta_ijab;
	1096	}
	1097	} // exit b loop
	1098	} // exit a loop
	1099	mo_intbuf = 0;
	1100	membuf_remote.release();
	1101	} // end kloop loop
	1102	} // endif
	1103
	1104	ij_index++;
	1105	} // endif
	1106	} // exit j loop
	1107	} // exit i loop
	1108	tim_exit("Pkj and Wkj");
	1109
	1110	// debug print
	1111	if (debug_ && me == 0) {
	1112	ExEnv::out0() << indent << "End of Pkj and Wkj" << endl;
	1113	}
	1114	// end of debug print
	1115
	1116	// Update the matrices Pab and Wab with
	1117	// contributions from (occ vir\|occ vir) integrals
	1118	tim_enter("Pab and Wab");
	1119	index = 0;
	1120	ij_index = 0;
	1121	for (i=0; i<ni; i++) {
	1122	for (j=0; j<nocc; j++) {
	1123	if (index++ % nproc == me) {
	1124	if (j>=nfzc) {
	1125
	1126	offset = nocc + noccnbasis + nbasisnbasis*ij_index;
	1127	for (a=0; a<nvir_act; a++) {
	1128	pab_ptr = &Pab[a*(a+1)/2];
	1129	if (do_d2_) d2vir_mat_ptr = &d2vir_mat[a*(a+1)/2];
	1130	for (b=0; b<=a; b++) { // active-active part of Pab and Wab
	1131	wab_ptr = &Wab[a*nvir + b];
	1132	wba_ptr = &Wab[b*nvir + a];
	1133	ibjc_ptr = &mo_int[offset + b];
	1134	icjb_ptr = &mo_int[offset + b*nbasis];
	1135	iajc_ptr = &mo_int[offset + a];
	1136	icja_ptr = &mo_int[offset + a*nbasis];
	1137	for (c=0; c<nvir_act; c++) {
	1138	tmpval = iajc_ptr*ibjc_ptr;
	1139	if (do_d2_) *d2vir_mat_ptr += tmpval;
	1140	pab_ptr += 4tmpval - 2*iajc_ptr *icjb_ptr;
	1141	// pab_ptr += 2iajc_ptr (2 * ibjc_ptr - icjb_ptr);
	1142
	1143	if (a == b) {
	1144	delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
	1145	wab_ptr += 2ibjc_ptr(icja_ptr - 2 iajc_ptr)delta_ijac;
	1146	}
	1147	else {
	1148	delta_ijbc = evals[i_offset+i]+evals[j]-evals[nocc+b]-evals[nocc+c];
	1149	delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
	1150	wab_ptr += 2ibjc_ptr (icja_ptr - 2 iajc_ptr)delta_ijac;
	1151	wba_ptr += 2iajc_ptr (icjb_ptr - 2 ibjc_ptr)delta_ijbc;
	1152	}
	1153	iajc_ptr += nbasis;
	1154	ibjc_ptr += nbasis;
	1155	icja_ptr++;
	1156	icjb_ptr++;
	1157	} // exit c loop
	1158	pab_ptr++;
	1159	d2vir_mat_ptr++;
	1160	} // exit b loop
	1161	} // exit a loop
	1162
	1163	for (a=0; a<nfzv; a++) { // active-frozen part of Pab
	1164	pab_ptr = &Pab[(a+nvir_act)*(a+nvir_act+1)/2];
	1165	for (b=0; b<nvir_act; b++) {
	1166	tmpval = evals[nocc+b] - evals[nocc+nvir_act+a];
	1167	ibjc_ptr = &mo_int[offset+b];
	1168	iajc_ptr = &mo_int[offset+a+nvir_act];
	1169	icja_ptr = &mo_int[offset+(a+nvir_act)*nbasis];
	1170	for (c=0; c<nvir_act; c++) {
	1171	pab_ptr += 2ibjc_ptr(2*iajc_ptr - icja_ptr++)/tmpval;
	1172	ibjc_ptr += nbasis;
	1173	iajc_ptr += nbasis;
	1174	} // exit c loop
	1175	pab_ptr++;
	1176	} // exit b loop
	1177	} // exit a loop
	1178
	1179	} // endif
	1180	ij_index++;
	1181	} // endif
	1182	} // exit j loop
	1183	} // exit i loop
	1184	tim_exit("Pab and Wab");
	1185
	1186	// debug print
	1187	if (debug_ && me == 0) {
	1188	ExEnv::out0() << indent << "End of Pab and Wab" << endl;
	1189	}
	1190	// end of debug print
	1191
	1192	compute_L:
	1193
	1194	///////////////////////////////////////
	1195	// Update Waj and Laj with contrib.
	1196	// from (oo\|ov) and (ov\|oo) integrals;
	1197	// here a is active and j is active or
	1198	// frozen
	1199	///////////////////////////////////////
	1200	tim_enter("Waj and Laj");
	1201
	1202	// (oo\|ov) contribution
	1203	index = 0;
	1204	ik_index = 0;
	1205	for (i=0; i<ni; i++) {
	1206	for (k=0; k<nocc; k++) {
	1207	if (index++ % nproc == me) {
	1208	if (k>=nfzc) {
	1209	offset = nbasisnocc + nbasisnbasis*ik_index;
	1210	for (j=0; j<nocc; j++) {
	1211	for (b=0; b<nvir_act; b++) {
	1212	ibka_ptr = &mo_int[b+nocc + offset];
	1213	ijkb_ptr = &mo_int[j + nbasis*b + offset];
	1214	if (dograd) waj_ptr = &Waj[jnvir]; // order as jnvir+a to make loops more efficient
	1215	laj_ptr = &Laj[j*nvir];
	1216	for (a=0; a<nvir_act; a++) {
	1217	tmpval = 2*ibka_ptr *ijkb_ptr;
	1218	ibka_ptr += nbasis;
	1219	if (dograd) *waj_ptr++ += tmpval;
	1220	*laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper
	1221	} // exit a loop
	1222	} // exit b loop
	1223	} // exit j loop
	1224	} // endif
	1225	ik_index++;
	1226	} // endif
	1227	} // exit k loop
	1228	} // exit i loop
	1229
	1230	// (ov\|oo) contribution
	1231	index = 0;
	1232	ik_index = 0;
	1233	for (i=0; i<ni; i++) {
	1234	for (k=0; k<nocc; k++) {
	1235	if (index++ % nproc == me) {
	1236	if (k>=nfzc) {
	1237	offset = nocc + nbasisnbasisik_index;
	1238	for (b=0; b<nvir_act; b++) {
	1239	for (j=0; j<nocc; j++) {
	1240	ibkj_ptr = &mo_int[offset + b + j*nbasis];
	1241	ibka_ptr = &mo_int[offset + b + nocc*nbasis];
	1242	if (dograd) waj_ptr = &Waj[j*nvir];
	1243	laj_ptr = &Laj[j*nvir];
	1244	for (a=0; a<nvir_act; a++) {
	1245	tmpval = 4 * ibka_ptr *ibkj_ptr;
	1246	ibka_ptr += nbasis;
	1247	if (dograd) *waj_ptr++ -= tmpval;
	1248	*laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper
	1249	} // exit a loop
	1250	} // exit j loop
	1251	} // exit b loop
	1252	} // endif
	1253	ik_index++;
	1254	} // endif
	1255	} // exit k loop
	1256	} // exit i loop
	1257
	1258	tim_exit("Waj and Laj");
	1259	/////////////////////////////
	1260	// End of Waj and Laj update
	1261	/////////////////////////////
	1262
	1263	// debug print
	1264	if (debug_ && me == 0) {
	1265	ExEnv::out0() << indent << "End of Paj and Waj" << endl;
	1266	}
	1267	// end of debug print
	1268
	1269	mo_int = 0;
	1270
	1271	mem->sync(); // Need to synchronize before deleting mo_intbuf
	1272
	1273	mo_int = (double*) mem->localdata();
	1274
	1275	gamma_iajs_tmp = new double[nbasis*nvir_act];
	1276	if (!gamma_iajs_tmp) {
	1277	ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl;
	1278	abort();
	1279	}
	1280
	1281	// debug print
	1282	if (debug_ && me == 0) {
	1283	ExEnv::out0() << indent << "Begin first and second q.b.t." << endl;
	1284	}
	1285	// end of debug print
	1286
	1287	///////////////////////////////////////////////////////////
	1288	// Perform first and second quarter back-transformation.
	1289	// Each node produces gamma_iajs, and gamma_iqjs
	1290	// for a subset of i and j, all a and all s;
	1291	// the back-transf. is done only for active i, j, a, and b
	1292	///////////////////////////////////////////////////////////
	1293
	1294	// Begin first quarter back-transformation
	1295	tim_enter("1. q.b.t.");
	1296	index = 0;
	1297	ij_index = 0;
	1298	for (i=0; i<ni; i++) {
	1299	for (j=0; j<nocc; j++) {
	1300	if (index++ % nproc == me) {
	1301	if (j>=nfzc) {
	1302	bzerofast(gamma_iajs_tmp,nbasis*nvir_act);
	1303	offset = nocc + noccnbasis + nbasisnbasis*ij_index;
	1304
	1305	for (a=0; a<nvir_act; a++) {
	1306	for (s=0; s<nbasis; s++) {
	1307	c_sb = &scf_vector[s][nocc];
	1308	gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a];
	1309	ibja_ptr = &mo_int[a*nbasis + offset];
	1310	iajb_ptr = &mo_int[a + offset];
	1311
	1312	tmpval = 0.0;
	1313	for (b=0; b<nvir_act; b++) {
	1314	tmpval += 2*c_sb++ (2*iajb_ptr - ibja_ptr++);
	1315	iajb_ptr += nbasis;
	1316	} // exit b loop
	1317	*gamma_iajs_ptr += tmpval;
	1318	} // exit s loop
	1319	} // exit a loop
	1320	// Put gamma_iajs_tmp into mo_int for one i,j
	1321	// while overwriting mo_int
	1322	gamma_iajs_ptr = gamma_iajs_tmp;
	1323	for (y=0; y<nbasis; y++) {
	1324	iajy_ptr = &mo_int[nocc + nbasis(y + nbasisij_index)];
	1325	for (a=0; a<nvir_act; a++) {
	1326	iajy_ptr++ = gamma_iajs_ptr++;
	1327	}
	1328	}
	1329
	1330	} // endif
	1331	ij_index++;
	1332	} // endif
	1333	} // exit j loop
	1334	} // exit i loop
	1335	// end of first quarter back-transformation
	1336	tim_exit("1. q.b.t.");
	1337
	1338	// debug print
	1339	if (debug_ && me == 0) {
	1340	ExEnv::out0() << indent << "End of first q.b.t." << endl;
	1341	}
	1342	// end of debug print
	1343
	1344	mo_int = 0;
	1345
	1346	mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming
	1347
	1348	delete[] gamma_iajs_tmp;
	1349
	1350	// The array mo_int has now been overwritten by the quarter
	1351	// back-transformed non-sep 2PDM gamma_iajs, so rename
	1352	gamma_iajs = (double*) mem->localdata();
	1353
	1354	gamma_iqjs_tmp = new double[nbasis];
	1355	if (!gamma_iqjs_tmp) {
	1356	ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl;
	1357	abort();
	1358	}
	1359
	1360	if (debug_ && me == 0) {
	1361	ExEnv::out0() << indent << "Begin second q.b.t." << endl;
	1362	}
	1363
	1364	// Begin second quarter back-transformation
	1365	// (gamma_iqjs elements ordered as i,j,s,q,
	1366	// i.e., q varies fastest)
	1367	tim_enter("2. q.b.t.");
	1368	index = 0;
	1369	ij_index = 0;
	1370	for (i=0; i<ni; i++) {
	1371	for (j=0; j<nocc; j++) {
	1372	if (index++ % nproc == me) {
	1373	if (j>=nfzc) {
	1374	offset = nbasisnbasisij_index;
	1375
	1376	for (s=0; s<nbasis; s++) {
	1377	bzerofast(gamma_iqjs_tmp,nbasis);
	1378	for (q=0; q<nbasis; q++) {
	1379	gamma_iqjs_ptr = &gamma_iqjs_tmp[q];
	1380	gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset];
	1381	c_qa = &scf_vector[q][nocc];
	1382
	1383	tmpval = 0.0;
	1384	for (a=0; a<nvir_act; a++) {
	1385	tmpval += c_qa++ *gamma_iajs_ptr++;
	1386	} // exit a loop
	1387	*gamma_iqjs_ptr += tmpval;
	1388	} // exit q loop
	1389	// Put gamma_iqjs_tmp into gamma_iajs for one i,j,s
	1390	// while overwriting gamma_iajs
	1391	gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset];
	1392	gamma_iqjs_ptr = gamma_iqjs_tmp;
	1393	for (q=0; q<nbasis; q++) {
	1394	gamma_iajs_ptr++ = gamma_iqjs_ptr++;
	1395	}
	1396	} // exit s loop
	1397
	1398	} // endif
	1399	ij_index++;
	1400	} // endif
	1401	} // exit j loop
	1402	} // exit i loop
	1403	tim_exit("2. q.b.t.");
	1404	// end of second quarter back-transformation
	1405
	1406	if (debug_ && me == 0) {
	1407	ExEnv::out0() << indent << "End of second q.b.t." << endl;
	1408	}
	1409
	1410	gamma_iajs = 0;
	1411
	1412	mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs
	1413	// before it is needed below, and that gamma_iajs is not
	1414	// deleted prematurely
	1415
	1416	// The quarter back-transformed elements gamma_iajs have now been
	1417	// overwritten by the half back-transformed elements gamma_iqjs
	1418
	1419	delete[] gamma_iqjs_tmp;
	1420
	1421	/////////////////////////////////////////////////
	1422	// End of 1. and 2. quarter back-transformation
	1423	/////////////////////////////////////////////////
	1424
	1425	Lpi = new double[nbasis*ni];
	1426	bzerofast(Lpi,nbasis*ni);
	1427
	1428	if (me == 0) {
	1429	ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl;
	1430	}
	1431
	1432	//////////////////////////////////////////////////////////
	1433	// Perform third and fourth quarter back-transformation
	1434	// and compute contribution to gradient from non-sep 2PDM
	1435	//////////////////////////////////////////////////////////
	1436
	1437	tim_enter("3.qbt+4.qbt+non-sep contrib.");
	1438	sp_g_data.init();
	1439	for (i=0; i<thr_->nthread(); i++) {
	1440	qbt34thread[i]->set_i_offset(i_offset);
	1441	qbt34thread[i]->set_ni(ni);
	1442	thr_->add_thread(i,qbt34thread[i]);
	1443	# if SINGLE_THREAD_QBT34
	1444	qbt34thread[i]->run();
	1445	# endif
	1446	}
	1447	# if !SINGLE_THREAD_QBT34
	1448	thr_->start_threads();
	1449	thr_->wait_threads();
	1450	# endif
	1451	tim_exit("3.qbt+4.qbt+non-sep contrib.");
	1452	// Add thread contributions to Lpi and ginter
	1453	for (i=0; i<thr_->nthread(); i++) {
	1454	double *Lpi_thread = qbt34thread[i]->get_Lpi();
	1455	double **ginter_thread = qbt34thread[i]->get_ginter();
	1456	for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j];
	1457	for (j=0; j<natom; j++) {
	1458	for (k=0; k<3; k++) {
	1459	ginter[j][k] += ginter_thread[j][k];
	1460	}
	1461	}
	1462	aointder_computed += qbt34thread[i]->get_aointder_computed();
	1463	}
	1464
	1465	if (me == 0) {
	1466	ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl;
	1467	}
	1468
	1469	mem->sync(); // Make sure all nodes are done before deleting arrays
	1470
	1471	if (debug_ > 1) {
	1472	RefSCDimension ni_dim(new SCDimension(ni,1));
	1473	ni_dim->blocks()->set_subdim(0, new SCDimension(ni));
	1474	RefSCDimension nbasis_dim(new SCDimension(nbasis,1));
	1475	nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis));
	1476	RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit);
	1477	Lpi_mat->assign(Lpi);
	1478	Lpi_mat.print("Lpi");
	1479	}
	1480
	1481	if (debug_ && me == 0) {
	1482	ExEnv::out0() << indent << "Back-transform Lpi" << endl;
	1483	}
	1484
	1485	// Back-transform Lpi to MO basis
	1486	lpi_ptr = Lpi;
	1487	for (p=0; p<nbasis; p++) {
	1488	for (i=0; i<ni; i++) {
	1489	c_pa = &scf_vector[p][nocc];
	1490	laj_ptr = &Laj[nvir*(i_offset + i)];
	1491	for (a=0; a<nvir; a++) {
	1492	laj_ptr++ += c_pa++ * *lpi_ptr;
	1493	} // exit a loop
	1494	lpi_ptr++;
	1495	} // exit i loop
	1496	} // exit p loop
	1497
	1498	// malloc_chain_check(1);
	1499
	1500	delete[] Lpi;
	1501
	1502	if (me == 0) {
	1503	ExEnv::out0() << indent << "Done with pass " << pass+1 << endl;
	1504	}
	1505	} // exit loop over i-batches (pass)
	1506	tim_exit("mp2 passes");
	1507
	1508	if (dograd \|\| do_d1_) {
	1509	for (i=0; i<thr_->nthread(); i++) {
	1510	delete qbt34thread[i];
	1511	}
	1512	delete[] qbt34thread;
	1513	}
	1514
	1515	mem->set_localsize(0);
	1516
	1517	// debug print
	1518	if (debug_ && me == 0) {
	1519	ExEnv::out0() << indent << "Exited loop over i-batches" << endl;
	1520	}
	1521	// end of debug print
	1522
	1523	///////////////////////////////////////////////////////////////
	1524	// The computation of the MP2 energy is now complete on each
	1525	// node; add the nodes' contributions and print out the energy
	1526	///////////////////////////////////////////////////////////////
	1527	msg_->sum(ecorr_mp2);
	1528	msg_->sum(aoint_computed);
	1529	msg_->sum(aointder_computed);
	1530
	1531	biggest_coefs.combine(msg_);
	1532	#if PRINT_BIGGEST_INTS
	1533	biggest_ints_1.combine(msg_);
	1534	biggest_ints_2.combine(msg_);
	1535	biggest_ints_2s.combine(msg_);
	1536	biggest_ints_3a.combine(msg_);
	1537	biggest_ints_3.combine(msg_);
	1538	#endif
	1539
	1540	if (me == 0) {
	1541	emp2 = escf + ecorr_mp2;
	1542
	1543	#if PRINT_BIGGEST_INTS
	1544	ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
	1545	for (i=0; i<biggest_ints_1.ncontrib(); i++) {
	1546	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
	1547	biggest_ints_1.indices(i)[0],
	1548	biggest_ints_1.indices(i)[1],
	1549	biggest_ints_1.indices(i)[2],
	1550	biggest_ints_1.indices(i)[3],
	1551	biggest_ints_1.val(i)
	1552	)
	1553	<< endl;
	1554	}
	1555	ExEnv::outn() << "biggest 2/4 transformed ints" << endl;
	1556	for (i=0; i<biggest_ints_2.ncontrib(); i++) {
	1557	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
	1558	biggest_ints_2.indices(i)[0],
	1559	biggest_ints_2.indices(i)[1],
	1560	biggest_ints_2.indices(i)[2],
	1561	biggest_ints_2.indices(i)[3],
	1562	biggest_ints_2.val(i)
	1563	)
	1564	<< endl;
	1565	}
	1566	ExEnv::outn() << "restricted 2/4 transformed ints" << endl;
	1567	for (i=0; i<biggest_ints_2s.ncontrib(); i++) {
	1568	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
	1569	biggest_ints_2s.indices(i)[0],
	1570	biggest_ints_2s.indices(i)[1],
	1571	biggest_ints_2s.indices(i)[2],
	1572	biggest_ints_2s.indices(i)[3],
	1573	biggest_ints_2s.val(i)
	1574	)
	1575	<< endl;
	1576	}
	1577	ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl;
	1578	for (i=0; i<biggest_ints_3a.ncontrib(); i++) {
	1579	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
	1580	biggest_ints_3a.indices(i)[0],
	1581	biggest_ints_3a.indices(i)[1],
	1582	biggest_ints_3a.indices(i)[2],
	1583	biggest_ints_3a.indices(i)[3],
	1584	biggest_ints_3a.val(i)
	1585	)
	1586	<< endl;
	1587	}
	1588	ExEnv::outn() << "biggest 3/4 transformed ints (in 4.)" << endl;
	1589	for (i=0; i<biggest_ints_3.ncontrib(); i++) {
	1590	ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
	1591	biggest_ints_3.indices(i)[0],
	1592	biggest_ints_3.indices(i)[1],
	1593	biggest_ints_3.indices(i)[2],
	1594	biggest_ints_3.indices(i)[3],
	1595	biggest_ints_3.val(i)
	1596	)
	1597	<< endl;
	1598	}
	1599	#endif
	1600
	1601	if (restart_orbital_memgrp_ == 0) {
	1602	if (biggest_coefs.ncontrib()) {
	1603	ExEnv::out0() << endl << indent
	1604	<< "Largest first order coefficients (unique):"
	1605	<< endl;
	1606	}
	1607	for (i=0; i<biggest_coefs.ncontrib(); i++) {
	1608	int i0 = biggest_coefs.indices(i)[0];
	1609	int i1 = biggest_coefs.indices(i)[1];
	1610	int i2 = biggest_coefs.indices(i)[2] + nocc;
	1611	int i3 = biggest_coefs.indices(i)[3] + nocc;
	1612	int spincase = biggest_coefs.indices(i)[4];
	1613	ExEnv::out0() << indent
	1614	<< scprintf(" %2d %12.8f %2d %3s %2d %3s -> %2d %3s %2d %3s (%s)",
	1615	i+1, biggest_coefs.val(i),
	1616	symorb_num_[i0]+1,
	1617	ct.gamma(symorb_irrep_[i0]).symbol(),
	1618	symorb_num_[i1]+1,
	1619	ct.gamma(symorb_irrep_[i1]).symbol(),
	1620	symorb_num_[i2]+1,
	1621	ct.gamma(symorb_irrep_[i2]).symbol(),
	1622	symorb_num_[i3]+1,
	1623	ct.gamma(symorb_irrep_[i3]).symbol(),
	1624	(spincase==1111?"++++":"+-+-")
	1625	)
	1626	<< endl;
	1627	}
	1628	}
	1629
	1630	// Print out various energies etc.
	1631
	1632	if (debug_) {
	1633	ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
	1634	<< indent << "(or integral derivatives) would have been computed\n"
	1635	<< indent << "without bounds checking: "
	1636	<< npassnshellnshell(nshell+1)(nshell+1)/2
	1637	<< endl;
	1638
	1639	ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
	1640	<< indent << "were computed: " << aoint_computed
	1641	<< endl;
	1642
	1643	if (dograd) {
	1644	ExEnv::out0() << indent
	1645	<< "Number of shell quartets for which AO integral derivatives\n"
	1646	<< indent << "were computed: " << aointder_computed
	1647	<< endl;
	1648	}
	1649	}
	1650
	1651	ExEnv::out0()<<endl<<indent
	1652	<<scprintf("RHF energy [au]: %17.12lf\n", escf);
	1653	ExEnv::out0()<<indent
	1654	<<scprintf("MP2 correlation energy [au]: %17.12lf\n", ecorr_mp2);
	1655	ExEnv::out0()<<indent
	1656	<<scprintf("MP2 energy [au]: %17.12lf\n", emp2);
	1657	ExEnv::out0().flush();
	1658	}
	1659	if (method_ && strcmp(method_,"mp")) {
	1660	ExEnv::out0() << indent
	1661	<< "MBPT2: bad method for closed shell case: " << method_
	1662	<< ", using mp" << endl;
	1663	}
	1664	set_energy(emp2);
	1665	set_actual_value_accuracy(reference_->actual_value_accuracy()
	1666	*ref_to_mp2_acc);
	1667
	1668	RefSCDimension nocc_act_dim(new SCDimension(nocc_act,1));
	1669	nocc_act_dim->blocks()->set_subdim(0, new SCDimension(nocc_act));
	1670	RefSCDimension nvir_act_dim(new SCDimension(nvir_act,1));
	1671	nvir_act_dim->blocks()->set_subdim(0, new SCDimension(nvir_act));
	1672	RefSCDimension nocc_dim(new SCDimension(nocc,1));
	1673	nocc_dim->blocks()->set_subdim(0, new SCDimension(nocc));
	1674	RefSCDimension nvir_dim(new SCDimension(nvir,1));
	1675	nvir_dim->blocks()->set_subdim(0, new SCDimension(nvir));
	1676	RefSCDimension nbasis_dim = ao_dimension()->blocks()->subdim(0);
	1677	RefSCDimension noso_dim(new SCDimension(noso,1));
	1678
	1679	if (dograd \|\| do_d1_) {
	1680	msg_->sum(Laj,nvir*nocc);
	1681
	1682	RefSCMatrix T1_mat(nocc_act_dim, nvir_act_dim, kit);
	1683	// the elements of T1_mat are the single-substitution amplitudes
	1684	BiggestContribs biggest_t1(2,10);
	1685	// compute the S2 norm of Lee et al. (s2_diag)
	1686	double s2_diag = 0.0;
	1687	for (j=nfzc; j<nocc; j++) {
	1688	laj_ptr = &Laj[j*nvir];
	1689	for (a=0; a<nvir_act; a++) {
	1690	tmpval = 0.5**laj_ptr++/(evals[a+nocc]-evals[j]);
	1691	T1_mat.set_element(j-nfzc,a,tmpval);
	1692	biggest_t1.insert(tmpval,j,a);
	1693	s2_diag += tmpval*tmpval;
	1694	}
	1695	}
	1696	s2_diag = sqrt(s2_diag/(2*nocc_act));
	1697	// compute the T1 matrix 1-norm
	1698	double t1onenorm = 0.0;
	1699	Ref<SCElementSumAbs> sumabs = new SCElementSumAbs;
	1700	Ref<SCElementOp> genop = sumabs.pointer();
	1701	for (a=0; a < nvir_act; a++) {
	1702	sumabs->init();
	1703	T1_mat.get_column(a).element_op(genop);
	1704	if (t1onenorm < sumabs->result()) t1onenorm = sumabs->result();
	1705	}
	1706	// compute the T1 matrix inf-norm
	1707	double t1infnorm = 0.0;
	1708	for (j=0; j < nocc_act; j++) {
	1709	sumabs->init();
	1710	T1_mat.get_row(j).element_op(genop);
	1711	if (t1infnorm < sumabs->result()) t1infnorm = sumabs->result();
	1712	}
	1713	// compute the T1 matrix 2-norm ( = D1(MP2) )
	1714	RefSymmSCMatrix D1_mat(nocc_act_dim,kit);
	1715	D1_mat.assign(0.0);
	1716	D1_mat.accumulate_symmetric_product(T1_mat);
	1717	T1_mat = 0;
	1718	double d1_diag = sqrt(D1_mat.eigvals().get_element(nocc_act-1));
	1719	D1_mat = 0;
	1720	// print the norms
	1721	ExEnv::out0()<<endl;
	1722	ExEnv::out0()
	1723	<<indent<<scprintf("D1(MP2) = %12.8f", d1_diag)
	1724	<<endl
	1725	<<indent<<scprintf("S2 matrix 1-norm = %12.8f", t1onenorm)
	1726	<<endl
	1727	<<indent<<scprintf("S2 matrix inf-norm = %12.8f", t1infnorm)
	1728	<<endl
	1729	<<indent<<scprintf("S2 diagnostic = %12.8f", s2_diag)
	1730	<<endl;
	1731	if (biggest_t1.ncontrib()) {
	1732	ExEnv::out0() << endl
	1733	<< indent << "Largest S2 values (unique determinants):" << endl;
	1734	}
	1735	for (i=0; i<biggest_t1.ncontrib(); i++) {
	1736	int i0 = biggest_t1.indices(i)[0];
	1737	int i1 = biggest_t1.indices(i)[1] + nocc;
	1738	ExEnv::out0()
	1739	<< indent << scprintf(" %2d %12.8f %3d %3s -> %4d %3s",
	1740	i+1, biggest_t1.val(i),
	1741	symorb_num_[i0]+1,
	1742	ct.gamma(symorb_irrep_[i0]).symbol(),
	1743	symorb_num_[i1]+1,
	1744	ct.gamma(symorb_irrep_[i1]).symbol())
	1745	<< endl;
	1746	}
	1747
	1748	} // if (dograd \|\| do_d1_)
	1749
	1750	for (i=0; i<thr_->nthread(); i++) {
	1751	delete e12thread[i];
	1752	}
	1753	delete[] e12thread;
	1754
	1755	// quit here if only the energy is needed
	1756	if (!dograd) {
	1757	delete[] tbints_; tbints_ = 0;
	1758	if (do_d1_) delete[] Laj;
	1759	delete[] scf_vector;
	1760	delete[] scf_vector_dat;
	1761	delete[] evals;
	1762	tim_exit("mp2-mem");
	1763	return;
	1764	}
	1765
	1766	// Accumulate intermediate gradients on node 0
	1767	sum_gradients(msg_, ginter, natom, 3);
	1768
	1769	// Add intermediate gradients to the gradient on node 0
	1770	if (me==0)
	1771	accum_gradients(gradient, ginter, natom, 3);
	1772
	1773	// Print out contribution to the gradient from non-sep. 2PDM
	1774	if (debug_) {
	1775	print_natom_3(ginter,
	1776	"Contribution to MP2 gradient from non-separable 2PDM [au]:");
	1777	}
	1778
	1779	////////////////////////////////////////////////////////
	1780	// Add contributions from all nodes to various matrices
	1781	////////////////////////////////////////////////////////
	1782	tmpint = (nvir > nocc ? nvir:nocc);
	1783	double tmpmat = new double[tmpinttmpint];
	1784	msg_->sum(Pkj,nocc*(nocc+1)/2,tmpmat); // Pkj is now complete
	1785	msg_->sum(Pab,nvir*(nvir+1)/2,tmpmat); // Pab is now complete
	1786	msg_->sum(Wab,nvir*nvir,tmpmat);
	1787	msg_->sum(Wkj,nocc*nocc,tmpmat);
	1788	msg_->sum(Waj,nvir*nocc,tmpmat);
	1789	if (do_d2_) {
	1790	msg_->sum(d2occ_mat,nocc_act*(nocc_act+1)/2,tmpmat);
	1791	msg_->sum(d2vir_mat,nvir_act*(nvir_act+1)/2,tmpmat);
	1792	}
	1793	delete[] tmpmat;
	1794
	1795	// Compute D2 diagnostic (d2_diag) from matrices d2_occ_mat and d2_vir_mat
	1796	if (do_d2_) {
	1797	RefSymmSCMatrix D2occ_mat(nocc_act_dim, kit);
	1798	RefSymmSCMatrix D2vir_mat(nvir_act_dim,kit);
	1799	D2occ_mat->assign(d2occ_mat);
	1800	D2vir_mat->assign(d2vir_mat);
	1801	d2o = sqrt(D2occ_mat.eigvals().get_element(nocc_act-1));
	1802	d2v = sqrt(D2vir_mat.eigvals().get_element(nvir_act-1));
	1803	d2_diag = (d2o > d2v ? d2o:d2v);
	1804	ExEnv::out0() << endl
	1805	<< indent << scprintf("D2(MP1) = %12.8f", d2_diag) << endl << endl;
	1806	delete[] d2occ_mat;
	1807	delete[] d2vir_mat;
	1808	}
	1809
	1810	// Finish computation of Wab
	1811	tim_enter("Pab and Wab");
	1812	pab_ptr = Pab;
	1813	for (a=0; a<nvir_act; a++) { // active-active part of Wab
	1814	wba_ptr = &Wab[a];
	1815	wab_ptr = &Wab[a*nvir];
	1816	for (b=0; b<=a; b++) {
	1817	if (a==b) {
	1818	wab_ptr -= evals[nocc+a]*pab_ptr;
	1819	}
	1820	else {
	1821	wab_ptr -= evals[nocc+a]*pab_ptr;
	1822	wba_ptr -= evals[nocc+b]*pab_ptr;
	1823	}
	1824	pab_ptr++;
	1825	wab_ptr++;
	1826	wba_ptr += nvir;
	1827	} // exit b loop
	1828	} // exit a loop
	1829	for (a=0; a<nfzv; a++) { // active-frozen part of Wab
	1830	wba_ptr = &Wab[nvir_act+a];
	1831	wab_ptr = &Wab[(nvir_act+a)*nvir];
	1832	pab_ptr = &Pab[(nvir_act+a)*(nvir_act+a+1)/2];
	1833	for (b=0; b<nvir_act; b++) {
	1834	wab_ptr -= evals[nocc+b]*pab_ptr;
	1835	wba_ptr -= evals[nocc+b]*pab_ptr;
	1836	pab_ptr++;
	1837	wab_ptr++;
	1838	wba_ptr += nvir;
	1839	} // exit b loop
	1840	} // exit a loop
	1841	// Wab is now complete
	1842	tim_exit("Pab and Wab");
	1843	RefSCMatrix Wab_matrix(nvir_dim, nvir_dim, kit);
	1844	Wab_matrix->assign(Wab); // Put elements of Wab into Wab_matrix
	1845	free(Wab);
	1846
	1847	// Update Wkj with contribution from Pkj
	1848	tim_enter("Pkj and Wkj");
	1849	pkj_ptr = Pkj;
	1850	for (k=0; k<nocc; k++) {
	1851	wjk_ptr = &Wkj[k];
	1852	wkj_ptr = &Wkj[k*nocc];
	1853	for (j=0; j<=k; j++) {
	1854	if (k<nfzc && j<nfzc) { // don't want both j and k frozen
	1855	wkj_ptr++;
	1856	wjk_ptr += nocc;
	1857	pkj_ptr++;
	1858	continue;
	1859	}
	1860	if (j==k) {
	1861	wkj_ptr++ -= evals[k]*pkj_ptr++;
	1862	}
	1863	else if (j<nfzc) {
	1864	wkj_ptr++ -= evals[k]*pkj_ptr;
	1865	wjk_ptr -= evals[k]*pkj_ptr;
	1866	pkj_ptr++;
	1867	}
	1868	else {
	1869	wkj_ptr++ -= evals[k]*pkj_ptr;
	1870	wjk_ptr -= evals[j]*pkj_ptr;
	1871	pkj_ptr++;
	1872	}
	1873	wjk_ptr += nocc;
	1874	} // exit j loop
	1875	} // exit k loop
	1876	tim_exit("Pkj and Wkj");
	1877
	1878	/////////////////////////////////
	1879	// Finish the computation of Laj
	1880	/////////////////////////////////
	1881
	1882	tim_enter("Laj");
	1883
	1884	RefSCMatrix Cv(nbasis_dim, nvir_dim, kit); // virtual block of scf_vector
	1885	RefSCMatrix Co(nbasis_dim, nocc_dim, kit); // occupied block of scf_vector
	1886	for (p=0; p<nbasis; p++) {
	1887	c_pq = scf_vector[p];
	1888	for (q=0; q<noso; q++) {
	1889	if (q<nocc) Co->set_element(p, q, *c_pq++);
	1890	else Cv->set_element(p, q-nocc, *c_pq++);
	1891	}
	1892	}
	1893
	1894	// Compute the density-like Dmat_matrix
	1895	// (CvPab_matrixCv.t() + CoPkj_matrixCo.t())
	1896	RefSymmSCMatrix Pab_matrix(nvir_dim,kit);
	1897	RefSymmSCMatrix Pkj_matrix(nocc_dim,kit);
	1898	RefSymmSCMatrix Dmat_matrix(nbasis_dim,kit);
	1899	Pab_matrix->assign(Pab); // fill in elements of Pab_matrix from Pab
	1900	free(Pab);
	1901	Pkj_matrix->assign(Pkj); // fill in elements of Pkj_matrix from Pkj
	1902	free(Pkj);
	1903	Dmat_matrix.assign(0.0);
	1904	Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
	1905	Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
	1906	// We now have the density-like matrix Dmat_matrix
	1907
	1908	// Compute the G matrix
	1909
	1910	RefSymmSCMatrix Gmat(nbasis_dim,kit);
	1911	init_cs_gmat();
	1912	tim_enter("make_gmat for Laj");
	1913	make_cs_gmat_new(Gmat, Dmat_matrix);
	1914	if (debug_ > 1) {
	1915	Dmat_matrix.print("Dmat");
	1916	Gmat.print("Gmat");
	1917	}
	1918	tim_exit("make_gmat for Laj");
	1919
	1920	// Finish computation of Laj
	1921	RefSCMatrix Laj_matrix(nocc_dim,nvir_dim,kit); // elements are ordered as j*nvir+a
	1922	Laj_matrix->assign(Laj);
	1923	if (debug_ > 1) Laj_matrix->print("Laj (first bit)");
	1924	Laj_matrix = Laj_matrix - 2Co.t()Gmat*Cv;
	1925	if (debug_ > 1) Laj_matrix->print("Laj (all of it)");
	1926	Laj_matrix->convert(Laj); // Put new Laj_matrix elements into Laj
	1927
	1928	tim_exit("Laj");
	1929
	1930	//////////////////////////////////////
	1931	// Computation of Laj is now complete
	1932	//////////////////////////////////////
	1933
	1934	////////////////////////////
	1935	// Solve the CPHF equations
	1936	////////////////////////////
	1937	RefSCMatrix Paj_matrix(nvir_dim, nocc_dim, kit);
	1938	tim_enter("cphf");
	1939	cs_cphf(scf_vector, Laj, evals, Paj_matrix);
	1940	tim_exit("cphf");
	1941
	1942	free(Laj);
	1943
	1944	// Finish computation of Waj
	1945	for (a=0; a<nvir; a++) {
	1946	waj_ptr = &Waj[a];
	1947	for (j=0; j<nocc; j++) {
	1948	waj_ptr -= evals[j]Paj_matrix->get_element(a,j);
	1949	waj_ptr += nvir;
	1950	}
	1951	}
	1952	// Waj is now complete
	1953	RefSCMatrix Waj_matrix(nocc_dim, nvir_dim, kit);
	1954	Waj_matrix->assign(Waj); // Put elements of Waj into Waj_matrix
	1955	// NB. Waj_matrix elements are ordered as j*nvir+a
	1956	free(Waj);
	1957
	1958
	1959	// Finish computation of Wkj
	1960	tim_enter("Pkj and Wkj");
	1961	// Compute Dmat_matrix =
	1962	// CoPkj_matrixCo.t() + CoPaj_matrix.t()Cv.t()
	1963	// + CvPaj_matrixCo.t() + CvPab_matrixCv.t();
	1964	Dmat_matrix.assign(0.0);
	1965	Dmat_matrix.accumulate_symmetric_sum(CvPaj_matrixCo.t());
	1966	Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
	1967	Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
	1968	tim_enter("make_gmat for Wkj");
	1969	make_cs_gmat_new(Gmat, Dmat_matrix);
	1970	tim_exit("make_gmat for Wkj");
	1971	done_cs_gmat();
	1972	for (i=0; i<thr_->nthread(); i++) tbints_[i] = 0;
	1973	delete[] tbints_; tbints_ = 0;
	1974	RefSCMatrix Wkj_matrix(nocc_dim, nocc_dim, kit);
	1975	Wkj_matrix->assign(Wkj);
	1976	Wkj_matrix = Wkj_matrix - 2Co.t()Gmat*Co;
	1977	free(Wkj);
	1978	// Wkj is now complete - not as Wkj but as Wkj_matrix
	1979	tim_exit("Pkj and Wkj");
	1980
	1981	////////////////////////////////////////////////////////////////
	1982	// We now have the matrices Pkj_matrix, Paj_matrix, Pab_matrix,
	1983	// Wkj_matrix, Waj_matrix, Wab_matrix and can compute the
	1984	// remaining contributions to the gradient
	1985	///////////////////////////////////////////////////////////////
	1986
	1987	// Compute the second order correction to
	1988	// the density matrix and energy weighted
	1989	// density matrix in the AO basis
	1990	RefSCMatrix P2AO_matrix(nbasis_dim, nbasis_dim, kit);
	1991	RefSCMatrix P2MO_matrix(noso_dim, noso_dim, kit);
	1992	RefSCMatrix W2AO_matrix(nbasis_dim, nbasis_dim, kit);
	1993	RefSCMatrix W2MO_matrix(noso_dim, noso_dim, kit);
	1994	RefSCMatrix SCF_matrix(nbasis_dim, noso_dim, kit);
	1995	for (i=0; i<nocc; i++) {
	1996	for (j=0; j<nocc; j++) {
	1997	P2MO_matrix->set_element(i,j,Pkj_matrix->get_element(i,j));
	1998	W2MO_matrix->set_element(i,j,Wkj_matrix->get_element(i,j));
	1999	}
	2000	for (j=nocc; j<noso; j++) {
	2001	P2MO_matrix->set_element(i,j,Paj_matrix->get_element(j-nocc,i));
	2002	W2MO_matrix->set_element(i,j,Waj_matrix->get_element(i,j-nocc));
	2003	}
	2004	}
	2005	for (i=nocc; i<noso; i++) {
	2006	for (j=0; j<nocc; j++) {
	2007	P2MO_matrix->set_element(i,j,Paj_matrix->get_element(i-nocc,j));
	2008	W2MO_matrix->set_element(i,j,Waj_matrix->get_element(j,i-nocc));
	2009	}
	2010	for (j=nocc; j<noso; j++) {
	2011	P2MO_matrix->set_element(i,j,Pab_matrix->get_element(i-nocc,j-nocc));
	2012	W2MO_matrix->set_element(i,j,Wab_matrix->get_element(i-nocc,j-nocc));
	2013	}
	2014	}
	2015	for (i=0; i<nbasis; i++) {
	2016	for (j=0; j<nocc; j++) {
	2017	SCF_matrix->set_element(i,j,Co->get_element(i,j));
	2018	}
	2019	for (j=nocc; j<noso; j++) {
	2020	SCF_matrix->set_element(i,j,Cv->get_element(i,j-nocc));
	2021	}
	2022	}
	2023	P2AO_matrix = SCF_matrix * P2MO_matrix * SCF_matrix.t();
	2024	W2AO_matrix = SCF_matrix * W2MO_matrix * SCF_matrix.t();
	2025	// P2AO_matrix = Co(Pkj_matrixCo.t() + Paj_matrix.t()*Cv.t()) +
	2026	// Cv(Paj_matrixCo.t() + Pab_matrix*Cv.t());
	2027	// W2AO_matrix = Co(Wkj_matrixCo.t() + Waj_matrix*Cv.t()) +
	2028	// Cv(Waj_matrix.t()Co.t() + Wab_matrix*Cv.t());
	2029
	2030	if (debug_ > 1) {
	2031	SCF_matrix.print("SCF_matrix");
	2032	P2MO_matrix.print("P2MO_matrix");
	2033	W2MO_matrix.print("W2MO_matrix");
	2034	P2AO_matrix.print("P2AO_matrix");
	2035	W2AO_matrix.print("W2AO_matrix");
	2036	}
	2037
	2038	// Convert P2AO_matrix and W2AO_matrix to double*
	2039	P2AO = new double[nbasis*nbasis];
	2040	W2AO = new double[nbasis*nbasis];
	2041	P2AO_matrix->convert(P2AO);
	2042	W2AO_matrix->convert(W2AO);
	2043
	2044	// Compute the HF density matrix and
	2045	// energy weighted density matrix
	2046	PHF = new double[nbasis*nbasis];
	2047	WHF = new double[nbasis*nbasis];
	2048	phf_ptr = PHF;
	2049	whf_ptr = WHF;
	2050	for (p=0; p<nbasis; p++) {
	2051	for (q=0; q<nbasis; q++) {
	2052	*phf_ptr++ = 0.0;
	2053	*whf_ptr++ = 0.0;
	2054	}
	2055	}
	2056	phf_ptr = PHF;
	2057	whf_ptr = WHF;
	2058	for (p=0; p<nbasis; p++) {
	2059	for (q=0; q<nbasis; q++) {
	2060	c_pi = scf_vector[p];
	2061	c_qi = scf_vector[q];
	2062	for (i=0; i<nocc; i++) {
	2063	tmpval = 2* c_pi++ *c_qi++;
	2064	*phf_ptr += tmpval;
	2065	whf_ptr -= evals[i] tmpval;
	2066	} // exit i loop
	2067	phf_ptr++;
	2068	whf_ptr++;
	2069	} // exit q loop
	2070	} // exit p loop
	2071
	2072	// Compute the MP2 density and energy weighted density
	2073
	2074	// PMP2 = PHF + P2AO; WMP2 = WHF + W2AO
	2075	PMP2 = new double[nbasis*nbasis];
	2076	WMP2 = new double[nbasis*nbasis];
	2077	// Initialize PMP2 and WMP2
	2078	pmp2_ptr = PMP2;
	2079	wmp2_ptr = WMP2;
	2080	for (p=0; p<nbasis; p++) {
	2081	for (q=0; q<nbasis; q++) {
	2082	*pmp2_ptr++ = 0.0;
	2083	*wmp2_ptr++ = 0.0;
	2084	}
	2085	}
	2086	pmp2_ptr = PMP2;
	2087	wmp2_ptr = WMP2;
	2088	p2ao_ptr = P2AO;
	2089	w2ao_ptr = W2AO;
	2090	phf_ptr = PHF;
	2091	whf_ptr = WHF;
	2092	for (p=0; p<nbasis; p++) {
	2093	for (q=0; q<nbasis; q++) {
	2094	pmp2_ptr++ = phf_ptr++ + *p2ao_ptr++;
	2095	wmp2_ptr++ = whf_ptr++ + *w2ao_ptr++;
	2096	}
	2097	}
	2098	delete[] W2AO;
	2099
	2100	if (debug_ > 1) {
	2101	RefSCMatrix tmpmat(ao_dimension(), ao_dimension(), kit);
	2102	tmpmat->assign(PMP2);
	2103	tmpmat.print("PMP2");
	2104	tmpmat->assign(P2AO);
	2105	tmpmat.print("P2AO");
	2106	tmpmat->assign(PHF);
	2107	tmpmat.print("PHF");
	2108	tmpmat->assign(WMP2);
	2109	tmpmat.print("WMP2");
	2110	}
	2111
	2112	////////////////////////////////////////////////
	2113	// Compute the contribution to the MP2 gradient
	2114	// from the separable part of the 2PDM
	2115	////////////////////////////////////////////////
	2116
	2117	zero_gradients(ginter, natom, 3);
	2118	zero_gradients(hf_ginter, natom, 3);
	2119	tim_enter("sep 2PDM contrib.");
	2120
	2121	CSGradS2PDM** s2pdmthread = new CSGradS2PDM*[thr_->nthread()];
	2122	for (i=0; i<thr_->nthread(); i++) {
	2123	s2pdmthread[i] = new CSGradS2PDM(i, thr_->nthread(), me, nproc,
	2124	lock, basis(), tbintder_[i],
	2125	PHF, P2AO, tol, debug_, dynamic_);
	2126	thr_->add_thread(i,s2pdmthread[i]);
	2127	# if SINGLE_THREAD_S2PDM
	2128	s2pdmthread[i]->run();
	2129	# endif
	2130	}
	2131	# if !SINGLE_THREAD_S2PDM
	2132	thr_->start_threads();
	2133	thr_->wait_threads();
	2134	# endif
	2135	for (i=0; i<thr_->nthread(); i++) {
	2136	s2pdmthread[i]->accum_mp2_contrib(ginter);
	2137	s2pdmthread[i]->accum_hf_contrib(hf_ginter);
	2138	delete s2pdmthread[i];
	2139	}
	2140	sum_gradients(msg_, ginter, molecule()->natom(), 3);
	2141	sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
	2142	delete[] s2pdmthread;
	2143
	2144	tim_exit("sep 2PDM contrib.");
	2145	delete[] P2AO;
	2146
	2147	// The separable 2PDM contribution to the gradient has now been
	2148	// accumulated in ginter on node 0; add it to the total gradients
	2149	if (me == 0) {
	2150	accum_gradients(gradient, ginter, natom, 3);
	2151	accum_gradients(hf_gradient, hf_ginter, natom, 3);
	2152	}
	2153	// Print out the contribution to the gradient from sep. 2PDM
	2154	if (debug_) {
	2155	print_natom_3(hf_ginter,
	2156	"Contribution from separable 2PDM to HF gradient [au]:");
	2157	print_natom_3(ginter,
	2158	"Contribution from separable 2PDM to MP2 gradient [au]:");
	2159	}
	2160
	2161	// Done with two-electron integrals
	2162	tbint_ = 0;
	2163	if (dograd \|\| do_d1_) {
	2164	delete[] tbintder_;
	2165	tbintder_ = 0;
	2166	}
	2167
	2168	/////////////////////////////////////////////////////////////
	2169	// Compute the one-electron contribution to the MP2 gradient
	2170	/////////////////////////////////////////////////////////////
	2171
	2172	zero_gradients(ginter, natom, 3);
	2173	zero_gradients(hf_ginter, natom, 3);
	2174	tim_enter("hcore contrib.");
	2175	hcore_cs_grad(PHF, PMP2, hf_ginter, ginter);
	2176	tim_exit("hcore contrib.");
	2177	delete[] PHF;
	2178	delete[] PMP2;
	2179	// The hcore contribution to the gradient has now been accumulated
	2180	// in ginter on node 0; add it to the total gradients
	2181	if (me == 0) {
	2182	accum_gradients(gradient, ginter, natom, 3);
	2183	accum_gradients(hf_gradient, hf_ginter, natom, 3);
	2184	}
	2185	// Print out the contribution to the gradient from hcore
	2186	if (debug_) {
	2187	print_natom_3(hf_ginter, "Contribution to HF gradient from hcore [au]:");
	2188	print_natom_3(ginter, "Contribution to MP2 gradient from hcore [au]:");
	2189	}
	2190
	2191	zero_gradients(ginter, natom, 3);
	2192	zero_gradients(hf_ginter, natom, 3);
	2193	tim_enter("overlap contrib.");
	2194	overlap_cs_grad(WHF, WMP2, hf_ginter, ginter);
	2195	delete[] WHF;
	2196	tim_exit("overlap contrib.");
	2197	delete[] WMP2;
	2198	// The overlap contribution to the gradient has now been accumulated
	2199	// in ginter on node 0; add it to the total gradients
	2200	if (me == 0) {
	2201	accum_gradients(gradient, ginter, natom, 3);
	2202	accum_gradients(hf_gradient, hf_ginter, natom, 3);
	2203	}
	2204
	2205	// Print out the overlap contribution to the gradient
	2206	if (debug_) {
	2207	print_natom_3(hf_ginter, "Overlap contribution to HF gradient [au]:");
	2208	print_natom_3(ginter,"Overlap contribution to MP2 gradient [au]:");
	2209	}
	2210
	2211	////////////////////////////////////////////////////////
	2212	// Compute the nuclear contribution to the MP2 gradient
	2213	////////////////////////////////////////////////////////
	2214
	2215	if (me == 0) {
	2216	nuclear_repulsion_energy_gradient(ginter);
	2217	accum_gradients(gradient, ginter, natom, 3);
	2218	accum_gradients(hf_gradient, ginter, natom, 3);
	2219
	2220	}
	2221
	2222	// Print out the nuclear contribution to the gradient
	2223	if (debug_)
	2224	print_natom_3(ginter,"Nuclear contribution to MP2 gradient [au]:");
	2225
	2226
	2227	////////////////////////////////////////////////////////
	2228	// The computation of the MP2 gradient is now complete;
	2229	// print out the gradient
	2230	////////////////////////////////////////////////////////
	2231	if (debug_) {
	2232	ExEnv::out0() << "Obtaining HF gradient" << endl;
	2233	print_natom_3(ref()->gradient(),"HF gradient from HF");
	2234	print_natom_3(hf_gradient,"Total HF gradient from MP2 [au]:");
	2235	}
	2236	print_natom_3(gradient,"Total MP2 gradient [au]:");
	2237
	2238	msg_->bcast(gradient_dat, natom*3);
	2239	RefSCVector gradientvec = matrixkit()->vector(moldim());
	2240	gradientvec->assign(gradient_dat);
	2241	set_gradient(gradientvec);
	2242
	2243	msg_->bcast(hf_gradient_dat, natom*3);
	2244	hf_gradient_ = matrixkit()->vector(moldim());
	2245	hf_gradient_->assign(hf_gradient_dat);
	2246
	2247	delete[] gradient;
	2248	delete[] gradient_dat;
	2249
	2250	delete[] hf_gradient;
	2251	delete[] hf_gradient_dat;
	2252
	2253	for (i=0; i<natom; i++) {
	2254	delete[] ginter[i];
	2255	delete[] hf_ginter[i];
	2256	}
	2257	delete[] ginter;
	2258	delete[] hf_ginter;
	2259
	2260	delete[] scf_vector;
	2261	delete[] scf_vector_dat;
	2262	delete[] evals;
	2263
	2264	tim_exit("mp2-mem");
	2265	}
	2266
	2267	///////////////////////////////////////////////////////////
	2268	// Compute the contribution to the MP2 gradient from hcore
	2269	///////////////////////////////////////////////////////////
	2270	void
	2271	MBPT2::hcore_cs_grad(double PHF, double PMP2,
	2272	double hf_ginter, double ginter)
	2273	{
	2274
	2275	int i, j, k, l, m;
	2276	int jj, kk;
	2277	int jsize, ksize;
	2278	int j_offset, k_offset;
	2279	int jk_index;
	2280	int index;
	2281	int nshell;
	2282	int nbasis;
	2283	int nproc = msg_->n();
	2284	int me = msg_->me();
	2285
	2286	const double *oneebuf; // 1-electron buffer
	2287	double tmpval1, tmpval2;
	2288	double gxyz[3];
	2289	double hf_gxyz[3];
	2290
	2291	// Initialize 1e object
	2292	Ref<OneBodyDerivInt> obintder_ = integral()->hcore_deriv();
	2293	oneebuf = obintder_->buffer();
	2294
	2295	nshell = basis()->nshell();
	2296	nbasis = basis()->nbasis();
	2297
	2298	///////////////////////////////////////////////////////////////////////////////
	2299	// Compute the kinetic and nuclear-electron energy contribution to the gradient
	2300	///////////////////////////////////////////////////////////////////////////////
	2301
	2302	jk_index = 0;
	2303
	2304	for (i=0; i<molecule()->natom(); i++) {
	2305	for (j=0; j<nshell; j++) {
	2306	jsize = basis()->shell(j).nfunction();
	2307	j_offset = basis()->shell_to_function(j);
	2308
	2309	for (k=0; k<=j; k++) {
	2310	ksize = basis()->shell(k).nfunction();
	2311	k_offset = basis()->shell_to_function(k);
	2312
	2313	if (jk_index++%nproc == me) {
	2314	obintder_->compute_shell(j,k,i);
	2315
	2316	for (l=0; l<3; l++) { gxyz[l] = 0.0; hf_gxyz[l] = 0.0; }
	2317
	2318	index = 0;
	2319
	2320	for (jj=0; jj<jsize; jj++) {
	2321	for (kk=0; kk<ksize; kk++) {
	2322	tmpval1 = PMP2[(j_offset + jj)*nbasis + k_offset + kk];
	2323	tmpval2 = PHF[(j_offset + jj)*nbasis + k_offset + kk];
	2324	for (m=0; m<3; m++) {
	2325	gxyz[m] += oneebuf[index] * tmpval1;
	2326	hf_gxyz[m] += oneebuf[index] * tmpval2;
	2327	index++;
	2328	} // exit m loop
	2329	} // exit kk loop
	2330	} // exit jj loop
	2331
	2332	if (j != k) {
	2333	// off-diagonal blocks
	2334	for (l=0; l<3; l++) {
	2335	gxyz[l] *= 2.0;
	2336	hf_gxyz[l] *= 2.0;
	2337	}
	2338	}
	2339
	2340	for (l=0; l<3; l++) {
	2341	ginter[i][l] += gxyz[l];
	2342	hf_ginter[i][l] += hf_gxyz[l];
	2343	}
	2344
	2345	} // exit "if"
	2346	} // exit k loop
	2347	} // exit j loop
	2348	} // exit i loop
	2349
	2350	/* Accumulate the nodes' intermediate gradients on node 0 */
	2351	sum_gradients(msg_, ginter, molecule()->natom(), 3);
	2352	sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
	2353	}
	2354
	2355
	2356	////////////////////////////////////////////////////
	2357	// Compute the overlap contribution to the gradient
	2358	////////////////////////////////////////////////////
	2359	void
	2360	MBPT2::overlap_cs_grad(double WHF, double WMP2,
	2361	double hf_ginter, double ginter)
	2362	{
	2363	int i, j, k, l, m;
	2364	int jj, kk;
	2365	int jj_index, kk_index;
	2366	int jsize, ksize;
	2367	int j_offset, k_offset;
	2368	int jk_index;
	2369	int index;
	2370	int nshell;
	2371	int nbasis;
	2372
	2373	const double *oneebuf; // 1-electron buffer
	2374	double tmpval1, tmpval2;
	2375	double hf_tmpval1, hf_tmpval2;
	2376	double gxyz[3];
	2377	double hf_gxyz[3];
	2378
	2379	// Initialize 1e object
	2380	Ref<OneBodyDerivInt> obintder_ = integral()->overlap_deriv();
	2381	oneebuf = obintder_->buffer();
	2382
	2383	nshell = basis()->nshell();
	2384	nbasis = basis()->nbasis();
	2385	int nproc = msg_->n();
	2386	int me = msg_->me();
	2387
	2388	for (i=0; i<molecule()->natom(); i++) {
	2389	jk_index = 0;
	2390
	2391	for (j=0; j<nshell; j++) {
	2392	j_offset = basis()->shell_to_function(j);
	2393	jsize = basis()->shell(j).nfunction();
	2394
	2395	for (k=0; k<=j; k++) {
	2396	k_offset = basis()->shell_to_function(k);
	2397	ksize = basis()->shell(k).nfunction();
	2398
	2399	if (jk_index++%nproc == me) {
	2400	obintder_->compute_shell(j,k,i);
	2401
	2402	for (l=0; l<3; l++) {
	2403	hf_gxyz[l] = 0.0;
	2404	gxyz[l] = 0.0;
	2405	}
	2406	index = 0;
	2407
	2408	for (jj=0; jj<jsize; jj++) {
	2409	jj_index = j_offset + jj;
	2410	for (kk=0; kk<ksize; kk++) {
	2411	kk_index = k_offset + kk;
	2412	if (kk_index > jj_index) {
	2413	index += 3; // increment index since m-loop will be skipped
	2414	break; // skip to next jj value
	2415	}
	2416	// NB. WMP2 is not a symmetrix matrix
	2417	tmpval1 = WMP2[jj_index*nbasis + kk_index];
	2418	tmpval2 = WMP2[kk_index*nbasis + jj_index];
	2419	hf_tmpval1 = WHF[jj_index*nbasis + kk_index];
	2420	hf_tmpval2 = WHF[kk_index*nbasis + jj_index];
	2421
	2422	for (m=0; m<3; m++) {
	2423	if (jj_index != kk_index) {
	2424	gxyz[m] += oneebuf[index] * (tmpval1 + tmpval2);
	2425	hf_gxyz[m] += oneebuf[index] * (hf_tmpval1 + hf_tmpval2);
	2426	}
	2427	else {
	2428	gxyz[m] += oneebuf[index] * tmpval1;
	2429	hf_gxyz[m] += oneebuf[index] * hf_tmpval1;
	2430	}
	2431	index++;
	2432	} // exit m loop
	2433	} // exit kk loop
	2434	} // exit jj loop
	2435
	2436	for (l=0; l<3; l++) {
	2437	ginter[i][l] += gxyz[l];
	2438	hf_ginter[i][l] += hf_gxyz[l];
	2439	}
	2440	} // exit "if"
	2441	} // exit k loop
	2442	} // exit j loop
	2443	} // exit i loop
	2444
	2445	/* Accumulate the nodes' intermediate gradients on node 0 */
	2446	sum_gradients(msg_, ginter, molecule()->natom(), 3);
	2447	sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
	2448	}
	2449
	2450	static void
	2451	sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2)
	2452	{
	2453	int i;
	2454
	2455	if (msg->n() == 1) return;
	2456
	2457	for (i=0; i<n1; i++) {
	2458	msg->sum(f[i],n2);
	2459	}
	2460	}
	2461
	2462	static void
	2463	zero_gradients(double **f, int n1, int n2)
	2464	{
	2465	for (int i=0; i<n1; i++) {
	2466	for (int j=0; j<3; j++) f[i][j] = 0.0;
	2467	}
	2468	}
	2469
	2470	static void
	2471	accum_gradients(double g, double f, int n1, int n2)
	2472	{
	2473	for (int i=0; i<n1; i++) {
	2474	for (int j=0; j<3; j++) g[i][j] += f[i][j];
	2475	}
	2476	}
	2477
	2478	/////////////////////////////////////
	2479	// Compute the batchsize
	2480	//
	2481	// Only arrays allocated before exiting the loop over
	2482	// i-batches are included here - only these arrays
	2483	// affect the batch size.
	2484	/////////////////////////////////////
	2485	int
	2486	MBPT2::compute_cs_batchsize(size_t mem_static, int nocc_act)
	2487	{
	2488	size_t mem_dyn; // dynamic memory available
	2489	distsize_t maxdyn;
	2490	int ni;
	2491
	2492	///////////////////////////////////////
	2493	// the largest memory requirement will
	2494	// either occur just before the end of
	2495	// the 1. q.b.t. (mem1) or just before
	2496	// the end of the i-batch loop (mem2)
	2497	///////////////////////////////////////
	2498
	2499	if (mem_alloc > mem_static) {
	2500	mem_dyn = mem_alloc - mem_static;
	2501	}
	2502	else {
	2503	mem_dyn = 0;
	2504	}
	2505
	2506	// First determine if calculation is possible at all (i.e., if ni=1 possible)
	2507
	2508	ni = 1;
	2509	maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
	2510
	2511	if (maxdyn > mem_dyn) {
	2512	return 0;
	2513	}
	2514
	2515	ni = 2;
	2516	while (ni<=nocc_act) {
	2517	maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
	2518	if (maxdyn >= mem_dyn) {
	2519	ni--;
	2520	break;
	2521	}
	2522	ni++;
	2523	}
	2524	if (ni > nocc_act) ni = nocc_act;
	2525
	2526	return ni;
	2527	}
	2528
	2529	/////////////////////////////////////
	2530	// Compute required (dynamic) memory
	2531	// for a given batch size
	2532	//
	2533	// Only arrays allocated before exiting the loop over
	2534	// i-batches are included here - only these arrays
	2535	// affect the batch size.
	2536	/////////////////////////////////////
	2537	distsize_t
	2538	MBPT2::compute_cs_dynamic_memory(int ni, int nocc_act)
	2539	{
	2540	int index;
	2541	distsize_t mem1, mem2, mem3;
	2542	distsize_t maxdyn;
	2543	distsize_t tmp;
	2544	int i, j;
	2545	int nij;
	2546	int nproc = msg_->n();
	2547
	2548	///////////////////////////////////////
	2549	// the largest memory requirement will
	2550	// either occur just before the end of
	2551	// the 1. q.b.t. (mem1) or just before
	2552	// the end of the i-batch loop (mem2)
	2553	///////////////////////////////////////
	2554
	2555	// compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
	2556	index = 0;
	2557	nij = 0;
	2558	for (i=0; i<ni; i++) {
	2559	for (j=0; j<nocc; j++) {
	2560	if (index++ % nproc == 0) nij++;
	2561	}
	2562	}
	2563	mem1 = sizeof(double)((distsize_t)nijnbasis*nbasis
	2564	+ nbasis*nvir);
	2565	mem2 = sizeof(double)((distsize_t)thr_->nthread()ninbasisnfuncmax*nfuncmax
	2566	+ (distsize_t)nijnbasisnbasis
	2567	+ ninbasis + nbasisnfuncmax
	2568	+ 2nfuncmaxnfuncmaxnfuncmaxnfuncmax);
	2569	mem3 = sizeof(double)((distsize_t)ninbasisnfuncmaxnfuncmax
	2570	+ (distsize_t)nijnbasisnbasis
	2571	+ 2(2 + nbasisnfuncmax));
	2572	tmp = (mem2>mem3 ? mem2:mem3);
	2573	maxdyn = (mem1>tmp ? mem1:tmp);
	2574
	2575	return maxdyn;
	2576	}
	2577
	2578	////////////////////////////////////////////////////////////////////////////
	2579
	2580	// Local Variables:
	2581	// mode: c++
	2582	// c-file-style: "CLJ-CONDENSED"
	2583	// End:

Note: See TracBrowser for help on using the repository browser.

Download in other formats: