Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv1.cc@ 70d034

Visit:

ForceAnnealing_with_BondGraph_continued

Last change on this file since 70d034 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 30.9 KB

Rev	Line
[0b990d]	1	//
	2	// hsosv1.cc
	3	//
	4	// Copyright (C) 1996 Limit Point Systems, Inc.
	5	//
	6	// Author: Ida Nielsen <ida@kemi.aau.dk>
	7	// Maintainer: LPS
	8	//
	9	// This file is part of the SC Toolkit.
	10	//
	11	// The SC Toolkit is free software; you can redistribute it and/or modify
	12	// it under the terms of the GNU Library General Public License as published by
	13	// the Free Software Foundation; either version 2, or (at your option)
	14	// any later version.
	15	//
	16	// The SC Toolkit is distributed in the hope that it will be useful,
	17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	// GNU Library General Public License for more details.
	20	//
	21	// You should have received a copy of the GNU Library General Public License
	22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
	23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	24	//
	25	// The U.S. Government is granted a limited license as per AL 91-7.
	26	//
	27
	28	typedef int dmt_matrix;
	29
	30	#include <stdlib.h>
	31	#include <math.h>
	32
	33	#include <util/misc/formio.h>
	34	#include <util/misc/timer.h>
	35	#include <util/class/class.h>
	36	#include <util/state/state.h>
	37	#include <util/group/message.h>
	38	#include <math/scmat/matrix.h>
	39	#include <chemistry/molecule/molecule.h>
	40	#include <chemistry/qc/scf/scf.h>
	41	#include <chemistry/qc/mbpt/mbpt.h>
	42	#include <chemistry/qc/mbpt/bzerofast.h>
	43	#include <chemistry/qc/mbpt/hsosv1e1.h>
	44
	45	using namespace std;
	46	using namespace sc;
	47
	48	static distsize_t
	49	compute_v1_memory(int ni,
	50	int nfuncmax, int nbasis, int noso,
	51	int a_number, int nshell,
	52	int ndocc, int nsocc, int nvir,
	53	int nfzc, int nfzv,
	54	int nproc)
	55	{
	56	distsize_t mem = 0;
	57	int nocc = ndocc + nsocc;
	58	int dim_ij = noccni - (ni(ni-1))/2;
	59	mem += nproc*sizeof(int);
	60	mem += (noso+nsocc-nfzc-nfzv)*sizeof(double);
	61	mem += nfuncmaxnfuncmaxnbasisnisizeof(double);
	62	mem += nfuncmaxnfuncmaxnbasisnisizeof(double);
	63	mem += (distsize_t)nbasisa_numberdim_ij*sizeof(double);
	64	mem += nvira_numbersizeof(double);
	65	mem += nvirnvirsizeof(double);
	66	if (nsocc) {
	67	mem += nsocc*sizeof(double);
	68	mem += ndoccnsocc(nvir-nsocc)*sizeof(double);
	69	mem += ndoccnsocc(nvir-nsocc)*sizeof(double);
	70	}
	71	mem += sizeof(double)(nbasis);
	72	mem += sizeof(double)((nocc+nvir)nbasis);
	73	return mem;
	74	}
	75
	76	void
	77	MBPT2::compute_hsos_v1()
	78	{
	79	int i, j;
	80	int s1, s2;
	81	int a, b;
	82	int isocc, asocc; /* indices running over singly occupied orbitals */
	83	int nfuncmax = basis()->max_nfunction_in_shell();
	84	int nvir;
	85	int nocc=0;
	86	int ndocc=0,nsocc=0;
	87	int i_offset;
	88	int npass, pass;
	89	int ni; /* batch size */
	90	int nr, ns;
	91	int R, S;
	92	int q, r, s;
	93	int bf3,bf4;
	94	int docc_index, socc_index, vir_index;
	95	int me;
	96	int nproc;
	97	int rest;
	98	int a_rest;
	99	int a_number; /* number of a-values processed by each node */
	100	int a_offset;
	101	int a_vector; / each node's # of iajb integrals for one i,j */
	102	int compute_index;
	103	int tmp_index;
	104	int dim_ij;
	105	int nshell;
	106	double evals_open; / reordered scf eigenvalues */
	107	double trans_int1; / partially transformed integrals */
	108	double trans_int2; / partially transformed integrals */
	109	double trans_int3; / partially transformed integrals */
	110	double trans_int4_node;/ each node's subset of fully transf. integrals */
	111	double trans_int4; / fully transformed integrals */
	112	double mo_int_do_so_vir=0;/mo integral (is\|sa); i:d.o.,s:s.o.,a:vir */
	113	double mo_int_tmp=0; / scratch array used in global summations */
	114	double socc_sum=0; / sum of 2-el integrals involving only s.o.'s */
	115	double *iqrs;
	116	double iars_ptr, iajs_ptr, *iajr_ptr;
	117	double iajr;
	118	double iars;
	119	double *iajb;
	120	double *c_qa;
	121	double c_rb, c_rj, *c_sj;
	122	double delta_ijab;
	123	double delta;
	124	double contrib1, contrib2;
	125	double ecorr_opt2=0,ecorr_opt1=0;
	126	double ecorr_zapt2;
	127	double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
	128	double escf;
	129	double eopt2,eopt1,ezapt2;
	130	double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
	131	int ithread;
	132
	133	me = msg_->me();
	134
	135	ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v1)" << endl;
	136
	137	tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
	138
	139	nproc = msg_->n();
	140	ExEnv::out0() << indent << "nproc = " << nproc << endl;
	141
	142	ndocc = nsocc = 0;
	143	const double epsilon = 1.0e-4;
	144	for (i=0; i<oso_dimension()->n(); i++) {
	145	if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
	146	else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
	147	}
	148
	149	/* do a few preliminary tests to make sure the desired calculation *
	150	* can be done (and appears to be meaningful!) */
	151
	152	if (ndocc == 0 && nsocc == 0) {
	153	ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
	154	abort();
	155	}
	156
	157	if (nfzc > ndocc) {
	158	ExEnv::err0()
	159	<< "The number of frozen core orbitals exceeds the number" << endl
	160	<< "of doubly occupied orbitals; program exiting" << endl;
	161	abort();
	162	}
	163
	164	if (nfzv > noso - ndocc - nsocc) {
	165	ExEnv::err0()
	166	<< "The number of frozen virtual orbitals exceeds the number" << endl
	167	<< "of unoccupied orbitals; program exiting" << endl;
	168	abort();
	169	}
	170
	171	ndocc = ndocc - nfzc;
	172	/* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
	173	nvir = noso - ndocc - nfzc - nfzv;
	174	/* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
	175	nocc = ndocc + nsocc;
	176
	177
	178	/* compute number of a-values (a_number) processed by each node */
	179
	180	a_number = nvir/nproc;
	181	a_rest = nvir%nproc;
	182	if (me < a_rest) a_number++;
	183
	184	if (me == 0 && a_number < nsocc) {
	185	ExEnv::err0() << "not enough memory allocated" << endl;
	186	/* must have all socc's on node 0 for computation of socc_sum*/
	187	abort();
	188	}
	189
	190	if (me < a_rest) a_offset = mea_number; / a_offset for each node */
	191	else a_offset = a_rest(a_number + 1) + (me - a_rest)a_number;
	192
	193	/* fill in elements of a_vector for gcollect */
	194
	195	a_vector = (int) malloc(nprocsizeof(int));
	196	if (!a_vector) {
	197	ExEnv::errn() << "could not allocate storage for a_vector" << endl;
	198	abort();
	199	}
	200	for (i=0; i<nproc; i++) {
	201	a_vector[i] = nvir(nvir/nproc)sizeof(double);
	202	}
	203	for (i=0; i<a_rest; i++) {
	204	a_vector[i] += nvirsizeof(double); / first a_rest nodes hold an extra a */
	205	}
	206
	207	// Cannot restart when singly occupied orbitals are present
	208	if (nsocc) {
	209	restart_orbital_v1_ = 0;
	210	}
	211	else if (restart_orbital_v1_) {
	212	ExEnv::out0() << indent
	213	<< scprintf("Restarting at orbital %d with partial energy %18.14f",
	214	restart_orbital_v1_, restart_ecorr_)
	215	<< endl;
	216	}
	217
	218	/* compute batch size ni for opt2 loops *
	219	* need to store the following arrays: trans_int1-4, trans_int4_node, *
	220	* scf_vector, evals_open, socc_sum, mo_int_do_so_vir, mo_int_tmp and *
	221	* a_vector; *
	222	* since a_number is not the same on all nodes, use node 0's a_number *
	223	* (which is >= all other a_numbers) and broadcast ni afterwords */
	224
	225	nshell = basis()->nshell();
	226	size_t memused = 0;
	227	ni = 0;
	228	for (i=1; i<=nocc-restart_orbital_v1_; i++) {
	229	distsize_t tmpmem = compute_v1_memory(i,
	230	nfuncmax, nbasis, noso,
	231	a_number, nshell,
	232	ndocc, nsocc, nvir,
	233	nfzc, nfzv, nproc);
	234	if (tmpmem > mem_alloc) break;
	235	ni = i;
	236	memused = distsize_to_size(tmpmem);
	237	}
	238
	239	size_t mem_remaining = mem_alloc - memused;
	240
	241	/* set ni equal to the smallest batch size for any node */
	242	msg_->min(ni);
	243	msg_->bcast(ni);
	244
	245	ExEnv::out0() << indent
	246	<< "Memory available per node: " << mem_alloc << " Bytes"
	247	<< endl;
	248	ExEnv::out0() << indent
	249	<< "Total memory used per node: " << memused << " Bytes"
	250	<< endl;
	251	ExEnv::out0() << indent
	252	<< "Memory required for one pass: "
	253	<< compute_v1_memory(nocc-restart_orbital_v1_,
	254	nfuncmax, nbasis, noso, a_number, nshell,
	255	ndocc, nsocc, nvir, nfzc, nfzv, nproc)
	256	<< " Bytes"
	257	<< endl;
	258	ExEnv::out0() << indent
	259	<< "Minimum memory required: "
	260	<< compute_v1_memory(1,
	261	nfuncmax, nbasis, noso, a_number, nshell,
	262	ndocc, nsocc, nvir, nfzc, nfzv, nproc)
	263	<< " Bytes"
	264	<< endl;
	265	ExEnv::out0() << indent
	266	<< "Batch size: " << ni
	267	<< endl;
	268
	269	if (ni < nsocc) {
	270	ExEnv::out0() << indent << "Not enough memory allocated to handle"
	271	<< " SOCC orbs in first pass" << endl;
	272	abort();
	273	}
	274
	275	if (ni < 1) {
	276	ExEnv::out0() << indent << "Not enough memory allocated" << endl;
	277	abort();
	278	}
	279
	280	rest = (nocc-restart_orbital_v1_)%ni;
	281	npass = (nocc - restart_orbital_v1_ - rest)/ni + 1;
	282	if (rest == 0) npass--;
	283
	284	if (me == 0) {
	285	ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
	286	<< " ndocc nsocc nvir nfzc nfzv" << endl;
	287	ExEnv::out0() << indent << scprintf(" %-4i %-3i %-5i %-4i %-3i"
	288	" %-3i %-3i %-3i %-3i %-3i",
	289	npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv)
	290	<< endl;
	291	}
	292
	293	/* the scf vector might be distributed between the nodes, but for OPT2 *
	294	* each node needs its own copy of the vector; *
	295	* therefore, put a copy of the scf vector on each node; *
	296	* while doing this, duplicate columns corresponding to singly *
	297	* occupied orbitals and order columns as [socc docc socc unocc] */
	298	/* also rearrange scf eigenvalues as [socc docc socc unocc] *
	299	* want socc first to get the socc's in the first batch *
	300	* (need socc's to compute energy denominators - see *
	301	* socc_sum comment below) */
	302
	303	evals_open = (double) malloc((noso+nsocc-nfzc-nfzv)sizeof(double));
	304	if (!evals_open) {
	305	ExEnv::errn() << "could not allocate storage for evals_open" << endl;
	306	abort();
	307	}
	308
	309	RefDiagSCMatrix occ;
	310	RefDiagSCMatrix evals;
	311	RefSCMatrix Scf_Vec;
	312	eigen(evals, Scf_Vec, occ);
	313
	314	if (debug_>0) ExEnv::out0() << indent << "eigvenvectors computed" << endl;
	315	if (debug_>1) evals.print("eigenvalues");
	316	if (debug_>2) Scf_Vec.print("eigenvectors");
	317
	318	double scf_vectort_dat = new double[nosonbasis];
	319	Scf_Vec->convert(scf_vectort_dat);
	320
	321	double** scf_vectort = new double*[nocc + nvir];
	322
	323	int idoc = 0, ivir = 0, isoc = 0;
	324	for (i=nfzc; i<noso-nfzv; i++) {
	325	if (occ(i) >= 2.0 - epsilon) {
	326	evals_open[idoc+nsocc] = evals(i);
	327	scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
	328	idoc++;
	329	}
	330	else if (occ(i) >= 1.0 - epsilon) {
	331	evals_open[isoc] = evals(i);
	332	scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
	333	evals_open[isoc+nocc] = evals(i);
	334	scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
	335	isoc++;
	336	}
	337	else {
	338	if (ivir < nvir) {
	339	evals_open[ivir+nocc+nsocc] = evals(i);
	340	scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
	341	}
	342	ivir++;
	343	}
	344	}
	345
	346	// need the transpose of the vector
	347	if (debug_>0) ExEnv::out0() << indent << "allocating scf_vector" << endl;
	348	double *scf_vector = new double[nbasis];
	349	double scf_vector_dat = new double[(nocc+nvir)nbasis];
	350	for (i=0; i<nbasis; i++) {
	351	scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
	352	for (j=0; j<nocc+nvir; j++) {
	353	scf_vector[i][j] = scf_vectort[j][i];
	354	}
	355	}
	356	delete[] scf_vectort;
	357	delete[] scf_vectort_dat;
	358
	359	if (debug_>2) {
	360	ExEnv::out0() << indent << "Final eigenvalues and vectors" << endl;
	361	for (i=0; i<nocc+nvir; i++) {
	362	ExEnv::out0() << indent << evals_open[i];
	363	for (j=0; j<nbasis; j++) {
	364	ExEnv::out0() << " " << scf_vector[j][i];
	365	}
	366	ExEnv::out0()<< endl;
	367	}
	368	ExEnv::out0() << endl;
	369	}
	370
	371	/* allocate storage for integral arrays */
	372	if (debug_>0) ExEnv::out0() << indent << "allocating intermediates" << endl;
	373	dim_ij = noccni - ni(ni-1)/2;
	374
	375	trans_int1 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
	376	trans_int2 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
	377	trans_int3 = (double) malloc(nbasisa_numberdim_ijsizeof(double));
	378	trans_int4_node= (double) malloc(nvira_number*sizeof(double));
	379	trans_int4 = (double) malloc(nvirnvir*sizeof(double));
	380	if (!(trans_int1 && trans_int2
	381	&& (!a_number \|\| trans_int3)
	382	&& (!a_number \|\| trans_int4_node) && trans_int4)){
	383	ExEnv::errn() << "could not allocate storage for integral arrays" << endl;
	384	abort();
	385	}
	386	if (nsocc) socc_sum = (double) malloc(nsoccsizeof(double));
	387	if (nsocc) mo_int_do_so_vir =
	388	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
	389	if (nsocc) mo_int_tmp =
	390	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
	391
	392	if (nsocc) bzerofast(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc));
	393
	394	// create the integrals object
	395	if (debug_>0) ExEnv::out0() << indent << "allocating integrals" << endl;
	396	integral()->set_storage(mem_remaining);
	397	Ref<TwoBodyInt> *tbint = new Ref<TwoBodyInt>[thr_->nthread()];
	398	for (ithread=0; ithread<thr_->nthread(); ithread++) {
	399	tbint[ithread] = integral()->electron_repulsion();
	400	}
	401
	402	// set up the thread objects
	403	Ref<ThreadLock> lock = thr_->new_lock();
	404	HSOSV1Erep1Qtr** e1thread = new HSOSV1Erep1Qtr*[thr_->nthread()];
	405	for (ithread=0; ithread<thr_->nthread(); ithread++) {
	406	e1thread[ithread] = new HSOSV1Erep1Qtr(ithread, thr_->nthread(), me, nproc,
	407	lock, basis(), tbint[ithread], ni,
	408	scf_vector, tol, debug_);
	409	}
	410
	411	if (debug_>0) ExEnv::out0() << indent << "beginning passes" << endl;
	412
	413	/**************************************************************************
	414	* begin opt2 loops *
	415	***************************************************************************/
	416
	417	int work = ((nshell*(nshell+1))/2);
	418	int print_interval = work/100;
	419	if (print_interval == 0) print_interval = 1;
	420	if (work == 0) work = 1;
	421
	422	for (pass=0; pass<npass; pass++) {
	423	if (debug_) {
	424	ExEnv::out0() << indent << "Beginning pass " << pass << endl;
	425	}
	426
	427	int print_index = 0;
	428
	429	i_offset= pass*ni + restart_orbital_v1_;
	430	if ((pass == npass - 1) && (rest != 0)) ni = rest;
	431	bzerofast(trans_int3,nbasisa_numberdim_ij);
	432
	433	tim_enter("RS loop");
	434	for (R = 0; R < basis()->nshell(); R++) {
	435	nr = basis()->shell(R).nfunction();
	436
	437	for (S = 0; S <= R; S++) {
	438	ns = basis()->shell(S).nfunction();
	439	tim_enter("bzerofast trans_int1");
	440	bzerofast(trans_int1,nfuncmaxnfuncmaxnbasis*ni);
	441	tim_exit("bzerofast trans_int1");
	442
	443	if (debug_ && (print_index++)%print_interval == 0) {
	444	lock->lock();
	445	ExEnv::outn() << scprintf("%d: (PQ\|%d %d) %d%%",
	446	me,R,S,(100*print_index)/work)
	447	<< endl;
	448	lock->unlock();
	449	}
	450
	451	tim_enter("PQ loop");
	452
	453	for (ithread=0; ithread<thr_->nthread(); ithread++) {
	454	e1thread[ithread]->set_data(R,nr,S,ns,ni,i_offset);
	455	thr_->add_thread(ithread,e1thread[ithread]);
	456	}
	457	thr_->start_threads();
	458	thr_->wait_threads();
	459	for (ithread=0; ithread<thr_->nthread(); ithread++) {
	460	e1thread[ithread]->accum_buffer(trans_int1);
	461	}
	462
	463	tim_exit("PQ loop");
	464
	465	tim_enter("sum int");
	466	msg_->sum(trans_int1,nrnsnbasis*ni,trans_int2);
	467	tim_exit("sum int");
	468
	469	/* begin second quarter transformation */
	470
	471	tim_enter("bzerofast trans_int2");
	472	bzerofast(trans_int2,nfuncmaxnfuncmaxnbasis*ni);
	473	tim_exit("bzerofast trans_int2");
	474
	475	tim_enter("2. quart. tr.");
	476
	477	for (bf3 = 0; bf3 < nr; bf3++) {
	478
	479	for (bf4 = 0; bf4 < ns; bf4++) {
	480	if (R == S && bf4 > bf3) continue;
	481
	482	for (q = 0; q < nbasis; q++) {
	483	c_qa = &scf_vector[q][nocc + a_offset];
	484	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
	485	iars_ptr = &trans_int2[((bf4nr + bf3)a_number)*ni];
	486
	487	for (a = 0; a < a_number; a++) {
	488
	489	for (i=ni; i; i--) {
	490	iars_ptr++ += c_qa * *iqrs++;
	491	}
	492
	493	iqrs -= ni;
	494	c_qa++;
	495	}
	496	}
	497	}
	498	}
	499	tim_exit("2. quart. tr.");
	500
	501	/* begin third quarter transformation */
	502	tim_enter("3. quart. tr.");
	503
	504
	505	for (bf3 = 0; bf3<nr; bf3++) {
	506	r = basis()->shell_to_function(R) + bf3;
	507
	508	for (bf4 = 0; bf4 <= (R == S ? bf3:(ns-1)); bf4++) {
	509	s = basis()->shell_to_function(S) + bf4;
	510
	511	for (i=0; i<ni; i++) {
	512	tmp_index = i(i+1)/2 + ii_offset;
	513
	514	for (a=0; a<a_number; a++) {
	515	iars = trans_int2[((bf4nr + bf3)a_number + a)*ni + i];
	516	if (r == s) iars *= 0.5;
	517	iajs_ptr = &trans_int3[tmp_index + dim_ij(a + a_numbers)];
	518	iajr_ptr = &trans_int3[tmp_index + dim_ij(a + a_numberr)];
	519	c_rj = scf_vector[r];
	520	c_sj = scf_vector[s];
	521
	522	for (j=0; j<=i+i_offset; j++) {
	523	iajs_ptr++ += c_rj++ * iars;
	524	iajr_ptr++ += c_sj++ * iars;
	525	}
	526	}
	527	}
	528	} /* exit bf4 loop */
	529	} /* exit bf3 loop */
	530	tim_exit("3. quart. tr.");
	531	} /* exit S loop */
	532	} /* exit R loop */
	533	tim_exit("RS loop");
	534
	535	/* begin fourth quarter transformation; *
	536	* first tansform integrals with only s.o. indices; *
	537	* these integrals are needed to compute the denominators *
	538	* in the various terms contributing to the correlation energy *
	539	* and must all be computed in the first pass; *
	540	* the integrals are summed into the array socc_sum: *
	541	* socc_sum[isocc] = sum over asocc of (isocc asocc\|asocc isocc) *
	542	* (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
	543	* the individual integrals are not saved here, only the sums are kept */
	544
	545	if (debug_) {
	546	ExEnv::out0() << indent << "Beginning 4. quarter transform" << endl;
	547	}
	548
	549	tim_enter("4. quart. tr.");
	550	if (pass == 0 && me == 0) {
	551	if (nsocc) bzerofast(socc_sum,nsocc);
	552	for (isocc=0; isocc<nsocc; isocc++) {
	553
	554	for (r=0; r<nbasis; r++) {
	555
	556	for (asocc=0; asocc<nsocc; asocc++) {
	557	socc_sum[isocc] += scf_vector[r][nocc+asocc]*
	558	trans_int3[isocc(isocc+1)/2 + isocci_offset
	559	+ isocc + dim_ij(asocc + a_numberr)];
	560	}
	561	}
	562	}
	563	}
	564
	565	tim_enter("bcast0 socc_sum");
	566	if (nsocc) msg_->bcast(socc_sum,nsocc);
	567	tim_exit("bcast0 socc_sum");
	568
	569	tim_exit("4. quart. tr.");
	570
	571	/* now we have all the sums of integrals involving s.o.'s (socc_sum); *
	572	* begin fourth quarter transformation for all integrals (including *
	573	* integrals with only s.o. indices); use restriction j <= (i_offset+i) *
	574	* to save flops */
	575
	576	compute_index = 0;
	577
	578	for (i=0; i<ni; i++) {
	579
	580	for (j=0; j <= (i_offset+i); j++) {
	581
	582	tim_enter("4. quart. tr.");
	583
	584	bzerofast(trans_int4_node,nvir*a_number);
	585
	586	for (r=0; r<nbasis; r++) {
	587
	588	for (a=0; a<a_number; a++) {
	589	iajb = &trans_int4_node[a*nvir];
	590	c_rb = &scf_vector[r][nocc];
	591	iajr = trans_int3[i(i+1)/2 + ii_offset + j + dim_ij(a+a_numberr)];
	592
	593	for (b=0; b<nvir; b++) {
	594	iajb++ += c_rb++ * iajr;
	595	}
	596	}
	597	}
	598
	599	tim_exit("4. quart. tr.");
	600
	601	/* collect each node's part of fully transf. int. into trans_int4 */
	602	tim_enter("collect");
	603	msg_->collect(trans_int4_node,a_vector,trans_int4);
	604	tim_exit("collect");
	605
	606
	607	/* we now have the fully transformed integrals (ia\|jb) *
	608	* for one i, one j (j <= i_offset+i), and all a and b; *
	609	* compute contribution to the OPT1 and OPT2 correlation *
	610	* energies; use restriction b <= a to save flops */
	611
	612	tim_enter("compute ecorr");
	613
	614	for (a=0; a<nvir; a++) {
	615	for (b=0; b<=a; b++) {
	616	compute_index++;
	617	if (compute_index%nproc != me) continue;
	618
	619	docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
	620	+ (j >= nsocc && j < nocc);
	621	socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
	622	vir_index = (a >= nsocc) + (b >= nsocc);
	623
	624	if (socc_index >= 3) continue; /* skip to next b value */
	625
	626	delta_ijab = evals_open[i_offset+i] + evals_open[j]
	627	- evals_open[nocc+a] - evals_open[nocc+b];
	628
	629	/* determine integral type and compute energy contribution */
	630	if (docc_index == 2 && vir_index == 2) {
	631	if (i_offset+i == j && a == b) {
	632	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	633	ecorr_opt2 += contrib1/delta_ijab;
	634	ecorr_opt1 += contrib1/delta_ijab;
	635	}
	636	else if (i_offset+i == j \|\| a == b) {
	637	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	638	ecorr_opt2 += 2*contrib1/delta_ijab;
	639	ecorr_opt1 += 2*contrib1/delta_ijab;
	640	}
	641	else {
	642	contrib1 = trans_int4[a*nvir + b];
	643	contrib2 = trans_int4[b*nvir + a];
	644	ecorr_opt2 += 4(contrib1contrib1 + contrib2*contrib2
	645	- contrib1*contrib2)/delta_ijab;
	646	ecorr_opt1 += 4(contrib1contrib1 + contrib2*contrib2
	647	- contrib1*contrib2)/delta_ijab;
	648	}
	649	}
	650	else if (docc_index == 2 && socc_index == 2) {
	651	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
	652	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
	653	ecorr_opt2 += contrib1/
	654	(delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
	655	ecorr_opt1 += contrib1/delta_ijab;
	656	}
	657	else if (socc_index == 2 && vir_index == 2) {
	658	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
	659	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
	660	ecorr_opt2 += contrib1/
	661	(delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
	662	ecorr_opt1 += contrib1/delta_ijab;
	663	}
	664	else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
	665	if (i_offset+i == j) {
	666	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	667	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
	668	ecorr_opt1 += contrib1/delta_ijab;
	669	}
	670	else {
	671	contrib1 = trans_int4[a*nvir + b];
	672	contrib2 = trans_int4[b*nvir + a];
	673	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
	674	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[b]);
	675	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
	676	- contrib1*contrib2)/delta_ijab;
	677	}
	678	}
	679	else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
	680	contrib1 = trans_int4[bnvir+a]trans_int4[b*nvir+a];
	681	if (j == b) {
	682	/* to compute the total energy contribution from an integral *
	683	* of the type (is1\|s1a) (i=d.o., s1=s.o., a=unocc.), we need *
	684	* the (is\|sa) integrals for all s=s.o.; these integrals are *
	685	* therefore stored here in the array mo_int_do_so_vir, and *
	686	* the energy contribution is computed after exiting the loop *
	687	* over i-batches (pass) */
	688	mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
	689	(i_offset+i-nsocc + ndocc*b)] =
	690	trans_int4[b*nvir + a];
	691	ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
	692	ecorr_opt1 += 1.5*contrib1/delta_ijab;
	693	ecorr_zapt2_contrib += contrib1/
	694	(delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
	695	+ 0.5*contrib1/delta_ijab;
	696	}
	697	else {
	698	ecorr_opt2 += contrib1/
	699	(delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
	700	ecorr_opt1 += contrib1/delta_ijab;
	701	}
	702	}
	703	else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
	704	if (a == b) {
	705	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	706	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
	707	ecorr_opt1 += contrib1/delta_ijab;
	708	}
	709	else {
	710	contrib1 = trans_int4[a*nvir + b];
	711	contrib2 = trans_int4[b*nvir + a];
	712	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
	713	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[j]);
	714	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
	715	- contrib1*contrib2)/delta_ijab;
	716	}
	717	}
	718	} /* exit b loop */
	719	} /* exit a loop */
	720	tim_exit("compute ecorr");
	721	} /* exit j loop */
	722	} /* exit i loop */
	723
	724	if (nsocc == 0 && npass > 1 && pass < npass - 1) {
	725	double passe = ecorr_opt2;
	726	msg_->sum(passe);
	727	ExEnv::out0() << indent
	728	<< "Partial correlation energy for pass " << pass << ":" << endl;
	729	ExEnv::out0() << indent
	730	<< scprintf(" restart_ecorr = %18.14f", passe)
	731	<< endl;
	732	ExEnv::out0() << indent
	733	<< scprintf(" restart_orbital_v1 = %d", ((pass+1) * ni))
	734	<< endl;
	735	}
	736	} /* exit loop over i-batches (pass) */
	737
	738	// don't need the AO integrals and threads anymore
	739	double aoint_computed = 0.0;
	740	for (i=0; i<thr_->nthread(); i++) {
	741	tbint[i] = 0;
	742	aoint_computed += e1thread[i]->aoint_computed();
	743	delete e1thread[i];
	744	}
	745	delete[] e1thread;
	746	delete[] tbint;
	747
	748	/* compute contribution from excitations of the type is1 -> s1a where *
	749	* i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
	750	* where i and a have the same spin, contribute to this term; *
	751	* (Brillouin's theorem not satisfied for ROHF wave functions); */
	752
	753	tim_enter("compute ecorr");
	754
	755	if (nsocc > 0) {
	756	tim_enter("sum mo_int_do_so_vir");
	757	msg_->sum(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc),mo_int_tmp);
	758	tim_exit("sum mo_int_do_so_vir");
	759	}
	760
	761	/* add extra contribution for triplet and higher spin multiplicities *
	762	* contribution = sum over s1 and s2<s1 of (is1\|s1a)(is2\|s2a)/delta /
	763
	764	if (me == 0 && nsocc) {
	765	for (i=0; i<ndocc; i++) {
	766
	767	for (a=0; a<nvir-nsocc; a++) {
	768	delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
	769
	770	for (s1=0; s1<nsocc; s1++) {
	771
	772	for (s2=0; s2<s1; s2++) {
	773	contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs1)]*
	774	mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs2)]/delta;
	775	ecorr_opt2 += contrib1;
	776	ecorr_opt1 += contrib1;
	777	}
	778	}
	779	} /* exit a loop */
	780	} /* exit i loop */
	781	}
	782
	783	tim_exit("compute ecorr");
	784
	785	ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
	786	ecorr_opt2 += ecorr_opt2_contrib;
	787	msg_->sum(ecorr_opt1);
	788	msg_->sum(ecorr_opt2);
	789	msg_->sum(ecorr_zapt2);
	790	msg_->sum(aoint_computed);
	791
	792	if (restart_orbital_v1_) {
	793	ecorr_opt1 += restart_ecorr_;
	794	ecorr_opt2 += restart_ecorr_;
	795	ecorr_zapt2 += restart_ecorr_;
	796	}
	797
	798	escf = reference_->energy();
	799	hf_energy_ = escf;
	800
	801	if (me == 0) {
	802	eopt2 = escf + ecorr_opt2;
	803	eopt1 = escf + ecorr_opt1;
	804	ezapt2 = escf + ecorr_zapt2;
	805
	806	/* print out various energies etc.*/
	807
	808	ExEnv::out0() << indent
	809	<< "Number of shell quartets for which AO integrals would" << endl
	810	<< indent
	811	<< "have been computed without bounds checking: "
	812	<< npassnshellnshell(nshell+1)(nshell+1)/4 << endl;
	813	ExEnv::out0() << indent
	814	<< "Number of shell quartets for which AO integrals" << endl
	815	<< indent << "were computed: " << aoint_computed << endl;
	816	ExEnv::out0() << indent
	817	<< scprintf("ROHF energy [au]: %17.12lf\n", escf);
	818	ExEnv::out0() << indent
	819	<< scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
	820	ExEnv::out0() << indent
	821	<< scprintf("OPT2 second order correction [au]: %17.12lf\n", ecorr_opt2);
	822	ExEnv::out0() << indent
	823	<< scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
	824	ExEnv::out0() << indent
	825	<< scprintf("ZAPT2 correlation energy [au]: %17.12lf\n", ecorr_zapt2);
	826	ExEnv::out0() << indent
	827	<< scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
	828	}
	829	msg_->bcast(eopt1);
	830	msg_->bcast(eopt2);
	831	msg_->bcast(ezapt2);
	832
	833	if (method_ && !strcmp(method_,"opt1")) {
	834	set_energy(eopt1);
	835	set_actual_value_accuracy(reference_->actual_value_accuracy()
	836	*ref_to_mp2_acc);
	837	}
	838	else if (method_ && !strcmp(method_,"opt2")) {
	839	set_energy(eopt2);
	840	set_actual_value_accuracy(reference_->actual_value_accuracy()
	841	*ref_to_mp2_acc);
	842	}
	843	else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
	844	set_energy(ezapt2);
	845	set_actual_value_accuracy(reference_->actual_value_accuracy()
	846	*ref_to_mp2_acc);
	847	}
	848	else {
	849	if (!(!method_ \|\| !strcmp(method_,"zapt"))) {
	850	ExEnv::out0() << indent
	851	<< "MBPT2: bad method: " << method_ << ", using zapt" << endl;
	852	}
	853	set_energy(ezapt2);
	854	set_actual_value_accuracy(reference_->actual_value_accuracy()
	855	*ref_to_mp2_acc);
	856	}
	857
	858	free(trans_int1);
	859	free(trans_int2);
	860	free(trans_int3);
	861	free(trans_int4_node);
	862	free(trans_int4);
	863	free(a_vector);
	864	if (nsocc) free(socc_sum);
	865	if (nsocc) free(mo_int_do_so_vir);
	866	if (nsocc) free(mo_int_tmp);
	867	free(evals_open);
	868
	869	delete[] scf_vector;
	870	delete[] scf_vector_dat;
	871	}
	872
	873	////////////////////////////////////////////////////////////////////////////
	874
	875	// Local Variables:
	876	// mode: c++
	877	// c-file-style: "CLJ-CONDENSED"
	878	// End:

Note: See TracBrowser for help on using the repository browser.

Download in other formats: