Context Navigation

hsosv2.cc@ 4f20e7

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable

Last change on this file since 4f20e7 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 32.6 KB

Rev	Line
[0b990d]	1	//
	2	// hsosv2.cc
	3	//
	4	// Copyright (C) 1996 Limit Point Systems, Inc.
	5	//
	6	// Author: Ida Nielsen <ida@kemi.aau.dk>
	7	// Maintainer: LPS
	8	//
	9	// This file is part of the SC Toolkit.
	10	//
	11	// The SC Toolkit is free software; you can redistribute it and/or modify
	12	// it under the terms of the GNU Library General Public License as published by
	13	// the Free Software Foundation; either version 2, or (at your option)
	14	// any later version.
	15	//
	16	// The SC Toolkit is distributed in the hope that it will be useful,
	17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	// GNU Library General Public License for more details.
	20	//
	21	// You should have received a copy of the GNU Library General Public License
	22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
	23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	24	//
	25	// The U.S. Government is granted a limited license as per AL 91-7.
	26	//
	27
	28	#include <iostream>
	29	#include <math.h>
	30
	31	#include <util/misc/formio.h>
	32	#include <util/misc/timer.h>
	33	#include <util/group/message.h>
	34	#include <math/scmat/matrix.h>
	35	#include <chemistry/molecule/molecule.h>
	36	#include <chemistry/qc/mbpt/mbpt.h>
	37	#include <chemistry/qc/mbpt/bzerofast.h>
	38
	39	using namespace std;
	40	using namespace sc;
	41
	42	static void iqs(int item,int index,int left,int right);
	43	static void iquicksort(int item,int index,int n);
	44
	45	void
	46	MBPT2::compute_hsos_v2()
	47	{
	48	int i, j, k;
	49	int s1, s2;
	50	int a, b;
	51	int isocc, asocc; /* indices running over singly occupied orbitals */
	52	int nfuncmax = basis()->max_nfunction_in_shell();
	53	int nvir;
	54	int nshell;
	55	int nocc=0,ndocc=0,nsocc=0;
	56	int i_offset;
	57	int npass, pass;
	58	int ni;
	59	int np, nq, nr, ns;
	60	int P, Q, R, S;
	61	int p, q, r, s;
	62	int bf1, bf2, bf3, bf4;
	63	int index;
	64	int compute_index;
	65	int col_index;
	66	int tmp_index;
	67	int dim_ij;
	68	int docc_index, socc_index, vir_index;
	69	int me;
	70	int nproc;
	71	int rest;
	72	int r_offset;
	73	int sum;
	74	int min;
	75	int iproc;
	76	int nRshell;
	77	int imyshell;
	78	int myshells; / the R indices processed by node me */
	79	int shellsize; / size of each shell */
	80	int sorted_shells; / sorted shell indices: large shells->small shells */
	81	int nbf; / number of basis functions processed by each node */
	82	int proc; / element k: processor which will process shell k */
	83	int aoint_computed = 0;
	84	double evals_open; / reordered scf eigenvalues */
	85	const double intbuf; / 2-electron AO integral buffer */
	86	double trans_int1; / partially transformed integrals */
	87	double trans_int2; / partially transformed integrals */
	88	double trans_int3; / partially transformed integrals */
	89	double trans_int4; / fully transformed integrals */
	90	double trans_int4_tmp; / scratch array */
	91	double mo_int_do_so_vir=0;/mo integral (is\|sa); i:d.o.,s:s.o.,a:vir */
	92	double mo_int_tmp=0; / scratch array used in global summations */
	93	double socc_sum=0; / sum of 2-el integrals involving only s.o.'s */
	94	double socc_sum_tmp=0;/ scratch array */
	95	double iqrs, iprs;
	96	double *iars_ptr;
	97	double iars;
	98	double iajr;
	99	double *iajr_ptr;
	100	double *iajb;
	101	double pqrs;
	102	double *c_qa;
	103	double c_rb, c_pi, c_qi, c_sj;
	104	double delta_ijab;
	105	double delta;
	106	double contrib1, contrib2;
	107	double ecorr_opt2=0,ecorr_opt1=0;
	108	double ecorr_zapt2;
	109	double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
	110	double escf;
	111	double eopt2,eopt1,ezapt2;
	112	double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
	113
	114	me = msg_->me();
	115
	116	ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v2)" << endl;
	117
	118	tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
	119
	120	nproc = msg_->n();
	121
	122	ndocc = nsocc = 0;
	123	const double epsilon = 1.0e-4;
	124	for (i=0; i<oso_dimension()->n(); i++) {
	125	if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
	126	else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
	127	}
	128
	129	/* do a few preliminary tests to make sure the desired calculation *
	130	* can be done (and appears to be meaningful!) */
	131
	132	if (ndocc == 0 && nsocc == 0) {
	133	ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
	134	abort();
	135	}
	136
	137	if (nfzc > ndocc) {
	138	ExEnv::err0()
	139	<< "The number of frozen core orbitals exceeds the number" << endl
	140	<< "of doubly occupied orbitals; program exiting" << endl;
	141	abort();
	142	}
	143
	144	if (nfzv > noso - ndocc - nsocc) {
	145	ExEnv::err0()
	146	<< "The number of frozen virtual orbitals exceeds the number" << endl
	147	<< "of unoccupied orbitals; program exiting" << endl;
	148	abort();
	149	}
	150
	151	ndocc = ndocc - nfzc;
	152	/* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
	153	nvir = noso - ndocc - nfzc - nfzv;
	154	/* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
	155	nocc = ndocc + nsocc;
	156	nshell = basis()->nshell();
	157
	158	/* allocate storage for some arrays used for keeping track of which R *
	159	* indices are processed by each node */
	160	shellsize = (int) malloc(nshellsizeof(int));
	161	sorted_shells = (int) malloc(nshellsizeof(int));
	162	nbf = (int) malloc(nprocsizeof(int));
	163	proc = (int) malloc(nshellsizeof(int));
	164
	165
	166	/******************************************************
	167	* Begin distributing R shells between nodes so each *
	168	* node gets ca. the same number of r basis functions *
	169	******************************************************/
	170
	171	/* compute size of each shell */
	172	for (i=0; i<nshell; i++) {
	173	shellsize[i] = basis()->shell(i).nfunction();
	174	}
	175
	176	/* do an index sort (large -> small) of shellsize to form sorted_shells */
	177	iquicksort(shellsize,sorted_shells,nshell);
	178
	179	/* initialize nbf */
	180	for (i=0; i<nproc; i++) nbf[i] = 0;
	181
	182	for (i=0; i<nshell; i++) {
	183	min = nbf[0];
	184	iproc = 0;
	185	for (j=1; j<nproc; j++) {
	186	if (nbf[j] < min) {
	187	iproc = j;
	188	min = nbf[j];
	189	}
	190	}
	191	proc[sorted_shells[i]] = iproc;
	192	nbf[iproc] += shellsize[sorted_shells[i]];
	193	}
	194	if (me == 0) {
	195	ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
	196	for (i=0; i<nproc; i++) {
	197	if (i%12 == 0) ExEnv::out0() << indent;
	198	ExEnv::out0() << scprintf(" %4i",nbf[i]);
	199	if ((i+1)%12 == 0) ExEnv::out0() << endl;
	200	}
	201	ExEnv::out0() << endl;
	202	}
	203
	204	/* determine which shells are to be processed by node me */
	205	nRshell = 0;
	206	for (i=0; i<nshell; i++) {
	207	if (proc[i] == me) nRshell++;
	208	}
	209	myshells = (int) malloc(nRshellsizeof(int));
	210	imyshell = 0;
	211	for (i=0; i<nshell; i++) {
	212	if (proc[i] == me) {
	213	myshells[imyshell] = i;
	214	imyshell++;
	215	}
	216	}
	217
	218	/************************************************
	219	* End of distribution of R shells between nodes *
	220	************************************************/
	221
	222
	223	/* compute batch size ni for opt2 loops; *
	224	* need to store the following arrays of type double : trans_int1-4, *
	225	* trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp, *
	226	* mo_int_do_so_vir, mo_int_tmp, *
	227	* and the following arrays of type int: myshells, shellsize, *
	228	* sorted_shells, nbf, and proc */
	229
	230	size_t memused = 0;
	231	ni = 0;
	232	for (i=1; i<=nocc; i++) {
	233	distsize_t tmpmem = compute_v2_memory(i,
	234	nfuncmax, nbf[me], nshell,
	235	ndocc, nsocc, nvir, nproc);
	236	if (tmpmem > mem_alloc) break;
	237	ni = i;
	238	memused = distsize_to_size(tmpmem);
	239	}
	240
	241	size_t mem_remaining = mem_alloc - memused;
	242
	243	/* set ni equal to the smallest batch size for any node */
	244	msg_->min(ni);
	245	msg_->bcast(ni);
	246
	247	int nbfmax = nbf[me];
	248	msg_->max(nbfmax);
	249
	250	if (me == 0) {
	251	ExEnv::out0() << indent << " nproc nbasis nshell nfuncmax"
	252	" ndocc nsocc nvir nfzc nfzv" << endl;
	253	ExEnv::out0() << indent << scprintf(" %-4i %-5i %-4i %-3i"
	254	" %-3i %-3i %-3i %-3i %-3i\n",
	255	nproc,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
	256	}
	257
	258
	259	ExEnv::out0() << indent
	260	<< "Memory available per node: " << mem_alloc << " Bytes"
	261	<< endl;
	262	ExEnv::out0() << indent
	263	<< "Total memory used per node: " << memused << " Bytes"
	264	<< endl;
	265	ExEnv::out0() << indent
	266	<< "Memory required for one pass: "
	267	<< compute_v2_memory(nocc,
	268	nfuncmax, nbfmax, nshell,
	269	ndocc, nsocc, nvir, nproc)
	270	<< " Bytes"
	271	<< endl;
	272	ExEnv::out0() << indent
	273	<< "Minimum memory required: "
	274	<< compute_v2_memory(1,
	275	nfuncmax, nbfmax, nshell,
	276	ndocc, nsocc, nvir, nproc)
	277	<< " Bytes"
	278	<< endl;
	279	ExEnv::out0() << indent
	280	<< "Batch size: " << ni
	281	<< endl;
	282
	283	if (ni < nsocc) {
	284	ExEnv::err0() << "Not enough memory allocated" << endl;
	285	abort();
	286	}
	287
	288	if (ni < 1) { /* this applies only to a closed shell case */
	289	ExEnv::err0() << "Not enough memory allocated" << endl;
	290	abort();
	291	}
	292
	293	if (nocc == ni) {
	294	npass = 1;
	295	rest = 0;
	296	}
	297	else {
	298	rest = nocc%ni;
	299	npass = (nocc - rest)/ni + 1;
	300	if (rest == 0) npass--;
	301	}
	302
	303	ExEnv::out0() << indent
	304	<< "npass = " << npass
	305	<< " rest = " << rest
	306	<< endl;
	307
	308	/* the scf vector might be distributed between the nodes, but for OPT2 *
	309	* each node needs its own copy of the vector; *
	310	* therefore, put a copy of the scf vector on each node; *
	311	* while doing this, duplicate columns corresponding to singly *
	312	* occupied orbitals and order columns as [socc docc socc unocc] */
	313	/* also rearrange scf eigenvalues as [socc docc socc unocc] *
	314	* want socc first to get the socc's in the first batch *
	315	* (need socc's to compute energy denominators - see *
	316	* socc_sum comment below) */
	317
	318	evals_open = (double) malloc((noso+nsocc-nfzc-nfzv)sizeof(double));
	319
	320	RefDiagSCMatrix occ;
	321	RefDiagSCMatrix evals;
	322	RefSCMatrix Scf_Vec;
	323	eigen(evals, Scf_Vec, occ);
	324
	325	if (debug_) {
	326	evals.print("eigenvalues");
	327	Scf_Vec.print("eigenvectors");
	328	}
	329
	330	double scf_vectort_dat = new double[nbasisnoso];
	331	Scf_Vec->convert(scf_vectort_dat);
	332
	333	double** scf_vectort = new double*[nocc + nvir];
	334
	335	int idoc = 0, ivir = 0, isoc = 0;
	336	for (i=nfzc; i<noso-nfzv; i++) {
	337	if (occ(i) >= 2.0 - epsilon) {
	338	evals_open[idoc+nsocc] = evals(i);
	339	scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
	340	idoc++;
	341	}
	342	else if (occ(i) >= 1.0 - epsilon) {
	343	evals_open[isoc] = evals(i);
	344	scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
	345	evals_open[isoc+nocc] = evals(i);
	346	scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
	347	isoc++;
	348	}
	349	else {
	350	if (ivir < nvir) {
	351	evals_open[ivir+nocc+nsocc] = evals(i);
	352	scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
	353	}
	354	ivir++;
	355	}
	356	}
	357	// need the transpose of the vector
	358	double *scf_vector = new double[nbasis];
	359	double scf_vector_dat = new double[(nocc+nvir)nbasis];
	360	for (i=0; i<nbasis; i++) {
	361	scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
	362	for (j=0; j<nocc+nvir; j++) {
	363	scf_vector[i][j] = scf_vectort[j][i];
	364	}
	365	}
	366	delete[] scf_vectort;
	367	delete[] scf_vectort_dat;
	368
	369	/* allocate storage for various arrays */
	370
	371	dim_ij = noccni - (ni(ni - 1))/2;
	372
	373	trans_int1 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
	374	trans_int2 = (double) malloc(nvirni*sizeof(double));
	375	trans_int3 = (double) malloc(nbf[me]nvirdim_ijsizeof(double));
	376	trans_int4 = (double) malloc(nvirnvir*sizeof(double));
	377	trans_int4_tmp = (double) malloc(nvirnvir*sizeof(double));
	378	if (nsocc) socc_sum = (double) malloc(nsoccsizeof(double));
	379	if (nsocc) socc_sum_tmp = (double) malloc(nsoccsizeof(double));
	380	if (nsocc) mo_int_do_so_vir =
	381	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
	382	if (nsocc) mo_int_tmp =
	383	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
	384
	385	if (nsocc) bzerofast(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc));
	386
	387
	388	// create the integrals object
	389	integral()->set_storage(mem_remaining);
	390	tbint_ = integral()->electron_repulsion();
	391	intbuf = tbint_->buffer();
	392
	393	/**************************************************************************
	394	* begin opt2 loops *
	395	**************************************************************************/
	396
	397
	398	for (pass=0; pass<npass; pass++) {
	399	i_offset = pass*ni;
	400	if ((pass == npass - 1) && (rest != 0)) ni = rest;
	401
	402	r_offset = 0;
	403	bzerofast(trans_int3,nbf[me]nvirdim_ij);
	404
	405	tim_enter("RS loop");
	406
	407	for (imyshell=0; imyshell<nRshell; imyshell++) {
	408
	409	R = myshells[imyshell];
	410	nr = basis()->shell(R).nfunction();
	411
	412	for (S = 0; S < nshell; S++) {
	413	ns = basis()->shell(S).nfunction();
	414	tim_enter("bzerofast trans_int1");
	415	bzerofast(trans_int1,nfuncmaxnfuncmaxnbasis*ni);
	416	tim_exit("bzerofast trans_int1");
	417
	418	tim_enter("PQ loop");
	419
	420	for (P = 0; P < nshell; P++) {
	421	np = basis()->shell(P).nfunction();
	422
	423	for (Q = 0; Q <= P; Q++) {
	424	if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
	425	continue; /* skip ereps less than tol */
	426	}
	427
	428	aoint_computed++;
	429
	430	nq = basis()->shell(Q).nfunction();
	431
	432	tim_enter("erep");
	433	tbint_->compute_shell(P,Q,R,S);
	434	tim_exit("erep");
	435
	436	tim_enter("1. quart. tr.");
	437
	438	index = 0;
	439
	440	for (bf1 = 0; bf1 < np; bf1++) {
	441	p = basis()->shell_to_function(P) + bf1;
	442
	443	for (bf2 = 0; bf2 < nq; bf2++) {
	444	q = basis()->shell_to_function(Q) + bf2;
	445	if (q > p) {
	446	/* if q > p: want to skip the loops over bf3-4 */
	447	/* and larger bf2 values, so increment bf1 by 1 */
	448	/* ("break") and adjust the value of index */
	449	index = (bf1 + 1) * nq * nr * ns;
	450	break;
	451	}
	452
	453	for (bf3 = 0; bf3 < nr; bf3++) {
	454
	455	for (bf4 = 0; bf4 < ns; bf4++,index++) {
	456
	457	if (fabs(intbuf[index]) > 1.0e-15) {
	458	pqrs = intbuf[index];
	459
	460	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
	461	iprs = &trans_int1[((bf4nr + bf3)nbasis + p)*ni];
	462
	463	if (p == q) pqrs *= 0.5;
	464	col_index = i_offset;
	465	c_pi = &scf_vector[p][col_index];
	466	c_qi = &scf_vector[q][col_index];
	467
	468	for (i=ni; i; i--) {
	469	iqrs++ += pqrs *c_pi++;
	470	iprs++ += pqrs *c_qi++;
	471	}
	472	}
	473	} /* exit bf4 loop */
	474	} /* exit bf3 loop */
	475	} /* exit bf2 loop */
	476	} /* exit bf1 loop */
	477	tim_exit("1. quart. tr.");
	478	} /* exit Q loop */
	479	} /* exit P loop */
	480	tim_exit("PQ loop");
	481
	482	/* begin second and third quarter transformations */
	483
	484	for (bf3 = 0; bf3 < nr; bf3++) {
	485	r = r_offset + bf3;
	486
	487	for (bf4 = 0; bf4 < ns; bf4++) {
	488	s = basis()->shell_to_function(S) + bf4;
	489
	490	tim_enter("bzerofast trans_int2");
	491	bzerofast(trans_int2,nvir*ni);
	492	tim_exit("bzerofast trans_int2");
	493
	494	tim_enter("2. quart. tr.");
	495
	496	for (q = 0; q < nbasis; q++) {
	497	iars_ptr = trans_int2;
	498	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
	499	c_qa = &scf_vector[q][nocc];
	500
	501	for (a = 0; a < nvir; a++) {
	502
	503	for (i=ni; i; i--) {
	504	iars_ptr++ += c_qa * *iqrs++;
	505	}
	506
	507	iqrs -= ni;
	508	c_qa++;
	509	}
	510	} /* exit q loop */
	511	tim_exit("2. quart. tr.");
	512
	513	/* begin third quarter transformation */
	514
	515	tim_enter("3. quart. tr.");
	516
	517	for (i=0; i<ni; i++) {
	518	tmp_index = i(i+1)/2 + ii_offset;
	519
	520	for (a=0; a<nvir; a++) {
	521	iars = trans_int2[a*ni + i];
	522	c_sj = scf_vector[s];
	523	iajr_ptr = &trans_int3[tmp_index + dim_ij(a + nvirr)];
	524
	525	for (j=0; j<=i+i_offset; j++) {
	526	iajr_ptr++ += c_sj++ * iars;
	527	}
	528	}
	529	} /* exit i loop */
	530	tim_exit("3. quart. tr.");
	531
	532	} /* exit bf4 loop */
	533	} /* exit bf3 loop */
	534
	535	} /* exit S loop */
	536	r_offset += nr;
	537	} /* exit R loop */
	538	tim_exit("RS loop");
	539
	540
	541	/* begin fourth quarter transformation; *
	542	* first tansform integrals with only s.o. indices; *
	543	* these integrals are needed to compute the denominators *
	544	* in the various terms contributing to the correlation energy *
	545	* and must all be computed in the first pass; *
	546	* the integrals are summed into the array socc_sum: *
	547	* socc_sum[isocc] = sum over asocc of (isocc asocc\|asocc isocc) *
	548	* (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
	549	* the individual integrals are not saved here, only the sums are kept */
	550
	551
	552	if (pass == 0) {
	553	tim_enter("4. quart. tr.");
	554	if (nsocc) bzerofast(socc_sum,nsocc);
	555	for (isocc=0; isocc<nsocc; isocc++) {
	556
	557	for (index=0; index<nbf[me]; index++) {
	558	i = 0;
	559	sum = basis()->shell(myshells[i]).nfunction();
	560	while (sum <= index) {
	561	i++;
	562	sum += basis()->shell(myshells[i]).nfunction();
	563	}
	564	sum -= basis()->shell(myshells[i]).nfunction();
	565	r = basis()->shell_to_function(myshells[i]) + index - sum;
	566
	567	for (asocc=0; asocc<nsocc; asocc++) {
	568	socc_sum[isocc] += scf_vector[r][nocc+asocc]*
	569	trans_int3[isocc(isocc+1)/2 + isocci_offset
	570	+ isocc + dim_ij(asocc + nvirindex)];
	571	}
	572	}
	573	} /* exit isocc loop */
	574
	575	tim_exit("4. quart. tr.");
	576
	577	/* sum socc_sum contributions from each node (only if nsocc > 0 *
	578	* since gop1 will fail if nsocc = 0) */
	579	if (nsocc > 0) {
	580	tim_enter("global sum socc_sum");
	581	msg_->sum(socc_sum,nsocc,socc_sum_tmp);
	582	tim_exit("global sum socc_sum");
	583	}
	584
	585	}
	586
	587	/* now we have all the sums of integrals involving s.o.'s (socc_sum); *
	588	* begin fourth quarter transformation for all integrals (including *
	589	* integrals with only s.o. indices); use restriction j <= (i_offset+i) *
	590	* to save flops */
	591
	592	compute_index = 0;
	593
	594	for (i=0; i<ni; i++) {
	595
	596	for (j=0; j <= (i_offset+i); j++) {
	597
	598	tim_enter("4. quart. tr.");
	599
	600	bzerofast(trans_int4,nvir*nvir);
	601
	602	for (index=0; index<nbf[me]; index++) {
	603	k = 0;
	604	sum = basis()->shell(myshells[k]).nfunction();
	605	while (sum <= index) {
	606	k++;
	607	sum += basis()->shell(myshells[k]).nfunction();
	608	}
	609	sum -= basis()->shell(myshells[k]).nfunction();
	610	r = basis()->shell_to_function(myshells[k]) + index - sum;
	611
	612	for (a=0; a<nvir; a++) {
	613	iajb = &trans_int4[a*nvir];
	614	iajr = trans_int3[i(i+1)/2 + ii_offset + j + dim_ij(a+nvirindex)];
	615	c_rb = &scf_vector[r][nocc];
	616
	617	for (b=0; b<nvir; b++) {
	618	iajb++ += c_rb++ * iajr;
	619	}
	620	}
	621	}
	622
	623	tim_exit("4. quart. tr.");
	624
	625	tim_enter("global sum trans_int4");
	626	msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
	627	tim_exit("global sum trans_int4");
	628
	629	/* we now have the fully transformed integrals (ia\|jb) *
	630	* for one i, one j (j <= i_offset+i), and all a and b; *
	631	* compute contribution to the OPT1 and OPT2 correlation *
	632	* energies; use restriction b <= a to save flops */
	633
	634	tim_enter("compute ecorr");
	635
	636	for (a=0; a<nvir; a++) {
	637	for (b=0; b<=a; b++) {
	638	compute_index++;
	639	if (compute_index%nproc != me) continue;
	640
	641	docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
	642	+ (j >= nsocc && j < nocc);
	643	socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
	644	vir_index = (a >= nsocc) + (b >= nsocc);
	645
	646	if (socc_index >= 3) continue; /* skip to next b value */
	647
	648	delta_ijab = evals_open[i_offset+i] + evals_open[j]
	649	- evals_open[nocc+a] - evals_open[nocc+b];
	650
	651	/* determine integral type and compute energy contribution */
	652	if (docc_index == 2 && vir_index == 2) {
	653	if (i_offset+i == j && a == b) {
	654	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	655	ecorr_opt2 += contrib1/delta_ijab;
	656	ecorr_opt1 += contrib1/delta_ijab;
	657	}
	658	else if (i_offset+i == j \|\| a == b) {
	659	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	660	ecorr_opt2 += 2*contrib1/delta_ijab;
	661	ecorr_opt1 += 2*contrib1/delta_ijab;
	662	}
	663	else {
	664	contrib1 = trans_int4[a*nvir + b];
	665	contrib2 = trans_int4[b*nvir + a];
	666	ecorr_opt2 += 4(contrib1contrib1 + contrib2*contrib2
	667	- contrib1*contrib2)/delta_ijab;
	668	ecorr_opt1 += 4(contrib1contrib1 + contrib2*contrib2
	669	- contrib1*contrib2)/delta_ijab;
	670	}
	671	}
	672	else if (docc_index == 2 && socc_index == 2) {
	673	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
	674	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
	675	ecorr_opt2 += contrib1/
	676	(delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
	677	ecorr_opt1 += contrib1/delta_ijab;
	678	}
	679	else if (socc_index == 2 && vir_index == 2) {
	680	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
	681	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
	682	ecorr_opt2 += contrib1/
	683	(delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
	684	ecorr_opt1 += contrib1/delta_ijab;
	685	}
	686	else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
	687	if (i_offset+i == j) {
	688	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	689	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
	690	ecorr_opt1 += contrib1/delta_ijab;
	691	}
	692	else {
	693	contrib1 = trans_int4[a*nvir + b];
	694	contrib2 = trans_int4[b*nvir + a];
	695	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
	696	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[b]);
	697	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
	698	- contrib1*contrib2)/delta_ijab;
	699	}
	700	}
	701	else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
	702	contrib1 = trans_int4[bnvir+a]trans_int4[b*nvir+a];
	703	if (j == b) {
	704	/* to compute the energy contribution from an integral of the *
	705	* type (is1\|s1a) (i=d.o., s1=s.o., a=unocc.), we need the *
	706	* (is\|sa) integrals for all s=s.o.; these integrals are *
	707	* therefore stored here in the array mo_int_do_so_vir, and *
	708	* the energy contribution is computed after exiting the loop *
	709	* over i-batches (pass) */
	710	mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
	711	(i_offset+i-nsocc + ndocc*b)] =
	712	trans_int4[b*nvir + a];
	713	ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
	714	ecorr_opt1 += 1.5*contrib1/delta_ijab;
	715	ecorr_zapt2_contrib += contrib1/
	716	(delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
	717	+ 0.5*contrib1/delta_ijab;
	718	}
	719	else {
	720	ecorr_opt2 += contrib1/
	721	(delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
	722	ecorr_opt1 += contrib1/delta_ijab;
	723	}
	724	}
	725	else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
	726	if (a == b) {
	727	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	728	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
	729	ecorr_opt1 += contrib1/delta_ijab;
	730	}
	731	else {
	732	contrib1 = trans_int4[a*nvir + b];
	733	contrib2 = trans_int4[b*nvir + a];
	734	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
	735	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[j]);
	736	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
	737	- contrib1*contrib2)/delta_ijab;
	738	}
	739	}
	740	} /* exit b loop */
	741	} /* exit a loop */
	742	tim_exit("compute ecorr");
	743	} /* exit j loop */
	744	} /* exit i loop */
	745
	746	if (nsocc == 0 && npass > 1 && pass < npass - 1) {
	747	double passe = ecorr_opt2;
	748	msg_->sum(passe);
	749	ExEnv::out0() << indent
	750	<< "Partial correlation energy for pass " << pass << ":" << endl;
	751	ExEnv::out0() << indent
	752	<< scprintf(" restart_ecorr = %14.10f", passe)
	753	<< endl;
	754	ExEnv::out0() << indent
	755	<< scprintf(" restart_orbital_v2 = %d", ((pass+1) * ni))
	756	<< endl;
	757	}
	758	} /* exit loop over i-batches (pass) */
	759
	760
	761
	762	/* compute contribution from excitations of the type is1 -> s1a where *
	763	* i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
	764	* where i and a have the same spin, contribute to this term; *
	765	* (Brillouin's theorem not satisfied for ROHF wave functions); *
	766	* do this only if nsocc > 0 since gop1 will fail otherwise */
	767
	768	tim_enter("compute ecorr");
	769
	770	if (nsocc > 0) {
	771	tim_enter("global sum mo_int_do_so_vir");
	772	msg_->sum(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc),mo_int_tmp);
	773	tim_exit("global sum mo_int_do_so_vir");
	774	}
	775
	776	/* add extra contribution for triplet and higher spin multiplicities *
	777	* contribution = sum over s1 and s2<s1 of (is1\|s1a)(is2\|s2a)/delta /
	778
	779	if (me == 0 && nsocc) {
	780	for (i=0; i<ndocc; i++) {
	781
	782	for (a=0; a<nvir-nsocc; a++) {
	783	delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
	784
	785	for (s1=0; s1<nsocc; s1++) {
	786
	787	for (s2=0; s2<s1; s2++) {
	788	contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs1)]*
	789	mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs2)]/delta;
	790	ecorr_opt2 += contrib1;
	791	ecorr_opt1 += contrib1;
	792	}
	793	}
	794	} /* exit a loop */
	795	} /* exit i loop */
	796	}
	797
	798	tim_exit("compute ecorr");
	799
	800	ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
	801	ecorr_opt2 += ecorr_opt2_contrib;
	802	msg_->sum(ecorr_opt1);
	803	msg_->sum(ecorr_opt2);
	804	msg_->sum(ecorr_zapt2);
	805	msg_->sum(aoint_computed);
	806
	807	escf = reference_->energy();
	808	hf_energy_ = escf;
	809
	810	if (me == 0) {
	811	eopt2 = escf + ecorr_opt2;
	812	eopt1 = escf + ecorr_opt1;
	813	ezapt2 = escf + ecorr_zapt2;
	814
	815	/* print out various energies etc.*/
	816
	817	ExEnv::out0() << indent
	818	<< "Number of shell quartets for which AO integrals would" << endl
	819	<< indent << "have been computed without bounds checking: "
	820	<< npassnshellnshell(nshell+1)(nshell+1)/2 << endl;
	821	ExEnv::out0() << indent
	822	<< "Number of shell quartets for which AO integrals" << endl
	823	<< indent << "were computed: " << aoint_computed << endl;
	824
	825	ExEnv::out0() << indent
	826	<< scprintf("ROHF energy [au]: %17.12lf\n", escf);
	827	ExEnv::out0() << indent
	828	<< scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
	829	ExEnv::out0() << indent
	830	<< scprintf("OPT2 second order correction [au]: %17.12lf\n",
	831	ecorr_opt2);
	832	ExEnv::out0() << indent
	833	<< scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
	834	ExEnv::out0() << indent
	835	<< scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
	836	ecorr_zapt2);
	837	ExEnv::out0() << indent
	838	<< scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
	839	ExEnv::out0().flush();
	840	}
	841
	842	msg_->bcast(eopt1);
	843	msg_->bcast(eopt2);
	844	msg_->bcast(ezapt2);
	845
	846	if (method_ && !strcmp(method_,"opt1")) {
	847	set_energy(eopt1);
	848	set_actual_value_accuracy(reference_->actual_value_accuracy()
	849	*ref_to_mp2_acc);
	850	}
	851	else if (method_ && !strcmp(method_,"opt2")) {
	852	set_energy(eopt2);
	853	set_actual_value_accuracy(reference_->actual_value_accuracy()
	854	*ref_to_mp2_acc);
	855	}
	856	else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
	857	set_energy(ezapt2);
	858	set_actual_value_accuracy(reference_->actual_value_accuracy()
	859	*ref_to_mp2_acc);
	860	}
	861	else {
	862	if (!(!method_ \|\| !strcmp(method_,"zapt"))) {
	863	ExEnv::out0() << indent
	864	<< "MBPT2: bad method: " << method_ << ", using zapt" << endl;
	865	}
	866	set_energy(ezapt2);
	867	set_actual_value_accuracy(reference_->actual_value_accuracy()
	868	*ref_to_mp2_acc);
	869	}
	870
	871	free(trans_int1);
	872	free(trans_int2);
	873	free(trans_int3);
	874	free(trans_int4);
	875	free(trans_int4_tmp);
	876	if (nsocc) free(socc_sum);
	877	if (nsocc) free(socc_sum_tmp);
	878	if (nsocc) free(mo_int_do_so_vir);
	879	if (nsocc) free(mo_int_tmp);
	880	free(evals_open);
	881	free(myshells);
	882	free(shellsize);
	883	free(sorted_shells);
	884	free(nbf);
	885	free(proc);
	886
	887	delete[] scf_vector;
	888	delete[] scf_vector_dat;
	889
	890	}
	891
	892
	893	/* Do a quick sort (larger -> smaller) of the integer data in item *
	894	* by the integer indices in index; *
	895	* data in item remain unchanged */
	896
	897	static void
	898	iquicksort(int item,int index,int n)
	899	{
	900	int i;
	901	if (n<=0) return;
	902	for (i=0; i<n; i++) {
	903	index[i] = i;
	904	}
	905	iqs(item,index,0,n-1);
	906	}
	907
	908	static void
	909	iqs(int item,int index,int left,int right)
	910	{
	911	register int i,j;
	912	int x,y;
	913
	914	i=left; j=right;
	915	x=item[index[(left+right)/2]];
	916
	917	do {
	918	while(item[index[i]]>x && i<right) i++;
	919	while(x>item[index[j]] && j>left) j--;
	920
	921	if (i<=j) {
	922	if (item[index[i]] != item[index[j]]) {
	923	y=index[i];
	924	index[i]=index[j];
	925	index[j]=y;
	926	}
	927	i++; j--;
	928	}
	929	} while(i<=j);
	930
	931	if (left<j) iqs(item,index,left,j);
	932	if (i<right) iqs(item,index,i,right);
	933	}
	934
	935	distsize_t
	936	MBPT2::compute_v2_memory(int ni,
	937	int nfuncmax, int nbfme, int nshell,
	938	int ndocc, int nsocc, int nvir, int nproc
	939	)
	940	{
	941	distsize_t result = 0;
	942	distsize_t dsize = sizeof(double);
	943	distsize_t isize = sizeof(int);
	944	int dim_ij = noccni - (ni(ni - 1))/2;
	945
	946	result += nfuncmaxnfuncmax(distsize_t)nbasis(distsize_t)nidsize;
	947	result += nvirnidsize;
	948	result += nbfmenvir(distsize_t)dim_ij*dsize;
	949	result += nvirnvirdsize;
	950	result += nvirnvirdsize;
	951	result += nsocc*dsize;
	952	result += nsocc*dsize;
	953	result += ndoccnsocc(distsize_t)(nvir-nsocc)*dsize;
	954	result += ndoccnsocc(distsize_t)(nvir-nsocc)*dsize;
	955	result += (noso+nsocc-nfzc-nfzv)*dsize;
	956	result += nshell*isize;
	957	result += nshell*isize;
	958	result += nproc*isize;
	959	result += nshell*isize;
	960
	961	return result;
	962	}
	963
	964	////////////////////////////////////////////////////////////////////////////
	965
	966	// Local Variables:
	967	// mode: c++
	968	// c-file-style: "CLJ-CONDENSED"
	969	// End:

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv2.cc@ 4f20e7

Download in other formats: