Context Navigation

hsosv2lb.cc@ 416a65

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable

Last change on this file since 416a65 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 35.5 KB

Rev	Line
[0b990d]	1	//
	2	// hsosv2lb.cc
	3	//
	4	// Copyright (C) 1996 Limit Point Systems, Inc.
	5	//
	6	// Author: Ida Nielsen <ida@kemi.aau.dk>
	7	// Maintainer: LPS
	8	//
	9	// This file is part of the SC Toolkit.
	10	//
	11	// The SC Toolkit is free software; you can redistribute it and/or modify
	12	// it under the terms of the GNU Library General Public License as published by
	13	// the Free Software Foundation; either version 2, or (at your option)
	14	// any later version.
	15	//
	16	// The SC Toolkit is distributed in the hope that it will be useful,
	17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	// GNU Library General Public License for more details.
	20	//
	21	// You should have received a copy of the GNU Library General Public License
	22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
	23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	24	//
	25	// The U.S. Government is granted a limited license as per AL 91-7.
	26	//
	27
	28	#include <math.h>
	29
	30	#include <util/misc/timer.h>
	31	#include <util/misc/formio.h>
	32	#include <chemistry/molecule/molecule.h>
	33	#include <chemistry/qc/mbpt/mbpt.h>
	34	#include <chemistry/qc/mbpt/bzerofast.h>
	35
	36	using namespace std;
	37	using namespace sc;
	38
	39	static void iqs(int item,int index,int left,int right);
	40	static void iquicksort(int item,int index,int n);
	41	static void findprocminmax(int *nbf, int nproc,
	42	int procmin, int procmax, int minbf, int maxbf);
	43	static void findshellmax(int myshellsizes, int nRshell, int shellmax,
	44	int *shellmaxindex);
	45	static void expandintarray(int *&a, int dim);
	46
	47	void
	48	MBPT2::compute_hsos_v2_lb()
	49	{
	50	int i, j, k, l;
	51	int s1, s2;
	52	int a, b;
	53	int isocc, asocc; // indices running over singly occupied orbitals
	54	int nfuncmax = basis()->max_nfunction_in_shell();
	55	int nvir;
	56	int nshell;
	57	int shellmax;
	58	int shellmaxindex;
	59	int nocc=0,ndocc=0,nsocc=0;
	60	int i_offset;
	61	int npass, pass;
	62	int ni;
	63	int np, nq, nr, ns;
	64	int P, Q, R, S;
	65	int p, q, r, s;
	66	int bf1, bf2, bf3, bf4;
	67	int bf3_offset;
	68	int nbfmoved;
	69	int nbfav; // average number of r basis functions per node
	70	int minbf, maxbf; // max/min number of (r) basis functions on a node
	71	int index;
	72	int compute_index;
	73	int col_index;
	74	int tmp_index;
	75	int dim_ij;
	76	int docc_index, socc_index, vir_index;
	77	int me;
	78	int nproc;
	79	int procmin, procmax; // processor with most/fewest basis functions
	80	int rest;
	81	int r_offset;
	82	int min;
	83	int iproc;
	84	int nRshell;
	85	int imyshell;
	86	int *myshells; // the R indices processed by node me
	87	int *myshellsizes; // sizes of the shells (after split) on node me
	88	int *split_info; // on each node: offset for each shell; -1 if shell not split
	89	int *shellsize; // size of each shell
	90	int *sorted_shells; // sorted shell indices: large shells->small shells
	91	int *nbf; // number of basis functions processed by each node
	92	int *proc; // element k: processor which will process shell k
	93	int aoint_computed = 0;
	94	double A, B, C, ni_top, max, ni_double; // variables used to compute ni
	95	double *evals_open; // reordered scf eigenvalues
	96	const double *intbuf; // 2-electron AO integral buffer
	97	double *trans_int1; // partially transformed integrals
	98	double *trans_int2; // partially transformed integrals
	99	double *trans_int3; // partially transformed integrals
	100	double *trans_int4; // fully transformed integrals
	101	double *trans_int4_tmp; // scratch array
	102	double *mo_int_do_so_vir=0;//mo integral (is\|sa); i:d.o.,s:s.o.,a:vir
	103	double *mo_int_tmp=0; // scratch array used in global summations
	104	double *socc_sum=0; // sum of 2-el integrals involving only s.o.'s
	105	double *socc_sum_tmp=0;// scratch array
	106	double iqrs, iprs;
	107	double *iars_ptr;
	108	double iars;
	109	double iajr;
	110	double *iajr_ptr;
	111	double *iajb;
	112	double pqrs;
	113	double *c_qa;
	114	double c_rb, c_pi, c_qi, c_sj;
	115	double delta_ijab;
	116	double delta;
	117	double contrib1, contrib2;
	118	double ecorr_opt2=0,ecorr_opt1=0;
	119	double ecorr_zapt2;
	120	double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
	121	double escf;
	122	double eopt2,eopt1,ezapt2;
	123	double tol; // log2 of the erep tolerance (erep < 2^tol => discard)
	124
	125	me = msg_->me();
	126
	127	ExEnv::out0() << indent << "Just entered OPT2 program (opt2v2lb)" << endl;
	128
	129	tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
	130
	131	nproc = msg_->n();
	132	ExEnv::out0() << indent << "nproc = " << nproc << endl;
	133
	134	ndocc = nsocc = 0;
	135	const double epsilon = 1.0e-4;
	136	for (i=0; i<oso_dimension()->n(); i++) {
	137	if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
	138	else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
	139	}
	140
	141	// Do a few preliminary tests to make sure the desired calculation
	142	// can be done (and appears to be meaningful!)
	143
	144	if (ndocc == 0 && nsocc == 0) {
	145	ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
	146	abort();
	147	}
	148
	149	if (nfzc > ndocc) {
	150	ExEnv::err0()
	151	<< "The number of frozen core orbitals exceeds the number" << endl
	152	<< "of doubly occupied orbitals; program exiting" << endl;
	153	abort();
	154	}
	155
	156	if (nfzv > noso - ndocc - nsocc) {
	157	ExEnv::err0()
	158	<< "The number of frozen virtual orbitals exceeds the number" << endl
	159	<< "of unoccupied orbitals; program exiting" << endl;
	160	abort();
	161	}
	162
	163	ndocc = ndocc - nfzc;
	164	// nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb.
	165	nvir = noso - ndocc - nfzc - nfzv;
	166	// nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb.
	167	nocc = ndocc + nsocc;
	168	nshell = basis()->nshell();
	169
	170	// Allocate storage for some arrays used for keeping track of which R
	171	// indices are processed by each node
	172	shellsize = (int) malloc(nshellsizeof(int));
	173	sorted_shells = (int) malloc(nshellsizeof(int));
	174	nbf = (int) malloc(nprocsizeof(int));
	175	proc = (int) malloc(nshellsizeof(int));
	176
	177
	178	///////////////////////////////////////////////////////
	179	// Begin distributing R shells between nodes so all
	180	// nodes get ca. the same number of r basis functions
	181	///////////////////////////////////////////////////////
	182
	183	// Compute the size of each shell
	184	for (i=0; i<nshell; i++) {
	185	shellsize[i] = basis()->shell(i).nfunction();
	186	}
	187
	188	// Do an index sort (large -> small) of shellsize to form sorted_shells
	189	iquicksort(shellsize,sorted_shells,nshell);
	190
	191	// Initialize nbf
	192	for (i=0; i<nproc; i++) nbf[i] = 0;
	193
	194	for (i=0; i<nshell; i++) {
	195	min = nbf[0];
	196	iproc = 0;
	197	for (j=1; j<nproc; j++) {
	198	if (nbf[j] < min) {
	199	iproc = j;
	200	min = nbf[j];
	201	}
	202	}
	203	proc[sorted_shells[i]] = iproc;
	204	nbf[iproc] += shellsize[sorted_shells[i]];
	205	}
	206	if (me == 0) {
	207	ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
	208	for (i=0; i<nproc; i++) {
	209	if (i%12 == 0) ExEnv::out0() << indent;
	210	ExEnv::out0() << scprintf(" %4i",nbf[i]);
	211	if ((i+1)%12 == 0) ExEnv::out0() << endl;
	212	}
	213	ExEnv::out0() << endl;
	214	}
	215
	216	// Determine which shells are to be processed by node me
	217	nRshell = 0;
	218	for (i=0; i<nshell; i++) {
	219	if (proc[i] == me) nRshell++;
	220	}
	221	myshells = (int) malloc(nRshellsizeof(int));
	222	imyshell = 0;
	223	for (i=0; i<nshell; i++) {
	224	if (proc[i] == me) {
	225	myshells[imyshell] = i;
	226	imyshell++;
	227	}
	228	}
	229
	230	/////////////////////////////////////////////////////////////
	231	// End of preliminary distribution of R shells between nodes
	232	/////////////////////////////////////////////////////////////
	233
	234	// Compute the average number of basis functions per node
	235	nbfav = nbasis/nproc;
	236	if (nbasis%nproc) nbfav++;
	237
	238	myshellsizes = (int) malloc(nRshellsizeof(int));
	239	split_info = (int) malloc(nRshellsizeof(int));
	240	for (j=0; j<nRshell; j++) {
	241	myshellsizes[j] = basis()->shell(myshells[j]).nfunction();
	242	split_info[j] = -1;
	243	}
	244
	245	// Find the processor with the most/fewest basis functions
	246	findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
	247	if (maxbf > nbfav) {
	248	ExEnv::out0() << indent << "Redistributing basis functions" << endl;
	249	}
	250
	251	while (maxbf > nbfav) {
	252	msg_->sync();
	253	if (me == procmax) {
	254
	255	findshellmax(myshellsizes, nRshell, &shellmax, &shellmaxindex);
	256	nbfmoved = 0;
	257	while (maxbf>nbfav && minbf<nbfav && shellmax>1) {
	258	shellmax--;
	259	nbfmoved++;
	260	maxbf--;
	261	minbf++;
	262	}
	263	myshellsizes[shellmaxindex] = shellmax;
	264	if (split_info[shellmaxindex] == -1) split_info[shellmaxindex] = 0;
	265	shellmax += nbfmoved;
	266
	267	// Send nbfmoved from procmax to all other nodes
	268	msg_->bcast(nbfmoved,procmax);
	269
	270	// Send variables to node procmin
	271	msg_->send(procmin,&myshells[shellmaxindex],1);
	272	msg_->send(procmin,&shellmax,1);
	273
	274	}
	275	else {
	276	// Receive nbfmoved from procmax
	277	msg_->bcast(nbfmoved,procmax);
	278	}
	279
	280	nbf[procmax] -= nbfmoved;
	281
	282	if (me == procmin) {
	283	expandintarray(myshellsizes,nRshell);
	284	expandintarray(myshells,nRshell);
	285	expandintarray(split_info,nRshell);
	286	nRshell++;
	287	myshellsizes[nRshell-1] = nbfmoved;
	288	msg_->recv(procmax,&myshells[nRshell-1],1);
	289	msg_->recv(procmax,&split_info[nRshell-1],1);
	290	split_info[nRshell-1] -= myshellsizes[nRshell-1];
	291	}
	292
	293	nbf[procmin] += nbfmoved;
	294	msg_->sync();
	295	findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
	296
	297	}
	298
	299	if (me == 0) {
	300	ExEnv::out0() << indent
	301	<< "New distribution of basis functions between nodes:" << endl;
	302	for (i=0; i<nproc; i++) {
	303	if (i%12 == 0) ExEnv::out0() << indent;
	304	ExEnv::out0() << scprintf(" %4i",nbf[i]);
	305	if ((i+1)%12 == 0) ExEnv::out0() << endl;
	306	}
	307	ExEnv::out0() << endl;
	308	}
	309
	310
	311	//////////////////////////////////////////////////////////
	312	// End of distribution of R shells and r basis functions
	313	//////////////////////////////////////////////////////////
	314
	315	// Compute batch size ni for opt2 loops;
	316	// need to store the following arrays of type double : trans_int1-4,
	317	// trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp,
	318	// mo_int_do_so_vir, mo_int_tmp,
	319	// and the following arrays of type int: myshells, shellsize,
	320	// sorted_shells, nbf, and proc
	321	A = -0.5sizeof(double)nbf[me]*nvir;
	322	B = sizeof(double)(nfuncmaxnfuncmaxnbasis + nvir + noccnbf[me]*nvir
	323	+ nbf[me]nvir0.5);
	324	C = sizeof(double)(2nvirnvir + (nbasis+1)(nvir+nocc) + 2*nsocc
	325	+ 2ndoccnsocc*(nvir-nsocc))
	326	+ sizeof(int)(3nshell + nproc + nRshell);
	327	ni_top = -B/(2*A);
	328	max = Ani_topni_top + B*ni_top +C;
	329	if (max <= mem_alloc) {
	330	ni = nocc;
	331	}
	332	else {
	333	ni_double = (-B + sqrt((double)(BB - 4A(C-mem_alloc))))/(2A);
	334	ni = (int) ni_double;
	335	if (ni > nocc) ni = nocc;
	336	max = mem_alloc;
	337	}
	338
	339	size_t mem_remaining = mem_alloc - (size_t)max;
	340
	341	// Set ni equal to the smallest batch size for any node
	342	msg_->min(ni);
	343	msg_->bcast(ni);
	344
	345	if (ni < nsocc) {
	346	ExEnv::err0() << "Not enough memory allocated" << endl;
	347	abort();
	348	}
	349
	350	if (ni < 1) { // this applies only to a closed shell case
	351	ExEnv::err0() << "Not enough memory allocated" << endl;
	352	abort();
	353	}
	354
	355	ExEnv::out0() << indent << "Computed batchsize: " << ni << endl;
	356
	357	if (nocc == ni) {
	358	npass = 1;
	359	rest = 0;
	360	}
	361	else {
	362	rest = nocc%ni;
	363	npass = (nocc - rest)/ni + 1;
	364	if (rest == 0) npass--;
	365	}
	366
	367	if (me == 0) {
	368	ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
	369	" ndocc nsocc nvir nfzc nfzv" << endl;
	370	ExEnv::out0() << indent
	371	<< scprintf(" %-4i %-3i %-5i %-4i %-3i"
	372	" %-3i %-3i %-3i %-3i %-3i\n",
	373	npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
	374	ExEnv::out0() << indent
	375	<< scprintf("Using %i bytes of memory",mem_alloc) << endl;
	376	}
	377
	378	//////////////////////
	379	// Test that ni is OK
	380	//////////////////////
	381	if (me == 0) {
	382	ExEnv::out0() << indent
	383	<< scprintf("Memory allocated: %i", mem_alloc) << endl;
	384	ExEnv::out0() << indent
	385	<< scprintf("Memory used : %lf", Anini+B*ni+C) << endl;
	386	if (Anini + B*ni +C > mem_alloc) {
	387	ExEnv::err0() << "Problems with memory allocation: "
	388	<< "Using more memory than allocated" << endl;
	389	abort();
	390	}
	391	}
	392
	393	//////////////////////////////////////////////////////////////////
	394	// The scf vector might be distributed between the nodes,
	395	// but for OPT2 each node needs its own copy of the vector;
	396	// therefore, put a copy of the scf vector on each node;
	397	// while doing this, duplicate columns corresponding to singly
	398	// occupied orbitals and order columns as [socc docc socc unocc]
	399	// Also rearrange scf eigenvalues as [socc docc socc unocc]
	400	// want socc first to get the socc's in the first batch
	401	// (need socc's to compute energy denominators - see
	402	// socc_sum comment below)
	403	/////////////////////////////////////////////////////////
	404	evals_open = (double) malloc((noso+nsocc-nfzc-nfzv)sizeof(double));
	405
	406	RefDiagSCMatrix occ;
	407	RefDiagSCMatrix evals;
	408	RefSCMatrix Scf_Vec;
	409	eigen(evals, Scf_Vec, occ);
	410
	411	if (debug_) {
	412	evals.print("eigenvalues");
	413	Scf_Vec.print("eigenvectors");
	414	}
	415
	416	double scf_vectort_dat = new double[nbasisnoso];
	417	Scf_Vec->convert(scf_vectort_dat);
	418
	419	double** scf_vectort = new double*[nocc + nvir];
	420
	421	int idoc = 0, ivir = 0, isoc = 0;
	422	for (i=nfzc; i<noso-nfzv; i++) {
	423	if (occ(i) >= 2.0 - epsilon) {
	424	evals_open[idoc+nsocc] = evals(i);
	425	scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
	426	idoc++;
	427	}
	428	else if (occ(i) >= 1.0 - epsilon) {
	429	evals_open[isoc] = evals(i);
	430	scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
	431	evals_open[isoc+nocc] = evals(i);
	432	scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
	433	isoc++;
	434	}
	435	else {
	436	if (ivir < nvir) {
	437	evals_open[ivir+nocc+nsocc] = evals(i);
	438	scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
	439	}
	440	ivir++;
	441	}
	442	}
	443	// need the transpose of the vector
	444	double *scf_vector = new double[nbasis];
	445	double scf_vector_dat = new double[(nocc+nvir)nbasis];
	446	for (i=0; i<nbasis; i++) {
	447	scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
	448	for (j=0; j<nocc+nvir; j++) {
	449	scf_vector[i][j] = scf_vectort[j][i];
	450	}
	451	}
	452	delete[] scf_vectort;
	453	delete[] scf_vectort_dat;
	454
	455	////////////////////////////////////////
	456	// Allocate storage for various arrays
	457	////////////////////////////////////////
	458
	459	dim_ij = noccni - ni(ni - 1)/2;
	460
	461	trans_int1 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
	462	trans_int2 = (double) malloc(nvirni*sizeof(double));
	463	trans_int3 = (double) malloc(nbf[me]nvirdim_ijsizeof(double));
	464	trans_int4 = (double) malloc(nvirnvir*sizeof(double));
	465	trans_int4_tmp = (double) malloc(nvirnvir*sizeof(double));
	466	if (nsocc) socc_sum = (double) malloc(nsoccsizeof(double));
	467	if (nsocc) socc_sum_tmp = (double) malloc(nsoccsizeof(double));
	468	if (nsocc) mo_int_do_so_vir =
	469	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
	470	if (nsocc) mo_int_tmp =
	471	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
	472
	473	if (nsocc) bzerofast(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc));
	474
	475	// create the integrals object
	476	integral()->set_storage(mem_remaining);
	477	tbint_ = integral()->electron_repulsion();
	478	intbuf = tbint_->buffer();
	479
	480	/////////////////////////////////////
	481	// Begin opt2 loops
	482	/////////////////////////////////////
	483
	484
	485	for (pass=0; pass<npass; pass++) {
	486	i_offset = pass*ni;
	487	if ((pass == npass - 1) && (rest != 0)) ni = rest;
	488
	489	r_offset = 0;
	490	bzerofast(trans_int3,nbf[me]nvirdim_ij);
	491
	492	tim_enter("RS loop");
	493
	494	for (imyshell=0; imyshell<nRshell; imyshell++) {
	495
	496	R = myshells[imyshell];
	497	nr = myshellsizes[imyshell];
	498
	499	for (S = 0; S < nshell; S++) {
	500	ns = basis()->shell(S).nfunction();
	501	tim_enter("bzerofast trans_int1");
	502	bzerofast(trans_int1,nfuncmaxnfuncmaxnbasis*ni);
	503	tim_exit("bzerofast trans_int1");
	504
	505	tim_enter("PQ loop");
	506
	507	for (P = 0; P < nshell; P++) {
	508	np = basis()->shell(P).nfunction();
	509
	510	for (Q = 0; Q <= P; Q++) {
	511	if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
	512	continue; // skip ereps less than tol
	513	}
	514
	515	aoint_computed++;
	516
	517	nq = basis()->shell(Q).nfunction();
	518
	519	tim_enter("erep");
	520	tbint_->compute_shell(P,Q,R,S);
	521	tim_exit("erep");
	522
	523	tim_enter("1. quart. tr.");
	524
	525	for (bf1 = 0; bf1 < np; bf1++) {
	526	p = basis()->shell_to_function(P) + bf1;
	527
	528	for (bf2 = 0; bf2 < nq; bf2++) {
	529	q = basis()->shell_to_function(Q) + bf2;
	530	if (q > p) {
	531	// if q > p: want to skip the loops over bf3-4
	532	// and larger bf2 values, so increment bf1 by 1
	533	// ("break")
	534	break;
	535	}
	536
	537	for (bf3 = 0; bf3 < nr; bf3++) {
	538	bf3_offset = 0;
	539	if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell];
	540
	541	for (bf4 = 0; bf4 < ns; bf4++) {
	542
	543	index = bf4 + ns*(bf3+bf3_offset +
	544	basis()->shell(R).nfunction()(bf2 + nqbf1));
	545
	546	if (fabs(intbuf[index]) > 1.0e-15) {
	547	pqrs = intbuf[index];
	548
	549	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
	550	iprs = &trans_int1[((bf4nr + bf3)nbasis + p)*ni];
	551
	552	if (p == q) pqrs *= 0.5;
	553	col_index = i_offset;
	554	c_pi = &scf_vector[p][col_index];
	555	c_qi = &scf_vector[q][col_index];
	556
	557	for (i=ni; i; i--) {
	558	iqrs++ += pqrs *c_pi++;
	559	iprs++ += pqrs *c_qi++;
	560	}
	561	}
	562	} // exit bf4 loop
	563	} // exit bf3 loop
	564	} // exit bf2 loop
	565	} // exit bf1 loop
	566	tim_exit("1. quart. tr.");
	567	} // exit Q loop
	568	} // exit P loop
	569	tim_exit("PQ loop");
	570
	571	// Begin second and third quarter transformations
	572
	573	for (bf3 = 0; bf3 < nr; bf3++) {
	574	r = r_offset + bf3;
	575
	576	for (bf4 = 0; bf4 < ns; bf4++) {
	577	s = basis()->shell_to_function(S) + bf4;
	578
	579	tim_enter("bzerofast trans_int2");
	580	bzerofast(trans_int2,nvir*ni);
	581	tim_exit("bzerofast trans_int2");
	582
	583	tim_enter("2. quart. tr.");
	584
	585	for (q = 0; q < nbasis; q++) {
	586	iars_ptr = trans_int2;
	587	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
	588	c_qa = &scf_vector[q][nocc];
	589
	590	for (a = 0; a < nvir; a++) {
	591
	592	for (i=ni; i; i--) {
	593	iars_ptr++ += c_qa * *iqrs++;
	594	}
	595
	596	iqrs -= ni;
	597	c_qa++;
	598	}
	599	} // exit q loop
	600	tim_exit("2. quart. tr.");
	601
	602	// Begin third quarter transformation
	603
	604	tim_enter("3. quart. tr.");
	605
	606	for (i=0; i<ni; i++) {
	607	tmp_index = i(i+1)/2 + ii_offset;
	608
	609	for (a=0; a<nvir; a++) {
	610	iars = trans_int2[a*ni + i];
	611	c_sj = scf_vector[s];
	612	iajr_ptr = &trans_int3[tmp_index + dim_ij(a + nvirr)];
	613
	614	for (j=0; j<=i+i_offset; j++) {
	615	iajr_ptr++ += c_sj++ * iars;
	616	}
	617	}
	618	} // exit i loop
	619	tim_exit("3. quart. tr.");
	620
	621	} // exit bf4 loop
	622	} // exit bf3 loop
	623
	624	} // exit S loop
	625	r_offset += nr;
	626	} // exit R loop
	627	tim_exit("RS loop");
	628
	629	// Begin fourth quarter transformation;
	630	// first tansform integrals with only s.o. indices;
	631	// these integrals are needed to compute the denominators
	632	// in the various terms contributing to the correlation energy
	633	// and must all be computed in the first pass;
	634	// the integrals are summed into the array socc_sum:
	635	// socc_sum[isocc] = sum over asocc of (isocc asocc\|asocc isocc)
	636	// (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s)
	637	// the individual integrals are not saved here, only the sums are kept
	638
	639	if (pass == 0) {
	640	tim_enter("4. quart. tr.");
	641	if (nsocc) bzerofast(socc_sum,nsocc);
	642	for (isocc=0; isocc<nsocc; isocc++) {
	643
	644	index = 0;
	645	for (i=0; i<nRshell; i++) {
	646	for (j=0; j<myshellsizes[i]; j++) {
	647	r = basis()->shell_to_function(myshells[i]) + j;
	648	if (split_info[i] != -1) r += split_info[i];
	649
	650	for (asocc=0; asocc<nsocc; asocc++) {
	651	socc_sum[isocc] += scf_vector[r][nocc+asocc]*
	652	trans_int3[isocc(isocc+1)/2 + isocci_offset
	653	+ isocc + dim_ij(asocc + nvirindex)];
	654	}
	655	index++;
	656	}
	657	}
	658	} // exit i loop
	659
	660	tim_exit("4. quart. tr.");
	661
	662	// Sum socc_sum contributions from each node (only if nsocc > 0
	663	// since gop1 will fail if nsocc = 0)
	664	if (nsocc > 0) {
	665	tim_enter("global sum socc_sum");
	666	msg_->sum(socc_sum,nsocc,socc_sum_tmp);
	667	tim_exit("global sum socc_sum");
	668	}
	669
	670	}
	671
	672	// Now we have all the sums of integrals involving s.o.'s (socc_sum);
	673	// begin fourth quarter transformation for all integrals (including
	674	// integrals with only s.o. indices); use restriction j <= (i_offset+i)
	675	// to save flops
	676
	677	compute_index = 0;
	678
	679	for (i=0; i<ni; i++) {
	680
	681	for (j=0; j <= (i_offset+i); j++) {
	682
	683	tim_enter("4. quart. tr.");
	684
	685	bzerofast(trans_int4,nvir*nvir);
	686
	687	index = 0;
	688	for (k=0; k<nRshell; k++) {
	689	for (l=0; l<myshellsizes[k]; l++) {
	690	r = basis()->shell_to_function(myshells[k]) + l;
	691	if (split_info[k] != -1) r += split_info[k];
	692
	693	for (a=0; a<nvir; a++) {
	694	iajb = &trans_int4[a*nvir];
	695	iajr = trans_int3[i(i+1)/2 + ii_offset + j + dim_ij(a+nvirindex)];
	696	c_rb = &scf_vector[r][nocc];
	697
	698	for (b=0; b<nvir; b++) {
	699	iajb++ += c_rb++ * iajr;
	700	}
	701	}
	702	index++;
	703	}
	704	} // end of k loop
	705
	706	tim_exit("4. quart. tr.");
	707
	708	tim_enter("global sum trans_int4");
	709	msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
	710	tim_exit("global sum trans_int4");
	711
	712	// We now have the fully transformed integrals (ia\|jb)
	713	// for one i, one j (j <= i_offset+i), and all a and b;
	714	// compute contribution to the OPT1 and OPT2 correlation
	715	// energies; use restriction b <= a to save flops
	716
	717	tim_enter("compute ecorr");
	718
	719	for (a=0; a<nvir; a++) {
	720	for (b=0; b<=a; b++) {
	721	compute_index++;
	722	if (compute_index%nproc != me) continue;
	723
	724	docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
	725	+ (j >= nsocc && j < nocc);
	726	socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
	727	vir_index = (a >= nsocc) + (b >= nsocc);
	728
	729	if (socc_index >= 3) continue; // skip to next b value
	730
	731	delta_ijab = evals_open[i_offset+i] + evals_open[j]
	732	- evals_open[nocc+a] - evals_open[nocc+b];
	733
	734	// Determine integral type and compute energy contribution
	735	if (docc_index == 2 && vir_index == 2) {
	736	if (i_offset+i == j && a == b) {
	737	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	738	ecorr_opt2 += contrib1/delta_ijab;
	739	ecorr_opt1 += contrib1/delta_ijab;
	740	}
	741	else if (i_offset+i == j \|\| a == b) {
	742	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	743	ecorr_opt2 += 2*contrib1/delta_ijab;
	744	ecorr_opt1 += 2*contrib1/delta_ijab;
	745	}
	746	else {
	747	contrib1 = trans_int4[a*nvir + b];
	748	contrib2 = trans_int4[b*nvir + a];
	749	ecorr_opt2 += 4(contrib1contrib1 + contrib2*contrib2
	750	- contrib1*contrib2)/delta_ijab;
	751	ecorr_opt1 += 4(contrib1contrib1 + contrib2*contrib2
	752	- contrib1*contrib2)/delta_ijab;
	753	}
	754	}
	755	else if (docc_index == 2 && socc_index == 2) {
	756	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
	757	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
	758	ecorr_opt2 += contrib1/
	759	(delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
	760	ecorr_opt1 += contrib1/delta_ijab;
	761	}
	762	else if (socc_index == 2 && vir_index == 2) {
	763	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
	764	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
	765	ecorr_opt2 += contrib1/
	766	(delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
	767	ecorr_opt1 += contrib1/delta_ijab;
	768	}
	769	else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
	770	if (i_offset+i == j) {
	771	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	772	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
	773	ecorr_opt1 += contrib1/delta_ijab;
	774	}
	775	else {
	776	contrib1 = trans_int4[a*nvir + b];
	777	contrib2 = trans_int4[b*nvir + a];
	778	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
	779	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[b]);
	780	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
	781	- contrib1*contrib2)/delta_ijab;
	782	}
	783	}
	784	else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
	785	contrib1 = trans_int4[bnvir+a]trans_int4[b*nvir+a];
	786	if (j == b) {
	787	// To compute the energy contribution from an integral of the
	788	// type (is1\|s1a) (i=d.o., s1=s.o., a=unocc.), we need the
	789	// (is\|sa) integrals for all s=s.o.; these integrals are
	790	// therefore stored here in the array mo_int_do_so_vir, and
	791	// the energy contribution is computed after exiting the loop
	792	// over i-batches (pass)
	793	mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
	794	(i_offset+i-nsocc + ndocc*b)] =
	795	trans_int4[b*nvir + a];
	796	ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
	797	ecorr_opt1 += 1.5*contrib1/delta_ijab;
	798	ecorr_zapt2_contrib += contrib1/
	799	(delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
	800	+ 0.5*contrib1/delta_ijab;
	801	}
	802	else {
	803	ecorr_opt2 += contrib1/
	804	(delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
	805	ecorr_opt1 += contrib1/delta_ijab;
	806	}
	807	}
	808	else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
	809	if (a == b) {
	810	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
	811	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
	812	ecorr_opt1 += contrib1/delta_ijab;
	813	}
	814	else {
	815	contrib1 = trans_int4[a*nvir + b];
	816	contrib2 = trans_int4[b*nvir + a];
	817	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
	818	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[j]);
	819	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
	820	- contrib1*contrib2)/delta_ijab;
	821	}
	822	}
	823	} // exit b loop
	824	} // exit a loop
	825	tim_exit("compute ecorr");
	826	} // exit j loop
	827	} // exit i loop
	828
	829	if (nsocc == 0 && npass > 1 && pass < npass - 1) {
	830	double passe = ecorr_opt2;
	831	msg_->sum(passe);
	832	ExEnv::out0() << indent
	833	<< "Partial correlation energy for pass " << pass << ":" << endl;
	834	ExEnv::out0() << indent
	835	<< scprintf(" restart_ecorr = %14.10f", passe)
	836	<< endl;
	837	ExEnv::out0() << indent
	838	<< scprintf(" restart_orbital_v2lb = %d", ((pass+1) * ni))
	839	<< endl;
	840	}
	841	} // exit loop over i-batches (pass)
	842
	843
	844
	845	// Compute contribution from excitations of the type is1 -> s1a where
	846	// i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a,
	847	// where i and a have the same spin, contribute to this term;
	848	// (Brillouin's theorem not satisfied for ROHF wave functions);
	849	// do this only if nsocc > 0 since gop1 will fail otherwise
	850
	851	tim_enter("compute ecorr");
	852
	853	if (nsocc > 0) {
	854	tim_enter("global sum mo_int_do_so_vir");
	855	msg_->sum(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc),mo_int_tmp);
	856	tim_exit("global sum mo_int_do_so_vir");
	857	}
	858
	859	// Add extra contribution for triplet and higher spin multiplicities
	860	// contribution = sum over s1 and s2<s1 of (is1\|s1a)*(is2\|s2a)/delta
	861
	862	if (me == 0 && nsocc) {
	863	for (i=0; i<ndocc; i++) {
	864
	865	for (a=0; a<nvir-nsocc; a++) {
	866	delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
	867
	868	for (s1=0; s1<nsocc; s1++) {
	869
	870	for (s2=0; s2<s1; s2++) {
	871	contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs1)]*
	872	mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs2)]/delta;
	873	ecorr_opt2 += contrib1;
	874	ecorr_opt1 += contrib1;
	875	}
	876	}
	877	} // exit a loop
	878	} // exit i loop
	879	}
	880
	881	tim_exit("compute ecorr");
	882
	883	ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
	884	ecorr_opt2 += ecorr_opt2_contrib;
	885	msg_->sum(ecorr_opt1);
	886	msg_->sum(ecorr_opt2);
	887	msg_->sum(ecorr_zapt2);
	888	msg_->sum(aoint_computed);
	889
	890	escf = reference_->energy();
	891	hf_energy_ = escf;
	892
	893	if (me == 0) {
	894	eopt2 = escf + ecorr_opt2;
	895	eopt1 = escf + ecorr_opt1;
	896	ezapt2 = escf + ecorr_zapt2;
	897
	898	// Print out various energies etc.
	899	ExEnv::out0() << indent
	900	<< "Number of shell quartets for which AO integrals would" << endl
	901	<< indent << "have been computed without bounds checking: "
	902	<< npassnshellnshell(nshell+1)(nshell+1)/2 << endl;
	903	ExEnv::out0() << indent
	904	<< "Number of shell quartets for which AO integrals" << endl
	905	<< indent << "were computed: " << aoint_computed << endl;
	906
	907	ExEnv::out0() << indent
	908	<< scprintf("ROHF energy [au]: %17.12lf\n", escf);
	909	ExEnv::out0() << indent
	910	<< scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
	911	ExEnv::out0() << indent
	912	<< scprintf("OPT2 second order correction [au]: %17.12lf\n",
	913	ecorr_opt2);
	914	ExEnv::out0() << indent
	915	<< scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
	916	ExEnv::out0() << indent
	917	<< scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
	918	ecorr_zapt2);
	919	ExEnv::out0() << indent
	920	<< scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
	921	ExEnv::out0().flush();
	922	}
	923
	924	msg_->bcast(eopt1);
	925	msg_->bcast(eopt2);
	926	msg_->bcast(ezapt2);
	927
	928	if (method_ && !strcmp(method_,"opt1")) {
	929	set_energy(eopt1);
	930	set_actual_value_accuracy(reference_->actual_value_accuracy()
	931	*ref_to_mp2_acc);
	932	}
	933	else if (method_ && !strcmp(method_,"opt2")) {
	934	set_energy(eopt2);
	935	set_actual_value_accuracy(reference_->actual_value_accuracy()
	936	*ref_to_mp2_acc);
	937	}
	938	else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
	939	set_energy(ezapt2);
	940	set_actual_value_accuracy(reference_->actual_value_accuracy()
	941	*ref_to_mp2_acc);
	942	}
	943	else {
	944	if (!(!method_ \|\| !strcmp(method_,"zapt"))) {
	945	ExEnv::out0() << indent
	946	<< "MBPT2: bad method: " << method_ << ", using zapt" << endl;
	947	}
	948	set_energy(ezapt2);
	949	set_actual_value_accuracy(reference_->actual_value_accuracy()
	950	*ref_to_mp2_acc);
	951	}
	952
	953	free(trans_int1);
	954	free(trans_int2);
	955	free(trans_int3);
	956	free(trans_int4);
	957	free(trans_int4_tmp);
	958	if (nsocc) free(socc_sum);
	959	if (nsocc) free(socc_sum_tmp);
	960	if (nsocc) free(mo_int_do_so_vir);
	961	if (nsocc) free(mo_int_tmp);
	962	free(evals_open);
	963	free(myshells);
	964	free(shellsize);
	965	free (myshellsizes);
	966	free (split_info);
	967	free(sorted_shells);
	968	free(nbf);
	969	free(proc);
	970
	971	delete[] scf_vector;
	972	delete[] scf_vector_dat;
	973
	974	}
	975
	976	/////////////////////////////////////////////////////////////////
	977	// Function iquicksort performs a quick sort (larger -> smaller)
	978	// of the integer data in item by the integer indices in index;
	979	// data in item remain unchanged
	980	/////////////////////////////////////////////////////////////////
	981	static void
	982	iquicksort(int item,int index,int n)
	983	{
	984	int i;
	985	if (n<=0) return;
	986	for (i=0; i<n; i++) {
	987	index[i] = i;
	988	}
	989	iqs(item,index,0,n-1);
	990	}
	991
	992	static void
	993	iqs(int item,int index,int left,int right)
	994	{
	995	register int i,j;
	996	int x,y;
	997
	998	i=left; j=right;
	999	x=item[index[(left+right)/2]];
	1000
	1001	do {
	1002	while(item[index[i]]>x && i<right) i++;
	1003	while(x>item[index[j]] && j>left) j--;
	1004
	1005	if (i<=j) {
	1006	if (item[index[i]] != item[index[j]]) {
	1007	y=index[i];
	1008	index[i]=index[j];
	1009	index[j]=y;
	1010	}
	1011	i++; j--;
	1012	}
	1013	} while(i<=j);
	1014
	1015	if (left<j) iqs(item,index,left,j);
	1016	if (i<right) iqs(item,index,i,right);
	1017	}
	1018
	1019	////////////////////////////////////////////////////////////////////
	1020	// Function findprocminmax finds the processor with the most/fewest
	1021	// basis functions and the corresponding number of basis functions
	1022	////////////////////////////////////////////////////////////////////
	1023	static void
	1024	findprocminmax(int *nbf, int nproc,
	1025	int procmin, int procmax, int minbf, int maxbf)
	1026	{
	1027	int i;
	1028
	1029	procmax = procmin = 0;
	1030	*maxbf = nbf[0];
	1031	*minbf = nbf[0];
	1032
	1033	for (i=1; i<nproc; i++) {
	1034	if (nbf[i] > *maxbf) {
	1035	*maxbf = nbf[i];
	1036	*procmax = i;
	1037	}
	1038	if (nbf[i] < *minbf) {
	1039	*minbf = nbf[i];
	1040	*procmin = i;
	1041	}
	1042	}
	1043	}
	1044
	1045	/////////////////////////////////////////////////////////////////
	1046	// Function findshellmax finds the largest shell on a processor
	1047	/////////////////////////////////////////////////////////////////
	1048	static void
	1049	findshellmax(int myshellsizes, int nRshell, int shellmax, int *shellmaxindex)
	1050	{
	1051	int i;
	1052
	1053	*shellmax = myshellsizes[0];
	1054	*shellmaxindex = 0;
	1055
	1056	for (i=1; i<nRshell; i++) {
	1057	if (myshellsizes[i] > *shellmax) {
	1058	*shellmax = myshellsizes[i];
	1059	*shellmaxindex = i;
	1060	}
	1061	}
	1062	}
	1063
	1064	//////////////////////////////////////////////////////////////
	1065	// Function expand_array expands the dimension of an array of
	1066	// doubles by 1;
	1067	// NB: THE ARRAY MUST HAVE BEEN ALLOCATED WITH MALLOC
	1068	//////////////////////////////////////////////////////////////
	1069	static void
	1070	expandintarray(int *&a, int olddim)
	1071	{
	1072	int i;
	1073	int *tmp;
	1074
	1075	tmp = (int) malloc((olddim+1)sizeof(int));
	1076
	1077	for (i=0; i<olddim; i++) {
	1078	tmp[i] = a[i];
	1079	}
	1080	tmp[olddim] = 0;
	1081
	1082	free(a);
	1083
	1084	a = tmp;
	1085	}
	1086
	1087	////////////////////////////////////////////////////////////////////////////
	1088
	1089	// Local Variables:
	1090	// mode: c++
	1091	// c-file-style: "CLJ-CONDENSED"
	1092	// End:

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv2lb.cc@ 416a65

Download in other formats: