Context Navigation

hsosv2.cc@ aae63a

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable

Last change on this file since aae63a was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 32.6 KB

Line
1	//
2	// hsosv2.cc
3	//
4	// Copyright (C) 1996 Limit Point Systems, Inc.
5	//
6	// Author: Ida Nielsen <ida@kemi.aau.dk>
7	// Maintainer: LPS
8	//
9	// This file is part of the SC Toolkit.
10	//
11	// The SC Toolkit is free software; you can redistribute it and/or modify
12	// it under the terms of the GNU Library General Public License as published by
13	// the Free Software Foundation; either version 2, or (at your option)
14	// any later version.
15	//
16	// The SC Toolkit is distributed in the hope that it will be useful,
17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	// GNU Library General Public License for more details.
20	//
21	// You should have received a copy of the GNU Library General Public License
22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24	//
25	// The U.S. Government is granted a limited license as per AL 91-7.
26	//
27
28	#include <iostream>
29	#include <math.h>
30
31	#include <util/misc/formio.h>
32	#include <util/misc/timer.h>
33	#include <util/group/message.h>
34	#include <math/scmat/matrix.h>
35	#include <chemistry/molecule/molecule.h>
36	#include <chemistry/qc/mbpt/mbpt.h>
37	#include <chemistry/qc/mbpt/bzerofast.h>
38
39	using namespace std;
40	using namespace sc;
41
42	static void iqs(int item,int index,int left,int right);
43	static void iquicksort(int item,int index,int n);
44
45	void
46	MBPT2::compute_hsos_v2()
47	{
48	int i, j, k;
49	int s1, s2;
50	int a, b;
51	int isocc, asocc; /* indices running over singly occupied orbitals */
52	int nfuncmax = basis()->max_nfunction_in_shell();
53	int nvir;
54	int nshell;
55	int nocc=0,ndocc=0,nsocc=0;
56	int i_offset;
57	int npass, pass;
58	int ni;
59	int np, nq, nr, ns;
60	int P, Q, R, S;
61	int p, q, r, s;
62	int bf1, bf2, bf3, bf4;
63	int index;
64	int compute_index;
65	int col_index;
66	int tmp_index;
67	int dim_ij;
68	int docc_index, socc_index, vir_index;
69	int me;
70	int nproc;
71	int rest;
72	int r_offset;
73	int sum;
74	int min;
75	int iproc;
76	int nRshell;
77	int imyshell;
78	int myshells; / the R indices processed by node me */
79	int shellsize; / size of each shell */
80	int sorted_shells; / sorted shell indices: large shells->small shells */
81	int nbf; / number of basis functions processed by each node */
82	int proc; / element k: processor which will process shell k */
83	int aoint_computed = 0;
84	double evals_open; / reordered scf eigenvalues */
85	const double intbuf; / 2-electron AO integral buffer */
86	double trans_int1; / partially transformed integrals */
87	double trans_int2; / partially transformed integrals */
88	double trans_int3; / partially transformed integrals */
89	double trans_int4; / fully transformed integrals */
90	double trans_int4_tmp; / scratch array */
91	double mo_int_do_so_vir=0;/mo integral (is\|sa); i:d.o.,s:s.o.,a:vir */
92	double mo_int_tmp=0; / scratch array used in global summations */
93	double socc_sum=0; / sum of 2-el integrals involving only s.o.'s */
94	double socc_sum_tmp=0;/ scratch array */
95	double iqrs, iprs;
96	double *iars_ptr;
97	double iars;
98	double iajr;
99	double *iajr_ptr;
100	double *iajb;
101	double pqrs;
102	double *c_qa;
103	double c_rb, c_pi, c_qi, c_sj;
104	double delta_ijab;
105	double delta;
106	double contrib1, contrib2;
107	double ecorr_opt2=0,ecorr_opt1=0;
108	double ecorr_zapt2;
109	double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
110	double escf;
111	double eopt2,eopt1,ezapt2;
112	double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
113
114	me = msg_->me();
115
116	ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v2)" << endl;
117
118	tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
119
120	nproc = msg_->n();
121
122	ndocc = nsocc = 0;
123	const double epsilon = 1.0e-4;
124	for (i=0; i<oso_dimension()->n(); i++) {
125	if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
126	else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
127	}
128
129	/* do a few preliminary tests to make sure the desired calculation *
130	* can be done (and appears to be meaningful!) */
131
132	if (ndocc == 0 && nsocc == 0) {
133	ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
134	abort();
135	}
136
137	if (nfzc > ndocc) {
138	ExEnv::err0()
139	<< "The number of frozen core orbitals exceeds the number" << endl
140	<< "of doubly occupied orbitals; program exiting" << endl;
141	abort();
142	}
143
144	if (nfzv > noso - ndocc - nsocc) {
145	ExEnv::err0()
146	<< "The number of frozen virtual orbitals exceeds the number" << endl
147	<< "of unoccupied orbitals; program exiting" << endl;
148	abort();
149	}
150
151	ndocc = ndocc - nfzc;
152	/* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
153	nvir = noso - ndocc - nfzc - nfzv;
154	/* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
155	nocc = ndocc + nsocc;
156	nshell = basis()->nshell();
157
158	/* allocate storage for some arrays used for keeping track of which R *
159	* indices are processed by each node */
160	shellsize = (int) malloc(nshellsizeof(int));
161	sorted_shells = (int) malloc(nshellsizeof(int));
162	nbf = (int) malloc(nprocsizeof(int));
163	proc = (int) malloc(nshellsizeof(int));
164
165
166	/******************************************************
167	* Begin distributing R shells between nodes so each *
168	* node gets ca. the same number of r basis functions *
169	******************************************************/
170
171	/* compute size of each shell */
172	for (i=0; i<nshell; i++) {
173	shellsize[i] = basis()->shell(i).nfunction();
174	}
175
176	/* do an index sort (large -> small) of shellsize to form sorted_shells */
177	iquicksort(shellsize,sorted_shells,nshell);
178
179	/* initialize nbf */
180	for (i=0; i<nproc; i++) nbf[i] = 0;
181
182	for (i=0; i<nshell; i++) {
183	min = nbf[0];
184	iproc = 0;
185	for (j=1; j<nproc; j++) {
186	if (nbf[j] < min) {
187	iproc = j;
188	min = nbf[j];
189	}
190	}
191	proc[sorted_shells[i]] = iproc;
192	nbf[iproc] += shellsize[sorted_shells[i]];
193	}
194	if (me == 0) {
195	ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
196	for (i=0; i<nproc; i++) {
197	if (i%12 == 0) ExEnv::out0() << indent;
198	ExEnv::out0() << scprintf(" %4i",nbf[i]);
199	if ((i+1)%12 == 0) ExEnv::out0() << endl;
200	}
201	ExEnv::out0() << endl;
202	}
203
204	/* determine which shells are to be processed by node me */
205	nRshell = 0;
206	for (i=0; i<nshell; i++) {
207	if (proc[i] == me) nRshell++;
208	}
209	myshells = (int) malloc(nRshellsizeof(int));
210	imyshell = 0;
211	for (i=0; i<nshell; i++) {
212	if (proc[i] == me) {
213	myshells[imyshell] = i;
214	imyshell++;
215	}
216	}
217
218	/************************************************
219	* End of distribution of R shells between nodes *
220	************************************************/
221
222
223	/* compute batch size ni for opt2 loops; *
224	* need to store the following arrays of type double : trans_int1-4, *
225	* trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp, *
226	* mo_int_do_so_vir, mo_int_tmp, *
227	* and the following arrays of type int: myshells, shellsize, *
228	* sorted_shells, nbf, and proc */
229
230	size_t memused = 0;
231	ni = 0;
232	for (i=1; i<=nocc; i++) {
233	distsize_t tmpmem = compute_v2_memory(i,
234	nfuncmax, nbf[me], nshell,
235	ndocc, nsocc, nvir, nproc);
236	if (tmpmem > mem_alloc) break;
237	ni = i;
238	memused = distsize_to_size(tmpmem);
239	}
240
241	size_t mem_remaining = mem_alloc - memused;
242
243	/* set ni equal to the smallest batch size for any node */
244	msg_->min(ni);
245	msg_->bcast(ni);
246
247	int nbfmax = nbf[me];
248	msg_->max(nbfmax);
249
250	if (me == 0) {
251	ExEnv::out0() << indent << " nproc nbasis nshell nfuncmax"
252	" ndocc nsocc nvir nfzc nfzv" << endl;
253	ExEnv::out0() << indent << scprintf(" %-4i %-5i %-4i %-3i"
254	" %-3i %-3i %-3i %-3i %-3i\n",
255	nproc,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
256	}
257
258
259	ExEnv::out0() << indent
260	<< "Memory available per node: " << mem_alloc << " Bytes"
261	<< endl;
262	ExEnv::out0() << indent
263	<< "Total memory used per node: " << memused << " Bytes"
264	<< endl;
265	ExEnv::out0() << indent
266	<< "Memory required for one pass: "
267	<< compute_v2_memory(nocc,
268	nfuncmax, nbfmax, nshell,
269	ndocc, nsocc, nvir, nproc)
270	<< " Bytes"
271	<< endl;
272	ExEnv::out0() << indent
273	<< "Minimum memory required: "
274	<< compute_v2_memory(1,
275	nfuncmax, nbfmax, nshell,
276	ndocc, nsocc, nvir, nproc)
277	<< " Bytes"
278	<< endl;
279	ExEnv::out0() << indent
280	<< "Batch size: " << ni
281	<< endl;
282
283	if (ni < nsocc) {
284	ExEnv::err0() << "Not enough memory allocated" << endl;
285	abort();
286	}
287
288	if (ni < 1) { /* this applies only to a closed shell case */
289	ExEnv::err0() << "Not enough memory allocated" << endl;
290	abort();
291	}
292
293	if (nocc == ni) {
294	npass = 1;
295	rest = 0;
296	}
297	else {
298	rest = nocc%ni;
299	npass = (nocc - rest)/ni + 1;
300	if (rest == 0) npass--;
301	}
302
303	ExEnv::out0() << indent
304	<< "npass = " << npass
305	<< " rest = " << rest
306	<< endl;
307
308	/* the scf vector might be distributed between the nodes, but for OPT2 *
309	* each node needs its own copy of the vector; *
310	* therefore, put a copy of the scf vector on each node; *
311	* while doing this, duplicate columns corresponding to singly *
312	* occupied orbitals and order columns as [socc docc socc unocc] */
313	/* also rearrange scf eigenvalues as [socc docc socc unocc] *
314	* want socc first to get the socc's in the first batch *
315	* (need socc's to compute energy denominators - see *
316	* socc_sum comment below) */
317
318	evals_open = (double) malloc((noso+nsocc-nfzc-nfzv)sizeof(double));
319
320	RefDiagSCMatrix occ;
321	RefDiagSCMatrix evals;
322	RefSCMatrix Scf_Vec;
323	eigen(evals, Scf_Vec, occ);
324
325	if (debug_) {
326	evals.print("eigenvalues");
327	Scf_Vec.print("eigenvectors");
328	}
329
330	double scf_vectort_dat = new double[nbasisnoso];
331	Scf_Vec->convert(scf_vectort_dat);
332
333	double** scf_vectort = new double*[nocc + nvir];
334
335	int idoc = 0, ivir = 0, isoc = 0;
336	for (i=nfzc; i<noso-nfzv; i++) {
337	if (occ(i) >= 2.0 - epsilon) {
338	evals_open[idoc+nsocc] = evals(i);
339	scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
340	idoc++;
341	}
342	else if (occ(i) >= 1.0 - epsilon) {
343	evals_open[isoc] = evals(i);
344	scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
345	evals_open[isoc+nocc] = evals(i);
346	scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
347	isoc++;
348	}
349	else {
350	if (ivir < nvir) {
351	evals_open[ivir+nocc+nsocc] = evals(i);
352	scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
353	}
354	ivir++;
355	}
356	}
357	// need the transpose of the vector
358	double *scf_vector = new double[nbasis];
359	double scf_vector_dat = new double[(nocc+nvir)nbasis];
360	for (i=0; i<nbasis; i++) {
361	scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
362	for (j=0; j<nocc+nvir; j++) {
363	scf_vector[i][j] = scf_vectort[j][i];
364	}
365	}
366	delete[] scf_vectort;
367	delete[] scf_vectort_dat;
368
369	/* allocate storage for various arrays */
370
371	dim_ij = noccni - (ni(ni - 1))/2;
372
373	trans_int1 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
374	trans_int2 = (double) malloc(nvirni*sizeof(double));
375	trans_int3 = (double) malloc(nbf[me]nvirdim_ijsizeof(double));
376	trans_int4 = (double) malloc(nvirnvir*sizeof(double));
377	trans_int4_tmp = (double) malloc(nvirnvir*sizeof(double));
378	if (nsocc) socc_sum = (double) malloc(nsoccsizeof(double));
379	if (nsocc) socc_sum_tmp = (double) malloc(nsoccsizeof(double));
380	if (nsocc) mo_int_do_so_vir =
381	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
382	if (nsocc) mo_int_tmp =
383	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
384
385	if (nsocc) bzerofast(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc));
386
387
388	// create the integrals object
389	integral()->set_storage(mem_remaining);
390	tbint_ = integral()->electron_repulsion();
391	intbuf = tbint_->buffer();
392
393	/**************************************************************************
394	* begin opt2 loops *
395	**************************************************************************/
396
397
398	for (pass=0; pass<npass; pass++) {
399	i_offset = pass*ni;
400	if ((pass == npass - 1) && (rest != 0)) ni = rest;
401
402	r_offset = 0;
403	bzerofast(trans_int3,nbf[me]nvirdim_ij);
404
405	tim_enter("RS loop");
406
407	for (imyshell=0; imyshell<nRshell; imyshell++) {
408
409	R = myshells[imyshell];
410	nr = basis()->shell(R).nfunction();
411
412	for (S = 0; S < nshell; S++) {
413	ns = basis()->shell(S).nfunction();
414	tim_enter("bzerofast trans_int1");
415	bzerofast(trans_int1,nfuncmaxnfuncmaxnbasis*ni);
416	tim_exit("bzerofast trans_int1");
417
418	tim_enter("PQ loop");
419
420	for (P = 0; P < nshell; P++) {
421	np = basis()->shell(P).nfunction();
422
423	for (Q = 0; Q <= P; Q++) {
424	if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
425	continue; /* skip ereps less than tol */
426	}
427
428	aoint_computed++;
429
430	nq = basis()->shell(Q).nfunction();
431
432	tim_enter("erep");
433	tbint_->compute_shell(P,Q,R,S);
434	tim_exit("erep");
435
436	tim_enter("1. quart. tr.");
437
438	index = 0;
439
440	for (bf1 = 0; bf1 < np; bf1++) {
441	p = basis()->shell_to_function(P) + bf1;
442
443	for (bf2 = 0; bf2 < nq; bf2++) {
444	q = basis()->shell_to_function(Q) + bf2;
445	if (q > p) {
446	/* if q > p: want to skip the loops over bf3-4 */
447	/* and larger bf2 values, so increment bf1 by 1 */
448	/* ("break") and adjust the value of index */
449	index = (bf1 + 1) * nq * nr * ns;
450	break;
451	}
452
453	for (bf3 = 0; bf3 < nr; bf3++) {
454
455	for (bf4 = 0; bf4 < ns; bf4++,index++) {
456
457	if (fabs(intbuf[index]) > 1.0e-15) {
458	pqrs = intbuf[index];
459
460	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
461	iprs = &trans_int1[((bf4nr + bf3)nbasis + p)*ni];
462
463	if (p == q) pqrs *= 0.5;
464	col_index = i_offset;
465	c_pi = &scf_vector[p][col_index];
466	c_qi = &scf_vector[q][col_index];
467
468	for (i=ni; i; i--) {
469	iqrs++ += pqrs *c_pi++;
470	iprs++ += pqrs *c_qi++;
471	}
472	}
473	} /* exit bf4 loop */
474	} /* exit bf3 loop */
475	} /* exit bf2 loop */
476	} /* exit bf1 loop */
477	tim_exit("1. quart. tr.");
478	} /* exit Q loop */
479	} /* exit P loop */
480	tim_exit("PQ loop");
481
482	/* begin second and third quarter transformations */
483
484	for (bf3 = 0; bf3 < nr; bf3++) {
485	r = r_offset + bf3;
486
487	for (bf4 = 0; bf4 < ns; bf4++) {
488	s = basis()->shell_to_function(S) + bf4;
489
490	tim_enter("bzerofast trans_int2");
491	bzerofast(trans_int2,nvir*ni);
492	tim_exit("bzerofast trans_int2");
493
494	tim_enter("2. quart. tr.");
495
496	for (q = 0; q < nbasis; q++) {
497	iars_ptr = trans_int2;
498	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
499	c_qa = &scf_vector[q][nocc];
500
501	for (a = 0; a < nvir; a++) {
502
503	for (i=ni; i; i--) {
504	iars_ptr++ += c_qa * *iqrs++;
505	}
506
507	iqrs -= ni;
508	c_qa++;
509	}
510	} /* exit q loop */
511	tim_exit("2. quart. tr.");
512
513	/* begin third quarter transformation */
514
515	tim_enter("3. quart. tr.");
516
517	for (i=0; i<ni; i++) {
518	tmp_index = i(i+1)/2 + ii_offset;
519
520	for (a=0; a<nvir; a++) {
521	iars = trans_int2[a*ni + i];
522	c_sj = scf_vector[s];
523	iajr_ptr = &trans_int3[tmp_index + dim_ij(a + nvirr)];
524
525	for (j=0; j<=i+i_offset; j++) {
526	iajr_ptr++ += c_sj++ * iars;
527	}
528	}
529	} /* exit i loop */
530	tim_exit("3. quart. tr.");
531
532	} /* exit bf4 loop */
533	} /* exit bf3 loop */
534
535	} /* exit S loop */
536	r_offset += nr;
537	} /* exit R loop */
538	tim_exit("RS loop");
539
540
541	/* begin fourth quarter transformation; *
542	* first tansform integrals with only s.o. indices; *
543	* these integrals are needed to compute the denominators *
544	* in the various terms contributing to the correlation energy *
545	* and must all be computed in the first pass; *
546	* the integrals are summed into the array socc_sum: *
547	* socc_sum[isocc] = sum over asocc of (isocc asocc\|asocc isocc) *
548	* (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
549	* the individual integrals are not saved here, only the sums are kept */
550
551
552	if (pass == 0) {
553	tim_enter("4. quart. tr.");
554	if (nsocc) bzerofast(socc_sum,nsocc);
555	for (isocc=0; isocc<nsocc; isocc++) {
556
557	for (index=0; index<nbf[me]; index++) {
558	i = 0;
559	sum = basis()->shell(myshells[i]).nfunction();
560	while (sum <= index) {
561	i++;
562	sum += basis()->shell(myshells[i]).nfunction();
563	}
564	sum -= basis()->shell(myshells[i]).nfunction();
565	r = basis()->shell_to_function(myshells[i]) + index - sum;
566
567	for (asocc=0; asocc<nsocc; asocc++) {
568	socc_sum[isocc] += scf_vector[r][nocc+asocc]*
569	trans_int3[isocc(isocc+1)/2 + isocci_offset
570	+ isocc + dim_ij(asocc + nvirindex)];
571	}
572	}
573	} /* exit isocc loop */
574
575	tim_exit("4. quart. tr.");
576
577	/* sum socc_sum contributions from each node (only if nsocc > 0 *
578	* since gop1 will fail if nsocc = 0) */
579	if (nsocc > 0) {
580	tim_enter("global sum socc_sum");
581	msg_->sum(socc_sum,nsocc,socc_sum_tmp);
582	tim_exit("global sum socc_sum");
583	}
584
585	}
586
587	/* now we have all the sums of integrals involving s.o.'s (socc_sum); *
588	* begin fourth quarter transformation for all integrals (including *
589	* integrals with only s.o. indices); use restriction j <= (i_offset+i) *
590	* to save flops */
591
592	compute_index = 0;
593
594	for (i=0; i<ni; i++) {
595
596	for (j=0; j <= (i_offset+i); j++) {
597
598	tim_enter("4. quart. tr.");
599
600	bzerofast(trans_int4,nvir*nvir);
601
602	for (index=0; index<nbf[me]; index++) {
603	k = 0;
604	sum = basis()->shell(myshells[k]).nfunction();
605	while (sum <= index) {
606	k++;
607	sum += basis()->shell(myshells[k]).nfunction();
608	}
609	sum -= basis()->shell(myshells[k]).nfunction();
610	r = basis()->shell_to_function(myshells[k]) + index - sum;
611
612	for (a=0; a<nvir; a++) {
613	iajb = &trans_int4[a*nvir];
614	iajr = trans_int3[i(i+1)/2 + ii_offset + j + dim_ij(a+nvirindex)];
615	c_rb = &scf_vector[r][nocc];
616
617	for (b=0; b<nvir; b++) {
618	iajb++ += c_rb++ * iajr;
619	}
620	}
621	}
622
623	tim_exit("4. quart. tr.");
624
625	tim_enter("global sum trans_int4");
626	msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
627	tim_exit("global sum trans_int4");
628
629	/* we now have the fully transformed integrals (ia\|jb) *
630	* for one i, one j (j <= i_offset+i), and all a and b; *
631	* compute contribution to the OPT1 and OPT2 correlation *
632	* energies; use restriction b <= a to save flops */
633
634	tim_enter("compute ecorr");
635
636	for (a=0; a<nvir; a++) {
637	for (b=0; b<=a; b++) {
638	compute_index++;
639	if (compute_index%nproc != me) continue;
640
641	docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
642	+ (j >= nsocc && j < nocc);
643	socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
644	vir_index = (a >= nsocc) + (b >= nsocc);
645
646	if (socc_index >= 3) continue; /* skip to next b value */
647
648	delta_ijab = evals_open[i_offset+i] + evals_open[j]
649	- evals_open[nocc+a] - evals_open[nocc+b];
650
651	/* determine integral type and compute energy contribution */
652	if (docc_index == 2 && vir_index == 2) {
653	if (i_offset+i == j && a == b) {
654	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
655	ecorr_opt2 += contrib1/delta_ijab;
656	ecorr_opt1 += contrib1/delta_ijab;
657	}
658	else if (i_offset+i == j \|\| a == b) {
659	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
660	ecorr_opt2 += 2*contrib1/delta_ijab;
661	ecorr_opt1 += 2*contrib1/delta_ijab;
662	}
663	else {
664	contrib1 = trans_int4[a*nvir + b];
665	contrib2 = trans_int4[b*nvir + a];
666	ecorr_opt2 += 4(contrib1contrib1 + contrib2*contrib2
667	- contrib1*contrib2)/delta_ijab;
668	ecorr_opt1 += 4(contrib1contrib1 + contrib2*contrib2
669	- contrib1*contrib2)/delta_ijab;
670	}
671	}
672	else if (docc_index == 2 && socc_index == 2) {
673	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
674	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
675	ecorr_opt2 += contrib1/
676	(delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
677	ecorr_opt1 += contrib1/delta_ijab;
678	}
679	else if (socc_index == 2 && vir_index == 2) {
680	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
681	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
682	ecorr_opt2 += contrib1/
683	(delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
684	ecorr_opt1 += contrib1/delta_ijab;
685	}
686	else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
687	if (i_offset+i == j) {
688	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
689	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
690	ecorr_opt1 += contrib1/delta_ijab;
691	}
692	else {
693	contrib1 = trans_int4[a*nvir + b];
694	contrib2 = trans_int4[b*nvir + a];
695	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
696	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[b]);
697	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
698	- contrib1*contrib2)/delta_ijab;
699	}
700	}
701	else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
702	contrib1 = trans_int4[bnvir+a]trans_int4[b*nvir+a];
703	if (j == b) {
704	/* to compute the energy contribution from an integral of the *
705	* type (is1\|s1a) (i=d.o., s1=s.o., a=unocc.), we need the *
706	* (is\|sa) integrals for all s=s.o.; these integrals are *
707	* therefore stored here in the array mo_int_do_so_vir, and *
708	* the energy contribution is computed after exiting the loop *
709	* over i-batches (pass) */
710	mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
711	(i_offset+i-nsocc + ndocc*b)] =
712	trans_int4[b*nvir + a];
713	ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
714	ecorr_opt1 += 1.5*contrib1/delta_ijab;
715	ecorr_zapt2_contrib += contrib1/
716	(delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
717	+ 0.5*contrib1/delta_ijab;
718	}
719	else {
720	ecorr_opt2 += contrib1/
721	(delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
722	ecorr_opt1 += contrib1/delta_ijab;
723	}
724	}
725	else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
726	if (a == b) {
727	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
728	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
729	ecorr_opt1 += contrib1/delta_ijab;
730	}
731	else {
732	contrib1 = trans_int4[a*nvir + b];
733	contrib2 = trans_int4[b*nvir + a];
734	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
735	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[j]);
736	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
737	- contrib1*contrib2)/delta_ijab;
738	}
739	}
740	} /* exit b loop */
741	} /* exit a loop */
742	tim_exit("compute ecorr");
743	} /* exit j loop */
744	} /* exit i loop */
745
746	if (nsocc == 0 && npass > 1 && pass < npass - 1) {
747	double passe = ecorr_opt2;
748	msg_->sum(passe);
749	ExEnv::out0() << indent
750	<< "Partial correlation energy for pass " << pass << ":" << endl;
751	ExEnv::out0() << indent
752	<< scprintf(" restart_ecorr = %14.10f", passe)
753	<< endl;
754	ExEnv::out0() << indent
755	<< scprintf(" restart_orbital_v2 = %d", ((pass+1) * ni))
756	<< endl;
757	}
758	} /* exit loop over i-batches (pass) */
759
760
761
762	/* compute contribution from excitations of the type is1 -> s1a where *
763	* i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
764	* where i and a have the same spin, contribute to this term; *
765	* (Brillouin's theorem not satisfied for ROHF wave functions); *
766	* do this only if nsocc > 0 since gop1 will fail otherwise */
767
768	tim_enter("compute ecorr");
769
770	if (nsocc > 0) {
771	tim_enter("global sum mo_int_do_so_vir");
772	msg_->sum(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc),mo_int_tmp);
773	tim_exit("global sum mo_int_do_so_vir");
774	}
775
776	/* add extra contribution for triplet and higher spin multiplicities *
777	* contribution = sum over s1 and s2<s1 of (is1\|s1a)(is2\|s2a)/delta /
778
779	if (me == 0 && nsocc) {
780	for (i=0; i<ndocc; i++) {
781
782	for (a=0; a<nvir-nsocc; a++) {
783	delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
784
785	for (s1=0; s1<nsocc; s1++) {
786
787	for (s2=0; s2<s1; s2++) {
788	contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs1)]*
789	mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs2)]/delta;
790	ecorr_opt2 += contrib1;
791	ecorr_opt1 += contrib1;
792	}
793	}
794	} /* exit a loop */
795	} /* exit i loop */
796	}
797
798	tim_exit("compute ecorr");
799
800	ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
801	ecorr_opt2 += ecorr_opt2_contrib;
802	msg_->sum(ecorr_opt1);
803	msg_->sum(ecorr_opt2);
804	msg_->sum(ecorr_zapt2);
805	msg_->sum(aoint_computed);
806
807	escf = reference_->energy();
808	hf_energy_ = escf;
809
810	if (me == 0) {
811	eopt2 = escf + ecorr_opt2;
812	eopt1 = escf + ecorr_opt1;
813	ezapt2 = escf + ecorr_zapt2;
814
815	/* print out various energies etc.*/
816
817	ExEnv::out0() << indent
818	<< "Number of shell quartets for which AO integrals would" << endl
819	<< indent << "have been computed without bounds checking: "
820	<< npassnshellnshell(nshell+1)(nshell+1)/2 << endl;
821	ExEnv::out0() << indent
822	<< "Number of shell quartets for which AO integrals" << endl
823	<< indent << "were computed: " << aoint_computed << endl;
824
825	ExEnv::out0() << indent
826	<< scprintf("ROHF energy [au]: %17.12lf\n", escf);
827	ExEnv::out0() << indent
828	<< scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
829	ExEnv::out0() << indent
830	<< scprintf("OPT2 second order correction [au]: %17.12lf\n",
831	ecorr_opt2);
832	ExEnv::out0() << indent
833	<< scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
834	ExEnv::out0() << indent
835	<< scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
836	ecorr_zapt2);
837	ExEnv::out0() << indent
838	<< scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
839	ExEnv::out0().flush();
840	}
841
842	msg_->bcast(eopt1);
843	msg_->bcast(eopt2);
844	msg_->bcast(ezapt2);
845
846	if (method_ && !strcmp(method_,"opt1")) {
847	set_energy(eopt1);
848	set_actual_value_accuracy(reference_->actual_value_accuracy()
849	*ref_to_mp2_acc);
850	}
851	else if (method_ && !strcmp(method_,"opt2")) {
852	set_energy(eopt2);
853	set_actual_value_accuracy(reference_->actual_value_accuracy()
854	*ref_to_mp2_acc);
855	}
856	else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
857	set_energy(ezapt2);
858	set_actual_value_accuracy(reference_->actual_value_accuracy()
859	*ref_to_mp2_acc);
860	}
861	else {
862	if (!(!method_ \|\| !strcmp(method_,"zapt"))) {
863	ExEnv::out0() << indent
864	<< "MBPT2: bad method: " << method_ << ", using zapt" << endl;
865	}
866	set_energy(ezapt2);
867	set_actual_value_accuracy(reference_->actual_value_accuracy()
868	*ref_to_mp2_acc);
869	}
870
871	free(trans_int1);
872	free(trans_int2);
873	free(trans_int3);
874	free(trans_int4);
875	free(trans_int4_tmp);
876	if (nsocc) free(socc_sum);
877	if (nsocc) free(socc_sum_tmp);
878	if (nsocc) free(mo_int_do_so_vir);
879	if (nsocc) free(mo_int_tmp);
880	free(evals_open);
881	free(myshells);
882	free(shellsize);
883	free(sorted_shells);
884	free(nbf);
885	free(proc);
886
887	delete[] scf_vector;
888	delete[] scf_vector_dat;
889
890	}
891
892
893	/* Do a quick sort (larger -> smaller) of the integer data in item *
894	* by the integer indices in index; *
895	* data in item remain unchanged */
896
897	static void
898	iquicksort(int item,int index,int n)
899	{
900	int i;
901	if (n<=0) return;
902	for (i=0; i<n; i++) {
903	index[i] = i;
904	}
905	iqs(item,index,0,n-1);
906	}
907
908	static void
909	iqs(int item,int index,int left,int right)
910	{
911	register int i,j;
912	int x,y;
913
914	i=left; j=right;
915	x=item[index[(left+right)/2]];
916
917	do {
918	while(item[index[i]]>x && i<right) i++;
919	while(x>item[index[j]] && j>left) j--;
920
921	if (i<=j) {
922	if (item[index[i]] != item[index[j]]) {
923	y=index[i];
924	index[i]=index[j];
925	index[j]=y;
926	}
927	i++; j--;
928	}
929	} while(i<=j);
930
931	if (left<j) iqs(item,index,left,j);
932	if (i<right) iqs(item,index,i,right);
933	}
934
935	distsize_t
936	MBPT2::compute_v2_memory(int ni,
937	int nfuncmax, int nbfme, int nshell,
938	int ndocc, int nsocc, int nvir, int nproc
939	)
940	{
941	distsize_t result = 0;
942	distsize_t dsize = sizeof(double);
943	distsize_t isize = sizeof(int);
944	int dim_ij = noccni - (ni(ni - 1))/2;
945
946	result += nfuncmaxnfuncmax(distsize_t)nbasis(distsize_t)nidsize;
947	result += nvirnidsize;
948	result += nbfmenvir(distsize_t)dim_ij*dsize;
949	result += nvirnvirdsize;
950	result += nvirnvirdsize;
951	result += nsocc*dsize;
952	result += nsocc*dsize;
953	result += ndoccnsocc(distsize_t)(nvir-nsocc)*dsize;
954	result += ndoccnsocc(distsize_t)(nvir-nsocc)*dsize;
955	result += (noso+nsocc-nfzc-nfzv)*dsize;
956	result += nshell*isize;
957	result += nshell*isize;
958	result += nproc*isize;
959	result += nshell*isize;
960
961	return result;
962	}
963
964	////////////////////////////////////////////////////////////////////////////
965
966	// Local Variables:
967	// mode: c++
968	// c-file-style: "CLJ-CONDENSED"
969	// End:

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv2.cc@ aae63a

Download in other formats: