source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv2.cc@ aae63a

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since aae63a was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 32.6 KB
Line 
1//
2// hsosv2.cc
3//
4// Copyright (C) 1996 Limit Point Systems, Inc.
5//
6// Author: Ida Nielsen <ida@kemi.aau.dk>
7// Maintainer: LPS
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#include <iostream>
29#include <math.h>
30
31#include <util/misc/formio.h>
32#include <util/misc/timer.h>
33#include <util/group/message.h>
34#include <math/scmat/matrix.h>
35#include <chemistry/molecule/molecule.h>
36#include <chemistry/qc/mbpt/mbpt.h>
37#include <chemistry/qc/mbpt/bzerofast.h>
38
39using namespace std;
40using namespace sc;
41
42static void iqs(int *item,int *index,int left,int right);
43static void iquicksort(int *item,int *index,int n);
44
45void
46MBPT2::compute_hsos_v2()
47{
48 int i, j, k;
49 int s1, s2;
50 int a, b;
51 int isocc, asocc; /* indices running over singly occupied orbitals */
52 int nfuncmax = basis()->max_nfunction_in_shell();
53 int nvir;
54 int nshell;
55 int nocc=0,ndocc=0,nsocc=0;
56 int i_offset;
57 int npass, pass;
58 int ni;
59 int np, nq, nr, ns;
60 int P, Q, R, S;
61 int p, q, r, s;
62 int bf1, bf2, bf3, bf4;
63 int index;
64 int compute_index;
65 int col_index;
66 int tmp_index;
67 int dim_ij;
68 int docc_index, socc_index, vir_index;
69 int me;
70 int nproc;
71 int rest;
72 int r_offset;
73 int sum;
74 int min;
75 int iproc;
76 int nRshell;
77 int imyshell;
78 int *myshells; /* the R indices processed by node me */
79 int *shellsize; /* size of each shell */
80 int *sorted_shells; /* sorted shell indices: large shells->small shells */
81 int *nbf; /* number of basis functions processed by each node */
82 int *proc; /* element k: processor which will process shell k */
83 int aoint_computed = 0;
84 double *evals_open; /* reordered scf eigenvalues */
85 const double *intbuf; /* 2-electron AO integral buffer */
86 double *trans_int1; /* partially transformed integrals */
87 double *trans_int2; /* partially transformed integrals */
88 double *trans_int3; /* partially transformed integrals */
89 double *trans_int4; /* fully transformed integrals */
90 double *trans_int4_tmp; /* scratch array */
91 double *mo_int_do_so_vir=0;/*mo integral (is|sa); i:d.o.,s:s.o.,a:vir */
92 double *mo_int_tmp=0; /* scratch array used in global summations */
93 double *socc_sum=0; /* sum of 2-el integrals involving only s.o.'s */
94 double *socc_sum_tmp=0;/* scratch array */
95 double *iqrs, *iprs;
96 double *iars_ptr;
97 double iars;
98 double iajr;
99 double *iajr_ptr;
100 double *iajb;
101 double pqrs;
102 double *c_qa;
103 double *c_rb, *c_pi, *c_qi, *c_sj;
104 double delta_ijab;
105 double delta;
106 double contrib1, contrib2;
107 double ecorr_opt2=0,ecorr_opt1=0;
108 double ecorr_zapt2;
109 double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
110 double escf;
111 double eopt2,eopt1,ezapt2;
112 double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
113
114 me = msg_->me();
115
116 ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v2)" << endl;
117
118 tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
119
120 nproc = msg_->n();
121
122 ndocc = nsocc = 0;
123 const double epsilon = 1.0e-4;
124 for (i=0; i<oso_dimension()->n(); i++) {
125 if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
126 else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
127 }
128
129 /* do a few preliminary tests to make sure the desired calculation *
130 * can be done (and appears to be meaningful!) */
131
132 if (ndocc == 0 && nsocc == 0) {
133 ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
134 abort();
135 }
136
137 if (nfzc > ndocc) {
138 ExEnv::err0()
139 << "The number of frozen core orbitals exceeds the number" << endl
140 << "of doubly occupied orbitals; program exiting" << endl;
141 abort();
142 }
143
144 if (nfzv > noso - ndocc - nsocc) {
145 ExEnv::err0()
146 << "The number of frozen virtual orbitals exceeds the number" << endl
147 << "of unoccupied orbitals; program exiting" << endl;
148 abort();
149 }
150
151 ndocc = ndocc - nfzc;
152 /* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
153 nvir = noso - ndocc - nfzc - nfzv;
154 /* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
155 nocc = ndocc + nsocc;
156 nshell = basis()->nshell();
157
158 /* allocate storage for some arrays used for keeping track of which R *
159 * indices are processed by each node */
160 shellsize = (int*) malloc(nshell*sizeof(int));
161 sorted_shells = (int*) malloc(nshell*sizeof(int));
162 nbf = (int*) malloc(nproc*sizeof(int));
163 proc = (int*) malloc(nshell*sizeof(int));
164
165
166 /******************************************************
167 * Begin distributing R shells between nodes so each *
168 * node gets ca. the same number of r basis functions *
169 ******************************************************/
170
171 /* compute size of each shell */
172 for (i=0; i<nshell; i++) {
173 shellsize[i] = basis()->shell(i).nfunction();
174 }
175
176 /* do an index sort (large -> small) of shellsize to form sorted_shells */
177 iquicksort(shellsize,sorted_shells,nshell);
178
179 /* initialize nbf */
180 for (i=0; i<nproc; i++) nbf[i] = 0;
181
182 for (i=0; i<nshell; i++) {
183 min = nbf[0];
184 iproc = 0;
185 for (j=1; j<nproc; j++) {
186 if (nbf[j] < min) {
187 iproc = j;
188 min = nbf[j];
189 }
190 }
191 proc[sorted_shells[i]] = iproc;
192 nbf[iproc] += shellsize[sorted_shells[i]];
193 }
194 if (me == 0) {
195 ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
196 for (i=0; i<nproc; i++) {
197 if (i%12 == 0) ExEnv::out0() << indent;
198 ExEnv::out0() << scprintf(" %4i",nbf[i]);
199 if ((i+1)%12 == 0) ExEnv::out0() << endl;
200 }
201 ExEnv::out0() << endl;
202 }
203
204 /* determine which shells are to be processed by node me */
205 nRshell = 0;
206 for (i=0; i<nshell; i++) {
207 if (proc[i] == me) nRshell++;
208 }
209 myshells = (int*) malloc(nRshell*sizeof(int));
210 imyshell = 0;
211 for (i=0; i<nshell; i++) {
212 if (proc[i] == me) {
213 myshells[imyshell] = i;
214 imyshell++;
215 }
216 }
217
218 /************************************************
219 * End of distribution of R shells between nodes *
220 ************************************************/
221
222
223 /* compute batch size ni for opt2 loops; *
224 * need to store the following arrays of type double : trans_int1-4, *
225 * trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp, *
226 * mo_int_do_so_vir, mo_int_tmp, *
227 * and the following arrays of type int: myshells, shellsize, *
228 * sorted_shells, nbf, and proc */
229
230 size_t memused = 0;
231 ni = 0;
232 for (i=1; i<=nocc; i++) {
233 distsize_t tmpmem = compute_v2_memory(i,
234 nfuncmax, nbf[me], nshell,
235 ndocc, nsocc, nvir, nproc);
236 if (tmpmem > mem_alloc) break;
237 ni = i;
238 memused = distsize_to_size(tmpmem);
239 }
240
241 size_t mem_remaining = mem_alloc - memused;
242
243 /* set ni equal to the smallest batch size for any node */
244 msg_->min(ni);
245 msg_->bcast(ni);
246
247 int nbfmax = nbf[me];
248 msg_->max(nbfmax);
249
250 if (me == 0) {
251 ExEnv::out0() << indent << " nproc nbasis nshell nfuncmax"
252 " ndocc nsocc nvir nfzc nfzv" << endl;
253 ExEnv::out0() << indent << scprintf(" %-4i %-5i %-4i %-3i"
254 " %-3i %-3i %-3i %-3i %-3i\n",
255 nproc,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
256 }
257
258
259 ExEnv::out0() << indent
260 << "Memory available per node: " << mem_alloc << " Bytes"
261 << endl;
262 ExEnv::out0() << indent
263 << "Total memory used per node: " << memused << " Bytes"
264 << endl;
265 ExEnv::out0() << indent
266 << "Memory required for one pass: "
267 << compute_v2_memory(nocc,
268 nfuncmax, nbfmax, nshell,
269 ndocc, nsocc, nvir, nproc)
270 << " Bytes"
271 << endl;
272 ExEnv::out0() << indent
273 << "Minimum memory required: "
274 << compute_v2_memory(1,
275 nfuncmax, nbfmax, nshell,
276 ndocc, nsocc, nvir, nproc)
277 << " Bytes"
278 << endl;
279 ExEnv::out0() << indent
280 << "Batch size: " << ni
281 << endl;
282
283 if (ni < nsocc) {
284 ExEnv::err0() << "Not enough memory allocated" << endl;
285 abort();
286 }
287
288 if (ni < 1) { /* this applies only to a closed shell case */
289 ExEnv::err0() << "Not enough memory allocated" << endl;
290 abort();
291 }
292
293 if (nocc == ni) {
294 npass = 1;
295 rest = 0;
296 }
297 else {
298 rest = nocc%ni;
299 npass = (nocc - rest)/ni + 1;
300 if (rest == 0) npass--;
301 }
302
303 ExEnv::out0() << indent
304 << "npass = " << npass
305 << " rest = " << rest
306 << endl;
307
308 /* the scf vector might be distributed between the nodes, but for OPT2 *
309 * each node needs its own copy of the vector; *
310 * therefore, put a copy of the scf vector on each node; *
311 * while doing this, duplicate columns corresponding to singly *
312 * occupied orbitals and order columns as [socc docc socc unocc] */
313 /* also rearrange scf eigenvalues as [socc docc socc unocc] *
314 * want socc first to get the socc's in the first batch *
315 * (need socc's to compute energy denominators - see *
316 * socc_sum comment below) */
317
318 evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
319
320 RefDiagSCMatrix occ;
321 RefDiagSCMatrix evals;
322 RefSCMatrix Scf_Vec;
323 eigen(evals, Scf_Vec, occ);
324
325 if (debug_) {
326 evals.print("eigenvalues");
327 Scf_Vec.print("eigenvectors");
328 }
329
330 double *scf_vectort_dat = new double[nbasis*noso];
331 Scf_Vec->convert(scf_vectort_dat);
332
333 double** scf_vectort = new double*[nocc + nvir];
334
335 int idoc = 0, ivir = 0, isoc = 0;
336 for (i=nfzc; i<noso-nfzv; i++) {
337 if (occ(i) >= 2.0 - epsilon) {
338 evals_open[idoc+nsocc] = evals(i);
339 scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
340 idoc++;
341 }
342 else if (occ(i) >= 1.0 - epsilon) {
343 evals_open[isoc] = evals(i);
344 scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
345 evals_open[isoc+nocc] = evals(i);
346 scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
347 isoc++;
348 }
349 else {
350 if (ivir < nvir) {
351 evals_open[ivir+nocc+nsocc] = evals(i);
352 scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
353 }
354 ivir++;
355 }
356 }
357 // need the transpose of the vector
358 double **scf_vector = new double*[nbasis];
359 double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
360 for (i=0; i<nbasis; i++) {
361 scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
362 for (j=0; j<nocc+nvir; j++) {
363 scf_vector[i][j] = scf_vectort[j][i];
364 }
365 }
366 delete[] scf_vectort;
367 delete[] scf_vectort_dat;
368
369 /* allocate storage for various arrays */
370
371 dim_ij = nocc*ni - (ni*(ni - 1))/2;
372
373 trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
374 trans_int2 = (double*) malloc(nvir*ni*sizeof(double));
375 trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));
376 trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
377 trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));
378 if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
379 if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));
380 if (nsocc) mo_int_do_so_vir =
381 (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
382 if (nsocc) mo_int_tmp =
383 (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
384
385 if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
386
387
388 // create the integrals object
389 integral()->set_storage(mem_remaining);
390 tbint_ = integral()->electron_repulsion();
391 intbuf = tbint_->buffer();
392
393/**************************************************************************
394 * begin opt2 loops *
395 **************************************************************************/
396
397
398 for (pass=0; pass<npass; pass++) {
399 i_offset = pass*ni;
400 if ((pass == npass - 1) && (rest != 0)) ni = rest;
401
402 r_offset = 0;
403 bzerofast(trans_int3,nbf[me]*nvir*dim_ij);
404
405 tim_enter("RS loop");
406
407 for (imyshell=0; imyshell<nRshell; imyshell++) {
408
409 R = myshells[imyshell];
410 nr = basis()->shell(R).nfunction();
411
412 for (S = 0; S < nshell; S++) {
413 ns = basis()->shell(S).nfunction();
414 tim_enter("bzerofast trans_int1");
415 bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
416 tim_exit("bzerofast trans_int1");
417
418 tim_enter("PQ loop");
419
420 for (P = 0; P < nshell; P++) {
421 np = basis()->shell(P).nfunction();
422
423 for (Q = 0; Q <= P; Q++) {
424 if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
425 continue; /* skip ereps less than tol */
426 }
427
428 aoint_computed++;
429
430 nq = basis()->shell(Q).nfunction();
431
432 tim_enter("erep");
433 tbint_->compute_shell(P,Q,R,S);
434 tim_exit("erep");
435
436 tim_enter("1. quart. tr.");
437
438 index = 0;
439
440 for (bf1 = 0; bf1 < np; bf1++) {
441 p = basis()->shell_to_function(P) + bf1;
442
443 for (bf2 = 0; bf2 < nq; bf2++) {
444 q = basis()->shell_to_function(Q) + bf2;
445 if (q > p) {
446 /* if q > p: want to skip the loops over bf3-4 */
447 /* and larger bf2 values, so increment bf1 by 1 */
448 /* ("break") and adjust the value of index */
449 index = (bf1 + 1) * nq * nr * ns;
450 break;
451 }
452
453 for (bf3 = 0; bf3 < nr; bf3++) {
454
455 for (bf4 = 0; bf4 < ns; bf4++,index++) {
456
457 if (fabs(intbuf[index]) > 1.0e-15) {
458 pqrs = intbuf[index];
459
460 iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
461 iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];
462
463 if (p == q) pqrs *= 0.5;
464 col_index = i_offset;
465 c_pi = &scf_vector[p][col_index];
466 c_qi = &scf_vector[q][col_index];
467
468 for (i=ni; i; i--) {
469 *iqrs++ += pqrs * *c_pi++;
470 *iprs++ += pqrs * *c_qi++;
471 }
472 }
473 } /* exit bf4 loop */
474 } /* exit bf3 loop */
475 } /* exit bf2 loop */
476 } /* exit bf1 loop */
477 tim_exit("1. quart. tr.");
478 } /* exit Q loop */
479 } /* exit P loop */
480 tim_exit("PQ loop");
481
482 /* begin second and third quarter transformations */
483
484 for (bf3 = 0; bf3 < nr; bf3++) {
485 r = r_offset + bf3;
486
487 for (bf4 = 0; bf4 < ns; bf4++) {
488 s = basis()->shell_to_function(S) + bf4;
489
490 tim_enter("bzerofast trans_int2");
491 bzerofast(trans_int2,nvir*ni);
492 tim_exit("bzerofast trans_int2");
493
494 tim_enter("2. quart. tr.");
495
496 for (q = 0; q < nbasis; q++) {
497 iars_ptr = trans_int2;
498 iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
499 c_qa = &scf_vector[q][nocc];
500
501 for (a = 0; a < nvir; a++) {
502
503 for (i=ni; i; i--) {
504 *iars_ptr++ += *c_qa * *iqrs++;
505 }
506
507 iqrs -= ni;
508 c_qa++;
509 }
510 } /* exit q loop */
511 tim_exit("2. quart. tr.");
512
513 /* begin third quarter transformation */
514
515 tim_enter("3. quart. tr.");
516
517 for (i=0; i<ni; i++) {
518 tmp_index = i*(i+1)/2 + i*i_offset;
519
520 for (a=0; a<nvir; a++) {
521 iars = trans_int2[a*ni + i];
522 c_sj = scf_vector[s];
523 iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];
524
525 for (j=0; j<=i+i_offset; j++) {
526 *iajr_ptr++ += *c_sj++ * iars;
527 }
528 }
529 } /* exit i loop */
530 tim_exit("3. quart. tr.");
531
532 } /* exit bf4 loop */
533 } /* exit bf3 loop */
534
535 } /* exit S loop */
536 r_offset += nr;
537 } /* exit R loop */
538 tim_exit("RS loop");
539
540
541 /* begin fourth quarter transformation; *
542 * first tansform integrals with only s.o. indices; *
543 * these integrals are needed to compute the denominators *
544 * in the various terms contributing to the correlation energy *
545 * and must all be computed in the first pass; *
546 * the integrals are summed into the array socc_sum: *
547 * socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc) *
548 * (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
549 * the individual integrals are not saved here, only the sums are kept */
550
551
552 if (pass == 0) {
553 tim_enter("4. quart. tr.");
554 if (nsocc) bzerofast(socc_sum,nsocc);
555 for (isocc=0; isocc<nsocc; isocc++) {
556
557 for (index=0; index<nbf[me]; index++) {
558 i = 0;
559 sum = basis()->shell(myshells[i]).nfunction();
560 while (sum <= index) {
561 i++;
562 sum += basis()->shell(myshells[i]).nfunction();
563 }
564 sum -= basis()->shell(myshells[i]).nfunction();
565 r = basis()->shell_to_function(myshells[i]) + index - sum;
566
567 for (asocc=0; asocc<nsocc; asocc++) {
568 socc_sum[isocc] += scf_vector[r][nocc+asocc]*
569 trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
570 + isocc + dim_ij*(asocc + nvir*index)];
571 }
572 }
573 } /* exit isocc loop */
574
575 tim_exit("4. quart. tr.");
576
577 /* sum socc_sum contributions from each node (only if nsocc > 0 *
578 * since gop1 will fail if nsocc = 0) */
579 if (nsocc > 0) {
580 tim_enter("global sum socc_sum");
581 msg_->sum(socc_sum,nsocc,socc_sum_tmp);
582 tim_exit("global sum socc_sum");
583 }
584
585 }
586
587 /* now we have all the sums of integrals involving s.o.'s (socc_sum); *
588 * begin fourth quarter transformation for all integrals (including *
589 * integrals with only s.o. indices); use restriction j <= (i_offset+i) *
590 * to save flops */
591
592 compute_index = 0;
593
594 for (i=0; i<ni; i++) {
595
596 for (j=0; j <= (i_offset+i); j++) {
597
598 tim_enter("4. quart. tr.");
599
600 bzerofast(trans_int4,nvir*nvir);
601
602 for (index=0; index<nbf[me]; index++) {
603 k = 0;
604 sum = basis()->shell(myshells[k]).nfunction();
605 while (sum <= index) {
606 k++;
607 sum += basis()->shell(myshells[k]).nfunction();
608 }
609 sum -= basis()->shell(myshells[k]).nfunction();
610 r = basis()->shell_to_function(myshells[k]) + index - sum;
611
612 for (a=0; a<nvir; a++) {
613 iajb = &trans_int4[a*nvir];
614 iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];
615 c_rb = &scf_vector[r][nocc];
616
617 for (b=0; b<nvir; b++) {
618 *iajb++ += *c_rb++ * iajr;
619 }
620 }
621 }
622
623 tim_exit("4. quart. tr.");
624
625 tim_enter("global sum trans_int4");
626 msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
627 tim_exit("global sum trans_int4");
628
629 /* we now have the fully transformed integrals (ia|jb) *
630 * for one i, one j (j <= i_offset+i), and all a and b; *
631 * compute contribution to the OPT1 and OPT2 correlation *
632 * energies; use restriction b <= a to save flops */
633
634 tim_enter("compute ecorr");
635
636 for (a=0; a<nvir; a++) {
637 for (b=0; b<=a; b++) {
638 compute_index++;
639 if (compute_index%nproc != me) continue;
640
641 docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
642 + (j >= nsocc && j < nocc);
643 socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
644 vir_index = (a >= nsocc) + (b >= nsocc);
645
646 if (socc_index >= 3) continue; /* skip to next b value */
647
648 delta_ijab = evals_open[i_offset+i] + evals_open[j]
649 - evals_open[nocc+a] - evals_open[nocc+b];
650
651 /* determine integral type and compute energy contribution */
652 if (docc_index == 2 && vir_index == 2) {
653 if (i_offset+i == j && a == b) {
654 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
655 ecorr_opt2 += contrib1/delta_ijab;
656 ecorr_opt1 += contrib1/delta_ijab;
657 }
658 else if (i_offset+i == j || a == b) {
659 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
660 ecorr_opt2 += 2*contrib1/delta_ijab;
661 ecorr_opt1 += 2*contrib1/delta_ijab;
662 }
663 else {
664 contrib1 = trans_int4[a*nvir + b];
665 contrib2 = trans_int4[b*nvir + a];
666 ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
667 - contrib1*contrib2)/delta_ijab;
668 ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
669 - contrib1*contrib2)/delta_ijab;
670 }
671 }
672 else if (docc_index == 2 && socc_index == 2) {
673 contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
674 (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
675 ecorr_opt2 += contrib1/
676 (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
677 ecorr_opt1 += contrib1/delta_ijab;
678 }
679 else if (socc_index == 2 && vir_index == 2) {
680 contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
681 (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
682 ecorr_opt2 += contrib1/
683 (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
684 ecorr_opt1 += contrib1/delta_ijab;
685 }
686 else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
687 if (i_offset+i == j) {
688 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
689 ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
690 ecorr_opt1 += contrib1/delta_ijab;
691 }
692 else {
693 contrib1 = trans_int4[a*nvir + b];
694 contrib2 = trans_int4[b*nvir + a];
695 ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
696 - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
697 ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
698 - contrib1*contrib2)/delta_ijab;
699 }
700 }
701 else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
702 contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
703 if (j == b) {
704 /* to compute the energy contribution from an integral of the *
705 * type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need the *
706 * (is|sa) integrals for all s=s.o.; these integrals are *
707 * therefore stored here in the array mo_int_do_so_vir, and *
708 * the energy contribution is computed after exiting the loop *
709 * over i-batches (pass) */
710 mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
711 (i_offset+i-nsocc + ndocc*b)] =
712 trans_int4[b*nvir + a];
713 ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
714 ecorr_opt1 += 1.5*contrib1/delta_ijab;
715 ecorr_zapt2_contrib += contrib1/
716 (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
717 + 0.5*contrib1/delta_ijab;
718 }
719 else {
720 ecorr_opt2 += contrib1/
721 (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
722 ecorr_opt1 += contrib1/delta_ijab;
723 }
724 }
725 else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
726 if (a == b) {
727 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
728 ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
729 ecorr_opt1 += contrib1/delta_ijab;
730 }
731 else {
732 contrib1 = trans_int4[a*nvir + b];
733 contrib2 = trans_int4[b*nvir + a];
734 ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
735 - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
736 ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
737 - contrib1*contrib2)/delta_ijab;
738 }
739 }
740 } /* exit b loop */
741 } /* exit a loop */
742 tim_exit("compute ecorr");
743 } /* exit j loop */
744 } /* exit i loop */
745
746 if (nsocc == 0 && npass > 1 && pass < npass - 1) {
747 double passe = ecorr_opt2;
748 msg_->sum(passe);
749 ExEnv::out0() << indent
750 << "Partial correlation energy for pass " << pass << ":" << endl;
751 ExEnv::out0() << indent
752 << scprintf(" restart_ecorr = %14.10f", passe)
753 << endl;
754 ExEnv::out0() << indent
755 << scprintf(" restart_orbital_v2 = %d", ((pass+1) * ni))
756 << endl;
757 }
758 } /* exit loop over i-batches (pass) */
759
760
761
762 /* compute contribution from excitations of the type is1 -> s1a where *
763 * i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
764 * where i and a have the same spin, contribute to this term; *
765 * (Brillouin's theorem not satisfied for ROHF wave functions); *
766 * do this only if nsocc > 0 since gop1 will fail otherwise */
767
768 tim_enter("compute ecorr");
769
770 if (nsocc > 0) {
771 tim_enter("global sum mo_int_do_so_vir");
772 msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
773 tim_exit("global sum mo_int_do_so_vir");
774 }
775
776 /* add extra contribution for triplet and higher spin multiplicities *
777 * contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta */
778
779 if (me == 0 && nsocc) {
780 for (i=0; i<ndocc; i++) {
781
782 for (a=0; a<nvir-nsocc; a++) {
783 delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
784
785 for (s1=0; s1<nsocc; s1++) {
786
787 for (s2=0; s2<s1; s2++) {
788 contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
789 mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
790 ecorr_opt2 += contrib1;
791 ecorr_opt1 += contrib1;
792 }
793 }
794 } /* exit a loop */
795 } /* exit i loop */
796 }
797
798 tim_exit("compute ecorr");
799
800 ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
801 ecorr_opt2 += ecorr_opt2_contrib;
802 msg_->sum(ecorr_opt1);
803 msg_->sum(ecorr_opt2);
804 msg_->sum(ecorr_zapt2);
805 msg_->sum(aoint_computed);
806
807 escf = reference_->energy();
808 hf_energy_ = escf;
809
810 if (me == 0) {
811 eopt2 = escf + ecorr_opt2;
812 eopt1 = escf + ecorr_opt1;
813 ezapt2 = escf + ecorr_zapt2;
814
815 /* print out various energies etc.*/
816
817 ExEnv::out0() << indent
818 << "Number of shell quartets for which AO integrals would" << endl
819 << indent << "have been computed without bounds checking: "
820 << npass*nshell*nshell*(nshell+1)*(nshell+1)/2 << endl;
821 ExEnv::out0() << indent
822 << "Number of shell quartets for which AO integrals" << endl
823 << indent << "were computed: " << aoint_computed << endl;
824
825 ExEnv::out0() << indent
826 << scprintf("ROHF energy [au]: %17.12lf\n", escf);
827 ExEnv::out0() << indent
828 << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
829 ExEnv::out0() << indent
830 << scprintf("OPT2 second order correction [au]: %17.12lf\n",
831 ecorr_opt2);
832 ExEnv::out0() << indent
833 << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
834 ExEnv::out0() << indent
835 << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
836 ecorr_zapt2);
837 ExEnv::out0() << indent
838 << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
839 ExEnv::out0().flush();
840 }
841
842 msg_->bcast(eopt1);
843 msg_->bcast(eopt2);
844 msg_->bcast(ezapt2);
845
846 if (method_ && !strcmp(method_,"opt1")) {
847 set_energy(eopt1);
848 set_actual_value_accuracy(reference_->actual_value_accuracy()
849 *ref_to_mp2_acc);
850 }
851 else if (method_ && !strcmp(method_,"opt2")) {
852 set_energy(eopt2);
853 set_actual_value_accuracy(reference_->actual_value_accuracy()
854 *ref_to_mp2_acc);
855 }
856 else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
857 set_energy(ezapt2);
858 set_actual_value_accuracy(reference_->actual_value_accuracy()
859 *ref_to_mp2_acc);
860 }
861 else {
862 if (!(!method_ || !strcmp(method_,"zapt"))) {
863 ExEnv::out0() << indent
864 << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
865 }
866 set_energy(ezapt2);
867 set_actual_value_accuracy(reference_->actual_value_accuracy()
868 *ref_to_mp2_acc);
869 }
870
871 free(trans_int1);
872 free(trans_int2);
873 free(trans_int3);
874 free(trans_int4);
875 free(trans_int4_tmp);
876 if (nsocc) free(socc_sum);
877 if (nsocc) free(socc_sum_tmp);
878 if (nsocc) free(mo_int_do_so_vir);
879 if (nsocc) free(mo_int_tmp);
880 free(evals_open);
881 free(myshells);
882 free(shellsize);
883 free(sorted_shells);
884 free(nbf);
885 free(proc);
886
887 delete[] scf_vector;
888 delete[] scf_vector_dat;
889
890 }
891
892
893/* Do a quick sort (larger -> smaller) of the integer data in item *
894 * by the integer indices in index; *
895 * data in item remain unchanged */
896
897static void
898iquicksort(int *item,int *index,int n)
899{
900 int i;
901 if (n<=0) return;
902 for (i=0; i<n; i++) {
903 index[i] = i;
904 }
905 iqs(item,index,0,n-1);
906 }
907
908static void
909iqs(int *item,int *index,int left,int right)
910{
911 register int i,j;
912 int x,y;
913
914 i=left; j=right;
915 x=item[index[(left+right)/2]];
916
917 do {
918 while(item[index[i]]>x && i<right) i++;
919 while(x>item[index[j]] && j>left) j--;
920
921 if (i<=j) {
922 if (item[index[i]] != item[index[j]]) {
923 y=index[i];
924 index[i]=index[j];
925 index[j]=y;
926 }
927 i++; j--;
928 }
929 } while(i<=j);
930
931 if (left<j) iqs(item,index,left,j);
932 if (i<right) iqs(item,index,i,right);
933 }
934
935distsize_t
936MBPT2::compute_v2_memory(int ni,
937 int nfuncmax, int nbfme, int nshell,
938 int ndocc, int nsocc, int nvir, int nproc
939 )
940{
941 distsize_t result = 0;
942 distsize_t dsize = sizeof(double);
943 distsize_t isize = sizeof(int);
944 int dim_ij = nocc*ni - (ni*(ni - 1))/2;
945
946 result += nfuncmax*nfuncmax*(distsize_t)nbasis*(distsize_t)ni*dsize;
947 result += nvir*ni*dsize;
948 result += nbfme*nvir*(distsize_t)dim_ij*dsize;
949 result += nvir*nvir*dsize;
950 result += nvir*nvir*dsize;
951 result += nsocc*dsize;
952 result += nsocc*dsize;
953 result += ndocc*nsocc*(distsize_t)(nvir-nsocc)*dsize;
954 result += ndocc*nsocc*(distsize_t)(nvir-nsocc)*dsize;
955 result += (noso+nsocc-nfzc-nfzv)*dsize;
956 result += nshell*isize;
957 result += nshell*isize;
958 result += nproc*isize;
959 result += nshell*isize;
960
961 return result;
962}
963
964////////////////////////////////////////////////////////////////////////////
965
966// Local Variables:
967// mode: c++
968// c-file-style: "CLJ-CONDENSED"
969// End:
Note: See TracBrowser for help on using the repository browser.