1 | //
|
---|
2 | // hsosv2.cc
|
---|
3 | //
|
---|
4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
5 | //
|
---|
6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
7 | // Maintainer: LPS
|
---|
8 | //
|
---|
9 | // This file is part of the SC Toolkit.
|
---|
10 | //
|
---|
11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
12 | // it under the terms of the GNU Library General Public License as published by
|
---|
13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
14 | // any later version.
|
---|
15 | //
|
---|
16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | // GNU Library General Public License for more details.
|
---|
20 | //
|
---|
21 | // You should have received a copy of the GNU Library General Public License
|
---|
22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | //
|
---|
25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
26 | //
|
---|
27 |
|
---|
28 | #include <iostream>
|
---|
29 | #include <math.h>
|
---|
30 |
|
---|
31 | #include <util/misc/formio.h>
|
---|
32 | #include <util/misc/timer.h>
|
---|
33 | #include <util/group/message.h>
|
---|
34 | #include <math/scmat/matrix.h>
|
---|
35 | #include <chemistry/molecule/molecule.h>
|
---|
36 | #include <chemistry/qc/mbpt/mbpt.h>
|
---|
37 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
38 |
|
---|
39 | using namespace std;
|
---|
40 | using namespace sc;
|
---|
41 |
|
---|
42 | static void iqs(int *item,int *index,int left,int right);
|
---|
43 | static void iquicksort(int *item,int *index,int n);
|
---|
44 |
|
---|
45 | void
|
---|
46 | MBPT2::compute_hsos_v2()
|
---|
47 | {
|
---|
48 | int i, j, k;
|
---|
49 | int s1, s2;
|
---|
50 | int a, b;
|
---|
51 | int isocc, asocc; /* indices running over singly occupied orbitals */
|
---|
52 | int nfuncmax = basis()->max_nfunction_in_shell();
|
---|
53 | int nvir;
|
---|
54 | int nshell;
|
---|
55 | int nocc=0,ndocc=0,nsocc=0;
|
---|
56 | int i_offset;
|
---|
57 | int npass, pass;
|
---|
58 | int ni;
|
---|
59 | int np, nq, nr, ns;
|
---|
60 | int P, Q, R, S;
|
---|
61 | int p, q, r, s;
|
---|
62 | int bf1, bf2, bf3, bf4;
|
---|
63 | int index;
|
---|
64 | int compute_index;
|
---|
65 | int col_index;
|
---|
66 | int tmp_index;
|
---|
67 | int dim_ij;
|
---|
68 | int docc_index, socc_index, vir_index;
|
---|
69 | int me;
|
---|
70 | int nproc;
|
---|
71 | int rest;
|
---|
72 | int r_offset;
|
---|
73 | int sum;
|
---|
74 | int min;
|
---|
75 | int iproc;
|
---|
76 | int nRshell;
|
---|
77 | int imyshell;
|
---|
78 | int *myshells; /* the R indices processed by node me */
|
---|
79 | int *shellsize; /* size of each shell */
|
---|
80 | int *sorted_shells; /* sorted shell indices: large shells->small shells */
|
---|
81 | int *nbf; /* number of basis functions processed by each node */
|
---|
82 | int *proc; /* element k: processor which will process shell k */
|
---|
83 | int aoint_computed = 0;
|
---|
84 | double *evals_open; /* reordered scf eigenvalues */
|
---|
85 | const double *intbuf; /* 2-electron AO integral buffer */
|
---|
86 | double *trans_int1; /* partially transformed integrals */
|
---|
87 | double *trans_int2; /* partially transformed integrals */
|
---|
88 | double *trans_int3; /* partially transformed integrals */
|
---|
89 | double *trans_int4; /* fully transformed integrals */
|
---|
90 | double *trans_int4_tmp; /* scratch array */
|
---|
91 | double *mo_int_do_so_vir=0;/*mo integral (is|sa); i:d.o.,s:s.o.,a:vir */
|
---|
92 | double *mo_int_tmp=0; /* scratch array used in global summations */
|
---|
93 | double *socc_sum=0; /* sum of 2-el integrals involving only s.o.'s */
|
---|
94 | double *socc_sum_tmp=0;/* scratch array */
|
---|
95 | double *iqrs, *iprs;
|
---|
96 | double *iars_ptr;
|
---|
97 | double iars;
|
---|
98 | double iajr;
|
---|
99 | double *iajr_ptr;
|
---|
100 | double *iajb;
|
---|
101 | double pqrs;
|
---|
102 | double *c_qa;
|
---|
103 | double *c_rb, *c_pi, *c_qi, *c_sj;
|
---|
104 | double delta_ijab;
|
---|
105 | double delta;
|
---|
106 | double contrib1, contrib2;
|
---|
107 | double ecorr_opt2=0,ecorr_opt1=0;
|
---|
108 | double ecorr_zapt2;
|
---|
109 | double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
|
---|
110 | double escf;
|
---|
111 | double eopt2,eopt1,ezapt2;
|
---|
112 | double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
|
---|
113 |
|
---|
114 | me = msg_->me();
|
---|
115 |
|
---|
116 | ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v2)" << endl;
|
---|
117 |
|
---|
118 | tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
|
---|
119 |
|
---|
120 | nproc = msg_->n();
|
---|
121 |
|
---|
122 | ndocc = nsocc = 0;
|
---|
123 | const double epsilon = 1.0e-4;
|
---|
124 | for (i=0; i<oso_dimension()->n(); i++) {
|
---|
125 | if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
|
---|
126 | else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
|
---|
127 | }
|
---|
128 |
|
---|
129 | /* do a few preliminary tests to make sure the desired calculation *
|
---|
130 | * can be done (and appears to be meaningful!) */
|
---|
131 |
|
---|
132 | if (ndocc == 0 && nsocc == 0) {
|
---|
133 | ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
|
---|
134 | abort();
|
---|
135 | }
|
---|
136 |
|
---|
137 | if (nfzc > ndocc) {
|
---|
138 | ExEnv::err0()
|
---|
139 | << "The number of frozen core orbitals exceeds the number" << endl
|
---|
140 | << "of doubly occupied orbitals; program exiting" << endl;
|
---|
141 | abort();
|
---|
142 | }
|
---|
143 |
|
---|
144 | if (nfzv > noso - ndocc - nsocc) {
|
---|
145 | ExEnv::err0()
|
---|
146 | << "The number of frozen virtual orbitals exceeds the number" << endl
|
---|
147 | << "of unoccupied orbitals; program exiting" << endl;
|
---|
148 | abort();
|
---|
149 | }
|
---|
150 |
|
---|
151 | ndocc = ndocc - nfzc;
|
---|
152 | /* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
|
---|
153 | nvir = noso - ndocc - nfzc - nfzv;
|
---|
154 | /* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
|
---|
155 | nocc = ndocc + nsocc;
|
---|
156 | nshell = basis()->nshell();
|
---|
157 |
|
---|
158 | /* allocate storage for some arrays used for keeping track of which R *
|
---|
159 | * indices are processed by each node */
|
---|
160 | shellsize = (int*) malloc(nshell*sizeof(int));
|
---|
161 | sorted_shells = (int*) malloc(nshell*sizeof(int));
|
---|
162 | nbf = (int*) malloc(nproc*sizeof(int));
|
---|
163 | proc = (int*) malloc(nshell*sizeof(int));
|
---|
164 |
|
---|
165 |
|
---|
166 | /******************************************************
|
---|
167 | * Begin distributing R shells between nodes so each *
|
---|
168 | * node gets ca. the same number of r basis functions *
|
---|
169 | ******************************************************/
|
---|
170 |
|
---|
171 | /* compute size of each shell */
|
---|
172 | for (i=0; i<nshell; i++) {
|
---|
173 | shellsize[i] = basis()->shell(i).nfunction();
|
---|
174 | }
|
---|
175 |
|
---|
176 | /* do an index sort (large -> small) of shellsize to form sorted_shells */
|
---|
177 | iquicksort(shellsize,sorted_shells,nshell);
|
---|
178 |
|
---|
179 | /* initialize nbf */
|
---|
180 | for (i=0; i<nproc; i++) nbf[i] = 0;
|
---|
181 |
|
---|
182 | for (i=0; i<nshell; i++) {
|
---|
183 | min = nbf[0];
|
---|
184 | iproc = 0;
|
---|
185 | for (j=1; j<nproc; j++) {
|
---|
186 | if (nbf[j] < min) {
|
---|
187 | iproc = j;
|
---|
188 | min = nbf[j];
|
---|
189 | }
|
---|
190 | }
|
---|
191 | proc[sorted_shells[i]] = iproc;
|
---|
192 | nbf[iproc] += shellsize[sorted_shells[i]];
|
---|
193 | }
|
---|
194 | if (me == 0) {
|
---|
195 | ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
|
---|
196 | for (i=0; i<nproc; i++) {
|
---|
197 | if (i%12 == 0) ExEnv::out0() << indent;
|
---|
198 | ExEnv::out0() << scprintf(" %4i",nbf[i]);
|
---|
199 | if ((i+1)%12 == 0) ExEnv::out0() << endl;
|
---|
200 | }
|
---|
201 | ExEnv::out0() << endl;
|
---|
202 | }
|
---|
203 |
|
---|
204 | /* determine which shells are to be processed by node me */
|
---|
205 | nRshell = 0;
|
---|
206 | for (i=0; i<nshell; i++) {
|
---|
207 | if (proc[i] == me) nRshell++;
|
---|
208 | }
|
---|
209 | myshells = (int*) malloc(nRshell*sizeof(int));
|
---|
210 | imyshell = 0;
|
---|
211 | for (i=0; i<nshell; i++) {
|
---|
212 | if (proc[i] == me) {
|
---|
213 | myshells[imyshell] = i;
|
---|
214 | imyshell++;
|
---|
215 | }
|
---|
216 | }
|
---|
217 |
|
---|
218 | /************************************************
|
---|
219 | * End of distribution of R shells between nodes *
|
---|
220 | ************************************************/
|
---|
221 |
|
---|
222 |
|
---|
223 | /* compute batch size ni for opt2 loops; *
|
---|
224 | * need to store the following arrays of type double : trans_int1-4, *
|
---|
225 | * trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp, *
|
---|
226 | * mo_int_do_so_vir, mo_int_tmp, *
|
---|
227 | * and the following arrays of type int: myshells, shellsize, *
|
---|
228 | * sorted_shells, nbf, and proc */
|
---|
229 |
|
---|
230 | size_t memused = 0;
|
---|
231 | ni = 0;
|
---|
232 | for (i=1; i<=nocc; i++) {
|
---|
233 | distsize_t tmpmem = compute_v2_memory(i,
|
---|
234 | nfuncmax, nbf[me], nshell,
|
---|
235 | ndocc, nsocc, nvir, nproc);
|
---|
236 | if (tmpmem > mem_alloc) break;
|
---|
237 | ni = i;
|
---|
238 | memused = distsize_to_size(tmpmem);
|
---|
239 | }
|
---|
240 |
|
---|
241 | size_t mem_remaining = mem_alloc - memused;
|
---|
242 |
|
---|
243 | /* set ni equal to the smallest batch size for any node */
|
---|
244 | msg_->min(ni);
|
---|
245 | msg_->bcast(ni);
|
---|
246 |
|
---|
247 | int nbfmax = nbf[me];
|
---|
248 | msg_->max(nbfmax);
|
---|
249 |
|
---|
250 | if (me == 0) {
|
---|
251 | ExEnv::out0() << indent << " nproc nbasis nshell nfuncmax"
|
---|
252 | " ndocc nsocc nvir nfzc nfzv" << endl;
|
---|
253 | ExEnv::out0() << indent << scprintf(" %-4i %-5i %-4i %-3i"
|
---|
254 | " %-3i %-3i %-3i %-3i %-3i\n",
|
---|
255 | nproc,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
|
---|
256 | }
|
---|
257 |
|
---|
258 |
|
---|
259 | ExEnv::out0() << indent
|
---|
260 | << "Memory available per node: " << mem_alloc << " Bytes"
|
---|
261 | << endl;
|
---|
262 | ExEnv::out0() << indent
|
---|
263 | << "Total memory used per node: " << memused << " Bytes"
|
---|
264 | << endl;
|
---|
265 | ExEnv::out0() << indent
|
---|
266 | << "Memory required for one pass: "
|
---|
267 | << compute_v2_memory(nocc,
|
---|
268 | nfuncmax, nbfmax, nshell,
|
---|
269 | ndocc, nsocc, nvir, nproc)
|
---|
270 | << " Bytes"
|
---|
271 | << endl;
|
---|
272 | ExEnv::out0() << indent
|
---|
273 | << "Minimum memory required: "
|
---|
274 | << compute_v2_memory(1,
|
---|
275 | nfuncmax, nbfmax, nshell,
|
---|
276 | ndocc, nsocc, nvir, nproc)
|
---|
277 | << " Bytes"
|
---|
278 | << endl;
|
---|
279 | ExEnv::out0() << indent
|
---|
280 | << "Batch size: " << ni
|
---|
281 | << endl;
|
---|
282 |
|
---|
283 | if (ni < nsocc) {
|
---|
284 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
285 | abort();
|
---|
286 | }
|
---|
287 |
|
---|
288 | if (ni < 1) { /* this applies only to a closed shell case */
|
---|
289 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
290 | abort();
|
---|
291 | }
|
---|
292 |
|
---|
293 | if (nocc == ni) {
|
---|
294 | npass = 1;
|
---|
295 | rest = 0;
|
---|
296 | }
|
---|
297 | else {
|
---|
298 | rest = nocc%ni;
|
---|
299 | npass = (nocc - rest)/ni + 1;
|
---|
300 | if (rest == 0) npass--;
|
---|
301 | }
|
---|
302 |
|
---|
303 | ExEnv::out0() << indent
|
---|
304 | << "npass = " << npass
|
---|
305 | << " rest = " << rest
|
---|
306 | << endl;
|
---|
307 |
|
---|
308 | /* the scf vector might be distributed between the nodes, but for OPT2 *
|
---|
309 | * each node needs its own copy of the vector; *
|
---|
310 | * therefore, put a copy of the scf vector on each node; *
|
---|
311 | * while doing this, duplicate columns corresponding to singly *
|
---|
312 | * occupied orbitals and order columns as [socc docc socc unocc] */
|
---|
313 | /* also rearrange scf eigenvalues as [socc docc socc unocc] *
|
---|
314 | * want socc first to get the socc's in the first batch *
|
---|
315 | * (need socc's to compute energy denominators - see *
|
---|
316 | * socc_sum comment below) */
|
---|
317 |
|
---|
318 | evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
|
---|
319 |
|
---|
320 | RefDiagSCMatrix occ;
|
---|
321 | RefDiagSCMatrix evals;
|
---|
322 | RefSCMatrix Scf_Vec;
|
---|
323 | eigen(evals, Scf_Vec, occ);
|
---|
324 |
|
---|
325 | if (debug_) {
|
---|
326 | evals.print("eigenvalues");
|
---|
327 | Scf_Vec.print("eigenvectors");
|
---|
328 | }
|
---|
329 |
|
---|
330 | double *scf_vectort_dat = new double[nbasis*noso];
|
---|
331 | Scf_Vec->convert(scf_vectort_dat);
|
---|
332 |
|
---|
333 | double** scf_vectort = new double*[nocc + nvir];
|
---|
334 |
|
---|
335 | int idoc = 0, ivir = 0, isoc = 0;
|
---|
336 | for (i=nfzc; i<noso-nfzv; i++) {
|
---|
337 | if (occ(i) >= 2.0 - epsilon) {
|
---|
338 | evals_open[idoc+nsocc] = evals(i);
|
---|
339 | scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
340 | idoc++;
|
---|
341 | }
|
---|
342 | else if (occ(i) >= 1.0 - epsilon) {
|
---|
343 | evals_open[isoc] = evals(i);
|
---|
344 | scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
|
---|
345 | evals_open[isoc+nocc] = evals(i);
|
---|
346 | scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
|
---|
347 | isoc++;
|
---|
348 | }
|
---|
349 | else {
|
---|
350 | if (ivir < nvir) {
|
---|
351 | evals_open[ivir+nocc+nsocc] = evals(i);
|
---|
352 | scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
353 | }
|
---|
354 | ivir++;
|
---|
355 | }
|
---|
356 | }
|
---|
357 | // need the transpose of the vector
|
---|
358 | double **scf_vector = new double*[nbasis];
|
---|
359 | double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
|
---|
360 | for (i=0; i<nbasis; i++) {
|
---|
361 | scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
|
---|
362 | for (j=0; j<nocc+nvir; j++) {
|
---|
363 | scf_vector[i][j] = scf_vectort[j][i];
|
---|
364 | }
|
---|
365 | }
|
---|
366 | delete[] scf_vectort;
|
---|
367 | delete[] scf_vectort_dat;
|
---|
368 |
|
---|
369 | /* allocate storage for various arrays */
|
---|
370 |
|
---|
371 | dim_ij = nocc*ni - (ni*(ni - 1))/2;
|
---|
372 |
|
---|
373 | trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
|
---|
374 | trans_int2 = (double*) malloc(nvir*ni*sizeof(double));
|
---|
375 | trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));
|
---|
376 | trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
377 | trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
378 | if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
|
---|
379 | if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));
|
---|
380 | if (nsocc) mo_int_do_so_vir =
|
---|
381 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
382 | if (nsocc) mo_int_tmp =
|
---|
383 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
384 |
|
---|
385 | if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
|
---|
386 |
|
---|
387 |
|
---|
388 | // create the integrals object
|
---|
389 | integral()->set_storage(mem_remaining);
|
---|
390 | tbint_ = integral()->electron_repulsion();
|
---|
391 | intbuf = tbint_->buffer();
|
---|
392 |
|
---|
393 | /**************************************************************************
|
---|
394 | * begin opt2 loops *
|
---|
395 | **************************************************************************/
|
---|
396 |
|
---|
397 |
|
---|
398 | for (pass=0; pass<npass; pass++) {
|
---|
399 | i_offset = pass*ni;
|
---|
400 | if ((pass == npass - 1) && (rest != 0)) ni = rest;
|
---|
401 |
|
---|
402 | r_offset = 0;
|
---|
403 | bzerofast(trans_int3,nbf[me]*nvir*dim_ij);
|
---|
404 |
|
---|
405 | tim_enter("RS loop");
|
---|
406 |
|
---|
407 | for (imyshell=0; imyshell<nRshell; imyshell++) {
|
---|
408 |
|
---|
409 | R = myshells[imyshell];
|
---|
410 | nr = basis()->shell(R).nfunction();
|
---|
411 |
|
---|
412 | for (S = 0; S < nshell; S++) {
|
---|
413 | ns = basis()->shell(S).nfunction();
|
---|
414 | tim_enter("bzerofast trans_int1");
|
---|
415 | bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
|
---|
416 | tim_exit("bzerofast trans_int1");
|
---|
417 |
|
---|
418 | tim_enter("PQ loop");
|
---|
419 |
|
---|
420 | for (P = 0; P < nshell; P++) {
|
---|
421 | np = basis()->shell(P).nfunction();
|
---|
422 |
|
---|
423 | for (Q = 0; Q <= P; Q++) {
|
---|
424 | if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
|
---|
425 | continue; /* skip ereps less than tol */
|
---|
426 | }
|
---|
427 |
|
---|
428 | aoint_computed++;
|
---|
429 |
|
---|
430 | nq = basis()->shell(Q).nfunction();
|
---|
431 |
|
---|
432 | tim_enter("erep");
|
---|
433 | tbint_->compute_shell(P,Q,R,S);
|
---|
434 | tim_exit("erep");
|
---|
435 |
|
---|
436 | tim_enter("1. quart. tr.");
|
---|
437 |
|
---|
438 | index = 0;
|
---|
439 |
|
---|
440 | for (bf1 = 0; bf1 < np; bf1++) {
|
---|
441 | p = basis()->shell_to_function(P) + bf1;
|
---|
442 |
|
---|
443 | for (bf2 = 0; bf2 < nq; bf2++) {
|
---|
444 | q = basis()->shell_to_function(Q) + bf2;
|
---|
445 | if (q > p) {
|
---|
446 | /* if q > p: want to skip the loops over bf3-4 */
|
---|
447 | /* and larger bf2 values, so increment bf1 by 1 */
|
---|
448 | /* ("break") and adjust the value of index */
|
---|
449 | index = (bf1 + 1) * nq * nr * ns;
|
---|
450 | break;
|
---|
451 | }
|
---|
452 |
|
---|
453 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
454 |
|
---|
455 | for (bf4 = 0; bf4 < ns; bf4++,index++) {
|
---|
456 |
|
---|
457 | if (fabs(intbuf[index]) > 1.0e-15) {
|
---|
458 | pqrs = intbuf[index];
|
---|
459 |
|
---|
460 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
461 | iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];
|
---|
462 |
|
---|
463 | if (p == q) pqrs *= 0.5;
|
---|
464 | col_index = i_offset;
|
---|
465 | c_pi = &scf_vector[p][col_index];
|
---|
466 | c_qi = &scf_vector[q][col_index];
|
---|
467 |
|
---|
468 | for (i=ni; i; i--) {
|
---|
469 | *iqrs++ += pqrs * *c_pi++;
|
---|
470 | *iprs++ += pqrs * *c_qi++;
|
---|
471 | }
|
---|
472 | }
|
---|
473 | } /* exit bf4 loop */
|
---|
474 | } /* exit bf3 loop */
|
---|
475 | } /* exit bf2 loop */
|
---|
476 | } /* exit bf1 loop */
|
---|
477 | tim_exit("1. quart. tr.");
|
---|
478 | } /* exit Q loop */
|
---|
479 | } /* exit P loop */
|
---|
480 | tim_exit("PQ loop");
|
---|
481 |
|
---|
482 | /* begin second and third quarter transformations */
|
---|
483 |
|
---|
484 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
485 | r = r_offset + bf3;
|
---|
486 |
|
---|
487 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
488 | s = basis()->shell_to_function(S) + bf4;
|
---|
489 |
|
---|
490 | tim_enter("bzerofast trans_int2");
|
---|
491 | bzerofast(trans_int2,nvir*ni);
|
---|
492 | tim_exit("bzerofast trans_int2");
|
---|
493 |
|
---|
494 | tim_enter("2. quart. tr.");
|
---|
495 |
|
---|
496 | for (q = 0; q < nbasis; q++) {
|
---|
497 | iars_ptr = trans_int2;
|
---|
498 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
499 | c_qa = &scf_vector[q][nocc];
|
---|
500 |
|
---|
501 | for (a = 0; a < nvir; a++) {
|
---|
502 |
|
---|
503 | for (i=ni; i; i--) {
|
---|
504 | *iars_ptr++ += *c_qa * *iqrs++;
|
---|
505 | }
|
---|
506 |
|
---|
507 | iqrs -= ni;
|
---|
508 | c_qa++;
|
---|
509 | }
|
---|
510 | } /* exit q loop */
|
---|
511 | tim_exit("2. quart. tr.");
|
---|
512 |
|
---|
513 | /* begin third quarter transformation */
|
---|
514 |
|
---|
515 | tim_enter("3. quart. tr.");
|
---|
516 |
|
---|
517 | for (i=0; i<ni; i++) {
|
---|
518 | tmp_index = i*(i+1)/2 + i*i_offset;
|
---|
519 |
|
---|
520 | for (a=0; a<nvir; a++) {
|
---|
521 | iars = trans_int2[a*ni + i];
|
---|
522 | c_sj = scf_vector[s];
|
---|
523 | iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];
|
---|
524 |
|
---|
525 | for (j=0; j<=i+i_offset; j++) {
|
---|
526 | *iajr_ptr++ += *c_sj++ * iars;
|
---|
527 | }
|
---|
528 | }
|
---|
529 | } /* exit i loop */
|
---|
530 | tim_exit("3. quart. tr.");
|
---|
531 |
|
---|
532 | } /* exit bf4 loop */
|
---|
533 | } /* exit bf3 loop */
|
---|
534 |
|
---|
535 | } /* exit S loop */
|
---|
536 | r_offset += nr;
|
---|
537 | } /* exit R loop */
|
---|
538 | tim_exit("RS loop");
|
---|
539 |
|
---|
540 |
|
---|
541 | /* begin fourth quarter transformation; *
|
---|
542 | * first tansform integrals with only s.o. indices; *
|
---|
543 | * these integrals are needed to compute the denominators *
|
---|
544 | * in the various terms contributing to the correlation energy *
|
---|
545 | * and must all be computed in the first pass; *
|
---|
546 | * the integrals are summed into the array socc_sum: *
|
---|
547 | * socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc) *
|
---|
548 | * (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
|
---|
549 | * the individual integrals are not saved here, only the sums are kept */
|
---|
550 |
|
---|
551 |
|
---|
552 | if (pass == 0) {
|
---|
553 | tim_enter("4. quart. tr.");
|
---|
554 | if (nsocc) bzerofast(socc_sum,nsocc);
|
---|
555 | for (isocc=0; isocc<nsocc; isocc++) {
|
---|
556 |
|
---|
557 | for (index=0; index<nbf[me]; index++) {
|
---|
558 | i = 0;
|
---|
559 | sum = basis()->shell(myshells[i]).nfunction();
|
---|
560 | while (sum <= index) {
|
---|
561 | i++;
|
---|
562 | sum += basis()->shell(myshells[i]).nfunction();
|
---|
563 | }
|
---|
564 | sum -= basis()->shell(myshells[i]).nfunction();
|
---|
565 | r = basis()->shell_to_function(myshells[i]) + index - sum;
|
---|
566 |
|
---|
567 | for (asocc=0; asocc<nsocc; asocc++) {
|
---|
568 | socc_sum[isocc] += scf_vector[r][nocc+asocc]*
|
---|
569 | trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
|
---|
570 | + isocc + dim_ij*(asocc + nvir*index)];
|
---|
571 | }
|
---|
572 | }
|
---|
573 | } /* exit isocc loop */
|
---|
574 |
|
---|
575 | tim_exit("4. quart. tr.");
|
---|
576 |
|
---|
577 | /* sum socc_sum contributions from each node (only if nsocc > 0 *
|
---|
578 | * since gop1 will fail if nsocc = 0) */
|
---|
579 | if (nsocc > 0) {
|
---|
580 | tim_enter("global sum socc_sum");
|
---|
581 | msg_->sum(socc_sum,nsocc,socc_sum_tmp);
|
---|
582 | tim_exit("global sum socc_sum");
|
---|
583 | }
|
---|
584 |
|
---|
585 | }
|
---|
586 |
|
---|
587 | /* now we have all the sums of integrals involving s.o.'s (socc_sum); *
|
---|
588 | * begin fourth quarter transformation for all integrals (including *
|
---|
589 | * integrals with only s.o. indices); use restriction j <= (i_offset+i) *
|
---|
590 | * to save flops */
|
---|
591 |
|
---|
592 | compute_index = 0;
|
---|
593 |
|
---|
594 | for (i=0; i<ni; i++) {
|
---|
595 |
|
---|
596 | for (j=0; j <= (i_offset+i); j++) {
|
---|
597 |
|
---|
598 | tim_enter("4. quart. tr.");
|
---|
599 |
|
---|
600 | bzerofast(trans_int4,nvir*nvir);
|
---|
601 |
|
---|
602 | for (index=0; index<nbf[me]; index++) {
|
---|
603 | k = 0;
|
---|
604 | sum = basis()->shell(myshells[k]).nfunction();
|
---|
605 | while (sum <= index) {
|
---|
606 | k++;
|
---|
607 | sum += basis()->shell(myshells[k]).nfunction();
|
---|
608 | }
|
---|
609 | sum -= basis()->shell(myshells[k]).nfunction();
|
---|
610 | r = basis()->shell_to_function(myshells[k]) + index - sum;
|
---|
611 |
|
---|
612 | for (a=0; a<nvir; a++) {
|
---|
613 | iajb = &trans_int4[a*nvir];
|
---|
614 | iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];
|
---|
615 | c_rb = &scf_vector[r][nocc];
|
---|
616 |
|
---|
617 | for (b=0; b<nvir; b++) {
|
---|
618 | *iajb++ += *c_rb++ * iajr;
|
---|
619 | }
|
---|
620 | }
|
---|
621 | }
|
---|
622 |
|
---|
623 | tim_exit("4. quart. tr.");
|
---|
624 |
|
---|
625 | tim_enter("global sum trans_int4");
|
---|
626 | msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
|
---|
627 | tim_exit("global sum trans_int4");
|
---|
628 |
|
---|
629 | /* we now have the fully transformed integrals (ia|jb) *
|
---|
630 | * for one i, one j (j <= i_offset+i), and all a and b; *
|
---|
631 | * compute contribution to the OPT1 and OPT2 correlation *
|
---|
632 | * energies; use restriction b <= a to save flops */
|
---|
633 |
|
---|
634 | tim_enter("compute ecorr");
|
---|
635 |
|
---|
636 | for (a=0; a<nvir; a++) {
|
---|
637 | for (b=0; b<=a; b++) {
|
---|
638 | compute_index++;
|
---|
639 | if (compute_index%nproc != me) continue;
|
---|
640 |
|
---|
641 | docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
|
---|
642 | + (j >= nsocc && j < nocc);
|
---|
643 | socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
|
---|
644 | vir_index = (a >= nsocc) + (b >= nsocc);
|
---|
645 |
|
---|
646 | if (socc_index >= 3) continue; /* skip to next b value */
|
---|
647 |
|
---|
648 | delta_ijab = evals_open[i_offset+i] + evals_open[j]
|
---|
649 | - evals_open[nocc+a] - evals_open[nocc+b];
|
---|
650 |
|
---|
651 | /* determine integral type and compute energy contribution */
|
---|
652 | if (docc_index == 2 && vir_index == 2) {
|
---|
653 | if (i_offset+i == j && a == b) {
|
---|
654 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
655 | ecorr_opt2 += contrib1/delta_ijab;
|
---|
656 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
657 | }
|
---|
658 | else if (i_offset+i == j || a == b) {
|
---|
659 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
660 | ecorr_opt2 += 2*contrib1/delta_ijab;
|
---|
661 | ecorr_opt1 += 2*contrib1/delta_ijab;
|
---|
662 | }
|
---|
663 | else {
|
---|
664 | contrib1 = trans_int4[a*nvir + b];
|
---|
665 | contrib2 = trans_int4[b*nvir + a];
|
---|
666 | ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
667 | - contrib1*contrib2)/delta_ijab;
|
---|
668 | ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
669 | - contrib1*contrib2)/delta_ijab;
|
---|
670 | }
|
---|
671 | }
|
---|
672 | else if (docc_index == 2 && socc_index == 2) {
|
---|
673 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
674 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
675 | ecorr_opt2 += contrib1/
|
---|
676 | (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
|
---|
677 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
678 | }
|
---|
679 | else if (socc_index == 2 && vir_index == 2) {
|
---|
680 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
681 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
682 | ecorr_opt2 += contrib1/
|
---|
683 | (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
|
---|
684 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
685 | }
|
---|
686 | else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
|
---|
687 | if (i_offset+i == j) {
|
---|
688 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
689 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
|
---|
690 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
691 | }
|
---|
692 | else {
|
---|
693 | contrib1 = trans_int4[a*nvir + b];
|
---|
694 | contrib2 = trans_int4[b*nvir + a];
|
---|
695 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
696 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
|
---|
697 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
698 | - contrib1*contrib2)/delta_ijab;
|
---|
699 | }
|
---|
700 | }
|
---|
701 | else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
|
---|
702 | contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
|
---|
703 | if (j == b) {
|
---|
704 | /* to compute the energy contribution from an integral of the *
|
---|
705 | * type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need the *
|
---|
706 | * (is|sa) integrals for all s=s.o.; these integrals are *
|
---|
707 | * therefore stored here in the array mo_int_do_so_vir, and *
|
---|
708 | * the energy contribution is computed after exiting the loop *
|
---|
709 | * over i-batches (pass) */
|
---|
710 | mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
|
---|
711 | (i_offset+i-nsocc + ndocc*b)] =
|
---|
712 | trans_int4[b*nvir + a];
|
---|
713 | ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
|
---|
714 | ecorr_opt1 += 1.5*contrib1/delta_ijab;
|
---|
715 | ecorr_zapt2_contrib += contrib1/
|
---|
716 | (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
|
---|
717 | + 0.5*contrib1/delta_ijab;
|
---|
718 | }
|
---|
719 | else {
|
---|
720 | ecorr_opt2 += contrib1/
|
---|
721 | (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
|
---|
722 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
723 | }
|
---|
724 | }
|
---|
725 | else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
|
---|
726 | if (a == b) {
|
---|
727 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
728 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
|
---|
729 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
730 | }
|
---|
731 | else {
|
---|
732 | contrib1 = trans_int4[a*nvir + b];
|
---|
733 | contrib2 = trans_int4[b*nvir + a];
|
---|
734 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
735 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
|
---|
736 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
737 | - contrib1*contrib2)/delta_ijab;
|
---|
738 | }
|
---|
739 | }
|
---|
740 | } /* exit b loop */
|
---|
741 | } /* exit a loop */
|
---|
742 | tim_exit("compute ecorr");
|
---|
743 | } /* exit j loop */
|
---|
744 | } /* exit i loop */
|
---|
745 |
|
---|
746 | if (nsocc == 0 && npass > 1 && pass < npass - 1) {
|
---|
747 | double passe = ecorr_opt2;
|
---|
748 | msg_->sum(passe);
|
---|
749 | ExEnv::out0() << indent
|
---|
750 | << "Partial correlation energy for pass " << pass << ":" << endl;
|
---|
751 | ExEnv::out0() << indent
|
---|
752 | << scprintf(" restart_ecorr = %14.10f", passe)
|
---|
753 | << endl;
|
---|
754 | ExEnv::out0() << indent
|
---|
755 | << scprintf(" restart_orbital_v2 = %d", ((pass+1) * ni))
|
---|
756 | << endl;
|
---|
757 | }
|
---|
758 | } /* exit loop over i-batches (pass) */
|
---|
759 |
|
---|
760 |
|
---|
761 |
|
---|
762 | /* compute contribution from excitations of the type is1 -> s1a where *
|
---|
763 | * i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
|
---|
764 | * where i and a have the same spin, contribute to this term; *
|
---|
765 | * (Brillouin's theorem not satisfied for ROHF wave functions); *
|
---|
766 | * do this only if nsocc > 0 since gop1 will fail otherwise */
|
---|
767 |
|
---|
768 | tim_enter("compute ecorr");
|
---|
769 |
|
---|
770 | if (nsocc > 0) {
|
---|
771 | tim_enter("global sum mo_int_do_so_vir");
|
---|
772 | msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
|
---|
773 | tim_exit("global sum mo_int_do_so_vir");
|
---|
774 | }
|
---|
775 |
|
---|
776 | /* add extra contribution for triplet and higher spin multiplicities *
|
---|
777 | * contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta */
|
---|
778 |
|
---|
779 | if (me == 0 && nsocc) {
|
---|
780 | for (i=0; i<ndocc; i++) {
|
---|
781 |
|
---|
782 | for (a=0; a<nvir-nsocc; a++) {
|
---|
783 | delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
|
---|
784 |
|
---|
785 | for (s1=0; s1<nsocc; s1++) {
|
---|
786 |
|
---|
787 | for (s2=0; s2<s1; s2++) {
|
---|
788 | contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
|
---|
789 | mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
|
---|
790 | ecorr_opt2 += contrib1;
|
---|
791 | ecorr_opt1 += contrib1;
|
---|
792 | }
|
---|
793 | }
|
---|
794 | } /* exit a loop */
|
---|
795 | } /* exit i loop */
|
---|
796 | }
|
---|
797 |
|
---|
798 | tim_exit("compute ecorr");
|
---|
799 |
|
---|
800 | ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
|
---|
801 | ecorr_opt2 += ecorr_opt2_contrib;
|
---|
802 | msg_->sum(ecorr_opt1);
|
---|
803 | msg_->sum(ecorr_opt2);
|
---|
804 | msg_->sum(ecorr_zapt2);
|
---|
805 | msg_->sum(aoint_computed);
|
---|
806 |
|
---|
807 | escf = reference_->energy();
|
---|
808 | hf_energy_ = escf;
|
---|
809 |
|
---|
810 | if (me == 0) {
|
---|
811 | eopt2 = escf + ecorr_opt2;
|
---|
812 | eopt1 = escf + ecorr_opt1;
|
---|
813 | ezapt2 = escf + ecorr_zapt2;
|
---|
814 |
|
---|
815 | /* print out various energies etc.*/
|
---|
816 |
|
---|
817 | ExEnv::out0() << indent
|
---|
818 | << "Number of shell quartets for which AO integrals would" << endl
|
---|
819 | << indent << "have been computed without bounds checking: "
|
---|
820 | << npass*nshell*nshell*(nshell+1)*(nshell+1)/2 << endl;
|
---|
821 | ExEnv::out0() << indent
|
---|
822 | << "Number of shell quartets for which AO integrals" << endl
|
---|
823 | << indent << "were computed: " << aoint_computed << endl;
|
---|
824 |
|
---|
825 | ExEnv::out0() << indent
|
---|
826 | << scprintf("ROHF energy [au]: %17.12lf\n", escf);
|
---|
827 | ExEnv::out0() << indent
|
---|
828 | << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
|
---|
829 | ExEnv::out0() << indent
|
---|
830 | << scprintf("OPT2 second order correction [au]: %17.12lf\n",
|
---|
831 | ecorr_opt2);
|
---|
832 | ExEnv::out0() << indent
|
---|
833 | << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
|
---|
834 | ExEnv::out0() << indent
|
---|
835 | << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
|
---|
836 | ecorr_zapt2);
|
---|
837 | ExEnv::out0() << indent
|
---|
838 | << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
|
---|
839 | ExEnv::out0().flush();
|
---|
840 | }
|
---|
841 |
|
---|
842 | msg_->bcast(eopt1);
|
---|
843 | msg_->bcast(eopt2);
|
---|
844 | msg_->bcast(ezapt2);
|
---|
845 |
|
---|
846 | if (method_ && !strcmp(method_,"opt1")) {
|
---|
847 | set_energy(eopt1);
|
---|
848 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
849 | *ref_to_mp2_acc);
|
---|
850 | }
|
---|
851 | else if (method_ && !strcmp(method_,"opt2")) {
|
---|
852 | set_energy(eopt2);
|
---|
853 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
854 | *ref_to_mp2_acc);
|
---|
855 | }
|
---|
856 | else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
|
---|
857 | set_energy(ezapt2);
|
---|
858 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
859 | *ref_to_mp2_acc);
|
---|
860 | }
|
---|
861 | else {
|
---|
862 | if (!(!method_ || !strcmp(method_,"zapt"))) {
|
---|
863 | ExEnv::out0() << indent
|
---|
864 | << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
|
---|
865 | }
|
---|
866 | set_energy(ezapt2);
|
---|
867 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
868 | *ref_to_mp2_acc);
|
---|
869 | }
|
---|
870 |
|
---|
871 | free(trans_int1);
|
---|
872 | free(trans_int2);
|
---|
873 | free(trans_int3);
|
---|
874 | free(trans_int4);
|
---|
875 | free(trans_int4_tmp);
|
---|
876 | if (nsocc) free(socc_sum);
|
---|
877 | if (nsocc) free(socc_sum_tmp);
|
---|
878 | if (nsocc) free(mo_int_do_so_vir);
|
---|
879 | if (nsocc) free(mo_int_tmp);
|
---|
880 | free(evals_open);
|
---|
881 | free(myshells);
|
---|
882 | free(shellsize);
|
---|
883 | free(sorted_shells);
|
---|
884 | free(nbf);
|
---|
885 | free(proc);
|
---|
886 |
|
---|
887 | delete[] scf_vector;
|
---|
888 | delete[] scf_vector_dat;
|
---|
889 |
|
---|
890 | }
|
---|
891 |
|
---|
892 |
|
---|
893 | /* Do a quick sort (larger -> smaller) of the integer data in item *
|
---|
894 | * by the integer indices in index; *
|
---|
895 | * data in item remain unchanged */
|
---|
896 |
|
---|
897 | static void
|
---|
898 | iquicksort(int *item,int *index,int n)
|
---|
899 | {
|
---|
900 | int i;
|
---|
901 | if (n<=0) return;
|
---|
902 | for (i=0; i<n; i++) {
|
---|
903 | index[i] = i;
|
---|
904 | }
|
---|
905 | iqs(item,index,0,n-1);
|
---|
906 | }
|
---|
907 |
|
---|
908 | static void
|
---|
909 | iqs(int *item,int *index,int left,int right)
|
---|
910 | {
|
---|
911 | register int i,j;
|
---|
912 | int x,y;
|
---|
913 |
|
---|
914 | i=left; j=right;
|
---|
915 | x=item[index[(left+right)/2]];
|
---|
916 |
|
---|
917 | do {
|
---|
918 | while(item[index[i]]>x && i<right) i++;
|
---|
919 | while(x>item[index[j]] && j>left) j--;
|
---|
920 |
|
---|
921 | if (i<=j) {
|
---|
922 | if (item[index[i]] != item[index[j]]) {
|
---|
923 | y=index[i];
|
---|
924 | index[i]=index[j];
|
---|
925 | index[j]=y;
|
---|
926 | }
|
---|
927 | i++; j--;
|
---|
928 | }
|
---|
929 | } while(i<=j);
|
---|
930 |
|
---|
931 | if (left<j) iqs(item,index,left,j);
|
---|
932 | if (i<right) iqs(item,index,i,right);
|
---|
933 | }
|
---|
934 |
|
---|
935 | distsize_t
|
---|
936 | MBPT2::compute_v2_memory(int ni,
|
---|
937 | int nfuncmax, int nbfme, int nshell,
|
---|
938 | int ndocc, int nsocc, int nvir, int nproc
|
---|
939 | )
|
---|
940 | {
|
---|
941 | distsize_t result = 0;
|
---|
942 | distsize_t dsize = sizeof(double);
|
---|
943 | distsize_t isize = sizeof(int);
|
---|
944 | int dim_ij = nocc*ni - (ni*(ni - 1))/2;
|
---|
945 |
|
---|
946 | result += nfuncmax*nfuncmax*(distsize_t)nbasis*(distsize_t)ni*dsize;
|
---|
947 | result += nvir*ni*dsize;
|
---|
948 | result += nbfme*nvir*(distsize_t)dim_ij*dsize;
|
---|
949 | result += nvir*nvir*dsize;
|
---|
950 | result += nvir*nvir*dsize;
|
---|
951 | result += nsocc*dsize;
|
---|
952 | result += nsocc*dsize;
|
---|
953 | result += ndocc*nsocc*(distsize_t)(nvir-nsocc)*dsize;
|
---|
954 | result += ndocc*nsocc*(distsize_t)(nvir-nsocc)*dsize;
|
---|
955 | result += (noso+nsocc-nfzc-nfzv)*dsize;
|
---|
956 | result += nshell*isize;
|
---|
957 | result += nshell*isize;
|
---|
958 | result += nproc*isize;
|
---|
959 | result += nshell*isize;
|
---|
960 |
|
---|
961 | return result;
|
---|
962 | }
|
---|
963 |
|
---|
964 | ////////////////////////////////////////////////////////////////////////////
|
---|
965 |
|
---|
966 | // Local Variables:
|
---|
967 | // mode: c++
|
---|
968 | // c-file-style: "CLJ-CONDENSED"
|
---|
969 | // End:
|
---|