[0b990d] | 1 | //
|
---|
| 2 | // hsosv2.cc
|
---|
| 3 | //
|
---|
| 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
| 5 | //
|
---|
| 6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
| 7 | // Maintainer: LPS
|
---|
| 8 | //
|
---|
| 9 | // This file is part of the SC Toolkit.
|
---|
| 10 | //
|
---|
| 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
| 12 | // it under the terms of the GNU Library General Public License as published by
|
---|
| 13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
| 14 | // any later version.
|
---|
| 15 | //
|
---|
| 16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
| 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 19 | // GNU Library General Public License for more details.
|
---|
| 20 | //
|
---|
| 21 | // You should have received a copy of the GNU Library General Public License
|
---|
| 22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
| 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 24 | //
|
---|
| 25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
| 26 | //
|
---|
| 27 |
|
---|
| 28 | #include <iostream>
|
---|
| 29 | #include <math.h>
|
---|
| 30 |
|
---|
| 31 | #include <util/misc/formio.h>
|
---|
| 32 | #include <util/misc/timer.h>
|
---|
| 33 | #include <util/group/message.h>
|
---|
| 34 | #include <math/scmat/matrix.h>
|
---|
| 35 | #include <chemistry/molecule/molecule.h>
|
---|
| 36 | #include <chemistry/qc/mbpt/mbpt.h>
|
---|
| 37 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
| 38 |
|
---|
| 39 | using namespace std;
|
---|
| 40 | using namespace sc;
|
---|
| 41 |
|
---|
| 42 | static void iqs(int *item,int *index,int left,int right);
|
---|
| 43 | static void iquicksort(int *item,int *index,int n);
|
---|
| 44 |
|
---|
| 45 | void
|
---|
| 46 | MBPT2::compute_hsos_v2()
|
---|
| 47 | {
|
---|
| 48 | int i, j, k;
|
---|
| 49 | int s1, s2;
|
---|
| 50 | int a, b;
|
---|
| 51 | int isocc, asocc; /* indices running over singly occupied orbitals */
|
---|
| 52 | int nfuncmax = basis()->max_nfunction_in_shell();
|
---|
| 53 | int nvir;
|
---|
| 54 | int nshell;
|
---|
| 55 | int nocc=0,ndocc=0,nsocc=0;
|
---|
| 56 | int i_offset;
|
---|
| 57 | int npass, pass;
|
---|
| 58 | int ni;
|
---|
| 59 | int np, nq, nr, ns;
|
---|
| 60 | int P, Q, R, S;
|
---|
| 61 | int p, q, r, s;
|
---|
| 62 | int bf1, bf2, bf3, bf4;
|
---|
| 63 | int index;
|
---|
| 64 | int compute_index;
|
---|
| 65 | int col_index;
|
---|
| 66 | int tmp_index;
|
---|
| 67 | int dim_ij;
|
---|
| 68 | int docc_index, socc_index, vir_index;
|
---|
| 69 | int me;
|
---|
| 70 | int nproc;
|
---|
| 71 | int rest;
|
---|
| 72 | int r_offset;
|
---|
| 73 | int sum;
|
---|
| 74 | int min;
|
---|
| 75 | int iproc;
|
---|
| 76 | int nRshell;
|
---|
| 77 | int imyshell;
|
---|
| 78 | int *myshells; /* the R indices processed by node me */
|
---|
| 79 | int *shellsize; /* size of each shell */
|
---|
| 80 | int *sorted_shells; /* sorted shell indices: large shells->small shells */
|
---|
| 81 | int *nbf; /* number of basis functions processed by each node */
|
---|
| 82 | int *proc; /* element k: processor which will process shell k */
|
---|
| 83 | int aoint_computed = 0;
|
---|
| 84 | double *evals_open; /* reordered scf eigenvalues */
|
---|
| 85 | const double *intbuf; /* 2-electron AO integral buffer */
|
---|
| 86 | double *trans_int1; /* partially transformed integrals */
|
---|
| 87 | double *trans_int2; /* partially transformed integrals */
|
---|
| 88 | double *trans_int3; /* partially transformed integrals */
|
---|
| 89 | double *trans_int4; /* fully transformed integrals */
|
---|
| 90 | double *trans_int4_tmp; /* scratch array */
|
---|
| 91 | double *mo_int_do_so_vir=0;/*mo integral (is|sa); i:d.o.,s:s.o.,a:vir */
|
---|
| 92 | double *mo_int_tmp=0; /* scratch array used in global summations */
|
---|
| 93 | double *socc_sum=0; /* sum of 2-el integrals involving only s.o.'s */
|
---|
| 94 | double *socc_sum_tmp=0;/* scratch array */
|
---|
| 95 | double *iqrs, *iprs;
|
---|
| 96 | double *iars_ptr;
|
---|
| 97 | double iars;
|
---|
| 98 | double iajr;
|
---|
| 99 | double *iajr_ptr;
|
---|
| 100 | double *iajb;
|
---|
| 101 | double pqrs;
|
---|
| 102 | double *c_qa;
|
---|
| 103 | double *c_rb, *c_pi, *c_qi, *c_sj;
|
---|
| 104 | double delta_ijab;
|
---|
| 105 | double delta;
|
---|
| 106 | double contrib1, contrib2;
|
---|
| 107 | double ecorr_opt2=0,ecorr_opt1=0;
|
---|
| 108 | double ecorr_zapt2;
|
---|
| 109 | double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
|
---|
| 110 | double escf;
|
---|
| 111 | double eopt2,eopt1,ezapt2;
|
---|
| 112 | double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
|
---|
| 113 |
|
---|
| 114 | me = msg_->me();
|
---|
| 115 |
|
---|
| 116 | ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v2)" << endl;
|
---|
| 117 |
|
---|
| 118 | tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
|
---|
| 119 |
|
---|
| 120 | nproc = msg_->n();
|
---|
| 121 |
|
---|
| 122 | ndocc = nsocc = 0;
|
---|
| 123 | const double epsilon = 1.0e-4;
|
---|
| 124 | for (i=0; i<oso_dimension()->n(); i++) {
|
---|
| 125 | if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
|
---|
| 126 | else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | /* do a few preliminary tests to make sure the desired calculation *
|
---|
| 130 | * can be done (and appears to be meaningful!) */
|
---|
| 131 |
|
---|
| 132 | if (ndocc == 0 && nsocc == 0) {
|
---|
| 133 | ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
|
---|
| 134 | abort();
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 | if (nfzc > ndocc) {
|
---|
| 138 | ExEnv::err0()
|
---|
| 139 | << "The number of frozen core orbitals exceeds the number" << endl
|
---|
| 140 | << "of doubly occupied orbitals; program exiting" << endl;
|
---|
| 141 | abort();
|
---|
| 142 | }
|
---|
| 143 |
|
---|
| 144 | if (nfzv > noso - ndocc - nsocc) {
|
---|
| 145 | ExEnv::err0()
|
---|
| 146 | << "The number of frozen virtual orbitals exceeds the number" << endl
|
---|
| 147 | << "of unoccupied orbitals; program exiting" << endl;
|
---|
| 148 | abort();
|
---|
| 149 | }
|
---|
| 150 |
|
---|
| 151 | ndocc = ndocc - nfzc;
|
---|
| 152 | /* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
|
---|
| 153 | nvir = noso - ndocc - nfzc - nfzv;
|
---|
| 154 | /* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
|
---|
| 155 | nocc = ndocc + nsocc;
|
---|
| 156 | nshell = basis()->nshell();
|
---|
| 157 |
|
---|
| 158 | /* allocate storage for some arrays used for keeping track of which R *
|
---|
| 159 | * indices are processed by each node */
|
---|
| 160 | shellsize = (int*) malloc(nshell*sizeof(int));
|
---|
| 161 | sorted_shells = (int*) malloc(nshell*sizeof(int));
|
---|
| 162 | nbf = (int*) malloc(nproc*sizeof(int));
|
---|
| 163 | proc = (int*) malloc(nshell*sizeof(int));
|
---|
| 164 |
|
---|
| 165 |
|
---|
| 166 | /******************************************************
|
---|
| 167 | * Begin distributing R shells between nodes so each *
|
---|
| 168 | * node gets ca. the same number of r basis functions *
|
---|
| 169 | ******************************************************/
|
---|
| 170 |
|
---|
| 171 | /* compute size of each shell */
|
---|
| 172 | for (i=0; i<nshell; i++) {
|
---|
| 173 | shellsize[i] = basis()->shell(i).nfunction();
|
---|
| 174 | }
|
---|
| 175 |
|
---|
| 176 | /* do an index sort (large -> small) of shellsize to form sorted_shells */
|
---|
| 177 | iquicksort(shellsize,sorted_shells,nshell);
|
---|
| 178 |
|
---|
| 179 | /* initialize nbf */
|
---|
| 180 | for (i=0; i<nproc; i++) nbf[i] = 0;
|
---|
| 181 |
|
---|
| 182 | for (i=0; i<nshell; i++) {
|
---|
| 183 | min = nbf[0];
|
---|
| 184 | iproc = 0;
|
---|
| 185 | for (j=1; j<nproc; j++) {
|
---|
| 186 | if (nbf[j] < min) {
|
---|
| 187 | iproc = j;
|
---|
| 188 | min = nbf[j];
|
---|
| 189 | }
|
---|
| 190 | }
|
---|
| 191 | proc[sorted_shells[i]] = iproc;
|
---|
| 192 | nbf[iproc] += shellsize[sorted_shells[i]];
|
---|
| 193 | }
|
---|
| 194 | if (me == 0) {
|
---|
| 195 | ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
|
---|
| 196 | for (i=0; i<nproc; i++) {
|
---|
| 197 | if (i%12 == 0) ExEnv::out0() << indent;
|
---|
| 198 | ExEnv::out0() << scprintf(" %4i",nbf[i]);
|
---|
| 199 | if ((i+1)%12 == 0) ExEnv::out0() << endl;
|
---|
| 200 | }
|
---|
| 201 | ExEnv::out0() << endl;
|
---|
| 202 | }
|
---|
| 203 |
|
---|
| 204 | /* determine which shells are to be processed by node me */
|
---|
| 205 | nRshell = 0;
|
---|
| 206 | for (i=0; i<nshell; i++) {
|
---|
| 207 | if (proc[i] == me) nRshell++;
|
---|
| 208 | }
|
---|
| 209 | myshells = (int*) malloc(nRshell*sizeof(int));
|
---|
| 210 | imyshell = 0;
|
---|
| 211 | for (i=0; i<nshell; i++) {
|
---|
| 212 | if (proc[i] == me) {
|
---|
| 213 | myshells[imyshell] = i;
|
---|
| 214 | imyshell++;
|
---|
| 215 | }
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | /************************************************
|
---|
| 219 | * End of distribution of R shells between nodes *
|
---|
| 220 | ************************************************/
|
---|
| 221 |
|
---|
| 222 |
|
---|
| 223 | /* compute batch size ni for opt2 loops; *
|
---|
| 224 | * need to store the following arrays of type double : trans_int1-4, *
|
---|
| 225 | * trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp, *
|
---|
| 226 | * mo_int_do_so_vir, mo_int_tmp, *
|
---|
| 227 | * and the following arrays of type int: myshells, shellsize, *
|
---|
| 228 | * sorted_shells, nbf, and proc */
|
---|
| 229 |
|
---|
| 230 | size_t memused = 0;
|
---|
| 231 | ni = 0;
|
---|
| 232 | for (i=1; i<=nocc; i++) {
|
---|
| 233 | distsize_t tmpmem = compute_v2_memory(i,
|
---|
| 234 | nfuncmax, nbf[me], nshell,
|
---|
| 235 | ndocc, nsocc, nvir, nproc);
|
---|
| 236 | if (tmpmem > mem_alloc) break;
|
---|
| 237 | ni = i;
|
---|
| 238 | memused = distsize_to_size(tmpmem);
|
---|
| 239 | }
|
---|
| 240 |
|
---|
| 241 | size_t mem_remaining = mem_alloc - memused;
|
---|
| 242 |
|
---|
| 243 | /* set ni equal to the smallest batch size for any node */
|
---|
| 244 | msg_->min(ni);
|
---|
| 245 | msg_->bcast(ni);
|
---|
| 246 |
|
---|
| 247 | int nbfmax = nbf[me];
|
---|
| 248 | msg_->max(nbfmax);
|
---|
| 249 |
|
---|
| 250 | if (me == 0) {
|
---|
| 251 | ExEnv::out0() << indent << " nproc nbasis nshell nfuncmax"
|
---|
| 252 | " ndocc nsocc nvir nfzc nfzv" << endl;
|
---|
| 253 | ExEnv::out0() << indent << scprintf(" %-4i %-5i %-4i %-3i"
|
---|
| 254 | " %-3i %-3i %-3i %-3i %-3i\n",
|
---|
| 255 | nproc,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 |
|
---|
| 259 | ExEnv::out0() << indent
|
---|
| 260 | << "Memory available per node: " << mem_alloc << " Bytes"
|
---|
| 261 | << endl;
|
---|
| 262 | ExEnv::out0() << indent
|
---|
| 263 | << "Total memory used per node: " << memused << " Bytes"
|
---|
| 264 | << endl;
|
---|
| 265 | ExEnv::out0() << indent
|
---|
| 266 | << "Memory required for one pass: "
|
---|
| 267 | << compute_v2_memory(nocc,
|
---|
| 268 | nfuncmax, nbfmax, nshell,
|
---|
| 269 | ndocc, nsocc, nvir, nproc)
|
---|
| 270 | << " Bytes"
|
---|
| 271 | << endl;
|
---|
| 272 | ExEnv::out0() << indent
|
---|
| 273 | << "Minimum memory required: "
|
---|
| 274 | << compute_v2_memory(1,
|
---|
| 275 | nfuncmax, nbfmax, nshell,
|
---|
| 276 | ndocc, nsocc, nvir, nproc)
|
---|
| 277 | << " Bytes"
|
---|
| 278 | << endl;
|
---|
| 279 | ExEnv::out0() << indent
|
---|
| 280 | << "Batch size: " << ni
|
---|
| 281 | << endl;
|
---|
| 282 |
|
---|
| 283 | if (ni < nsocc) {
|
---|
| 284 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
| 285 | abort();
|
---|
| 286 | }
|
---|
| 287 |
|
---|
| 288 | if (ni < 1) { /* this applies only to a closed shell case */
|
---|
| 289 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
| 290 | abort();
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | if (nocc == ni) {
|
---|
| 294 | npass = 1;
|
---|
| 295 | rest = 0;
|
---|
| 296 | }
|
---|
| 297 | else {
|
---|
| 298 | rest = nocc%ni;
|
---|
| 299 | npass = (nocc - rest)/ni + 1;
|
---|
| 300 | if (rest == 0) npass--;
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 | ExEnv::out0() << indent
|
---|
| 304 | << "npass = " << npass
|
---|
| 305 | << " rest = " << rest
|
---|
| 306 | << endl;
|
---|
| 307 |
|
---|
| 308 | /* the scf vector might be distributed between the nodes, but for OPT2 *
|
---|
| 309 | * each node needs its own copy of the vector; *
|
---|
| 310 | * therefore, put a copy of the scf vector on each node; *
|
---|
| 311 | * while doing this, duplicate columns corresponding to singly *
|
---|
| 312 | * occupied orbitals and order columns as [socc docc socc unocc] */
|
---|
| 313 | /* also rearrange scf eigenvalues as [socc docc socc unocc] *
|
---|
| 314 | * want socc first to get the socc's in the first batch *
|
---|
| 315 | * (need socc's to compute energy denominators - see *
|
---|
| 316 | * socc_sum comment below) */
|
---|
| 317 |
|
---|
| 318 | evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
|
---|
| 319 |
|
---|
| 320 | RefDiagSCMatrix occ;
|
---|
| 321 | RefDiagSCMatrix evals;
|
---|
| 322 | RefSCMatrix Scf_Vec;
|
---|
| 323 | eigen(evals, Scf_Vec, occ);
|
---|
| 324 |
|
---|
| 325 | if (debug_) {
|
---|
| 326 | evals.print("eigenvalues");
|
---|
| 327 | Scf_Vec.print("eigenvectors");
|
---|
| 328 | }
|
---|
| 329 |
|
---|
| 330 | double *scf_vectort_dat = new double[nbasis*noso];
|
---|
| 331 | Scf_Vec->convert(scf_vectort_dat);
|
---|
| 332 |
|
---|
| 333 | double** scf_vectort = new double*[nocc + nvir];
|
---|
| 334 |
|
---|
| 335 | int idoc = 0, ivir = 0, isoc = 0;
|
---|
| 336 | for (i=nfzc; i<noso-nfzv; i++) {
|
---|
| 337 | if (occ(i) >= 2.0 - epsilon) {
|
---|
| 338 | evals_open[idoc+nsocc] = evals(i);
|
---|
| 339 | scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
| 340 | idoc++;
|
---|
| 341 | }
|
---|
| 342 | else if (occ(i) >= 1.0 - epsilon) {
|
---|
| 343 | evals_open[isoc] = evals(i);
|
---|
| 344 | scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
|
---|
| 345 | evals_open[isoc+nocc] = evals(i);
|
---|
| 346 | scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
|
---|
| 347 | isoc++;
|
---|
| 348 | }
|
---|
| 349 | else {
|
---|
| 350 | if (ivir < nvir) {
|
---|
| 351 | evals_open[ivir+nocc+nsocc] = evals(i);
|
---|
| 352 | scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
| 353 | }
|
---|
| 354 | ivir++;
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 | // need the transpose of the vector
|
---|
| 358 | double **scf_vector = new double*[nbasis];
|
---|
| 359 | double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
|
---|
| 360 | for (i=0; i<nbasis; i++) {
|
---|
| 361 | scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
|
---|
| 362 | for (j=0; j<nocc+nvir; j++) {
|
---|
| 363 | scf_vector[i][j] = scf_vectort[j][i];
|
---|
| 364 | }
|
---|
| 365 | }
|
---|
| 366 | delete[] scf_vectort;
|
---|
| 367 | delete[] scf_vectort_dat;
|
---|
| 368 |
|
---|
| 369 | /* allocate storage for various arrays */
|
---|
| 370 |
|
---|
| 371 | dim_ij = nocc*ni - (ni*(ni - 1))/2;
|
---|
| 372 |
|
---|
| 373 | trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
|
---|
| 374 | trans_int2 = (double*) malloc(nvir*ni*sizeof(double));
|
---|
| 375 | trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));
|
---|
| 376 | trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
| 377 | trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
| 378 | if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
|
---|
| 379 | if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));
|
---|
| 380 | if (nsocc) mo_int_do_so_vir =
|
---|
| 381 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
| 382 | if (nsocc) mo_int_tmp =
|
---|
| 383 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
| 384 |
|
---|
| 385 | if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
|
---|
| 386 |
|
---|
| 387 |
|
---|
| 388 | // create the integrals object
|
---|
| 389 | integral()->set_storage(mem_remaining);
|
---|
| 390 | tbint_ = integral()->electron_repulsion();
|
---|
| 391 | intbuf = tbint_->buffer();
|
---|
| 392 |
|
---|
| 393 | /**************************************************************************
|
---|
| 394 | * begin opt2 loops *
|
---|
| 395 | **************************************************************************/
|
---|
| 396 |
|
---|
| 397 |
|
---|
| 398 | for (pass=0; pass<npass; pass++) {
|
---|
| 399 | i_offset = pass*ni;
|
---|
| 400 | if ((pass == npass - 1) && (rest != 0)) ni = rest;
|
---|
| 401 |
|
---|
| 402 | r_offset = 0;
|
---|
| 403 | bzerofast(trans_int3,nbf[me]*nvir*dim_ij);
|
---|
| 404 |
|
---|
| 405 | tim_enter("RS loop");
|
---|
| 406 |
|
---|
| 407 | for (imyshell=0; imyshell<nRshell; imyshell++) {
|
---|
| 408 |
|
---|
| 409 | R = myshells[imyshell];
|
---|
| 410 | nr = basis()->shell(R).nfunction();
|
---|
| 411 |
|
---|
| 412 | for (S = 0; S < nshell; S++) {
|
---|
| 413 | ns = basis()->shell(S).nfunction();
|
---|
| 414 | tim_enter("bzerofast trans_int1");
|
---|
| 415 | bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
|
---|
| 416 | tim_exit("bzerofast trans_int1");
|
---|
| 417 |
|
---|
| 418 | tim_enter("PQ loop");
|
---|
| 419 |
|
---|
| 420 | for (P = 0; P < nshell; P++) {
|
---|
| 421 | np = basis()->shell(P).nfunction();
|
---|
| 422 |
|
---|
| 423 | for (Q = 0; Q <= P; Q++) {
|
---|
| 424 | if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
|
---|
| 425 | continue; /* skip ereps less than tol */
|
---|
| 426 | }
|
---|
| 427 |
|
---|
| 428 | aoint_computed++;
|
---|
| 429 |
|
---|
| 430 | nq = basis()->shell(Q).nfunction();
|
---|
| 431 |
|
---|
| 432 | tim_enter("erep");
|
---|
| 433 | tbint_->compute_shell(P,Q,R,S);
|
---|
| 434 | tim_exit("erep");
|
---|
| 435 |
|
---|
| 436 | tim_enter("1. quart. tr.");
|
---|
| 437 |
|
---|
| 438 | index = 0;
|
---|
| 439 |
|
---|
| 440 | for (bf1 = 0; bf1 < np; bf1++) {
|
---|
| 441 | p = basis()->shell_to_function(P) + bf1;
|
---|
| 442 |
|
---|
| 443 | for (bf2 = 0; bf2 < nq; bf2++) {
|
---|
| 444 | q = basis()->shell_to_function(Q) + bf2;
|
---|
| 445 | if (q > p) {
|
---|
| 446 | /* if q > p: want to skip the loops over bf3-4 */
|
---|
| 447 | /* and larger bf2 values, so increment bf1 by 1 */
|
---|
| 448 | /* ("break") and adjust the value of index */
|
---|
| 449 | index = (bf1 + 1) * nq * nr * ns;
|
---|
| 450 | break;
|
---|
| 451 | }
|
---|
| 452 |
|
---|
| 453 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
| 454 |
|
---|
| 455 | for (bf4 = 0; bf4 < ns; bf4++,index++) {
|
---|
| 456 |
|
---|
| 457 | if (fabs(intbuf[index]) > 1.0e-15) {
|
---|
| 458 | pqrs = intbuf[index];
|
---|
| 459 |
|
---|
| 460 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
| 461 | iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];
|
---|
| 462 |
|
---|
| 463 | if (p == q) pqrs *= 0.5;
|
---|
| 464 | col_index = i_offset;
|
---|
| 465 | c_pi = &scf_vector[p][col_index];
|
---|
| 466 | c_qi = &scf_vector[q][col_index];
|
---|
| 467 |
|
---|
| 468 | for (i=ni; i; i--) {
|
---|
| 469 | *iqrs++ += pqrs * *c_pi++;
|
---|
| 470 | *iprs++ += pqrs * *c_qi++;
|
---|
| 471 | }
|
---|
| 472 | }
|
---|
| 473 | } /* exit bf4 loop */
|
---|
| 474 | } /* exit bf3 loop */
|
---|
| 475 | } /* exit bf2 loop */
|
---|
| 476 | } /* exit bf1 loop */
|
---|
| 477 | tim_exit("1. quart. tr.");
|
---|
| 478 | } /* exit Q loop */
|
---|
| 479 | } /* exit P loop */
|
---|
| 480 | tim_exit("PQ loop");
|
---|
| 481 |
|
---|
| 482 | /* begin second and third quarter transformations */
|
---|
| 483 |
|
---|
| 484 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
| 485 | r = r_offset + bf3;
|
---|
| 486 |
|
---|
| 487 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
| 488 | s = basis()->shell_to_function(S) + bf4;
|
---|
| 489 |
|
---|
| 490 | tim_enter("bzerofast trans_int2");
|
---|
| 491 | bzerofast(trans_int2,nvir*ni);
|
---|
| 492 | tim_exit("bzerofast trans_int2");
|
---|
| 493 |
|
---|
| 494 | tim_enter("2. quart. tr.");
|
---|
| 495 |
|
---|
| 496 | for (q = 0; q < nbasis; q++) {
|
---|
| 497 | iars_ptr = trans_int2;
|
---|
| 498 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
| 499 | c_qa = &scf_vector[q][nocc];
|
---|
| 500 |
|
---|
| 501 | for (a = 0; a < nvir; a++) {
|
---|
| 502 |
|
---|
| 503 | for (i=ni; i; i--) {
|
---|
| 504 | *iars_ptr++ += *c_qa * *iqrs++;
|
---|
| 505 | }
|
---|
| 506 |
|
---|
| 507 | iqrs -= ni;
|
---|
| 508 | c_qa++;
|
---|
| 509 | }
|
---|
| 510 | } /* exit q loop */
|
---|
| 511 | tim_exit("2. quart. tr.");
|
---|
| 512 |
|
---|
| 513 | /* begin third quarter transformation */
|
---|
| 514 |
|
---|
| 515 | tim_enter("3. quart. tr.");
|
---|
| 516 |
|
---|
| 517 | for (i=0; i<ni; i++) {
|
---|
| 518 | tmp_index = i*(i+1)/2 + i*i_offset;
|
---|
| 519 |
|
---|
| 520 | for (a=0; a<nvir; a++) {
|
---|
| 521 | iars = trans_int2[a*ni + i];
|
---|
| 522 | c_sj = scf_vector[s];
|
---|
| 523 | iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];
|
---|
| 524 |
|
---|
| 525 | for (j=0; j<=i+i_offset; j++) {
|
---|
| 526 | *iajr_ptr++ += *c_sj++ * iars;
|
---|
| 527 | }
|
---|
| 528 | }
|
---|
| 529 | } /* exit i loop */
|
---|
| 530 | tim_exit("3. quart. tr.");
|
---|
| 531 |
|
---|
| 532 | } /* exit bf4 loop */
|
---|
| 533 | } /* exit bf3 loop */
|
---|
| 534 |
|
---|
| 535 | } /* exit S loop */
|
---|
| 536 | r_offset += nr;
|
---|
| 537 | } /* exit R loop */
|
---|
| 538 | tim_exit("RS loop");
|
---|
| 539 |
|
---|
| 540 |
|
---|
| 541 | /* begin fourth quarter transformation; *
|
---|
| 542 | * first tansform integrals with only s.o. indices; *
|
---|
| 543 | * these integrals are needed to compute the denominators *
|
---|
| 544 | * in the various terms contributing to the correlation energy *
|
---|
| 545 | * and must all be computed in the first pass; *
|
---|
| 546 | * the integrals are summed into the array socc_sum: *
|
---|
| 547 | * socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc) *
|
---|
| 548 | * (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
|
---|
| 549 | * the individual integrals are not saved here, only the sums are kept */
|
---|
| 550 |
|
---|
| 551 |
|
---|
| 552 | if (pass == 0) {
|
---|
| 553 | tim_enter("4. quart. tr.");
|
---|
| 554 | if (nsocc) bzerofast(socc_sum,nsocc);
|
---|
| 555 | for (isocc=0; isocc<nsocc; isocc++) {
|
---|
| 556 |
|
---|
| 557 | for (index=0; index<nbf[me]; index++) {
|
---|
| 558 | i = 0;
|
---|
| 559 | sum = basis()->shell(myshells[i]).nfunction();
|
---|
| 560 | while (sum <= index) {
|
---|
| 561 | i++;
|
---|
| 562 | sum += basis()->shell(myshells[i]).nfunction();
|
---|
| 563 | }
|
---|
| 564 | sum -= basis()->shell(myshells[i]).nfunction();
|
---|
| 565 | r = basis()->shell_to_function(myshells[i]) + index - sum;
|
---|
| 566 |
|
---|
| 567 | for (asocc=0; asocc<nsocc; asocc++) {
|
---|
| 568 | socc_sum[isocc] += scf_vector[r][nocc+asocc]*
|
---|
| 569 | trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
|
---|
| 570 | + isocc + dim_ij*(asocc + nvir*index)];
|
---|
| 571 | }
|
---|
| 572 | }
|
---|
| 573 | } /* exit isocc loop */
|
---|
| 574 |
|
---|
| 575 | tim_exit("4. quart. tr.");
|
---|
| 576 |
|
---|
| 577 | /* sum socc_sum contributions from each node (only if nsocc > 0 *
|
---|
| 578 | * since gop1 will fail if nsocc = 0) */
|
---|
| 579 | if (nsocc > 0) {
|
---|
| 580 | tim_enter("global sum socc_sum");
|
---|
| 581 | msg_->sum(socc_sum,nsocc,socc_sum_tmp);
|
---|
| 582 | tim_exit("global sum socc_sum");
|
---|
| 583 | }
|
---|
| 584 |
|
---|
| 585 | }
|
---|
| 586 |
|
---|
| 587 | /* now we have all the sums of integrals involving s.o.'s (socc_sum); *
|
---|
| 588 | * begin fourth quarter transformation for all integrals (including *
|
---|
| 589 | * integrals with only s.o. indices); use restriction j <= (i_offset+i) *
|
---|
| 590 | * to save flops */
|
---|
| 591 |
|
---|
| 592 | compute_index = 0;
|
---|
| 593 |
|
---|
| 594 | for (i=0; i<ni; i++) {
|
---|
| 595 |
|
---|
| 596 | for (j=0; j <= (i_offset+i); j++) {
|
---|
| 597 |
|
---|
| 598 | tim_enter("4. quart. tr.");
|
---|
| 599 |
|
---|
| 600 | bzerofast(trans_int4,nvir*nvir);
|
---|
| 601 |
|
---|
| 602 | for (index=0; index<nbf[me]; index++) {
|
---|
| 603 | k = 0;
|
---|
| 604 | sum = basis()->shell(myshells[k]).nfunction();
|
---|
| 605 | while (sum <= index) {
|
---|
| 606 | k++;
|
---|
| 607 | sum += basis()->shell(myshells[k]).nfunction();
|
---|
| 608 | }
|
---|
| 609 | sum -= basis()->shell(myshells[k]).nfunction();
|
---|
| 610 | r = basis()->shell_to_function(myshells[k]) + index - sum;
|
---|
| 611 |
|
---|
| 612 | for (a=0; a<nvir; a++) {
|
---|
| 613 | iajb = &trans_int4[a*nvir];
|
---|
| 614 | iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];
|
---|
| 615 | c_rb = &scf_vector[r][nocc];
|
---|
| 616 |
|
---|
| 617 | for (b=0; b<nvir; b++) {
|
---|
| 618 | *iajb++ += *c_rb++ * iajr;
|
---|
| 619 | }
|
---|
| 620 | }
|
---|
| 621 | }
|
---|
| 622 |
|
---|
| 623 | tim_exit("4. quart. tr.");
|
---|
| 624 |
|
---|
| 625 | tim_enter("global sum trans_int4");
|
---|
| 626 | msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
|
---|
| 627 | tim_exit("global sum trans_int4");
|
---|
| 628 |
|
---|
| 629 | /* we now have the fully transformed integrals (ia|jb) *
|
---|
| 630 | * for one i, one j (j <= i_offset+i), and all a and b; *
|
---|
| 631 | * compute contribution to the OPT1 and OPT2 correlation *
|
---|
| 632 | * energies; use restriction b <= a to save flops */
|
---|
| 633 |
|
---|
| 634 | tim_enter("compute ecorr");
|
---|
| 635 |
|
---|
| 636 | for (a=0; a<nvir; a++) {
|
---|
| 637 | for (b=0; b<=a; b++) {
|
---|
| 638 | compute_index++;
|
---|
| 639 | if (compute_index%nproc != me) continue;
|
---|
| 640 |
|
---|
| 641 | docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
|
---|
| 642 | + (j >= nsocc && j < nocc);
|
---|
| 643 | socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
|
---|
| 644 | vir_index = (a >= nsocc) + (b >= nsocc);
|
---|
| 645 |
|
---|
| 646 | if (socc_index >= 3) continue; /* skip to next b value */
|
---|
| 647 |
|
---|
| 648 | delta_ijab = evals_open[i_offset+i] + evals_open[j]
|
---|
| 649 | - evals_open[nocc+a] - evals_open[nocc+b];
|
---|
| 650 |
|
---|
| 651 | /* determine integral type and compute energy contribution */
|
---|
| 652 | if (docc_index == 2 && vir_index == 2) {
|
---|
| 653 | if (i_offset+i == j && a == b) {
|
---|
| 654 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 655 | ecorr_opt2 += contrib1/delta_ijab;
|
---|
| 656 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 657 | }
|
---|
| 658 | else if (i_offset+i == j || a == b) {
|
---|
| 659 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 660 | ecorr_opt2 += 2*contrib1/delta_ijab;
|
---|
| 661 | ecorr_opt1 += 2*contrib1/delta_ijab;
|
---|
| 662 | }
|
---|
| 663 | else {
|
---|
| 664 | contrib1 = trans_int4[a*nvir + b];
|
---|
| 665 | contrib2 = trans_int4[b*nvir + a];
|
---|
| 666 | ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 667 | - contrib1*contrib2)/delta_ijab;
|
---|
| 668 | ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 669 | - contrib1*contrib2)/delta_ijab;
|
---|
| 670 | }
|
---|
| 671 | }
|
---|
| 672 | else if (docc_index == 2 && socc_index == 2) {
|
---|
| 673 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
| 674 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
| 675 | ecorr_opt2 += contrib1/
|
---|
| 676 | (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
|
---|
| 677 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 678 | }
|
---|
| 679 | else if (socc_index == 2 && vir_index == 2) {
|
---|
| 680 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
| 681 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
| 682 | ecorr_opt2 += contrib1/
|
---|
| 683 | (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
|
---|
| 684 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 685 | }
|
---|
| 686 | else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
|
---|
| 687 | if (i_offset+i == j) {
|
---|
| 688 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 689 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
|
---|
| 690 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 691 | }
|
---|
| 692 | else {
|
---|
| 693 | contrib1 = trans_int4[a*nvir + b];
|
---|
| 694 | contrib2 = trans_int4[b*nvir + a];
|
---|
| 695 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 696 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
|
---|
| 697 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 698 | - contrib1*contrib2)/delta_ijab;
|
---|
| 699 | }
|
---|
| 700 | }
|
---|
| 701 | else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
|
---|
| 702 | contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
|
---|
| 703 | if (j == b) {
|
---|
| 704 | /* to compute the energy contribution from an integral of the *
|
---|
| 705 | * type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need the *
|
---|
| 706 | * (is|sa) integrals for all s=s.o.; these integrals are *
|
---|
| 707 | * therefore stored here in the array mo_int_do_so_vir, and *
|
---|
| 708 | * the energy contribution is computed after exiting the loop *
|
---|
| 709 | * over i-batches (pass) */
|
---|
| 710 | mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
|
---|
| 711 | (i_offset+i-nsocc + ndocc*b)] =
|
---|
| 712 | trans_int4[b*nvir + a];
|
---|
| 713 | ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
|
---|
| 714 | ecorr_opt1 += 1.5*contrib1/delta_ijab;
|
---|
| 715 | ecorr_zapt2_contrib += contrib1/
|
---|
| 716 | (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
|
---|
| 717 | + 0.5*contrib1/delta_ijab;
|
---|
| 718 | }
|
---|
| 719 | else {
|
---|
| 720 | ecorr_opt2 += contrib1/
|
---|
| 721 | (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
|
---|
| 722 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 723 | }
|
---|
| 724 | }
|
---|
| 725 | else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
|
---|
| 726 | if (a == b) {
|
---|
| 727 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 728 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
|
---|
| 729 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 730 | }
|
---|
| 731 | else {
|
---|
| 732 | contrib1 = trans_int4[a*nvir + b];
|
---|
| 733 | contrib2 = trans_int4[b*nvir + a];
|
---|
| 734 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 735 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
|
---|
| 736 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 737 | - contrib1*contrib2)/delta_ijab;
|
---|
| 738 | }
|
---|
| 739 | }
|
---|
| 740 | } /* exit b loop */
|
---|
| 741 | } /* exit a loop */
|
---|
| 742 | tim_exit("compute ecorr");
|
---|
| 743 | } /* exit j loop */
|
---|
| 744 | } /* exit i loop */
|
---|
| 745 |
|
---|
| 746 | if (nsocc == 0 && npass > 1 && pass < npass - 1) {
|
---|
| 747 | double passe = ecorr_opt2;
|
---|
| 748 | msg_->sum(passe);
|
---|
| 749 | ExEnv::out0() << indent
|
---|
| 750 | << "Partial correlation energy for pass " << pass << ":" << endl;
|
---|
| 751 | ExEnv::out0() << indent
|
---|
| 752 | << scprintf(" restart_ecorr = %14.10f", passe)
|
---|
| 753 | << endl;
|
---|
| 754 | ExEnv::out0() << indent
|
---|
| 755 | << scprintf(" restart_orbital_v2 = %d", ((pass+1) * ni))
|
---|
| 756 | << endl;
|
---|
| 757 | }
|
---|
| 758 | } /* exit loop over i-batches (pass) */
|
---|
| 759 |
|
---|
| 760 |
|
---|
| 761 |
|
---|
| 762 | /* compute contribution from excitations of the type is1 -> s1a where *
|
---|
| 763 | * i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
|
---|
| 764 | * where i and a have the same spin, contribute to this term; *
|
---|
| 765 | * (Brillouin's theorem not satisfied for ROHF wave functions); *
|
---|
| 766 | * do this only if nsocc > 0 since gop1 will fail otherwise */
|
---|
| 767 |
|
---|
| 768 | tim_enter("compute ecorr");
|
---|
| 769 |
|
---|
| 770 | if (nsocc > 0) {
|
---|
| 771 | tim_enter("global sum mo_int_do_so_vir");
|
---|
| 772 | msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
|
---|
| 773 | tim_exit("global sum mo_int_do_so_vir");
|
---|
| 774 | }
|
---|
| 775 |
|
---|
| 776 | /* add extra contribution for triplet and higher spin multiplicities *
|
---|
| 777 | * contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta */
|
---|
| 778 |
|
---|
| 779 | if (me == 0 && nsocc) {
|
---|
| 780 | for (i=0; i<ndocc; i++) {
|
---|
| 781 |
|
---|
| 782 | for (a=0; a<nvir-nsocc; a++) {
|
---|
| 783 | delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
|
---|
| 784 |
|
---|
| 785 | for (s1=0; s1<nsocc; s1++) {
|
---|
| 786 |
|
---|
| 787 | for (s2=0; s2<s1; s2++) {
|
---|
| 788 | contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
|
---|
| 789 | mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
|
---|
| 790 | ecorr_opt2 += contrib1;
|
---|
| 791 | ecorr_opt1 += contrib1;
|
---|
| 792 | }
|
---|
| 793 | }
|
---|
| 794 | } /* exit a loop */
|
---|
| 795 | } /* exit i loop */
|
---|
| 796 | }
|
---|
| 797 |
|
---|
| 798 | tim_exit("compute ecorr");
|
---|
| 799 |
|
---|
| 800 | ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
|
---|
| 801 | ecorr_opt2 += ecorr_opt2_contrib;
|
---|
| 802 | msg_->sum(ecorr_opt1);
|
---|
| 803 | msg_->sum(ecorr_opt2);
|
---|
| 804 | msg_->sum(ecorr_zapt2);
|
---|
| 805 | msg_->sum(aoint_computed);
|
---|
| 806 |
|
---|
| 807 | escf = reference_->energy();
|
---|
| 808 | hf_energy_ = escf;
|
---|
| 809 |
|
---|
| 810 | if (me == 0) {
|
---|
| 811 | eopt2 = escf + ecorr_opt2;
|
---|
| 812 | eopt1 = escf + ecorr_opt1;
|
---|
| 813 | ezapt2 = escf + ecorr_zapt2;
|
---|
| 814 |
|
---|
| 815 | /* print out various energies etc.*/
|
---|
| 816 |
|
---|
| 817 | ExEnv::out0() << indent
|
---|
| 818 | << "Number of shell quartets for which AO integrals would" << endl
|
---|
| 819 | << indent << "have been computed without bounds checking: "
|
---|
| 820 | << npass*nshell*nshell*(nshell+1)*(nshell+1)/2 << endl;
|
---|
| 821 | ExEnv::out0() << indent
|
---|
| 822 | << "Number of shell quartets for which AO integrals" << endl
|
---|
| 823 | << indent << "were computed: " << aoint_computed << endl;
|
---|
| 824 |
|
---|
| 825 | ExEnv::out0() << indent
|
---|
| 826 | << scprintf("ROHF energy [au]: %17.12lf\n", escf);
|
---|
| 827 | ExEnv::out0() << indent
|
---|
| 828 | << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
|
---|
| 829 | ExEnv::out0() << indent
|
---|
| 830 | << scprintf("OPT2 second order correction [au]: %17.12lf\n",
|
---|
| 831 | ecorr_opt2);
|
---|
| 832 | ExEnv::out0() << indent
|
---|
| 833 | << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
|
---|
| 834 | ExEnv::out0() << indent
|
---|
| 835 | << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
|
---|
| 836 | ecorr_zapt2);
|
---|
| 837 | ExEnv::out0() << indent
|
---|
| 838 | << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
|
---|
| 839 | ExEnv::out0().flush();
|
---|
| 840 | }
|
---|
| 841 |
|
---|
| 842 | msg_->bcast(eopt1);
|
---|
| 843 | msg_->bcast(eopt2);
|
---|
| 844 | msg_->bcast(ezapt2);
|
---|
| 845 |
|
---|
| 846 | if (method_ && !strcmp(method_,"opt1")) {
|
---|
| 847 | set_energy(eopt1);
|
---|
| 848 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 849 | *ref_to_mp2_acc);
|
---|
| 850 | }
|
---|
| 851 | else if (method_ && !strcmp(method_,"opt2")) {
|
---|
| 852 | set_energy(eopt2);
|
---|
| 853 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 854 | *ref_to_mp2_acc);
|
---|
| 855 | }
|
---|
| 856 | else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
|
---|
| 857 | set_energy(ezapt2);
|
---|
| 858 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 859 | *ref_to_mp2_acc);
|
---|
| 860 | }
|
---|
| 861 | else {
|
---|
| 862 | if (!(!method_ || !strcmp(method_,"zapt"))) {
|
---|
| 863 | ExEnv::out0() << indent
|
---|
| 864 | << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
|
---|
| 865 | }
|
---|
| 866 | set_energy(ezapt2);
|
---|
| 867 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 868 | *ref_to_mp2_acc);
|
---|
| 869 | }
|
---|
| 870 |
|
---|
| 871 | free(trans_int1);
|
---|
| 872 | free(trans_int2);
|
---|
| 873 | free(trans_int3);
|
---|
| 874 | free(trans_int4);
|
---|
| 875 | free(trans_int4_tmp);
|
---|
| 876 | if (nsocc) free(socc_sum);
|
---|
| 877 | if (nsocc) free(socc_sum_tmp);
|
---|
| 878 | if (nsocc) free(mo_int_do_so_vir);
|
---|
| 879 | if (nsocc) free(mo_int_tmp);
|
---|
| 880 | free(evals_open);
|
---|
| 881 | free(myshells);
|
---|
| 882 | free(shellsize);
|
---|
| 883 | free(sorted_shells);
|
---|
| 884 | free(nbf);
|
---|
| 885 | free(proc);
|
---|
| 886 |
|
---|
| 887 | delete[] scf_vector;
|
---|
| 888 | delete[] scf_vector_dat;
|
---|
| 889 |
|
---|
| 890 | }
|
---|
| 891 |
|
---|
| 892 |
|
---|
| 893 | /* Do a quick sort (larger -> smaller) of the integer data in item *
|
---|
| 894 | * by the integer indices in index; *
|
---|
| 895 | * data in item remain unchanged */
|
---|
| 896 |
|
---|
| 897 | static void
|
---|
| 898 | iquicksort(int *item,int *index,int n)
|
---|
| 899 | {
|
---|
| 900 | int i;
|
---|
| 901 | if (n<=0) return;
|
---|
| 902 | for (i=0; i<n; i++) {
|
---|
| 903 | index[i] = i;
|
---|
| 904 | }
|
---|
| 905 | iqs(item,index,0,n-1);
|
---|
| 906 | }
|
---|
| 907 |
|
---|
| 908 | static void
|
---|
| 909 | iqs(int *item,int *index,int left,int right)
|
---|
| 910 | {
|
---|
| 911 | register int i,j;
|
---|
| 912 | int x,y;
|
---|
| 913 |
|
---|
| 914 | i=left; j=right;
|
---|
| 915 | x=item[index[(left+right)/2]];
|
---|
| 916 |
|
---|
| 917 | do {
|
---|
| 918 | while(item[index[i]]>x && i<right) i++;
|
---|
| 919 | while(x>item[index[j]] && j>left) j--;
|
---|
| 920 |
|
---|
| 921 | if (i<=j) {
|
---|
| 922 | if (item[index[i]] != item[index[j]]) {
|
---|
| 923 | y=index[i];
|
---|
| 924 | index[i]=index[j];
|
---|
| 925 | index[j]=y;
|
---|
| 926 | }
|
---|
| 927 | i++; j--;
|
---|
| 928 | }
|
---|
| 929 | } while(i<=j);
|
---|
| 930 |
|
---|
| 931 | if (left<j) iqs(item,index,left,j);
|
---|
| 932 | if (i<right) iqs(item,index,i,right);
|
---|
| 933 | }
|
---|
| 934 |
|
---|
| 935 | distsize_t
|
---|
| 936 | MBPT2::compute_v2_memory(int ni,
|
---|
| 937 | int nfuncmax, int nbfme, int nshell,
|
---|
| 938 | int ndocc, int nsocc, int nvir, int nproc
|
---|
| 939 | )
|
---|
| 940 | {
|
---|
| 941 | distsize_t result = 0;
|
---|
| 942 | distsize_t dsize = sizeof(double);
|
---|
| 943 | distsize_t isize = sizeof(int);
|
---|
| 944 | int dim_ij = nocc*ni - (ni*(ni - 1))/2;
|
---|
| 945 |
|
---|
| 946 | result += nfuncmax*nfuncmax*(distsize_t)nbasis*(distsize_t)ni*dsize;
|
---|
| 947 | result += nvir*ni*dsize;
|
---|
| 948 | result += nbfme*nvir*(distsize_t)dim_ij*dsize;
|
---|
| 949 | result += nvir*nvir*dsize;
|
---|
| 950 | result += nvir*nvir*dsize;
|
---|
| 951 | result += nsocc*dsize;
|
---|
| 952 | result += nsocc*dsize;
|
---|
| 953 | result += ndocc*nsocc*(distsize_t)(nvir-nsocc)*dsize;
|
---|
| 954 | result += ndocc*nsocc*(distsize_t)(nvir-nsocc)*dsize;
|
---|
| 955 | result += (noso+nsocc-nfzc-nfzv)*dsize;
|
---|
| 956 | result += nshell*isize;
|
---|
| 957 | result += nshell*isize;
|
---|
| 958 | result += nproc*isize;
|
---|
| 959 | result += nshell*isize;
|
---|
| 960 |
|
---|
| 961 | return result;
|
---|
| 962 | }
|
---|
| 963 |
|
---|
| 964 | ////////////////////////////////////////////////////////////////////////////
|
---|
| 965 |
|
---|
| 966 | // Local Variables:
|
---|
| 967 | // mode: c++
|
---|
| 968 | // c-file-style: "CLJ-CONDENSED"
|
---|
| 969 | // End:
|
---|