[0b990d] | 1 | //
|
---|
| 2 | // hsosv2lb.cc
|
---|
| 3 | //
|
---|
| 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
| 5 | //
|
---|
| 6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
| 7 | // Maintainer: LPS
|
---|
| 8 | //
|
---|
| 9 | // This file is part of the SC Toolkit.
|
---|
| 10 | //
|
---|
| 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
| 12 | // it under the terms of the GNU Library General Public License as published by
|
---|
| 13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
| 14 | // any later version.
|
---|
| 15 | //
|
---|
| 16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
| 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 19 | // GNU Library General Public License for more details.
|
---|
| 20 | //
|
---|
| 21 | // You should have received a copy of the GNU Library General Public License
|
---|
| 22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
| 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 24 | //
|
---|
| 25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
| 26 | //
|
---|
| 27 |
|
---|
| 28 | #include <math.h>
|
---|
| 29 |
|
---|
| 30 | #include <util/misc/timer.h>
|
---|
| 31 | #include <util/misc/formio.h>
|
---|
| 32 | #include <chemistry/molecule/molecule.h>
|
---|
| 33 | #include <chemistry/qc/mbpt/mbpt.h>
|
---|
| 34 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
| 35 |
|
---|
| 36 | using namespace std;
|
---|
| 37 | using namespace sc;
|
---|
| 38 |
|
---|
| 39 | static void iqs(int *item,int *index,int left,int right);
|
---|
| 40 | static void iquicksort(int *item,int *index,int n);
|
---|
| 41 | static void findprocminmax(int *nbf, int nproc,
|
---|
| 42 | int *procmin, int *procmax, int *minbf, int *maxbf);
|
---|
| 43 | static void findshellmax(int *myshellsizes, int nRshell, int *shellmax,
|
---|
| 44 | int *shellmaxindex);
|
---|
| 45 | static void expandintarray(int *&a, int dim);
|
---|
| 46 |
|
---|
| 47 | void
|
---|
| 48 | MBPT2::compute_hsos_v2_lb()
|
---|
| 49 | {
|
---|
| 50 | int i, j, k, l;
|
---|
| 51 | int s1, s2;
|
---|
| 52 | int a, b;
|
---|
| 53 | int isocc, asocc; // indices running over singly occupied orbitals
|
---|
| 54 | int nfuncmax = basis()->max_nfunction_in_shell();
|
---|
| 55 | int nvir;
|
---|
| 56 | int nshell;
|
---|
| 57 | int shellmax;
|
---|
| 58 | int shellmaxindex;
|
---|
| 59 | int nocc=0,ndocc=0,nsocc=0;
|
---|
| 60 | int i_offset;
|
---|
| 61 | int npass, pass;
|
---|
| 62 | int ni;
|
---|
| 63 | int np, nq, nr, ns;
|
---|
| 64 | int P, Q, R, S;
|
---|
| 65 | int p, q, r, s;
|
---|
| 66 | int bf1, bf2, bf3, bf4;
|
---|
| 67 | int bf3_offset;
|
---|
| 68 | int nbfmoved;
|
---|
| 69 | int nbfav; // average number of r basis functions per node
|
---|
| 70 | int minbf, maxbf; // max/min number of (r) basis functions on a node
|
---|
| 71 | int index;
|
---|
| 72 | int compute_index;
|
---|
| 73 | int col_index;
|
---|
| 74 | int tmp_index;
|
---|
| 75 | int dim_ij;
|
---|
| 76 | int docc_index, socc_index, vir_index;
|
---|
| 77 | int me;
|
---|
| 78 | int nproc;
|
---|
| 79 | int procmin, procmax; // processor with most/fewest basis functions
|
---|
| 80 | int rest;
|
---|
| 81 | int r_offset;
|
---|
| 82 | int min;
|
---|
| 83 | int iproc;
|
---|
| 84 | int nRshell;
|
---|
| 85 | int imyshell;
|
---|
| 86 | int *myshells; // the R indices processed by node me
|
---|
| 87 | int *myshellsizes; // sizes of the shells (after split) on node me
|
---|
| 88 | int *split_info; // on each node: offset for each shell; -1 if shell not split
|
---|
| 89 | int *shellsize; // size of each shell
|
---|
| 90 | int *sorted_shells; // sorted shell indices: large shells->small shells
|
---|
| 91 | int *nbf; // number of basis functions processed by each node
|
---|
| 92 | int *proc; // element k: processor which will process shell k
|
---|
| 93 | int aoint_computed = 0;
|
---|
| 94 | double A, B, C, ni_top, max, ni_double; // variables used to compute ni
|
---|
| 95 | double *evals_open; // reordered scf eigenvalues
|
---|
| 96 | const double *intbuf; // 2-electron AO integral buffer
|
---|
| 97 | double *trans_int1; // partially transformed integrals
|
---|
| 98 | double *trans_int2; // partially transformed integrals
|
---|
| 99 | double *trans_int3; // partially transformed integrals
|
---|
| 100 | double *trans_int4; // fully transformed integrals
|
---|
| 101 | double *trans_int4_tmp; // scratch array
|
---|
| 102 | double *mo_int_do_so_vir=0;//mo integral (is|sa); i:d.o.,s:s.o.,a:vir
|
---|
| 103 | double *mo_int_tmp=0; // scratch array used in global summations
|
---|
| 104 | double *socc_sum=0; // sum of 2-el integrals involving only s.o.'s
|
---|
| 105 | double *socc_sum_tmp=0;// scratch array
|
---|
| 106 | double *iqrs, *iprs;
|
---|
| 107 | double *iars_ptr;
|
---|
| 108 | double iars;
|
---|
| 109 | double iajr;
|
---|
| 110 | double *iajr_ptr;
|
---|
| 111 | double *iajb;
|
---|
| 112 | double pqrs;
|
---|
| 113 | double *c_qa;
|
---|
| 114 | double *c_rb, *c_pi, *c_qi, *c_sj;
|
---|
| 115 | double delta_ijab;
|
---|
| 116 | double delta;
|
---|
| 117 | double contrib1, contrib2;
|
---|
| 118 | double ecorr_opt2=0,ecorr_opt1=0;
|
---|
| 119 | double ecorr_zapt2;
|
---|
| 120 | double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
|
---|
| 121 | double escf;
|
---|
| 122 | double eopt2,eopt1,ezapt2;
|
---|
| 123 | double tol; // log2 of the erep tolerance (erep < 2^tol => discard)
|
---|
| 124 |
|
---|
| 125 | me = msg_->me();
|
---|
| 126 |
|
---|
| 127 | ExEnv::out0() << indent << "Just entered OPT2 program (opt2v2lb)" << endl;
|
---|
| 128 |
|
---|
| 129 | tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
|
---|
| 130 |
|
---|
| 131 | nproc = msg_->n();
|
---|
| 132 | ExEnv::out0() << indent << "nproc = " << nproc << endl;
|
---|
| 133 |
|
---|
| 134 | ndocc = nsocc = 0;
|
---|
| 135 | const double epsilon = 1.0e-4;
|
---|
| 136 | for (i=0; i<oso_dimension()->n(); i++) {
|
---|
| 137 | if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
|
---|
| 138 | else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
|
---|
| 139 | }
|
---|
| 140 |
|
---|
| 141 | // Do a few preliminary tests to make sure the desired calculation
|
---|
| 142 | // can be done (and appears to be meaningful!)
|
---|
| 143 |
|
---|
| 144 | if (ndocc == 0 && nsocc == 0) {
|
---|
| 145 | ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
|
---|
| 146 | abort();
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | if (nfzc > ndocc) {
|
---|
| 150 | ExEnv::err0()
|
---|
| 151 | << "The number of frozen core orbitals exceeds the number" << endl
|
---|
| 152 | << "of doubly occupied orbitals; program exiting" << endl;
|
---|
| 153 | abort();
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 | if (nfzv > noso - ndocc - nsocc) {
|
---|
| 157 | ExEnv::err0()
|
---|
| 158 | << "The number of frozen virtual orbitals exceeds the number" << endl
|
---|
| 159 | << "of unoccupied orbitals; program exiting" << endl;
|
---|
| 160 | abort();
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | ndocc = ndocc - nfzc;
|
---|
| 164 | // nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb.
|
---|
| 165 | nvir = noso - ndocc - nfzc - nfzv;
|
---|
| 166 | // nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb.
|
---|
| 167 | nocc = ndocc + nsocc;
|
---|
| 168 | nshell = basis()->nshell();
|
---|
| 169 |
|
---|
| 170 | // Allocate storage for some arrays used for keeping track of which R
|
---|
| 171 | // indices are processed by each node
|
---|
| 172 | shellsize = (int*) malloc(nshell*sizeof(int));
|
---|
| 173 | sorted_shells = (int*) malloc(nshell*sizeof(int));
|
---|
| 174 | nbf = (int*) malloc(nproc*sizeof(int));
|
---|
| 175 | proc = (int*) malloc(nshell*sizeof(int));
|
---|
| 176 |
|
---|
| 177 |
|
---|
| 178 | ///////////////////////////////////////////////////////
|
---|
| 179 | // Begin distributing R shells between nodes so all
|
---|
| 180 | // nodes get ca. the same number of r basis functions
|
---|
| 181 | ///////////////////////////////////////////////////////
|
---|
| 182 |
|
---|
| 183 | // Compute the size of each shell
|
---|
| 184 | for (i=0; i<nshell; i++) {
|
---|
| 185 | shellsize[i] = basis()->shell(i).nfunction();
|
---|
| 186 | }
|
---|
| 187 |
|
---|
| 188 | // Do an index sort (large -> small) of shellsize to form sorted_shells
|
---|
| 189 | iquicksort(shellsize,sorted_shells,nshell);
|
---|
| 190 |
|
---|
| 191 | // Initialize nbf
|
---|
| 192 | for (i=0; i<nproc; i++) nbf[i] = 0;
|
---|
| 193 |
|
---|
| 194 | for (i=0; i<nshell; i++) {
|
---|
| 195 | min = nbf[0];
|
---|
| 196 | iproc = 0;
|
---|
| 197 | for (j=1; j<nproc; j++) {
|
---|
| 198 | if (nbf[j] < min) {
|
---|
| 199 | iproc = j;
|
---|
| 200 | min = nbf[j];
|
---|
| 201 | }
|
---|
| 202 | }
|
---|
| 203 | proc[sorted_shells[i]] = iproc;
|
---|
| 204 | nbf[iproc] += shellsize[sorted_shells[i]];
|
---|
| 205 | }
|
---|
| 206 | if (me == 0) {
|
---|
| 207 | ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
|
---|
| 208 | for (i=0; i<nproc; i++) {
|
---|
| 209 | if (i%12 == 0) ExEnv::out0() << indent;
|
---|
| 210 | ExEnv::out0() << scprintf(" %4i",nbf[i]);
|
---|
| 211 | if ((i+1)%12 == 0) ExEnv::out0() << endl;
|
---|
| 212 | }
|
---|
| 213 | ExEnv::out0() << endl;
|
---|
| 214 | }
|
---|
| 215 |
|
---|
| 216 | // Determine which shells are to be processed by node me
|
---|
| 217 | nRshell = 0;
|
---|
| 218 | for (i=0; i<nshell; i++) {
|
---|
| 219 | if (proc[i] == me) nRshell++;
|
---|
| 220 | }
|
---|
| 221 | myshells = (int*) malloc(nRshell*sizeof(int));
|
---|
| 222 | imyshell = 0;
|
---|
| 223 | for (i=0; i<nshell; i++) {
|
---|
| 224 | if (proc[i] == me) {
|
---|
| 225 | myshells[imyshell] = i;
|
---|
| 226 | imyshell++;
|
---|
| 227 | }
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | /////////////////////////////////////////////////////////////
|
---|
| 231 | // End of preliminary distribution of R shells between nodes
|
---|
| 232 | /////////////////////////////////////////////////////////////
|
---|
| 233 |
|
---|
| 234 | // Compute the average number of basis functions per node
|
---|
| 235 | nbfav = nbasis/nproc;
|
---|
| 236 | if (nbasis%nproc) nbfav++;
|
---|
| 237 |
|
---|
| 238 | myshellsizes = (int*) malloc(nRshell*sizeof(int));
|
---|
| 239 | split_info = (int*) malloc(nRshell*sizeof(int));
|
---|
| 240 | for (j=0; j<nRshell; j++) {
|
---|
| 241 | myshellsizes[j] = basis()->shell(myshells[j]).nfunction();
|
---|
| 242 | split_info[j] = -1;
|
---|
| 243 | }
|
---|
| 244 |
|
---|
| 245 | // Find the processor with the most/fewest basis functions
|
---|
| 246 | findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
|
---|
| 247 | if (maxbf > nbfav) {
|
---|
| 248 | ExEnv::out0() << indent << "Redistributing basis functions" << endl;
|
---|
| 249 | }
|
---|
| 250 |
|
---|
| 251 | while (maxbf > nbfav) {
|
---|
| 252 | msg_->sync();
|
---|
| 253 | if (me == procmax) {
|
---|
| 254 |
|
---|
| 255 | findshellmax(myshellsizes, nRshell, &shellmax, &shellmaxindex);
|
---|
| 256 | nbfmoved = 0;
|
---|
| 257 | while (maxbf>nbfav && minbf<nbfav && shellmax>1) {
|
---|
| 258 | shellmax--;
|
---|
| 259 | nbfmoved++;
|
---|
| 260 | maxbf--;
|
---|
| 261 | minbf++;
|
---|
| 262 | }
|
---|
| 263 | myshellsizes[shellmaxindex] = shellmax;
|
---|
| 264 | if (split_info[shellmaxindex] == -1) split_info[shellmaxindex] = 0;
|
---|
| 265 | shellmax += nbfmoved;
|
---|
| 266 |
|
---|
| 267 | // Send nbfmoved from procmax to all other nodes
|
---|
| 268 | msg_->bcast(nbfmoved,procmax);
|
---|
| 269 |
|
---|
| 270 | // Send variables to node procmin
|
---|
| 271 | msg_->send(procmin,&myshells[shellmaxindex],1);
|
---|
| 272 | msg_->send(procmin,&shellmax,1);
|
---|
| 273 |
|
---|
| 274 | }
|
---|
| 275 | else {
|
---|
| 276 | // Receive nbfmoved from procmax
|
---|
| 277 | msg_->bcast(nbfmoved,procmax);
|
---|
| 278 | }
|
---|
| 279 |
|
---|
| 280 | nbf[procmax] -= nbfmoved;
|
---|
| 281 |
|
---|
| 282 | if (me == procmin) {
|
---|
| 283 | expandintarray(myshellsizes,nRshell);
|
---|
| 284 | expandintarray(myshells,nRshell);
|
---|
| 285 | expandintarray(split_info,nRshell);
|
---|
| 286 | nRshell++;
|
---|
| 287 | myshellsizes[nRshell-1] = nbfmoved;
|
---|
| 288 | msg_->recv(procmax,&myshells[nRshell-1],1);
|
---|
| 289 | msg_->recv(procmax,&split_info[nRshell-1],1);
|
---|
| 290 | split_info[nRshell-1] -= myshellsizes[nRshell-1];
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | nbf[procmin] += nbfmoved;
|
---|
| 294 | msg_->sync();
|
---|
| 295 | findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
|
---|
| 296 |
|
---|
| 297 | }
|
---|
| 298 |
|
---|
| 299 | if (me == 0) {
|
---|
| 300 | ExEnv::out0() << indent
|
---|
| 301 | << "New distribution of basis functions between nodes:" << endl;
|
---|
| 302 | for (i=0; i<nproc; i++) {
|
---|
| 303 | if (i%12 == 0) ExEnv::out0() << indent;
|
---|
| 304 | ExEnv::out0() << scprintf(" %4i",nbf[i]);
|
---|
| 305 | if ((i+1)%12 == 0) ExEnv::out0() << endl;
|
---|
| 306 | }
|
---|
| 307 | ExEnv::out0() << endl;
|
---|
| 308 | }
|
---|
| 309 |
|
---|
| 310 |
|
---|
| 311 | //////////////////////////////////////////////////////////
|
---|
| 312 | // End of distribution of R shells and r basis functions
|
---|
| 313 | //////////////////////////////////////////////////////////
|
---|
| 314 |
|
---|
| 315 | // Compute batch size ni for opt2 loops;
|
---|
| 316 | // need to store the following arrays of type double : trans_int1-4,
|
---|
| 317 | // trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp,
|
---|
| 318 | // mo_int_do_so_vir, mo_int_tmp,
|
---|
| 319 | // and the following arrays of type int: myshells, shellsize,
|
---|
| 320 | // sorted_shells, nbf, and proc
|
---|
| 321 | A = -0.5*sizeof(double)*nbf[me]*nvir;
|
---|
| 322 | B = sizeof(double)*(nfuncmax*nfuncmax*nbasis + nvir + nocc*nbf[me]*nvir
|
---|
| 323 | + nbf[me]*nvir*0.5);
|
---|
| 324 | C = sizeof(double)*(2*nvir*nvir + (nbasis+1)*(nvir+nocc) + 2*nsocc
|
---|
| 325 | + 2*ndocc*nsocc*(nvir-nsocc))
|
---|
| 326 | + sizeof(int)*(3*nshell + nproc + nRshell);
|
---|
| 327 | ni_top = -B/(2*A);
|
---|
| 328 | max = A*ni_top*ni_top + B*ni_top +C;
|
---|
| 329 | if (max <= mem_alloc) {
|
---|
| 330 | ni = nocc;
|
---|
| 331 | }
|
---|
| 332 | else {
|
---|
| 333 | ni_double = (-B + sqrt((double)(B*B - 4*A*(C-mem_alloc))))/(2*A);
|
---|
| 334 | ni = (int) ni_double;
|
---|
| 335 | if (ni > nocc) ni = nocc;
|
---|
| 336 | max = mem_alloc;
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 | size_t mem_remaining = mem_alloc - (size_t)max;
|
---|
| 340 |
|
---|
| 341 | // Set ni equal to the smallest batch size for any node
|
---|
| 342 | msg_->min(ni);
|
---|
| 343 | msg_->bcast(ni);
|
---|
| 344 |
|
---|
| 345 | if (ni < nsocc) {
|
---|
| 346 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
| 347 | abort();
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 | if (ni < 1) { // this applies only to a closed shell case
|
---|
| 351 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
| 352 | abort();
|
---|
| 353 | }
|
---|
| 354 |
|
---|
| 355 | ExEnv::out0() << indent << "Computed batchsize: " << ni << endl;
|
---|
| 356 |
|
---|
| 357 | if (nocc == ni) {
|
---|
| 358 | npass = 1;
|
---|
| 359 | rest = 0;
|
---|
| 360 | }
|
---|
| 361 | else {
|
---|
| 362 | rest = nocc%ni;
|
---|
| 363 | npass = (nocc - rest)/ni + 1;
|
---|
| 364 | if (rest == 0) npass--;
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | if (me == 0) {
|
---|
| 368 | ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
|
---|
| 369 | " ndocc nsocc nvir nfzc nfzv" << endl;
|
---|
| 370 | ExEnv::out0() << indent
|
---|
| 371 | << scprintf(" %-4i %-3i %-5i %-4i %-3i"
|
---|
| 372 | " %-3i %-3i %-3i %-3i %-3i\n",
|
---|
| 373 | npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
|
---|
| 374 | ExEnv::out0() << indent
|
---|
| 375 | << scprintf("Using %i bytes of memory",mem_alloc) << endl;
|
---|
| 376 | }
|
---|
| 377 |
|
---|
| 378 | //////////////////////
|
---|
| 379 | // Test that ni is OK
|
---|
| 380 | //////////////////////
|
---|
| 381 | if (me == 0) {
|
---|
| 382 | ExEnv::out0() << indent
|
---|
| 383 | << scprintf("Memory allocated: %i", mem_alloc) << endl;
|
---|
| 384 | ExEnv::out0() << indent
|
---|
| 385 | << scprintf("Memory used : %lf", A*ni*ni+B*ni+C) << endl;
|
---|
| 386 | if (A*ni*ni + B*ni +C > mem_alloc) {
|
---|
| 387 | ExEnv::err0() << "Problems with memory allocation: "
|
---|
| 388 | << "Using more memory than allocated" << endl;
|
---|
| 389 | abort();
|
---|
| 390 | }
|
---|
| 391 | }
|
---|
| 392 |
|
---|
| 393 | //////////////////////////////////////////////////////////////////
|
---|
| 394 | // The scf vector might be distributed between the nodes,
|
---|
| 395 | // but for OPT2 each node needs its own copy of the vector;
|
---|
| 396 | // therefore, put a copy of the scf vector on each node;
|
---|
| 397 | // while doing this, duplicate columns corresponding to singly
|
---|
| 398 | // occupied orbitals and order columns as [socc docc socc unocc]
|
---|
| 399 | // Also rearrange scf eigenvalues as [socc docc socc unocc]
|
---|
| 400 | // want socc first to get the socc's in the first batch
|
---|
| 401 | // (need socc's to compute energy denominators - see
|
---|
| 402 | // socc_sum comment below)
|
---|
| 403 | /////////////////////////////////////////////////////////
|
---|
| 404 | evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
|
---|
| 405 |
|
---|
| 406 | RefDiagSCMatrix occ;
|
---|
| 407 | RefDiagSCMatrix evals;
|
---|
| 408 | RefSCMatrix Scf_Vec;
|
---|
| 409 | eigen(evals, Scf_Vec, occ);
|
---|
| 410 |
|
---|
| 411 | if (debug_) {
|
---|
| 412 | evals.print("eigenvalues");
|
---|
| 413 | Scf_Vec.print("eigenvectors");
|
---|
| 414 | }
|
---|
| 415 |
|
---|
| 416 | double *scf_vectort_dat = new double[nbasis*noso];
|
---|
| 417 | Scf_Vec->convert(scf_vectort_dat);
|
---|
| 418 |
|
---|
| 419 | double** scf_vectort = new double*[nocc + nvir];
|
---|
| 420 |
|
---|
| 421 | int idoc = 0, ivir = 0, isoc = 0;
|
---|
| 422 | for (i=nfzc; i<noso-nfzv; i++) {
|
---|
| 423 | if (occ(i) >= 2.0 - epsilon) {
|
---|
| 424 | evals_open[idoc+nsocc] = evals(i);
|
---|
| 425 | scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
| 426 | idoc++;
|
---|
| 427 | }
|
---|
| 428 | else if (occ(i) >= 1.0 - epsilon) {
|
---|
| 429 | evals_open[isoc] = evals(i);
|
---|
| 430 | scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
|
---|
| 431 | evals_open[isoc+nocc] = evals(i);
|
---|
| 432 | scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
|
---|
| 433 | isoc++;
|
---|
| 434 | }
|
---|
| 435 | else {
|
---|
| 436 | if (ivir < nvir) {
|
---|
| 437 | evals_open[ivir+nocc+nsocc] = evals(i);
|
---|
| 438 | scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
| 439 | }
|
---|
| 440 | ivir++;
|
---|
| 441 | }
|
---|
| 442 | }
|
---|
| 443 | // need the transpose of the vector
|
---|
| 444 | double **scf_vector = new double*[nbasis];
|
---|
| 445 | double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
|
---|
| 446 | for (i=0; i<nbasis; i++) {
|
---|
| 447 | scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
|
---|
| 448 | for (j=0; j<nocc+nvir; j++) {
|
---|
| 449 | scf_vector[i][j] = scf_vectort[j][i];
|
---|
| 450 | }
|
---|
| 451 | }
|
---|
| 452 | delete[] scf_vectort;
|
---|
| 453 | delete[] scf_vectort_dat;
|
---|
| 454 |
|
---|
| 455 | ////////////////////////////////////////
|
---|
| 456 | // Allocate storage for various arrays
|
---|
| 457 | ////////////////////////////////////////
|
---|
| 458 |
|
---|
| 459 | dim_ij = nocc*ni - ni*(ni - 1)/2;
|
---|
| 460 |
|
---|
| 461 | trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
|
---|
| 462 | trans_int2 = (double*) malloc(nvir*ni*sizeof(double));
|
---|
| 463 | trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));
|
---|
| 464 | trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
| 465 | trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
| 466 | if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
|
---|
| 467 | if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));
|
---|
| 468 | if (nsocc) mo_int_do_so_vir =
|
---|
| 469 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
| 470 | if (nsocc) mo_int_tmp =
|
---|
| 471 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
| 472 |
|
---|
| 473 | if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
|
---|
| 474 |
|
---|
| 475 | // create the integrals object
|
---|
| 476 | integral()->set_storage(mem_remaining);
|
---|
| 477 | tbint_ = integral()->electron_repulsion();
|
---|
| 478 | intbuf = tbint_->buffer();
|
---|
| 479 |
|
---|
| 480 | /////////////////////////////////////
|
---|
| 481 | // Begin opt2 loops
|
---|
| 482 | /////////////////////////////////////
|
---|
| 483 |
|
---|
| 484 |
|
---|
| 485 | for (pass=0; pass<npass; pass++) {
|
---|
| 486 | i_offset = pass*ni;
|
---|
| 487 | if ((pass == npass - 1) && (rest != 0)) ni = rest;
|
---|
| 488 |
|
---|
| 489 | r_offset = 0;
|
---|
| 490 | bzerofast(trans_int3,nbf[me]*nvir*dim_ij);
|
---|
| 491 |
|
---|
| 492 | tim_enter("RS loop");
|
---|
| 493 |
|
---|
| 494 | for (imyshell=0; imyshell<nRshell; imyshell++) {
|
---|
| 495 |
|
---|
| 496 | R = myshells[imyshell];
|
---|
| 497 | nr = myshellsizes[imyshell];
|
---|
| 498 |
|
---|
| 499 | for (S = 0; S < nshell; S++) {
|
---|
| 500 | ns = basis()->shell(S).nfunction();
|
---|
| 501 | tim_enter("bzerofast trans_int1");
|
---|
| 502 | bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
|
---|
| 503 | tim_exit("bzerofast trans_int1");
|
---|
| 504 |
|
---|
| 505 | tim_enter("PQ loop");
|
---|
| 506 |
|
---|
| 507 | for (P = 0; P < nshell; P++) {
|
---|
| 508 | np = basis()->shell(P).nfunction();
|
---|
| 509 |
|
---|
| 510 | for (Q = 0; Q <= P; Q++) {
|
---|
| 511 | if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
|
---|
| 512 | continue; // skip ereps less than tol
|
---|
| 513 | }
|
---|
| 514 |
|
---|
| 515 | aoint_computed++;
|
---|
| 516 |
|
---|
| 517 | nq = basis()->shell(Q).nfunction();
|
---|
| 518 |
|
---|
| 519 | tim_enter("erep");
|
---|
| 520 | tbint_->compute_shell(P,Q,R,S);
|
---|
| 521 | tim_exit("erep");
|
---|
| 522 |
|
---|
| 523 | tim_enter("1. quart. tr.");
|
---|
| 524 |
|
---|
| 525 | for (bf1 = 0; bf1 < np; bf1++) {
|
---|
| 526 | p = basis()->shell_to_function(P) + bf1;
|
---|
| 527 |
|
---|
| 528 | for (bf2 = 0; bf2 < nq; bf2++) {
|
---|
| 529 | q = basis()->shell_to_function(Q) + bf2;
|
---|
| 530 | if (q > p) {
|
---|
| 531 | // if q > p: want to skip the loops over bf3-4
|
---|
| 532 | // and larger bf2 values, so increment bf1 by 1
|
---|
| 533 | // ("break")
|
---|
| 534 | break;
|
---|
| 535 | }
|
---|
| 536 |
|
---|
| 537 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
| 538 | bf3_offset = 0;
|
---|
| 539 | if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell];
|
---|
| 540 |
|
---|
| 541 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
| 542 |
|
---|
| 543 | index = bf4 + ns*(bf3+bf3_offset +
|
---|
| 544 | basis()->shell(R).nfunction()*(bf2 + nq*bf1));
|
---|
| 545 |
|
---|
| 546 | if (fabs(intbuf[index]) > 1.0e-15) {
|
---|
| 547 | pqrs = intbuf[index];
|
---|
| 548 |
|
---|
| 549 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
| 550 | iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];
|
---|
| 551 |
|
---|
| 552 | if (p == q) pqrs *= 0.5;
|
---|
| 553 | col_index = i_offset;
|
---|
| 554 | c_pi = &scf_vector[p][col_index];
|
---|
| 555 | c_qi = &scf_vector[q][col_index];
|
---|
| 556 |
|
---|
| 557 | for (i=ni; i; i--) {
|
---|
| 558 | *iqrs++ += pqrs * *c_pi++;
|
---|
| 559 | *iprs++ += pqrs * *c_qi++;
|
---|
| 560 | }
|
---|
| 561 | }
|
---|
| 562 | } // exit bf4 loop
|
---|
| 563 | } // exit bf3 loop
|
---|
| 564 | } // exit bf2 loop
|
---|
| 565 | } // exit bf1 loop
|
---|
| 566 | tim_exit("1. quart. tr.");
|
---|
| 567 | } // exit Q loop
|
---|
| 568 | } // exit P loop
|
---|
| 569 | tim_exit("PQ loop");
|
---|
| 570 |
|
---|
| 571 | // Begin second and third quarter transformations
|
---|
| 572 |
|
---|
| 573 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
| 574 | r = r_offset + bf3;
|
---|
| 575 |
|
---|
| 576 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
| 577 | s = basis()->shell_to_function(S) + bf4;
|
---|
| 578 |
|
---|
| 579 | tim_enter("bzerofast trans_int2");
|
---|
| 580 | bzerofast(trans_int2,nvir*ni);
|
---|
| 581 | tim_exit("bzerofast trans_int2");
|
---|
| 582 |
|
---|
| 583 | tim_enter("2. quart. tr.");
|
---|
| 584 |
|
---|
| 585 | for (q = 0; q < nbasis; q++) {
|
---|
| 586 | iars_ptr = trans_int2;
|
---|
| 587 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
| 588 | c_qa = &scf_vector[q][nocc];
|
---|
| 589 |
|
---|
| 590 | for (a = 0; a < nvir; a++) {
|
---|
| 591 |
|
---|
| 592 | for (i=ni; i; i--) {
|
---|
| 593 | *iars_ptr++ += *c_qa * *iqrs++;
|
---|
| 594 | }
|
---|
| 595 |
|
---|
| 596 | iqrs -= ni;
|
---|
| 597 | c_qa++;
|
---|
| 598 | }
|
---|
| 599 | } // exit q loop
|
---|
| 600 | tim_exit("2. quart. tr.");
|
---|
| 601 |
|
---|
| 602 | // Begin third quarter transformation
|
---|
| 603 |
|
---|
| 604 | tim_enter("3. quart. tr.");
|
---|
| 605 |
|
---|
| 606 | for (i=0; i<ni; i++) {
|
---|
| 607 | tmp_index = i*(i+1)/2 + i*i_offset;
|
---|
| 608 |
|
---|
| 609 | for (a=0; a<nvir; a++) {
|
---|
| 610 | iars = trans_int2[a*ni + i];
|
---|
| 611 | c_sj = scf_vector[s];
|
---|
| 612 | iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];
|
---|
| 613 |
|
---|
| 614 | for (j=0; j<=i+i_offset; j++) {
|
---|
| 615 | *iajr_ptr++ += *c_sj++ * iars;
|
---|
| 616 | }
|
---|
| 617 | }
|
---|
| 618 | } // exit i loop
|
---|
| 619 | tim_exit("3. quart. tr.");
|
---|
| 620 |
|
---|
| 621 | } // exit bf4 loop
|
---|
| 622 | } // exit bf3 loop
|
---|
| 623 |
|
---|
| 624 | } // exit S loop
|
---|
| 625 | r_offset += nr;
|
---|
| 626 | } // exit R loop
|
---|
| 627 | tim_exit("RS loop");
|
---|
| 628 |
|
---|
| 629 | // Begin fourth quarter transformation;
|
---|
| 630 | // first tansform integrals with only s.o. indices;
|
---|
| 631 | // these integrals are needed to compute the denominators
|
---|
| 632 | // in the various terms contributing to the correlation energy
|
---|
| 633 | // and must all be computed in the first pass;
|
---|
| 634 | // the integrals are summed into the array socc_sum:
|
---|
| 635 | // socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc)
|
---|
| 636 | // (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s)
|
---|
| 637 | // the individual integrals are not saved here, only the sums are kept
|
---|
| 638 |
|
---|
| 639 | if (pass == 0) {
|
---|
| 640 | tim_enter("4. quart. tr.");
|
---|
| 641 | if (nsocc) bzerofast(socc_sum,nsocc);
|
---|
| 642 | for (isocc=0; isocc<nsocc; isocc++) {
|
---|
| 643 |
|
---|
| 644 | index = 0;
|
---|
| 645 | for (i=0; i<nRshell; i++) {
|
---|
| 646 | for (j=0; j<myshellsizes[i]; j++) {
|
---|
| 647 | r = basis()->shell_to_function(myshells[i]) + j;
|
---|
| 648 | if (split_info[i] != -1) r += split_info[i];
|
---|
| 649 |
|
---|
| 650 | for (asocc=0; asocc<nsocc; asocc++) {
|
---|
| 651 | socc_sum[isocc] += scf_vector[r][nocc+asocc]*
|
---|
| 652 | trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
|
---|
| 653 | + isocc + dim_ij*(asocc + nvir*index)];
|
---|
| 654 | }
|
---|
| 655 | index++;
|
---|
| 656 | }
|
---|
| 657 | }
|
---|
| 658 | } // exit i loop
|
---|
| 659 |
|
---|
| 660 | tim_exit("4. quart. tr.");
|
---|
| 661 |
|
---|
| 662 | // Sum socc_sum contributions from each node (only if nsocc > 0
|
---|
| 663 | // since gop1 will fail if nsocc = 0)
|
---|
| 664 | if (nsocc > 0) {
|
---|
| 665 | tim_enter("global sum socc_sum");
|
---|
| 666 | msg_->sum(socc_sum,nsocc,socc_sum_tmp);
|
---|
| 667 | tim_exit("global sum socc_sum");
|
---|
| 668 | }
|
---|
| 669 |
|
---|
| 670 | }
|
---|
| 671 |
|
---|
| 672 | // Now we have all the sums of integrals involving s.o.'s (socc_sum);
|
---|
| 673 | // begin fourth quarter transformation for all integrals (including
|
---|
| 674 | // integrals with only s.o. indices); use restriction j <= (i_offset+i)
|
---|
| 675 | // to save flops
|
---|
| 676 |
|
---|
| 677 | compute_index = 0;
|
---|
| 678 |
|
---|
| 679 | for (i=0; i<ni; i++) {
|
---|
| 680 |
|
---|
| 681 | for (j=0; j <= (i_offset+i); j++) {
|
---|
| 682 |
|
---|
| 683 | tim_enter("4. quart. tr.");
|
---|
| 684 |
|
---|
| 685 | bzerofast(trans_int4,nvir*nvir);
|
---|
| 686 |
|
---|
| 687 | index = 0;
|
---|
| 688 | for (k=0; k<nRshell; k++) {
|
---|
| 689 | for (l=0; l<myshellsizes[k]; l++) {
|
---|
| 690 | r = basis()->shell_to_function(myshells[k]) + l;
|
---|
| 691 | if (split_info[k] != -1) r += split_info[k];
|
---|
| 692 |
|
---|
| 693 | for (a=0; a<nvir; a++) {
|
---|
| 694 | iajb = &trans_int4[a*nvir];
|
---|
| 695 | iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];
|
---|
| 696 | c_rb = &scf_vector[r][nocc];
|
---|
| 697 |
|
---|
| 698 | for (b=0; b<nvir; b++) {
|
---|
| 699 | *iajb++ += *c_rb++ * iajr;
|
---|
| 700 | }
|
---|
| 701 | }
|
---|
| 702 | index++;
|
---|
| 703 | }
|
---|
| 704 | } // end of k loop
|
---|
| 705 |
|
---|
| 706 | tim_exit("4. quart. tr.");
|
---|
| 707 |
|
---|
| 708 | tim_enter("global sum trans_int4");
|
---|
| 709 | msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
|
---|
| 710 | tim_exit("global sum trans_int4");
|
---|
| 711 |
|
---|
| 712 | // We now have the fully transformed integrals (ia|jb)
|
---|
| 713 | // for one i, one j (j <= i_offset+i), and all a and b;
|
---|
| 714 | // compute contribution to the OPT1 and OPT2 correlation
|
---|
| 715 | // energies; use restriction b <= a to save flops
|
---|
| 716 |
|
---|
| 717 | tim_enter("compute ecorr");
|
---|
| 718 |
|
---|
| 719 | for (a=0; a<nvir; a++) {
|
---|
| 720 | for (b=0; b<=a; b++) {
|
---|
| 721 | compute_index++;
|
---|
| 722 | if (compute_index%nproc != me) continue;
|
---|
| 723 |
|
---|
| 724 | docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
|
---|
| 725 | + (j >= nsocc && j < nocc);
|
---|
| 726 | socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
|
---|
| 727 | vir_index = (a >= nsocc) + (b >= nsocc);
|
---|
| 728 |
|
---|
| 729 | if (socc_index >= 3) continue; // skip to next b value
|
---|
| 730 |
|
---|
| 731 | delta_ijab = evals_open[i_offset+i] + evals_open[j]
|
---|
| 732 | - evals_open[nocc+a] - evals_open[nocc+b];
|
---|
| 733 |
|
---|
| 734 | // Determine integral type and compute energy contribution
|
---|
| 735 | if (docc_index == 2 && vir_index == 2) {
|
---|
| 736 | if (i_offset+i == j && a == b) {
|
---|
| 737 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 738 | ecorr_opt2 += contrib1/delta_ijab;
|
---|
| 739 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 740 | }
|
---|
| 741 | else if (i_offset+i == j || a == b) {
|
---|
| 742 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 743 | ecorr_opt2 += 2*contrib1/delta_ijab;
|
---|
| 744 | ecorr_opt1 += 2*contrib1/delta_ijab;
|
---|
| 745 | }
|
---|
| 746 | else {
|
---|
| 747 | contrib1 = trans_int4[a*nvir + b];
|
---|
| 748 | contrib2 = trans_int4[b*nvir + a];
|
---|
| 749 | ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 750 | - contrib1*contrib2)/delta_ijab;
|
---|
| 751 | ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 752 | - contrib1*contrib2)/delta_ijab;
|
---|
| 753 | }
|
---|
| 754 | }
|
---|
| 755 | else if (docc_index == 2 && socc_index == 2) {
|
---|
| 756 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
| 757 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
| 758 | ecorr_opt2 += contrib1/
|
---|
| 759 | (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
|
---|
| 760 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 761 | }
|
---|
| 762 | else if (socc_index == 2 && vir_index == 2) {
|
---|
| 763 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
| 764 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
| 765 | ecorr_opt2 += contrib1/
|
---|
| 766 | (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
|
---|
| 767 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 768 | }
|
---|
| 769 | else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
|
---|
| 770 | if (i_offset+i == j) {
|
---|
| 771 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 772 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
|
---|
| 773 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 774 | }
|
---|
| 775 | else {
|
---|
| 776 | contrib1 = trans_int4[a*nvir + b];
|
---|
| 777 | contrib2 = trans_int4[b*nvir + a];
|
---|
| 778 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 779 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
|
---|
| 780 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 781 | - contrib1*contrib2)/delta_ijab;
|
---|
| 782 | }
|
---|
| 783 | }
|
---|
| 784 | else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
|
---|
| 785 | contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
|
---|
| 786 | if (j == b) {
|
---|
| 787 | // To compute the energy contribution from an integral of the
|
---|
| 788 | // type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need the
|
---|
| 789 | // (is|sa) integrals for all s=s.o.; these integrals are
|
---|
| 790 | // therefore stored here in the array mo_int_do_so_vir, and
|
---|
| 791 | // the energy contribution is computed after exiting the loop
|
---|
| 792 | // over i-batches (pass)
|
---|
| 793 | mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
|
---|
| 794 | (i_offset+i-nsocc + ndocc*b)] =
|
---|
| 795 | trans_int4[b*nvir + a];
|
---|
| 796 | ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
|
---|
| 797 | ecorr_opt1 += 1.5*contrib1/delta_ijab;
|
---|
| 798 | ecorr_zapt2_contrib += contrib1/
|
---|
| 799 | (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
|
---|
| 800 | + 0.5*contrib1/delta_ijab;
|
---|
| 801 | }
|
---|
| 802 | else {
|
---|
| 803 | ecorr_opt2 += contrib1/
|
---|
| 804 | (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
|
---|
| 805 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 806 | }
|
---|
| 807 | }
|
---|
| 808 | else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
|
---|
| 809 | if (a == b) {
|
---|
| 810 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
| 811 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
|
---|
| 812 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
| 813 | }
|
---|
| 814 | else {
|
---|
| 815 | contrib1 = trans_int4[a*nvir + b];
|
---|
| 816 | contrib2 = trans_int4[b*nvir + a];
|
---|
| 817 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 818 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
|
---|
| 819 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
| 820 | - contrib1*contrib2)/delta_ijab;
|
---|
| 821 | }
|
---|
| 822 | }
|
---|
| 823 | } // exit b loop
|
---|
| 824 | } // exit a loop
|
---|
| 825 | tim_exit("compute ecorr");
|
---|
| 826 | } // exit j loop
|
---|
| 827 | } // exit i loop
|
---|
| 828 |
|
---|
| 829 | if (nsocc == 0 && npass > 1 && pass < npass - 1) {
|
---|
| 830 | double passe = ecorr_opt2;
|
---|
| 831 | msg_->sum(passe);
|
---|
| 832 | ExEnv::out0() << indent
|
---|
| 833 | << "Partial correlation energy for pass " << pass << ":" << endl;
|
---|
| 834 | ExEnv::out0() << indent
|
---|
| 835 | << scprintf(" restart_ecorr = %14.10f", passe)
|
---|
| 836 | << endl;
|
---|
| 837 | ExEnv::out0() << indent
|
---|
| 838 | << scprintf(" restart_orbital_v2lb = %d", ((pass+1) * ni))
|
---|
| 839 | << endl;
|
---|
| 840 | }
|
---|
| 841 | } // exit loop over i-batches (pass)
|
---|
| 842 |
|
---|
| 843 |
|
---|
| 844 |
|
---|
| 845 | // Compute contribution from excitations of the type is1 -> s1a where
|
---|
| 846 | // i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a,
|
---|
| 847 | // where i and a have the same spin, contribute to this term;
|
---|
| 848 | // (Brillouin's theorem not satisfied for ROHF wave functions);
|
---|
| 849 | // do this only if nsocc > 0 since gop1 will fail otherwise
|
---|
| 850 |
|
---|
| 851 | tim_enter("compute ecorr");
|
---|
| 852 |
|
---|
| 853 | if (nsocc > 0) {
|
---|
| 854 | tim_enter("global sum mo_int_do_so_vir");
|
---|
| 855 | msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
|
---|
| 856 | tim_exit("global sum mo_int_do_so_vir");
|
---|
| 857 | }
|
---|
| 858 |
|
---|
| 859 | // Add extra contribution for triplet and higher spin multiplicities
|
---|
| 860 | // contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta
|
---|
| 861 |
|
---|
| 862 | if (me == 0 && nsocc) {
|
---|
| 863 | for (i=0; i<ndocc; i++) {
|
---|
| 864 |
|
---|
| 865 | for (a=0; a<nvir-nsocc; a++) {
|
---|
| 866 | delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
|
---|
| 867 |
|
---|
| 868 | for (s1=0; s1<nsocc; s1++) {
|
---|
| 869 |
|
---|
| 870 | for (s2=0; s2<s1; s2++) {
|
---|
| 871 | contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
|
---|
| 872 | mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
|
---|
| 873 | ecorr_opt2 += contrib1;
|
---|
| 874 | ecorr_opt1 += contrib1;
|
---|
| 875 | }
|
---|
| 876 | }
|
---|
| 877 | } // exit a loop
|
---|
| 878 | } // exit i loop
|
---|
| 879 | }
|
---|
| 880 |
|
---|
| 881 | tim_exit("compute ecorr");
|
---|
| 882 |
|
---|
| 883 | ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
|
---|
| 884 | ecorr_opt2 += ecorr_opt2_contrib;
|
---|
| 885 | msg_->sum(ecorr_opt1);
|
---|
| 886 | msg_->sum(ecorr_opt2);
|
---|
| 887 | msg_->sum(ecorr_zapt2);
|
---|
| 888 | msg_->sum(aoint_computed);
|
---|
| 889 |
|
---|
| 890 | escf = reference_->energy();
|
---|
| 891 | hf_energy_ = escf;
|
---|
| 892 |
|
---|
| 893 | if (me == 0) {
|
---|
| 894 | eopt2 = escf + ecorr_opt2;
|
---|
| 895 | eopt1 = escf + ecorr_opt1;
|
---|
| 896 | ezapt2 = escf + ecorr_zapt2;
|
---|
| 897 |
|
---|
| 898 | // Print out various energies etc.
|
---|
| 899 | ExEnv::out0() << indent
|
---|
| 900 | << "Number of shell quartets for which AO integrals would" << endl
|
---|
| 901 | << indent << "have been computed without bounds checking: "
|
---|
| 902 | << npass*nshell*nshell*(nshell+1)*(nshell+1)/2 << endl;
|
---|
| 903 | ExEnv::out0() << indent
|
---|
| 904 | << "Number of shell quartets for which AO integrals" << endl
|
---|
| 905 | << indent << "were computed: " << aoint_computed << endl;
|
---|
| 906 |
|
---|
| 907 | ExEnv::out0() << indent
|
---|
| 908 | << scprintf("ROHF energy [au]: %17.12lf\n", escf);
|
---|
| 909 | ExEnv::out0() << indent
|
---|
| 910 | << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
|
---|
| 911 | ExEnv::out0() << indent
|
---|
| 912 | << scprintf("OPT2 second order correction [au]: %17.12lf\n",
|
---|
| 913 | ecorr_opt2);
|
---|
| 914 | ExEnv::out0() << indent
|
---|
| 915 | << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
|
---|
| 916 | ExEnv::out0() << indent
|
---|
| 917 | << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
|
---|
| 918 | ecorr_zapt2);
|
---|
| 919 | ExEnv::out0() << indent
|
---|
| 920 | << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
|
---|
| 921 | ExEnv::out0().flush();
|
---|
| 922 | }
|
---|
| 923 |
|
---|
| 924 | msg_->bcast(eopt1);
|
---|
| 925 | msg_->bcast(eopt2);
|
---|
| 926 | msg_->bcast(ezapt2);
|
---|
| 927 |
|
---|
| 928 | if (method_ && !strcmp(method_,"opt1")) {
|
---|
| 929 | set_energy(eopt1);
|
---|
| 930 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 931 | *ref_to_mp2_acc);
|
---|
| 932 | }
|
---|
| 933 | else if (method_ && !strcmp(method_,"opt2")) {
|
---|
| 934 | set_energy(eopt2);
|
---|
| 935 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 936 | *ref_to_mp2_acc);
|
---|
| 937 | }
|
---|
| 938 | else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
|
---|
| 939 | set_energy(ezapt2);
|
---|
| 940 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 941 | *ref_to_mp2_acc);
|
---|
| 942 | }
|
---|
| 943 | else {
|
---|
| 944 | if (!(!method_ || !strcmp(method_,"zapt"))) {
|
---|
| 945 | ExEnv::out0() << indent
|
---|
| 946 | << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
|
---|
| 947 | }
|
---|
| 948 | set_energy(ezapt2);
|
---|
| 949 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 950 | *ref_to_mp2_acc);
|
---|
| 951 | }
|
---|
| 952 |
|
---|
| 953 | free(trans_int1);
|
---|
| 954 | free(trans_int2);
|
---|
| 955 | free(trans_int3);
|
---|
| 956 | free(trans_int4);
|
---|
| 957 | free(trans_int4_tmp);
|
---|
| 958 | if (nsocc) free(socc_sum);
|
---|
| 959 | if (nsocc) free(socc_sum_tmp);
|
---|
| 960 | if (nsocc) free(mo_int_do_so_vir);
|
---|
| 961 | if (nsocc) free(mo_int_tmp);
|
---|
| 962 | free(evals_open);
|
---|
| 963 | free(myshells);
|
---|
| 964 | free(shellsize);
|
---|
| 965 | free (myshellsizes);
|
---|
| 966 | free (split_info);
|
---|
| 967 | free(sorted_shells);
|
---|
| 968 | free(nbf);
|
---|
| 969 | free(proc);
|
---|
| 970 |
|
---|
| 971 | delete[] scf_vector;
|
---|
| 972 | delete[] scf_vector_dat;
|
---|
| 973 |
|
---|
| 974 | }
|
---|
| 975 |
|
---|
| 976 | /////////////////////////////////////////////////////////////////
|
---|
| 977 | // Function iquicksort performs a quick sort (larger -> smaller)
|
---|
| 978 | // of the integer data in item by the integer indices in index;
|
---|
| 979 | // data in item remain unchanged
|
---|
| 980 | /////////////////////////////////////////////////////////////////
|
---|
| 981 | static void
|
---|
| 982 | iquicksort(int *item,int *index,int n)
|
---|
| 983 | {
|
---|
| 984 | int i;
|
---|
| 985 | if (n<=0) return;
|
---|
| 986 | for (i=0; i<n; i++) {
|
---|
| 987 | index[i] = i;
|
---|
| 988 | }
|
---|
| 989 | iqs(item,index,0,n-1);
|
---|
| 990 | }
|
---|
| 991 |
|
---|
| 992 | static void
|
---|
| 993 | iqs(int *item,int *index,int left,int right)
|
---|
| 994 | {
|
---|
| 995 | register int i,j;
|
---|
| 996 | int x,y;
|
---|
| 997 |
|
---|
| 998 | i=left; j=right;
|
---|
| 999 | x=item[index[(left+right)/2]];
|
---|
| 1000 |
|
---|
| 1001 | do {
|
---|
| 1002 | while(item[index[i]]>x && i<right) i++;
|
---|
| 1003 | while(x>item[index[j]] && j>left) j--;
|
---|
| 1004 |
|
---|
| 1005 | if (i<=j) {
|
---|
| 1006 | if (item[index[i]] != item[index[j]]) {
|
---|
| 1007 | y=index[i];
|
---|
| 1008 | index[i]=index[j];
|
---|
| 1009 | index[j]=y;
|
---|
| 1010 | }
|
---|
| 1011 | i++; j--;
|
---|
| 1012 | }
|
---|
| 1013 | } while(i<=j);
|
---|
| 1014 |
|
---|
| 1015 | if (left<j) iqs(item,index,left,j);
|
---|
| 1016 | if (i<right) iqs(item,index,i,right);
|
---|
| 1017 | }
|
---|
| 1018 |
|
---|
| 1019 | ////////////////////////////////////////////////////////////////////
|
---|
| 1020 | // Function findprocminmax finds the processor with the most/fewest
|
---|
| 1021 | // basis functions and the corresponding number of basis functions
|
---|
| 1022 | ////////////////////////////////////////////////////////////////////
|
---|
| 1023 | static void
|
---|
| 1024 | findprocminmax(int *nbf, int nproc,
|
---|
| 1025 | int *procmin, int *procmax, int *minbf, int *maxbf)
|
---|
| 1026 | {
|
---|
| 1027 | int i;
|
---|
| 1028 |
|
---|
| 1029 | *procmax = *procmin = 0;
|
---|
| 1030 | *maxbf = nbf[0];
|
---|
| 1031 | *minbf = nbf[0];
|
---|
| 1032 |
|
---|
| 1033 | for (i=1; i<nproc; i++) {
|
---|
| 1034 | if (nbf[i] > *maxbf) {
|
---|
| 1035 | *maxbf = nbf[i];
|
---|
| 1036 | *procmax = i;
|
---|
| 1037 | }
|
---|
| 1038 | if (nbf[i] < *minbf) {
|
---|
| 1039 | *minbf = nbf[i];
|
---|
| 1040 | *procmin = i;
|
---|
| 1041 | }
|
---|
| 1042 | }
|
---|
| 1043 | }
|
---|
| 1044 |
|
---|
| 1045 | /////////////////////////////////////////////////////////////////
|
---|
| 1046 | // Function findshellmax finds the largest shell on a processor
|
---|
| 1047 | /////////////////////////////////////////////////////////////////
|
---|
| 1048 | static void
|
---|
| 1049 | findshellmax(int *myshellsizes, int nRshell, int *shellmax, int *shellmaxindex)
|
---|
| 1050 | {
|
---|
| 1051 | int i;
|
---|
| 1052 |
|
---|
| 1053 | *shellmax = myshellsizes[0];
|
---|
| 1054 | *shellmaxindex = 0;
|
---|
| 1055 |
|
---|
| 1056 | for (i=1; i<nRshell; i++) {
|
---|
| 1057 | if (myshellsizes[i] > *shellmax) {
|
---|
| 1058 | *shellmax = myshellsizes[i];
|
---|
| 1059 | *shellmaxindex = i;
|
---|
| 1060 | }
|
---|
| 1061 | }
|
---|
| 1062 | }
|
---|
| 1063 |
|
---|
| 1064 | //////////////////////////////////////////////////////////////
|
---|
| 1065 | // Function expand_array expands the dimension of an array of
|
---|
| 1066 | // doubles by 1;
|
---|
| 1067 | // NB: THE ARRAY MUST HAVE BEEN ALLOCATED WITH MALLOC
|
---|
| 1068 | //////////////////////////////////////////////////////////////
|
---|
| 1069 | static void
|
---|
| 1070 | expandintarray(int *&a, int olddim)
|
---|
| 1071 | {
|
---|
| 1072 | int i;
|
---|
| 1073 | int *tmp;
|
---|
| 1074 |
|
---|
| 1075 | tmp = (int*) malloc((olddim+1)*sizeof(int));
|
---|
| 1076 |
|
---|
| 1077 | for (i=0; i<olddim; i++) {
|
---|
| 1078 | tmp[i] = a[i];
|
---|
| 1079 | }
|
---|
| 1080 | tmp[olddim] = 0;
|
---|
| 1081 |
|
---|
| 1082 | free(a);
|
---|
| 1083 |
|
---|
| 1084 | a = tmp;
|
---|
| 1085 | }
|
---|
| 1086 |
|
---|
| 1087 | ////////////////////////////////////////////////////////////////////////////
|
---|
| 1088 |
|
---|
| 1089 | // Local Variables:
|
---|
| 1090 | // mode: c++
|
---|
| 1091 | // c-file-style: "CLJ-CONDENSED"
|
---|
| 1092 | // End:
|
---|