[0b990d] | 1 | //
|
---|
| 2 | // csgrad.cc
|
---|
| 3 | //
|
---|
| 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
| 5 | //
|
---|
| 6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
| 7 | // Maintainer: LPS
|
---|
| 8 | //
|
---|
| 9 | // This file is part of the SC Toolkit.
|
---|
| 10 | //
|
---|
| 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
| 12 | // it under the terms of the GNU Library General Public License as published by
|
---|
| 13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
| 14 | // any later version.
|
---|
| 15 | //
|
---|
| 16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
| 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 19 | // GNU Library General Public License for more details.
|
---|
| 20 | //
|
---|
| 21 | // You should have received a copy of the GNU Library General Public License
|
---|
| 22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
| 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 24 | //
|
---|
| 25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
| 26 | //
|
---|
| 27 |
|
---|
| 28 | #include <stdlib.h>
|
---|
| 29 | #include <math.h>
|
---|
| 30 | #include <limits.h>
|
---|
| 31 |
|
---|
| 32 | #include <util/misc/formio.h>
|
---|
| 33 | #include <util/misc/timer.h>
|
---|
| 34 | #include <util/group/memory.h>
|
---|
| 35 | #include <util/group/message.h>
|
---|
| 36 | #include <util/class/class.h>
|
---|
| 37 | #include <util/state/state.h>
|
---|
| 38 | #include <math/scmat/matrix.h>
|
---|
| 39 | #include <math/scmat/blocked.h>
|
---|
| 40 | #include <chemistry/molecule/molecule.h>
|
---|
| 41 | #include <chemistry/qc/basis/integral.h>
|
---|
| 42 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
| 43 | #include <chemistry/qc/mbpt/mbpt.h>
|
---|
| 44 | #include <chemistry/qc/mbpt/util.h>
|
---|
| 45 | #include <chemistry/qc/mbpt/csgrade12.h>
|
---|
| 46 | #include <chemistry/qc/mbpt/csgrad34qb.h>
|
---|
| 47 | #include <chemistry/qc/mbpt/csgrads2pdm.h>
|
---|
| 48 |
|
---|
| 49 | using namespace std;
|
---|
| 50 | using namespace sc;
|
---|
| 51 |
|
---|
| 52 | #define SINGLE_THREAD_E12 0
|
---|
| 53 | #define SINGLE_THREAD_QBT34 0
|
---|
| 54 | #define SINGLE_THREAD_S2PDM 0
|
---|
| 55 |
|
---|
| 56 | #define PRINT2Q 0
|
---|
| 57 | #define PRINT3Q 0
|
---|
| 58 | #define PRINT4Q 0
|
---|
| 59 | #if PRINT_BIGGEST_INTS
|
---|
| 60 | BiggestContribs biggest_ints_1(4,40);
|
---|
| 61 | #endif
|
---|
| 62 |
|
---|
| 63 | #define WRITE_DOUBLES 0
|
---|
| 64 |
|
---|
| 65 | static void sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2);
|
---|
| 66 | static void zero_gradients(double **f, int n1, int n2);
|
---|
| 67 | static void accum_gradients(double **g, double **f, int n1, int n2);
|
---|
| 68 |
|
---|
| 69 | #define PRINT1Q 0
|
---|
| 70 |
|
---|
| 71 | #if PRINT_CONTRIB
|
---|
| 72 | static void
|
---|
| 73 | sw(int&i,int&j)
|
---|
| 74 | {
|
---|
| 75 | int tmp = i;
|
---|
| 76 | i = j;
|
---|
| 77 | j = tmp;
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | static void
|
---|
| 81 | print_contrib(double tmpval, int num, int onum,
|
---|
| 82 | int P,int Q,int R,int S, int p,int q,int r,int s)
|
---|
| 83 | {
|
---|
| 84 |
|
---|
| 85 | printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
| 86 | num, P, Q, R, S, p, q, r, s, tmpval);
|
---|
| 87 | printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
| 88 | onum, P, Q, R, S, p, q, r, s, -tmpval);
|
---|
| 89 |
|
---|
| 90 | if (p < q) {
|
---|
| 91 | sw(p,q); sw(P,Q);
|
---|
| 92 | }
|
---|
| 93 | if (r < s) {
|
---|
| 94 | sw(r,s); sw(R,S);
|
---|
| 95 | }
|
---|
| 96 | if (p < r || (p == r && q < s)) {
|
---|
| 97 | sw(P,R); sw(p,r);
|
---|
| 98 | sw(Q,S); sw(q,s);
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
| 102 | num, P, Q, R, S, p, q, r, s, tmpval);
|
---|
| 103 | printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
|
---|
| 104 | onum, P, Q, R, S, p, q, r, s, -tmpval);
|
---|
| 105 | }
|
---|
| 106 | #endif
|
---|
| 107 |
|
---|
| 108 | void
|
---|
| 109 | MBPT2::compute_cs_grad()
|
---|
| 110 | {
|
---|
| 111 |
|
---|
| 112 | // New version of MP2 gradient program which uses the full
|
---|
| 113 | // permutational symmetry of the two-electron integral derivatives
|
---|
| 114 |
|
---|
| 115 | Ref<SCMatrixKit> kit = basis()->matrixkit();
|
---|
| 116 |
|
---|
| 117 | int do_d2_ = 1; // if true, compute d2 diagnostic
|
---|
| 118 |
|
---|
| 119 | int nij; // number of i,j pairs on a node (for e.g., mo_int)
|
---|
| 120 | double *mo_int; // MO integrals of type (ov|ov)
|
---|
| 121 | // (and these integrals divided by
|
---|
| 122 | // orbital energy denominators)
|
---|
| 123 | double *integral_iqjs; // half-transformed integrals
|
---|
| 124 |
|
---|
| 125 | int nocc_act, nvir_act;
|
---|
| 126 | int i, j, k;
|
---|
| 127 | int ii, bb;
|
---|
| 128 | int x, y;
|
---|
| 129 | int a, b, c;
|
---|
| 130 | int nshell;
|
---|
| 131 | int offset;
|
---|
| 132 | int ik_offset;
|
---|
| 133 | int i_offset;
|
---|
| 134 | int npass, pass;
|
---|
| 135 | int tmpint;
|
---|
| 136 | int np, nq, nr, ns;
|
---|
| 137 | int P, Q, R, S;
|
---|
| 138 | int p, q, r, s;
|
---|
| 139 | int bf1, bf2, bf3, bf4;
|
---|
| 140 | int index;
|
---|
| 141 | int me;
|
---|
| 142 | int nproc;
|
---|
| 143 | int rest;
|
---|
| 144 | int p_offset, q_offset, r_offset, s_offset;
|
---|
| 145 |
|
---|
| 146 | int aoint_computed = 0;
|
---|
| 147 | int aointder_computed = 0;
|
---|
| 148 | int xyz;
|
---|
| 149 | int natom = molecule()->natom(); // the number of atoms
|
---|
| 150 | int int_index;
|
---|
| 151 | size_t mem_static; // static memory in bytes
|
---|
| 152 | int ij_proc; // the processor which has ij pair
|
---|
| 153 | int ij_index; // of the ij pairs on a proc, this ij pair is number ij_index
|
---|
| 154 | // (i.e., ij_index < nij)
|
---|
| 155 | int ik_proc; // the processor which has ik pair
|
---|
| 156 | int ik_index;
|
---|
| 157 | int jloop, kloop;
|
---|
| 158 |
|
---|
| 159 | int ni;
|
---|
| 160 |
|
---|
| 161 | double *evals; // scf eigenvalues
|
---|
| 162 | double *iajb_ptr, *ibja_ptr, *iakb_ptr, *ibka_ptr;
|
---|
| 163 | double *iajc_ptr, *ibjc_ptr, *icjb_ptr, *icja_ptr;
|
---|
| 164 | double *ijkb_ptr, *ibkj_ptr;
|
---|
| 165 | double pqrs;
|
---|
| 166 | double *c_sa, c_rj;
|
---|
| 167 | double *c_pi, *c_qi, *c_sj;
|
---|
| 168 | double *c_qx, *c_qa, *c_sb, *c_pa, *c_pq, *c_sy;
|
---|
| 169 | double delta_ijab, delta_ijbc, delta_ijac;
|
---|
| 170 | double ecorr_mp2 = 0.0;
|
---|
| 171 | double escf;
|
---|
| 172 | double emp2=0.0;
|
---|
| 173 | int tol; // log2 of the erep tolerance
|
---|
| 174 | // (erep < 2^tol => discard)
|
---|
| 175 | double *Wkj=0,*Wab=0,*Waj=0;// occ-occ, vir-vir and vir-occ parts of
|
---|
| 176 | // second order correction to MP2
|
---|
| 177 | // energy weighted density matrix
|
---|
| 178 | double *Pkj=0,*Pab=0; // occ-occ and vir-vir parts of second order
|
---|
| 179 | // correction to MP2 density matrix
|
---|
| 180 | double *d2occ_mat, *d2vir_mat; // matrices for computation of D2 diagnostic
|
---|
| 181 | double *Laj=0; // MP2 Lagrangian
|
---|
| 182 | double *Lpi; // contrib to MP2 Lagrangian partially in AO basis
|
---|
| 183 | double *pkj_ptr=0, *pab_ptr;
|
---|
| 184 | double *d2occ_mat_ptr;
|
---|
| 185 | double *d2vir_mat_ptr;
|
---|
| 186 | double *wkj_ptr, *wjk_ptr, *wab_ptr, *wba_ptr, *waj_ptr=0;
|
---|
| 187 | double *laj_ptr, *lpi_ptr, *lqi_ptr;
|
---|
| 188 | double *gamma_iajs, *gamma_iajs_tmp;
|
---|
| 189 | // partially back-transformed non-sep 2PDM's
|
---|
| 190 | double *gamma_iqjs_tmp;
|
---|
| 191 | double *gamma_iajs_ptr;
|
---|
| 192 | double *gamma_iqjs_ptr;
|
---|
| 193 | double *gammabuf; // buffer used for sending elements of gamma_iqjs
|
---|
| 194 | double *mo_intbuf; // buffer used for sending mo integrals
|
---|
| 195 | double tmpval, tmpval1;
|
---|
| 196 | double *P2AO, *W2AO;
|
---|
| 197 | double *p2ao_ptr, *w2ao_ptr;
|
---|
| 198 | double *PHF, *WHF;
|
---|
| 199 | double *phf_ptr, *whf_ptr;
|
---|
| 200 | double *PMP2, *WMP2;
|
---|
| 201 | double *pmp2_ptr, *wmp2_ptr;
|
---|
| 202 |
|
---|
| 203 | double *ixjs_tmp; // three-quarter transformed two-el integrals
|
---|
| 204 | double *integral_ixjs; // all three-quarter transformed two-el integrals
|
---|
| 205 | double *integral_iajy; // mo integrals (y = any MO)
|
---|
| 206 | double *integral_ikja; // mo integrals
|
---|
| 207 | double *integral_iqjs_ptr;
|
---|
| 208 | double *iajy_ptr;
|
---|
| 209 | double *ixjs_ptr;
|
---|
| 210 | double *ikja_ptr;
|
---|
| 211 | double *iajs_ptr, *ikjs_ptr;
|
---|
| 212 |
|
---|
| 213 | double **gradient=0, *gradient_dat=0; // The MP2 gradient
|
---|
| 214 | double **hf_gradient=0, *hf_gradient_dat=0; // The HF gradient
|
---|
| 215 | double **ginter=0; // Intermediates for the MP2 gradient
|
---|
| 216 | double **hf_ginter=0; // Intermediates for the HF gradient
|
---|
| 217 | double d2o, d2v, d2_diag;
|
---|
| 218 |
|
---|
| 219 | BiggestContribs biggest_coefs(5,10);
|
---|
| 220 | CharacterTable ct = molecule()->point_group()->char_table();
|
---|
| 221 |
|
---|
| 222 | #if PRINT_BIGGEST_INTS
|
---|
| 223 | BiggestContribs biggest_ints_2(4,40);
|
---|
| 224 | BiggestContribs biggest_ints_2s(4,40);
|
---|
| 225 | BiggestContribs biggest_ints_3a(4,40);
|
---|
| 226 | BiggestContribs biggest_ints_3(4,40);
|
---|
| 227 | #endif
|
---|
| 228 |
|
---|
| 229 | int dograd = gradient_needed();
|
---|
| 230 |
|
---|
| 231 | tim_enter("mp2-mem");
|
---|
| 232 |
|
---|
| 233 | nfuncmax = basis()->max_nfunction_in_shell();
|
---|
| 234 |
|
---|
| 235 | nshell = basis()->nshell();
|
---|
| 236 |
|
---|
| 237 | me = msg_->me();
|
---|
| 238 |
|
---|
| 239 | if (me == 0) {
|
---|
| 240 | ExEnv::out0() << endl << indent
|
---|
| 241 | << "Entered memgrp based MP2 routine" << endl;
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | nproc = msg_->n();
|
---|
| 245 | if (me == 0)
|
---|
| 246 | ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
|
---|
| 247 |
|
---|
| 248 | tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
|
---|
| 249 |
|
---|
| 250 | nocc = 0;
|
---|
| 251 | for (i=0; i<oso_dimension()->n(); i++) {
|
---|
| 252 | if (reference_->occupation(i) == 2.0) nocc++;
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | nocc_act = nocc - nfzc;
|
---|
| 256 | nvir = noso - nocc;
|
---|
| 257 | nvir_act = nvir - nfzv;
|
---|
| 258 |
|
---|
| 259 | // Do a few preliminary tests to make sure the desired calculation
|
---|
| 260 | // can be done (and appears to be meaningful!)
|
---|
| 261 |
|
---|
| 262 | if (nocc_act <= 0) {
|
---|
| 263 | if (me == 0) {
|
---|
| 264 | ExEnv::err0() << "There are no active occupied orbitals; program exiting" << endl;
|
---|
| 265 | }
|
---|
| 266 | abort();
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | if (nvir_act <= 0) {
|
---|
| 270 | if (me == 0) {
|
---|
| 271 | ExEnv::err0() << "There are no active virtual orbitals; program exiting" << endl;
|
---|
| 272 | }
|
---|
| 273 | abort();
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | if (restart_orbital_memgrp_) {
|
---|
| 277 | if (!dograd && !do_d1_ && !do_d2_) {
|
---|
| 278 | ExEnv::out0() << indent
|
---|
| 279 | << scprintf("Restarting at orbital %d with partial energy %18.14f",
|
---|
| 280 | restart_orbital_memgrp_, restart_ecorr_)
|
---|
| 281 | << endl;
|
---|
| 282 | ecorr_mp2 = restart_ecorr_;
|
---|
| 283 | }
|
---|
| 284 | else {
|
---|
| 285 | ExEnv::out0() << indent
|
---|
| 286 | << "Restart requested but not possible with gradients, D1, or D2"
|
---|
| 287 | << endl;
|
---|
| 288 | restart_ecorr_ = 0.0;
|
---|
| 289 | restart_orbital_memgrp_ = 0;
|
---|
| 290 | }
|
---|
| 291 | }
|
---|
| 292 | else {
|
---|
| 293 | restart_ecorr_ = 0.0;
|
---|
| 294 | }
|
---|
| 295 |
|
---|
| 296 | ////////////////////////////////////////////////////////
|
---|
| 297 | // Compute batch size ni for mp2 loops;
|
---|
| 298 | //
|
---|
| 299 | // The following arrays are kept throughout (all of type double):
|
---|
| 300 | // scf_vector, gradient, ginter, Pkj, Pab, Wkj, Wab, Waj, Laj
|
---|
| 301 | // and memory allocated for these arrays and integral evaluators
|
---|
| 302 | // is called mem_static
|
---|
| 303 | //
|
---|
| 304 | ////////////////////////////////////////////////////////
|
---|
| 305 | if (me == 0) {
|
---|
| 306 | mem_static = nbasis*noso; // scf vector
|
---|
| 307 | mem_static += 2*nbasis*nfuncmax; // iqjs & iqjr
|
---|
| 308 | if (dograd) {
|
---|
| 309 | mem_static += 9*natom; // gradient & ginter & hf_ginter
|
---|
| 310 | mem_static += (nocc*(nocc+1))/2; // Pkj
|
---|
| 311 | mem_static += (nvir*(nvir+1))/2; // Pab
|
---|
| 312 | mem_static += nocc*nocc; // Wkj
|
---|
| 313 | mem_static += nvir*nvir; // Wab
|
---|
| 314 | mem_static += 2*nocc*nvir; // Waj & Laj
|
---|
| 315 | if (do_d2_) {
|
---|
| 316 | mem_static += (nocc_act*(nocc_act+1))/2; // d2occ_mat
|
---|
| 317 | mem_static += (nvir_act*(nvir_act+1))/2; // d2vir_mat
|
---|
| 318 | }
|
---|
| 319 | }
|
---|
| 320 | else if (do_d1_) {
|
---|
| 321 | mem_static += nocc*nvir; // partial Laj
|
---|
| 322 | }
|
---|
| 323 | mem_static *= sizeof(double);
|
---|
| 324 | int nthreads = thr_->nthread();
|
---|
| 325 | mem_static += nthreads * integral()->storage_required_eri(basis()); // integral evaluators
|
---|
| 326 | ni = compute_cs_batchsize(mem_static, nocc_act-restart_orbital_memgrp_);
|
---|
| 327 | }
|
---|
| 328 |
|
---|
| 329 | if (max_norb_ > 0 && ni > max_norb_) {
|
---|
| 330 | ExEnv::out0() << indent
|
---|
| 331 | << "\"max_norb\" set: could have done "
|
---|
| 332 | << ni << " orbitals per pass otherwise."
|
---|
| 333 | << endl;
|
---|
| 334 | ni = max_norb_;
|
---|
| 335 | }
|
---|
| 336 |
|
---|
| 337 | // Send value of ni and mem_static to other nodes
|
---|
| 338 | msg_->bcast(ni);
|
---|
| 339 | double dmem_static = mem_static;
|
---|
| 340 | msg_->bcast(dmem_static);
|
---|
| 341 | mem_static = size_t(dmem_static);
|
---|
| 342 |
|
---|
| 343 | // Compute the storage to be used by the integral routines (required plus optional)
|
---|
| 344 | size_t dyn_mem = distsize_to_size(compute_cs_dynamic_memory(ni,nocc_act));
|
---|
| 345 | int mem_remaining;
|
---|
| 346 | if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining = 0;
|
---|
| 347 | else mem_remaining = mem_alloc - dyn_mem - mem_static;
|
---|
| 348 | mem_remaining += thr_->nthread() * integral()->storage_required_eri(basis());
|
---|
| 349 |
|
---|
| 350 | ExEnv::out0() << indent
|
---|
| 351 | << "Memory available per node: " << mem_alloc << " Bytes"
|
---|
| 352 | << endl;
|
---|
| 353 | ExEnv::out0() << indent
|
---|
| 354 | << "Static memory used per node: " << mem_static << " Bytes"
|
---|
| 355 | << endl;
|
---|
| 356 | ExEnv::out0() << indent
|
---|
| 357 | << "Total memory used per node: " << dyn_mem+mem_static << " Bytes"
|
---|
| 358 | << endl;
|
---|
| 359 | ExEnv::out0() << indent
|
---|
| 360 | << "Memory required for one pass: "
|
---|
| 361 | << compute_cs_dynamic_memory(nocc_act,nocc_act)+mem_static
|
---|
| 362 | << " Bytes"
|
---|
| 363 | << endl;
|
---|
| 364 | ExEnv::out0() << indent
|
---|
| 365 | << "Minimum memory required: "
|
---|
| 366 | << compute_cs_dynamic_memory(1,nocc_act)+mem_static
|
---|
| 367 | << " Bytes"
|
---|
| 368 | << endl;
|
---|
| 369 | ExEnv::out0() << indent
|
---|
| 370 | << "Batch size: " << ni
|
---|
| 371 | << endl;
|
---|
| 372 |
|
---|
| 373 | if (ni == 0) {
|
---|
| 374 | ExEnv::err0() << "Batch size is 0: more memory or processors are needed"
|
---|
| 375 | << endl;
|
---|
| 376 | abort();
|
---|
| 377 | }
|
---|
| 378 |
|
---|
| 379 | if (dynamic_) {
|
---|
| 380 | ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
|
---|
| 381 | }
|
---|
| 382 |
|
---|
| 383 | if (ni == nocc_act-restart_orbital_memgrp_) {
|
---|
| 384 | npass = 1;
|
---|
| 385 | rest = 0;
|
---|
| 386 | }
|
---|
| 387 | else {
|
---|
| 388 | rest = (nocc_act-restart_orbital_memgrp_)%ni;
|
---|
| 389 | npass = (nocc_act-restart_orbital_memgrp_ - rest)/ni + 1;
|
---|
| 390 | if (rest == 0) npass--;
|
---|
| 391 | }
|
---|
| 392 |
|
---|
| 393 | if (me == 0) {
|
---|
| 394 | ExEnv::out0() << indent
|
---|
| 395 | << scprintf(" npass rest nbasis nshell nfuncmax") << endl;
|
---|
| 396 | ExEnv::out0() << indent
|
---|
| 397 | << scprintf(" %-4i %-3i %-5i %-4i %-3i",
|
---|
| 398 | npass,rest,nbasis,nshell,nfuncmax)
|
---|
| 399 | << endl;
|
---|
| 400 | ExEnv::out0() << indent
|
---|
| 401 | << scprintf(" nocc nvir nfzc nfzv") << endl;
|
---|
| 402 | ExEnv::out0() << indent
|
---|
| 403 | << scprintf(" %-4i %-4i %-4i %-4i",
|
---|
| 404 | nocc,nvir,nfzc,nfzv)
|
---|
| 405 | << endl;
|
---|
| 406 | }
|
---|
| 407 |
|
---|
| 408 | int nijmax = 0;
|
---|
| 409 | index = 0;
|
---|
| 410 | for (i=0; i<ni; i++) {
|
---|
| 411 | for (j=0; j<nocc; j++) {
|
---|
| 412 | if (index++ % nproc == me) nijmax++;
|
---|
| 413 | }
|
---|
| 414 | }
|
---|
| 415 |
|
---|
| 416 | ////////////////////////////////////////////////
|
---|
| 417 | // The scf vector is distributed between nodes;
|
---|
| 418 | // put a copy of the scf vector on each node;
|
---|
| 419 | ////////////////////////////////////////////////
|
---|
| 420 |
|
---|
| 421 | escf = reference_->energy();
|
---|
| 422 | hf_energy_ = escf;
|
---|
| 423 |
|
---|
| 424 | RefDiagSCMatrix occ;
|
---|
| 425 | RefSCMatrix Scf_Vec;
|
---|
| 426 | RefDiagSCMatrix evalmat;
|
---|
| 427 | eigen(evalmat, Scf_Vec, occ);
|
---|
| 428 |
|
---|
| 429 | if (debug_ > 1) {
|
---|
| 430 | evalmat.print("eigenvalues");
|
---|
| 431 | Scf_Vec.print("eigenvectors");
|
---|
| 432 | }
|
---|
| 433 |
|
---|
| 434 | double *scf_vector_dat = new double[nbasis*noso];
|
---|
| 435 | Scf_Vec.t()->convert(scf_vector_dat);
|
---|
| 436 |
|
---|
| 437 | evals = new double[noso];
|
---|
| 438 | double** scf_vector = new double*[nbasis];
|
---|
| 439 | for (i=0; i<nbasis; i++) {
|
---|
| 440 | scf_vector[i] = &scf_vector_dat[i*noso];
|
---|
| 441 | }
|
---|
| 442 | for (i=0; i<noso; i++) {
|
---|
| 443 | evals[i] = evalmat(i);
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 | Scf_Vec = 0;
|
---|
| 447 | evalmat = 0;
|
---|
| 448 |
|
---|
| 449 | //////////////////////////////////////////////////////////////
|
---|
| 450 | // Allocate storage for various arrays needed for gradients
|
---|
| 451 | // (Pkj and Pab are symmetric, so store only lower triangle)
|
---|
| 452 | //////////////////////////////////////////////////////////////
|
---|
| 453 |
|
---|
| 454 | if (dograd) {
|
---|
| 455 | Pkj = (double*) malloc((nocc*(nocc+1)/2)*sizeof(double));
|
---|
| 456 | Pab = (double*) malloc((nvir*(nvir+1)/2)*sizeof(double));
|
---|
| 457 | Wkj = (double*) malloc(nocc*nocc*sizeof(double));
|
---|
| 458 | Wab = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
| 459 | Waj = (double*) malloc(nvir*nocc*sizeof(double));
|
---|
| 460 | if (do_d2_) {
|
---|
| 461 | d2occ_mat = new double[nocc_act*(nocc_act+1)/2];
|
---|
| 462 | d2vir_mat = new double[nvir_act*(nvir_act+1)/2];
|
---|
| 463 | }
|
---|
| 464 |
|
---|
| 465 | gradient_dat = new double[natom*3];
|
---|
| 466 | gradient = new double*[natom];
|
---|
| 467 | for (i=0; i<natom; i++) {
|
---|
| 468 | gradient[i] = &gradient_dat[i*3];
|
---|
| 469 | }
|
---|
| 470 |
|
---|
| 471 | hf_gradient_dat = new double[natom*3];
|
---|
| 472 | hf_gradient = new double*[natom];
|
---|
| 473 | for (i=0; i<natom; i++) {
|
---|
| 474 | hf_gradient[i] = &hf_gradient_dat[i*3];
|
---|
| 475 | }
|
---|
| 476 |
|
---|
| 477 | ginter = new double*[natom];
|
---|
| 478 | for (i=0; i<natom; i++) {
|
---|
| 479 | ginter[i] = new double[3];
|
---|
| 480 | for (xyz=0; xyz<3; xyz++) ginter[i][xyz] = 0;
|
---|
| 481 | }
|
---|
| 482 |
|
---|
| 483 | hf_ginter = new double*[natom];
|
---|
| 484 | for (i=0; i<natom; i++) {
|
---|
| 485 | hf_ginter[i] = new double[3];
|
---|
| 486 | for (xyz=0; xyz<3; xyz++) hf_ginter[i][xyz] = 0;
|
---|
| 487 | }
|
---|
| 488 |
|
---|
| 489 | //////////////////////////////
|
---|
| 490 | // Initialize various arrays
|
---|
| 491 | //////////////////////////////
|
---|
| 492 |
|
---|
| 493 | bzerofast(Pkj,nocc*(nocc+1)/2);
|
---|
| 494 | bzerofast(Wkj,nocc*nocc);
|
---|
| 495 | bzerofast(Pab,nvir*(nvir+1)/2);
|
---|
| 496 | bzerofast(Wab,nvir*nvir);
|
---|
| 497 | bzerofast(Waj,nvir*nocc);
|
---|
| 498 | if (do_d2_) {
|
---|
| 499 | bzerofast(d2occ_mat,nocc_act*(nocc_act+1)/2);
|
---|
| 500 | bzerofast(d2vir_mat,nvir_act*(nvir_act+1)/2);
|
---|
| 501 | }
|
---|
| 502 |
|
---|
| 503 | if (me == 0) zero_gradients(gradient, natom, 3);
|
---|
| 504 | if (me == 0) zero_gradients(hf_gradient, natom, 3);
|
---|
| 505 | }
|
---|
| 506 |
|
---|
| 507 | if (dograd || do_d1_) {
|
---|
| 508 | Laj = (double*) malloc(nvir*nocc*sizeof(double));
|
---|
| 509 | bzerofast(Laj,nvir*nocc);
|
---|
| 510 | }
|
---|
| 511 |
|
---|
| 512 | if (debug_ > 2 && me == 0) {
|
---|
| 513 | for (j=0; j<noso; j++) {
|
---|
| 514 | ExEnv::out0() << indent
|
---|
| 515 | << scprintf("eigenvalue[%3d] = %15.10lf", j, evals[j]);
|
---|
| 516 | if (j < nfzc) ExEnv::out0() << " (frozen docc)";
|
---|
| 517 | else if (j < nocc_act + nfzc) ExEnv::out0() << " (active docc)";
|
---|
| 518 | else if (j < nvir_act + nocc_act + nfzc) ExEnv::out0() << " (active uocc)";
|
---|
| 519 | else ExEnv::out0() << " (frozen uocc)";
|
---|
| 520 | ExEnv::out0() << endl;
|
---|
| 521 | }
|
---|
| 522 | }
|
---|
| 523 |
|
---|
| 524 | /////////////////////////////////////
|
---|
| 525 | // Begin MP2 loops
|
---|
| 526 | /////////////////////////////////////
|
---|
| 527 |
|
---|
| 528 | // debug print
|
---|
| 529 | if (debug_ && me == 0) {
|
---|
| 530 | ExEnv::out0() << indent
|
---|
| 531 | << scprintf("node %i, begin loop over i-batches",me) << endl;
|
---|
| 532 | }
|
---|
| 533 | // end of debug print
|
---|
| 534 |
|
---|
| 535 | // Initialize the integrals
|
---|
| 536 | integral()->set_storage(mem_remaining);
|
---|
| 537 | tbints_ = new Ref<TwoBodyInt>[thr_->nthread()];
|
---|
| 538 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 539 | tbints_[i] = integral()->electron_repulsion();
|
---|
| 540 | }
|
---|
| 541 | if (dograd || do_d1_) {
|
---|
| 542 | tbintder_ = new Ref<TwoBodyDerivInt>[thr_->nthread()];
|
---|
| 543 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 544 | tbintder_[i] = integral()->electron_repulsion_deriv();
|
---|
| 545 | }
|
---|
| 546 | }
|
---|
| 547 |
|
---|
| 548 | int mem_integral_intermediates = integral()->storage_used();
|
---|
| 549 | int mem_integral_storage = (mem_remaining - mem_integral_intermediates) / thr_->nthread();
|
---|
| 550 | if (mem_integral_storage<0) mem_integral_storage = 0;
|
---|
| 551 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 552 | tbints_[i]->set_integral_storage(mem_integral_storage);
|
---|
| 553 | }
|
---|
| 554 |
|
---|
| 555 | ExEnv::out0() << endl << indent
|
---|
| 556 | << scprintf("Memory used for integral intermediates: %i Bytes",
|
---|
| 557 | mem_integral_intermediates)
|
---|
| 558 | << endl;
|
---|
| 559 | ExEnv::out0() << indent
|
---|
| 560 | << scprintf("Memory used for integral storage: %i Bytes",
|
---|
| 561 | mem_integral_storage)
|
---|
| 562 | << endl;
|
---|
| 563 |
|
---|
| 564 | if (mem.null()) {
|
---|
| 565 | ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
|
---|
| 566 | abort();
|
---|
| 567 | }
|
---|
| 568 |
|
---|
| 569 | mem->set_localsize(size_t(nijmax)*nbasis*nbasis*sizeof(double));
|
---|
| 570 | ExEnv::out0() << indent
|
---|
| 571 | << "Size of global distributed array: "
|
---|
| 572 | << mem->totalsize()
|
---|
| 573 | << " Bytes"
|
---|
| 574 | << endl;
|
---|
| 575 |
|
---|
| 576 | MemoryGrpBuf<double> membuf_remote(mem);
|
---|
| 577 |
|
---|
| 578 | int usep4 = !dograd;
|
---|
| 579 |
|
---|
| 580 | Ref<ThreadLock> lock = thr_->new_lock();
|
---|
| 581 | CSGradErep12Qtr** e12thread = new CSGradErep12Qtr*[thr_->nthread()];
|
---|
| 582 | DistShellPair::SharedData sp_e_data, sp_g_data;
|
---|
| 583 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 584 | e12thread[i] = new CSGradErep12Qtr(i, thr_->nthread(), me, nproc,
|
---|
| 585 | mem, msg_, lock, basis(), tbints_[i],
|
---|
| 586 | nocc, scf_vector, tol, debug_,
|
---|
| 587 | dynamic_, print_percent_,
|
---|
| 588 | &sp_e_data, usep4);
|
---|
| 589 | }
|
---|
| 590 |
|
---|
| 591 | CSGrad34Qbtr** qbt34thread;
|
---|
| 592 | if (dograd || do_d1_) {
|
---|
| 593 | qbt34thread = new CSGrad34Qbtr*[thr_->nthread()];
|
---|
| 594 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 595 | qbt34thread[i] = new CSGrad34Qbtr(i, thr_->nthread(), me, nproc,
|
---|
| 596 | mem, msg_, lock, basis(), tbints_[i],
|
---|
| 597 | tbintder_[i], nocc, nfzc, scf_vector,
|
---|
| 598 | tol, debug_, dynamic_, print_percent_,
|
---|
| 599 | &sp_g_data, dograd, natom);
|
---|
| 600 | }
|
---|
| 601 | }
|
---|
| 602 |
|
---|
| 603 | tim_enter("mp2 passes");
|
---|
| 604 | for (pass=0; pass<npass; pass++) {
|
---|
| 605 |
|
---|
| 606 | if (me == 0) {
|
---|
| 607 | ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
|
---|
| 608 | }
|
---|
| 609 |
|
---|
| 610 | i_offset = restart_orbital_memgrp_ + pass*ni + nfzc;
|
---|
| 611 | if ((pass == npass - 1) && (rest != 0)) ni = rest;
|
---|
| 612 |
|
---|
| 613 | // Compute number of of i,j pairs on each node for
|
---|
| 614 | // two-el integrals and non-sep 2PDM elements
|
---|
| 615 | index = 0;
|
---|
| 616 | nij = 0;
|
---|
| 617 | for (i=0; i<ni; i++) {
|
---|
| 618 | for (j=0; j<nocc; j++) {
|
---|
| 619 | if (index++ % nproc == me) nij++;
|
---|
| 620 | }
|
---|
| 621 | }
|
---|
| 622 |
|
---|
| 623 | // debug print
|
---|
| 624 | if (debug_)
|
---|
| 625 | ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
|
---|
| 626 | // end of debug print
|
---|
| 627 |
|
---|
| 628 | mem->sync(); // This must be here or gamma non-sep will be wrong when running
|
---|
| 629 | // on multiple processors with more than one pass
|
---|
| 630 |
|
---|
| 631 | r_offset = 0;
|
---|
| 632 |
|
---|
| 633 | // Allocate and initialize some arrays
|
---|
| 634 | // (done here to avoid having these arrays
|
---|
| 635 | // overlap with arrays allocated later)
|
---|
| 636 |
|
---|
| 637 | // Allocate (and initialize) some arrays
|
---|
| 638 |
|
---|
| 639 | integral_iqjs = (double*) mem->localdata();
|
---|
| 640 |
|
---|
| 641 | bzerofast(integral_iqjs, nij*nbasis*nbasis);
|
---|
| 642 |
|
---|
| 643 | integral_iqjs = 0;
|
---|
| 644 |
|
---|
| 645 | mem->sync();
|
---|
| 646 |
|
---|
| 647 | index = 0;
|
---|
| 648 |
|
---|
| 649 | if (me == 0) {
|
---|
| 650 | ExEnv::out0() << indent
|
---|
| 651 | << scprintf("Begin loop over shells (erep, 1.+2. q.t.)") << endl;
|
---|
| 652 | }
|
---|
| 653 |
|
---|
| 654 | // Do the two eletron integrals and the first two quarter transformations
|
---|
| 655 | tim_enter("erep+1.qt+2.qt");
|
---|
| 656 | sp_e_data.init();
|
---|
| 657 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 658 | e12thread[i]->set_i_offset(i_offset);
|
---|
| 659 | e12thread[i]->set_ni(ni);
|
---|
| 660 | thr_->add_thread(i,e12thread[i]);
|
---|
| 661 | # if SINGLE_THREAD_E12
|
---|
| 662 | e12thread[i]->run();
|
---|
| 663 | # endif
|
---|
| 664 | }
|
---|
| 665 | # if !SINGLE_THREAD_E12
|
---|
| 666 | thr_->start_threads();
|
---|
| 667 | thr_->wait_threads();
|
---|
| 668 | # endif
|
---|
| 669 | tim_exit("erep+1.qt+2.qt");
|
---|
| 670 |
|
---|
| 671 | if (me == 0) {
|
---|
| 672 | ExEnv::out0() << indent << "End of loop over shells" << endl;
|
---|
| 673 | }
|
---|
| 674 |
|
---|
| 675 | mem->sync(); // Make sure iqjs is complete on each node before continuing
|
---|
| 676 |
|
---|
| 677 | integral_iqjs = (double*) mem->localdata();
|
---|
| 678 |
|
---|
| 679 | #if PRINT2Q
|
---|
| 680 | if (me == 0) {
|
---|
| 681 | int index = 0;
|
---|
| 682 | int ij_index = 0;
|
---|
| 683 | for (int i = 0; i<ni; i++) {
|
---|
| 684 | for (int j = 0; j<nocc; j++) {
|
---|
| 685 | if (index++ % nproc == me) {
|
---|
| 686 | if (j >= nfzc) {
|
---|
| 687 | double *integral_ij_offset = integral_iqjs + nbasis*nbasis*ij_index;
|
---|
| 688 | for (int s = 0; s<nbasis; s++) {
|
---|
| 689 | double *integral_ijsq_ptr = integral_ij_offset + s*nbasis;
|
---|
| 690 | for (int q = 0; q<nbasis; q++) {
|
---|
| 691 | printf("2Q: (%d %d|%d %d) = %12.8f\n",
|
---|
| 692 | i,q,j,s,*integral_ijsq_ptr);
|
---|
| 693 | *integral_ijsq_ptr++;
|
---|
| 694 | }
|
---|
| 695 | }
|
---|
| 696 | }
|
---|
| 697 | ij_index++;
|
---|
| 698 | }
|
---|
| 699 | }
|
---|
| 700 | }
|
---|
| 701 | }
|
---|
| 702 | #endif
|
---|
| 703 |
|
---|
| 704 | // Allocate and initialize some arrays
|
---|
| 705 | ixjs_tmp = new double[nbasis];
|
---|
| 706 |
|
---|
| 707 | if (me == 0) {
|
---|
| 708 | ExEnv::out0() << indent << "Begin third q.t." << endl;
|
---|
| 709 | }
|
---|
| 710 |
|
---|
| 711 | tim_enter("3. q.t.");
|
---|
| 712 | // Begin third quarter transformation;
|
---|
| 713 | // generate (ix|js) for i act, j act or frz, and x any MO (act or frz)
|
---|
| 714 | index = 0;
|
---|
| 715 | ij_index = 0;
|
---|
| 716 | for (i=0; i<ni; i++) {
|
---|
| 717 | for (j=0; j<nocc; j++) {
|
---|
| 718 | if (index++ % nproc == me) {
|
---|
| 719 |
|
---|
| 720 | for (s=0; s<nbasis; s++) {
|
---|
| 721 |
|
---|
| 722 | bzerofast(ixjs_tmp, nbasis);
|
---|
| 723 | for (q=0; q<nbasis; q++) {
|
---|
| 724 | integral_iqjs_ptr = &integral_iqjs[q + nbasis*(s + nbasis*ij_index)];
|
---|
| 725 | ixjs_ptr = ixjs_tmp;
|
---|
| 726 | c_qx = scf_vector[q];
|
---|
| 727 | tmpval = *integral_iqjs_ptr;
|
---|
| 728 | #if PRINT_BIGGEST_INTS
|
---|
| 729 | biggest_ints_2.insert(tmpval,i+i_offset,j,s,q);
|
---|
| 730 | if ((i+i_offset==104 && j == 1)
|
---|
| 731 | ||(i+i_offset==104 && j == 2)) {
|
---|
| 732 | biggest_ints_2s.insert(tmpval,i+i_offset,j,s,q);
|
---|
| 733 | }
|
---|
| 734 | #endif
|
---|
| 735 | for (x=0; x<noso; x++) {
|
---|
| 736 | *ixjs_ptr++ += *c_qx++ * tmpval;
|
---|
| 737 | }
|
---|
| 738 | } // exit q loop
|
---|
| 739 |
|
---|
| 740 | // Put ixjs into integral_iqjs, while overwriting what was there;
|
---|
| 741 | // i.e., integral_iqjs will now contain three-quarter transformed
|
---|
| 742 | // integrals ixjs
|
---|
| 743 | integral_iqjs_ptr = &integral_iqjs[nbasis*(s + nbasis*ij_index)];
|
---|
| 744 | ixjs_ptr = ixjs_tmp;
|
---|
| 745 | for (x=0; x<noso; x++) {
|
---|
| 746 | #if PRINT_BIGGEST_INTS
|
---|
| 747 | if (x>=nocc) {
|
---|
| 748 | biggest_ints_3a.insert(*ixjs_ptr,i+i_offset,j,s,x-nocc);
|
---|
| 749 | }
|
---|
| 750 | #endif
|
---|
| 751 | *integral_iqjs_ptr++ = *ixjs_ptr++;
|
---|
| 752 | }
|
---|
| 753 | } // exit s loop
|
---|
| 754 | ij_index++;
|
---|
| 755 | } // endif
|
---|
| 756 | } // exit j loop
|
---|
| 757 | } // exit i loop
|
---|
| 758 | // end of third quarter transformation
|
---|
| 759 | tim_exit("3. q.t.");
|
---|
| 760 |
|
---|
| 761 | if (me == 0) {
|
---|
| 762 | ExEnv::out0() << indent << "End of third q.t." << endl;
|
---|
| 763 | }
|
---|
| 764 |
|
---|
| 765 | delete[] ixjs_tmp;
|
---|
| 766 |
|
---|
| 767 | // The array of half-transformed integrals integral_iqjs has now
|
---|
| 768 | // been overwritten by three-quarter transformed integrals ixjs;
|
---|
| 769 | // rename the array integral_ixjs, where x = any MO
|
---|
| 770 | integral_ixjs = integral_iqjs;
|
---|
| 771 |
|
---|
| 772 | #if PRINT3Q
|
---|
| 773 | if (me == 0) {
|
---|
| 774 | int index = 0;
|
---|
| 775 | int ij_index = 0;
|
---|
| 776 | for (int i = 0; i<ni; i++) {
|
---|
| 777 | for (int j = 0; j<nocc; j++) {
|
---|
| 778 | if (index++ % nproc == me) {
|
---|
| 779 | if (j >= nfzc) {
|
---|
| 780 | double *integral_ij_offset = integral_ixjs + nbasis*nbasis*ij_index;
|
---|
| 781 | for (int s = 0; s<nbasis; s++) {
|
---|
| 782 | double *integral_ijsx_ptr = integral_ij_offset + s*nbasis;
|
---|
| 783 | for (int x = 0; x<noso; x++) {
|
---|
| 784 | printf("3Q: (%d %d|%d %d) = %12.8f\n",
|
---|
| 785 | i,x,j,s,*integral_ijsx_ptr);
|
---|
| 786 | *integral_ijsx_ptr++;
|
---|
| 787 | }
|
---|
| 788 | }
|
---|
| 789 | }
|
---|
| 790 | ij_index++;
|
---|
| 791 | }
|
---|
| 792 | }
|
---|
| 793 | }
|
---|
| 794 | }
|
---|
| 795 | #endif
|
---|
| 796 |
|
---|
| 797 | integral_iajy = new double[noso];
|
---|
| 798 | // in iajy: i act; a,j act or frz; y act or frz occ or virt.
|
---|
| 799 | integral_ikja = new double[nvir_act];
|
---|
| 800 | // in ikja: i,j act; k act or frz; a act.
|
---|
| 801 |
|
---|
| 802 | if (me == 0) {
|
---|
| 803 | ExEnv::out0() << indent << "Begin fourth q.t." << endl;
|
---|
| 804 | }
|
---|
| 805 |
|
---|
| 806 | // Begin fourth quarter transformation
|
---|
| 807 | // generating MO integrals (ov|ov), (ov|oo) and (oo|ov)
|
---|
| 808 | tim_enter("4. q.t.");
|
---|
| 809 | index = 0;
|
---|
| 810 | ij_index = 0;
|
---|
| 811 | for (i=0; i<ni; i++) {
|
---|
| 812 | for (j=0; j<nocc; j++) {
|
---|
| 813 | if (index++ % nproc == me) {
|
---|
| 814 |
|
---|
| 815 | for (a=0; a<nvir; a++) {
|
---|
| 816 | bzerofast(integral_iajy, noso);
|
---|
| 817 | iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
|
---|
| 818 | for (s=0; s<nbasis; s++) {
|
---|
| 819 | c_sy = scf_vector[s];
|
---|
| 820 | iajy_ptr = integral_iajy;
|
---|
| 821 | tmpval = *iajs_ptr;
|
---|
| 822 | #if PRINT_BIGGEST_INTS
|
---|
| 823 | biggest_ints_3.insert(tmpval,i+i_offset,j,s,a);
|
---|
| 824 | if ((i+i_offset==105 && j == 2 && s == 170 && a == 3)
|
---|
| 825 | ||(i+i_offset==102 && j == 2 && s == 170 && a == 2)) {
|
---|
| 826 | ExEnv::outn() << scprintf("3/4: %3d %3d %3d %3d: %16.10f",
|
---|
| 827 | i+i_offset, j, s, x-nocc)
|
---|
| 828 | << endl;
|
---|
| 829 | }
|
---|
| 830 | #endif
|
---|
| 831 | for (y=0; y<noso; y++) {
|
---|
| 832 | *iajy_ptr++ += *c_sy++ * tmpval;
|
---|
| 833 | } // exit y loop
|
---|
| 834 | iajs_ptr += nbasis;
|
---|
| 835 | } // exit s loop
|
---|
| 836 | // Put integral_iajy into ixjs for one i,a,j while
|
---|
| 837 | // overwriting elements of ixjs
|
---|
| 838 | iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
|
---|
| 839 | iajy_ptr = integral_iajy;
|
---|
| 840 | for (y=0; y<noso; y++) {
|
---|
| 841 | *iajs_ptr = *iajy_ptr++;
|
---|
| 842 | iajs_ptr += nbasis;
|
---|
| 843 | } // exit y loop
|
---|
| 844 | } // exit a loop
|
---|
| 845 |
|
---|
| 846 | // this is only needed for gradients
|
---|
| 847 | if ((dograd || do_d1_) && j >= nfzc) {
|
---|
| 848 | for (k=0; k<nocc; k++) {
|
---|
| 849 | bzerofast(integral_ikja, nvir_act);
|
---|
| 850 | ikjs_ptr = &integral_ixjs[k + nbasis*nbasis*ij_index];
|
---|
| 851 | for (s=0; s<nbasis; s++) {
|
---|
| 852 | c_sa = &scf_vector[s][nocc];
|
---|
| 853 | ikja_ptr = integral_ikja;
|
---|
| 854 | tmpval = *ikjs_ptr;
|
---|
| 855 | for (a=0; a<nvir_act; a++) {
|
---|
| 856 | *ikja_ptr++ += *c_sa++ * tmpval;
|
---|
| 857 | } // exit a loop
|
---|
| 858 | ikjs_ptr += nbasis;
|
---|
| 859 | } // exit s loop
|
---|
| 860 | // Put integral_ikja into ixjs for one i,k,j while
|
---|
| 861 | // overwriting elements of ixjs
|
---|
| 862 | ikjs_ptr = &integral_ixjs[k + nbasis*(nocc + nbasis*ij_index)];
|
---|
| 863 | ikja_ptr = integral_ikja;
|
---|
| 864 | for (a=0; a<nvir_act; a++) {
|
---|
| 865 | *ikjs_ptr = *ikja_ptr++;
|
---|
| 866 | ikjs_ptr += nbasis;
|
---|
| 867 | } // exit a loop
|
---|
| 868 | } // exit k loop
|
---|
| 869 | } //endif
|
---|
| 870 |
|
---|
| 871 | ij_index++;
|
---|
| 872 | } // endif
|
---|
| 873 | } // exit j loop
|
---|
| 874 | } // exit i loop
|
---|
| 875 | // end of fourth quarter transformation
|
---|
| 876 | tim_exit("4. q.t.");
|
---|
| 877 |
|
---|
| 878 | if (me == 0) {
|
---|
| 879 | ExEnv::out0() << indent << "End of fourth q.t." << endl;
|
---|
| 880 | }
|
---|
| 881 |
|
---|
| 882 | // The array integral_ixjs has now been overwritten by MO integrals
|
---|
| 883 | // iajy and ikja, so rename the array mo_int
|
---|
| 884 | mo_int = integral_ixjs;
|
---|
| 885 |
|
---|
| 886 | delete[] integral_iajy;
|
---|
| 887 | delete[] integral_ikja;
|
---|
| 888 |
|
---|
| 889 | // Divide the (ia|jb) MO integrals by the term
|
---|
| 890 | // evals[i]+evals[j]-evals[a]-evals[b]
|
---|
| 891 | // and keep these integrals in mo_int;
|
---|
| 892 | // do this only for active i, j, a, and b
|
---|
| 893 | tim_enter("divide (ia|jb)'s");
|
---|
| 894 |
|
---|
| 895 | index = 0;
|
---|
| 896 | ij_index = 0;
|
---|
| 897 | for (i=0; i<ni; i++) {
|
---|
| 898 | ii = i+i_offset;
|
---|
| 899 | for (j=0; j<nocc; j++) {
|
---|
| 900 | if (index++ % nproc == me) {
|
---|
| 901 | if (j>=nfzc) {
|
---|
| 902 | for (b=0; b<nvir_act; b++) {
|
---|
| 903 | iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
|
---|
| 904 | bb = b+nocc;
|
---|
| 905 | for (a=0; a<nvir_act; a++) {
|
---|
| 906 | #if PRINT4Q
|
---|
| 907 | printf("4Q: (%d %d|%d %d) = %12.8f\n",
|
---|
| 908 | i,a+nocc,j,b+nocc,*iajb_ptr);
|
---|
| 909 | #endif
|
---|
| 910 | // Zero out nonsymmetric integral, else divide by denominators
|
---|
| 911 | if (usep4 && ( symorb_irrep_[ii] ^
|
---|
| 912 | symorb_irrep_[j] ^
|
---|
| 913 | symorb_irrep_[a+nocc] ^
|
---|
| 914 | symorb_irrep_[bb]) ) {
|
---|
| 915 | *iajb_ptr++ = 0.0;
|
---|
| 916 | }
|
---|
| 917 | else
|
---|
| 918 | *iajb_ptr++ /= evals[ii]+evals[j]-evals[a+nocc]-evals[bb];
|
---|
| 919 | } // exit a loop
|
---|
| 920 | } // exit b loop
|
---|
| 921 | } // endif
|
---|
| 922 | ij_index++;
|
---|
| 923 | } // endif
|
---|
| 924 | } // exit j loop
|
---|
| 925 | } // exit i loop
|
---|
| 926 | tim_exit("divide (ia|jb)'s");
|
---|
| 927 |
|
---|
| 928 | // We now have the fully transformed integrals (ia|jb)
|
---|
| 929 | // divided by the proper orbital energy denominators
|
---|
| 930 | // for one batch of i, all j<nocc, and all a<nvir and b<nvir,
|
---|
| 931 | // where i, j, a, and b are all active;
|
---|
| 932 | // compute contribution to the MP2 correlation energy
|
---|
| 933 | // from these integrals
|
---|
| 934 |
|
---|
| 935 | #if WRITE_DOUBLES
|
---|
| 936 | if (nproc > 1 || npass > 1) {
|
---|
| 937 | ExEnv::outn() << "csgrad.cc: WRITE_DOUBLES set but case not allowed" << endl;
|
---|
| 938 | abort();
|
---|
| 939 | }
|
---|
| 940 | ExEnv::outn() << "csgrad.cc: WRITING DOUBLES: CHECK ORDER" << endl;
|
---|
| 941 | char *doutname = SCFormIO::fileext_to_filename(".mp2");
|
---|
| 942 | FILE *dout = fopen(doutname,"w");
|
---|
| 943 | delete[] doutname;
|
---|
| 944 | fwrite(&nocc_act, sizeof(int), 1, dout);
|
---|
| 945 | fwrite(&nvir_act, sizeof(int), 1, dout);
|
---|
| 946 | for (j=nfzc; j<nocc; j++) {
|
---|
| 947 | for (b=0; b<nvir_act; b++) {
|
---|
| 948 | for (i=0; i<ni; i++) {
|
---|
| 949 | ij_index = nocc*i + j;
|
---|
| 950 | iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
|
---|
| 951 | fwrite(iajb_ptr, sizeof(double), nvir_act, dout);
|
---|
| 952 | for (a=0; a<nvir_act; a++) {
|
---|
| 953 | if (fabs(iajb_ptr[a])>1.0e-8) {
|
---|
| 954 | ExEnv::outn() << scprintf(" Djbia(%2d %2d %2d %2d) = %12.8f",
|
---|
| 955 | j+1-nfzc,b+1,i+1,a+1,iajb_ptr[a])
|
---|
| 956 | << endl;
|
---|
| 957 | }
|
---|
| 958 | }
|
---|
| 959 | }
|
---|
| 960 | }
|
---|
| 961 | }
|
---|
| 962 | fclose(dout);
|
---|
| 963 | #endif
|
---|
| 964 |
|
---|
| 965 | tim_enter("compute ecorr");
|
---|
| 966 |
|
---|
| 967 | index = 0;
|
---|
| 968 | ij_index = 0;
|
---|
| 969 | for (i=0; i<ni; i++) {
|
---|
| 970 | double ecorr_i = 0.0;
|
---|
| 971 | for (j=0; j<nocc; j++) {
|
---|
| 972 | double ecorr_ij = 0.0;
|
---|
| 973 | if (index++ % nproc == me) {
|
---|
| 974 |
|
---|
| 975 | if (j>=nfzc) {
|
---|
| 976 | for (b=0; b<nvir_act; b++) {
|
---|
| 977 | iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
|
---|
| 978 | ibja_ptr = &mo_int[b+nocc + nbasis*(nocc + nbasis*ij_index)];
|
---|
| 979 | for (a=0; a<nvir_act; a++) {
|
---|
| 980 | delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
|
---|
| 981 | // only include determinants with unique coefficients
|
---|
| 982 | if (a>=b && i_offset+i>=j) {
|
---|
| 983 | if (a>b && i_offset+i>j) {
|
---|
| 984 | // aaaa or bbbb
|
---|
| 985 | biggest_coefs.insert(*iajb_ptr - *ibja_ptr,
|
---|
| 986 | i_offset+i,j,a,b,1111);
|
---|
| 987 | // aabb or bbaa or abba or baab
|
---|
| 988 | biggest_coefs.insert(*ibja_ptr,i_offset+i,j,b,a,1212);
|
---|
| 989 | } // endif
|
---|
| 990 | // aabb or bbaa or abba or baab
|
---|
| 991 | biggest_coefs.insert(*iajb_ptr,i_offset+i,j,a,b,1212);
|
---|
| 992 | } // endif
|
---|
| 993 |
|
---|
| 994 | tmpval = *iajb_ptr*(2**iajb_ptr - *ibja_ptr)*delta_ijab;
|
---|
| 995 | ecorr_mp2 += tmpval;
|
---|
| 996 | if (debug_) ecorr_ij += tmpval;
|
---|
| 997 | iajb_ptr++;
|
---|
| 998 | ibja_ptr += nbasis;;
|
---|
| 999 | } // exit a loop
|
---|
| 1000 | } // exit b loop
|
---|
| 1001 | } // endif
|
---|
| 1002 | ij_index++;
|
---|
| 1003 | } // endif
|
---|
| 1004 | if (debug_) {
|
---|
| 1005 | msg_->sum(ecorr_ij);
|
---|
| 1006 | ecorr_i += ecorr_ij;
|
---|
| 1007 | ExEnv::out0() << indent
|
---|
| 1008 | << scprintf("correlation energy for pair %3d %3d = %16.12f",
|
---|
| 1009 | i+i_offset, j, ecorr_ij)
|
---|
| 1010 | << endl;
|
---|
| 1011 | }
|
---|
| 1012 | } // exit j loop
|
---|
| 1013 | if (debug_) {
|
---|
| 1014 | ExEnv::out0() << indent
|
---|
| 1015 | << scprintf("correlation energy for orbital %3d = %16.12f",
|
---|
| 1016 | i+i_offset, ecorr_i)
|
---|
| 1017 | << endl;
|
---|
| 1018 | }
|
---|
| 1019 | } // exit i loop
|
---|
| 1020 | tim_exit("compute ecorr");
|
---|
| 1021 |
|
---|
| 1022 | // debug print
|
---|
| 1023 | if (debug_ && me == 0) {
|
---|
| 1024 | ExEnv::out0() << indent << "End of ecorr" << endl;
|
---|
| 1025 | }
|
---|
| 1026 | // end of debug print
|
---|
| 1027 |
|
---|
| 1028 | if (npass > 1 && pass < npass - 1) {
|
---|
| 1029 | double passe = ecorr_mp2;
|
---|
| 1030 | msg_->sum(passe);
|
---|
| 1031 | ExEnv::out0() << indent
|
---|
| 1032 | << "Partial correlation energy for pass " << pass << ":" << endl;
|
---|
| 1033 | ExEnv::out0() << indent
|
---|
| 1034 | << scprintf(" restart_ecorr = %18.14f", passe)
|
---|
| 1035 | << endl;
|
---|
| 1036 | ExEnv::out0() << indent
|
---|
| 1037 | << scprintf(" restart_orbital_memgrp = %d", ((pass+1) * ni))
|
---|
| 1038 | << endl;
|
---|
| 1039 | }
|
---|
| 1040 |
|
---|
| 1041 | integral_iqjs = 0;
|
---|
| 1042 | mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
|
---|
| 1043 |
|
---|
| 1044 | // don't go beyond this point if only the energy is needed
|
---|
| 1045 | if (!dograd && !do_d1_) continue;
|
---|
| 1046 |
|
---|
| 1047 | mo_int = (double*) mem->localdata();
|
---|
| 1048 |
|
---|
| 1049 | if (!dograd) goto compute_L;
|
---|
| 1050 |
|
---|
| 1051 | // Update the matrices Pkj and Wkj with
|
---|
| 1052 | // contributions from (occ vir|occ vir) integrals
|
---|
| 1053 | index = 0;
|
---|
| 1054 | ij_index = 0;
|
---|
| 1055 | tim_enter("Pkj and Wkj");
|
---|
| 1056 | for (i=0; i<ni; i++) {
|
---|
| 1057 | for (j=0; j<nocc; j++) {
|
---|
| 1058 | if (index++ % nproc == me) {
|
---|
| 1059 |
|
---|
| 1060 | if (j>=nfzc) {
|
---|
| 1061 | for (kloop=me; kloop<me+nocc; kloop++) {
|
---|
| 1062 | // stagger k's to minimize contention
|
---|
| 1063 | k = kloop%nocc;
|
---|
| 1064 | if (k<=j) pkj_ptr = &Pkj[j*(j+1)/2 + k];
|
---|
| 1065 | if (do_d2_) {
|
---|
| 1066 | if (k<=j && k>=nfzc) {
|
---|
| 1067 | d2occ_mat_ptr = &d2occ_mat[(j-nfzc)*(j-nfzc+1)/2 + k-nfzc];
|
---|
| 1068 | }
|
---|
| 1069 | }
|
---|
| 1070 | wjk_ptr = &Wkj[j*nocc + k];
|
---|
| 1071 | // Send for iakb, if necessary
|
---|
| 1072 | ik_index = (i*nocc + k)/nproc;
|
---|
| 1073 | ik_proc = (i*nocc + k)%nproc;
|
---|
| 1074 | ik_offset = nocc + nocc*nbasis + nbasis*nbasis*ik_index;
|
---|
| 1075 | mo_intbuf = (double*) membuf_remote.readonly_on_node(ik_offset,
|
---|
| 1076 | nbasis*nvir-nocc,
|
---|
| 1077 | ik_proc);
|
---|
| 1078 | for (a=0; a<nvir_act; a++) {
|
---|
| 1079 | ibja_ptr = &mo_int[nocc + nbasis*(a+nocc + nbasis*ij_index)];
|
---|
| 1080 | iajb_ptr = &mo_int[a+nocc + nbasis*(nocc + nbasis*ij_index)];
|
---|
| 1081 | iakb_ptr = &mo_intbuf[a];
|
---|
| 1082 | for (b=0; b<nvir_act; b++) {
|
---|
| 1083 | tmpval1 = *iakb_ptr * *iajb_ptr;
|
---|
| 1084 | tmpval = 2**iakb_ptr * *ibja_ptr++ - 4*tmpval1;
|
---|
| 1085 | // tmpval = 2**iakb_ptr * (*ibja_ptr++ - 2 * *iajb_ptr);
|
---|
| 1086 | iakb_ptr += nbasis;
|
---|
| 1087 | iajb_ptr += nbasis;
|
---|
| 1088 | if (k<nfzc && k<=j) *pkj_ptr += tmpval/(evals[k]-evals[j]);
|
---|
| 1089 | else if (k<=j) {
|
---|
| 1090 | *pkj_ptr += tmpval;
|
---|
| 1091 | if (do_d2_) *d2occ_mat_ptr += tmpval1;
|
---|
| 1092 | }
|
---|
| 1093 | if (k>=nfzc) {
|
---|
| 1094 | delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
|
---|
| 1095 | *wjk_ptr += tmpval*delta_ijab;
|
---|
| 1096 | }
|
---|
| 1097 | } // exit b loop
|
---|
| 1098 | } // exit a loop
|
---|
| 1099 | mo_intbuf = 0;
|
---|
| 1100 | membuf_remote.release();
|
---|
| 1101 | } // end kloop loop
|
---|
| 1102 | } // endif
|
---|
| 1103 |
|
---|
| 1104 | ij_index++;
|
---|
| 1105 | } // endif
|
---|
| 1106 | } // exit j loop
|
---|
| 1107 | } // exit i loop
|
---|
| 1108 | tim_exit("Pkj and Wkj");
|
---|
| 1109 |
|
---|
| 1110 | // debug print
|
---|
| 1111 | if (debug_ && me == 0) {
|
---|
| 1112 | ExEnv::out0() << indent << "End of Pkj and Wkj" << endl;
|
---|
| 1113 | }
|
---|
| 1114 | // end of debug print
|
---|
| 1115 |
|
---|
| 1116 | // Update the matrices Pab and Wab with
|
---|
| 1117 | // contributions from (occ vir|occ vir) integrals
|
---|
| 1118 | tim_enter("Pab and Wab");
|
---|
| 1119 | index = 0;
|
---|
| 1120 | ij_index = 0;
|
---|
| 1121 | for (i=0; i<ni; i++) {
|
---|
| 1122 | for (j=0; j<nocc; j++) {
|
---|
| 1123 | if (index++ % nproc == me) {
|
---|
| 1124 | if (j>=nfzc) {
|
---|
| 1125 |
|
---|
| 1126 | offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
|
---|
| 1127 | for (a=0; a<nvir_act; a++) {
|
---|
| 1128 | pab_ptr = &Pab[a*(a+1)/2];
|
---|
| 1129 | if (do_d2_) d2vir_mat_ptr = &d2vir_mat[a*(a+1)/2];
|
---|
| 1130 | for (b=0; b<=a; b++) { // active-active part of Pab and Wab
|
---|
| 1131 | wab_ptr = &Wab[a*nvir + b];
|
---|
| 1132 | wba_ptr = &Wab[b*nvir + a];
|
---|
| 1133 | ibjc_ptr = &mo_int[offset + b];
|
---|
| 1134 | icjb_ptr = &mo_int[offset + b*nbasis];
|
---|
| 1135 | iajc_ptr = &mo_int[offset + a];
|
---|
| 1136 | icja_ptr = &mo_int[offset + a*nbasis];
|
---|
| 1137 | for (c=0; c<nvir_act; c++) {
|
---|
| 1138 | tmpval = *iajc_ptr**ibjc_ptr;
|
---|
| 1139 | if (do_d2_) *d2vir_mat_ptr += tmpval;
|
---|
| 1140 | *pab_ptr += 4*tmpval - 2**iajc_ptr * *icjb_ptr;
|
---|
| 1141 | // *pab_ptr += 2**iajc_ptr * (2 * *ibjc_ptr - *icjb_ptr);
|
---|
| 1142 |
|
---|
| 1143 | if (a == b) {
|
---|
| 1144 | delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
|
---|
| 1145 | *wab_ptr += 2**ibjc_ptr*(*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
|
---|
| 1146 | }
|
---|
| 1147 | else {
|
---|
| 1148 | delta_ijbc = evals[i_offset+i]+evals[j]-evals[nocc+b]-evals[nocc+c];
|
---|
| 1149 | delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
|
---|
| 1150 | *wab_ptr += 2**ibjc_ptr * (*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
|
---|
| 1151 | *wba_ptr += 2**iajc_ptr * (*icjb_ptr - 2 * *ibjc_ptr)*delta_ijbc;
|
---|
| 1152 | }
|
---|
| 1153 | iajc_ptr += nbasis;
|
---|
| 1154 | ibjc_ptr += nbasis;
|
---|
| 1155 | icja_ptr++;
|
---|
| 1156 | icjb_ptr++;
|
---|
| 1157 | } // exit c loop
|
---|
| 1158 | pab_ptr++;
|
---|
| 1159 | d2vir_mat_ptr++;
|
---|
| 1160 | } // exit b loop
|
---|
| 1161 | } // exit a loop
|
---|
| 1162 |
|
---|
| 1163 | for (a=0; a<nfzv; a++) { // active-frozen part of Pab
|
---|
| 1164 | pab_ptr = &Pab[(a+nvir_act)*(a+nvir_act+1)/2];
|
---|
| 1165 | for (b=0; b<nvir_act; b++) {
|
---|
| 1166 | tmpval = evals[nocc+b] - evals[nocc+nvir_act+a];
|
---|
| 1167 | ibjc_ptr = &mo_int[offset+b];
|
---|
| 1168 | iajc_ptr = &mo_int[offset+a+nvir_act];
|
---|
| 1169 | icja_ptr = &mo_int[offset+(a+nvir_act)*nbasis];
|
---|
| 1170 | for (c=0; c<nvir_act; c++) {
|
---|
| 1171 | *pab_ptr += 2**ibjc_ptr*(2**iajc_ptr - *icja_ptr++)/tmpval;
|
---|
| 1172 | ibjc_ptr += nbasis;
|
---|
| 1173 | iajc_ptr += nbasis;
|
---|
| 1174 | } // exit c loop
|
---|
| 1175 | pab_ptr++;
|
---|
| 1176 | } // exit b loop
|
---|
| 1177 | } // exit a loop
|
---|
| 1178 |
|
---|
| 1179 | } // endif
|
---|
| 1180 | ij_index++;
|
---|
| 1181 | } // endif
|
---|
| 1182 | } // exit j loop
|
---|
| 1183 | } // exit i loop
|
---|
| 1184 | tim_exit("Pab and Wab");
|
---|
| 1185 |
|
---|
| 1186 | // debug print
|
---|
| 1187 | if (debug_ && me == 0) {
|
---|
| 1188 | ExEnv::out0() << indent << "End of Pab and Wab" << endl;
|
---|
| 1189 | }
|
---|
| 1190 | // end of debug print
|
---|
| 1191 |
|
---|
| 1192 | compute_L:
|
---|
| 1193 |
|
---|
| 1194 | ///////////////////////////////////////
|
---|
| 1195 | // Update Waj and Laj with contrib.
|
---|
| 1196 | // from (oo|ov) and (ov|oo) integrals;
|
---|
| 1197 | // here a is active and j is active or
|
---|
| 1198 | // frozen
|
---|
| 1199 | ///////////////////////////////////////
|
---|
| 1200 | tim_enter("Waj and Laj");
|
---|
| 1201 |
|
---|
| 1202 | // (oo|ov) contribution
|
---|
| 1203 | index = 0;
|
---|
| 1204 | ik_index = 0;
|
---|
| 1205 | for (i=0; i<ni; i++) {
|
---|
| 1206 | for (k=0; k<nocc; k++) {
|
---|
| 1207 | if (index++ % nproc == me) {
|
---|
| 1208 | if (k>=nfzc) {
|
---|
| 1209 | offset = nbasis*nocc + nbasis*nbasis*ik_index;
|
---|
| 1210 | for (j=0; j<nocc; j++) {
|
---|
| 1211 | for (b=0; b<nvir_act; b++) {
|
---|
| 1212 | ibka_ptr = &mo_int[b+nocc + offset];
|
---|
| 1213 | ijkb_ptr = &mo_int[j + nbasis*b + offset];
|
---|
| 1214 | if (dograd) waj_ptr = &Waj[j*nvir]; // order as j*nvir+a to make loops more efficient
|
---|
| 1215 | laj_ptr = &Laj[j*nvir];
|
---|
| 1216 | for (a=0; a<nvir_act; a++) {
|
---|
| 1217 | tmpval = 2**ibka_ptr * *ijkb_ptr;
|
---|
| 1218 | ibka_ptr += nbasis;
|
---|
| 1219 | if (dograd) *waj_ptr++ += tmpval;
|
---|
| 1220 | *laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper
|
---|
| 1221 | } // exit a loop
|
---|
| 1222 | } // exit b loop
|
---|
| 1223 | } // exit j loop
|
---|
| 1224 | } // endif
|
---|
| 1225 | ik_index++;
|
---|
| 1226 | } // endif
|
---|
| 1227 | } // exit k loop
|
---|
| 1228 | } // exit i loop
|
---|
| 1229 |
|
---|
| 1230 | // (ov|oo) contribution
|
---|
| 1231 | index = 0;
|
---|
| 1232 | ik_index = 0;
|
---|
| 1233 | for (i=0; i<ni; i++) {
|
---|
| 1234 | for (k=0; k<nocc; k++) {
|
---|
| 1235 | if (index++ % nproc == me) {
|
---|
| 1236 | if (k>=nfzc) {
|
---|
| 1237 | offset = nocc + nbasis*nbasis*ik_index;
|
---|
| 1238 | for (b=0; b<nvir_act; b++) {
|
---|
| 1239 | for (j=0; j<nocc; j++) {
|
---|
| 1240 | ibkj_ptr = &mo_int[offset + b + j*nbasis];
|
---|
| 1241 | ibka_ptr = &mo_int[offset + b + nocc*nbasis];
|
---|
| 1242 | if (dograd) waj_ptr = &Waj[j*nvir];
|
---|
| 1243 | laj_ptr = &Laj[j*nvir];
|
---|
| 1244 | for (a=0; a<nvir_act; a++) {
|
---|
| 1245 | tmpval = 4 * *ibka_ptr * *ibkj_ptr;
|
---|
| 1246 | ibka_ptr += nbasis;
|
---|
| 1247 | if (dograd) *waj_ptr++ -= tmpval;
|
---|
| 1248 | *laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper
|
---|
| 1249 | } // exit a loop
|
---|
| 1250 | } // exit j loop
|
---|
| 1251 | } // exit b loop
|
---|
| 1252 | } // endif
|
---|
| 1253 | ik_index++;
|
---|
| 1254 | } // endif
|
---|
| 1255 | } // exit k loop
|
---|
| 1256 | } // exit i loop
|
---|
| 1257 |
|
---|
| 1258 | tim_exit("Waj and Laj");
|
---|
| 1259 | /////////////////////////////
|
---|
| 1260 | // End of Waj and Laj update
|
---|
| 1261 | /////////////////////////////
|
---|
| 1262 |
|
---|
| 1263 | // debug print
|
---|
| 1264 | if (debug_ && me == 0) {
|
---|
| 1265 | ExEnv::out0() << indent << "End of Paj and Waj" << endl;
|
---|
| 1266 | }
|
---|
| 1267 | // end of debug print
|
---|
| 1268 |
|
---|
| 1269 | mo_int = 0;
|
---|
| 1270 |
|
---|
| 1271 | mem->sync(); // Need to synchronize before deleting mo_intbuf
|
---|
| 1272 |
|
---|
| 1273 | mo_int = (double*) mem->localdata();
|
---|
| 1274 |
|
---|
| 1275 | gamma_iajs_tmp = new double[nbasis*nvir_act];
|
---|
| 1276 | if (!gamma_iajs_tmp) {
|
---|
| 1277 | ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl;
|
---|
| 1278 | abort();
|
---|
| 1279 | }
|
---|
| 1280 |
|
---|
| 1281 | // debug print
|
---|
| 1282 | if (debug_ && me == 0) {
|
---|
| 1283 | ExEnv::out0() << indent << "Begin first and second q.b.t." << endl;
|
---|
| 1284 | }
|
---|
| 1285 | // end of debug print
|
---|
| 1286 |
|
---|
| 1287 | ///////////////////////////////////////////////////////////
|
---|
| 1288 | // Perform first and second quarter back-transformation.
|
---|
| 1289 | // Each node produces gamma_iajs, and gamma_iqjs
|
---|
| 1290 | // for a subset of i and j, all a and all s;
|
---|
| 1291 | // the back-transf. is done only for active i, j, a, and b
|
---|
| 1292 | ///////////////////////////////////////////////////////////
|
---|
| 1293 |
|
---|
| 1294 | // Begin first quarter back-transformation
|
---|
| 1295 | tim_enter("1. q.b.t.");
|
---|
| 1296 | index = 0;
|
---|
| 1297 | ij_index = 0;
|
---|
| 1298 | for (i=0; i<ni; i++) {
|
---|
| 1299 | for (j=0; j<nocc; j++) {
|
---|
| 1300 | if (index++ % nproc == me) {
|
---|
| 1301 | if (j>=nfzc) {
|
---|
| 1302 | bzerofast(gamma_iajs_tmp,nbasis*nvir_act);
|
---|
| 1303 | offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
|
---|
| 1304 |
|
---|
| 1305 | for (a=0; a<nvir_act; a++) {
|
---|
| 1306 | for (s=0; s<nbasis; s++) {
|
---|
| 1307 | c_sb = &scf_vector[s][nocc];
|
---|
| 1308 | gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a];
|
---|
| 1309 | ibja_ptr = &mo_int[a*nbasis + offset];
|
---|
| 1310 | iajb_ptr = &mo_int[a + offset];
|
---|
| 1311 |
|
---|
| 1312 | tmpval = 0.0;
|
---|
| 1313 | for (b=0; b<nvir_act; b++) {
|
---|
| 1314 | tmpval += 2**c_sb++ * (2**iajb_ptr - *ibja_ptr++);
|
---|
| 1315 | iajb_ptr += nbasis;
|
---|
| 1316 | } // exit b loop
|
---|
| 1317 | *gamma_iajs_ptr += tmpval;
|
---|
| 1318 | } // exit s loop
|
---|
| 1319 | } // exit a loop
|
---|
| 1320 | // Put gamma_iajs_tmp into mo_int for one i,j
|
---|
| 1321 | // while overwriting mo_int
|
---|
| 1322 | gamma_iajs_ptr = gamma_iajs_tmp;
|
---|
| 1323 | for (y=0; y<nbasis; y++) {
|
---|
| 1324 | iajy_ptr = &mo_int[nocc + nbasis*(y + nbasis*ij_index)];
|
---|
| 1325 | for (a=0; a<nvir_act; a++) {
|
---|
| 1326 | *iajy_ptr++ = *gamma_iajs_ptr++;
|
---|
| 1327 | }
|
---|
| 1328 | }
|
---|
| 1329 |
|
---|
| 1330 | } // endif
|
---|
| 1331 | ij_index++;
|
---|
| 1332 | } // endif
|
---|
| 1333 | } // exit j loop
|
---|
| 1334 | } // exit i loop
|
---|
| 1335 | // end of first quarter back-transformation
|
---|
| 1336 | tim_exit("1. q.b.t.");
|
---|
| 1337 |
|
---|
| 1338 | // debug print
|
---|
| 1339 | if (debug_ && me == 0) {
|
---|
| 1340 | ExEnv::out0() << indent << "End of first q.b.t." << endl;
|
---|
| 1341 | }
|
---|
| 1342 | // end of debug print
|
---|
| 1343 |
|
---|
| 1344 | mo_int = 0;
|
---|
| 1345 |
|
---|
| 1346 | mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming
|
---|
| 1347 |
|
---|
| 1348 | delete[] gamma_iajs_tmp;
|
---|
| 1349 |
|
---|
| 1350 | // The array mo_int has now been overwritten by the quarter
|
---|
| 1351 | // back-transformed non-sep 2PDM gamma_iajs, so rename
|
---|
| 1352 | gamma_iajs = (double*) mem->localdata();
|
---|
| 1353 |
|
---|
| 1354 | gamma_iqjs_tmp = new double[nbasis];
|
---|
| 1355 | if (!gamma_iqjs_tmp) {
|
---|
| 1356 | ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl;
|
---|
| 1357 | abort();
|
---|
| 1358 | }
|
---|
| 1359 |
|
---|
| 1360 | if (debug_ && me == 0) {
|
---|
| 1361 | ExEnv::out0() << indent << "Begin second q.b.t." << endl;
|
---|
| 1362 | }
|
---|
| 1363 |
|
---|
| 1364 | // Begin second quarter back-transformation
|
---|
| 1365 | // (gamma_iqjs elements ordered as i,j,s,q,
|
---|
| 1366 | // i.e., q varies fastest)
|
---|
| 1367 | tim_enter("2. q.b.t.");
|
---|
| 1368 | index = 0;
|
---|
| 1369 | ij_index = 0;
|
---|
| 1370 | for (i=0; i<ni; i++) {
|
---|
| 1371 | for (j=0; j<nocc; j++) {
|
---|
| 1372 | if (index++ % nproc == me) {
|
---|
| 1373 | if (j>=nfzc) {
|
---|
| 1374 | offset = nbasis*nbasis*ij_index;
|
---|
| 1375 |
|
---|
| 1376 | for (s=0; s<nbasis; s++) {
|
---|
| 1377 | bzerofast(gamma_iqjs_tmp,nbasis);
|
---|
| 1378 | for (q=0; q<nbasis; q++) {
|
---|
| 1379 | gamma_iqjs_ptr = &gamma_iqjs_tmp[q];
|
---|
| 1380 | gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset];
|
---|
| 1381 | c_qa = &scf_vector[q][nocc];
|
---|
| 1382 |
|
---|
| 1383 | tmpval = 0.0;
|
---|
| 1384 | for (a=0; a<nvir_act; a++) {
|
---|
| 1385 | tmpval += *c_qa++ * *gamma_iajs_ptr++;
|
---|
| 1386 | } // exit a loop
|
---|
| 1387 | *gamma_iqjs_ptr += tmpval;
|
---|
| 1388 | } // exit q loop
|
---|
| 1389 | // Put gamma_iqjs_tmp into gamma_iajs for one i,j,s
|
---|
| 1390 | // while overwriting gamma_iajs
|
---|
| 1391 | gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset];
|
---|
| 1392 | gamma_iqjs_ptr = gamma_iqjs_tmp;
|
---|
| 1393 | for (q=0; q<nbasis; q++) {
|
---|
| 1394 | *gamma_iajs_ptr++ = *gamma_iqjs_ptr++;
|
---|
| 1395 | }
|
---|
| 1396 | } // exit s loop
|
---|
| 1397 |
|
---|
| 1398 | } // endif
|
---|
| 1399 | ij_index++;
|
---|
| 1400 | } // endif
|
---|
| 1401 | } // exit j loop
|
---|
| 1402 | } // exit i loop
|
---|
| 1403 | tim_exit("2. q.b.t.");
|
---|
| 1404 | // end of second quarter back-transformation
|
---|
| 1405 |
|
---|
| 1406 | if (debug_ && me == 0) {
|
---|
| 1407 | ExEnv::out0() << indent << "End of second q.b.t." << endl;
|
---|
| 1408 | }
|
---|
| 1409 |
|
---|
| 1410 | gamma_iajs = 0;
|
---|
| 1411 |
|
---|
| 1412 | mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs
|
---|
| 1413 | // before it is needed below, and that gamma_iajs is not
|
---|
| 1414 | // deleted prematurely
|
---|
| 1415 |
|
---|
| 1416 | // The quarter back-transformed elements gamma_iajs have now been
|
---|
| 1417 | // overwritten by the half back-transformed elements gamma_iqjs
|
---|
| 1418 |
|
---|
| 1419 | delete[] gamma_iqjs_tmp;
|
---|
| 1420 |
|
---|
| 1421 | /////////////////////////////////////////////////
|
---|
| 1422 | // End of 1. and 2. quarter back-transformation
|
---|
| 1423 | /////////////////////////////////////////////////
|
---|
| 1424 |
|
---|
| 1425 | Lpi = new double[nbasis*ni];
|
---|
| 1426 | bzerofast(Lpi,nbasis*ni);
|
---|
| 1427 |
|
---|
| 1428 | if (me == 0) {
|
---|
| 1429 | ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl;
|
---|
| 1430 | }
|
---|
| 1431 |
|
---|
| 1432 | //////////////////////////////////////////////////////////
|
---|
| 1433 | // Perform third and fourth quarter back-transformation
|
---|
| 1434 | // and compute contribution to gradient from non-sep 2PDM
|
---|
| 1435 | //////////////////////////////////////////////////////////
|
---|
| 1436 |
|
---|
| 1437 | tim_enter("3.qbt+4.qbt+non-sep contrib.");
|
---|
| 1438 | sp_g_data.init();
|
---|
| 1439 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 1440 | qbt34thread[i]->set_i_offset(i_offset);
|
---|
| 1441 | qbt34thread[i]->set_ni(ni);
|
---|
| 1442 | thr_->add_thread(i,qbt34thread[i]);
|
---|
| 1443 | # if SINGLE_THREAD_QBT34
|
---|
| 1444 | qbt34thread[i]->run();
|
---|
| 1445 | # endif
|
---|
| 1446 | }
|
---|
| 1447 | # if !SINGLE_THREAD_QBT34
|
---|
| 1448 | thr_->start_threads();
|
---|
| 1449 | thr_->wait_threads();
|
---|
| 1450 | # endif
|
---|
| 1451 | tim_exit("3.qbt+4.qbt+non-sep contrib.");
|
---|
| 1452 | // Add thread contributions to Lpi and ginter
|
---|
| 1453 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 1454 | double *Lpi_thread = qbt34thread[i]->get_Lpi();
|
---|
| 1455 | double **ginter_thread = qbt34thread[i]->get_ginter();
|
---|
| 1456 | for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j];
|
---|
| 1457 | for (j=0; j<natom; j++) {
|
---|
| 1458 | for (k=0; k<3; k++) {
|
---|
| 1459 | ginter[j][k] += ginter_thread[j][k];
|
---|
| 1460 | }
|
---|
| 1461 | }
|
---|
| 1462 | aointder_computed += qbt34thread[i]->get_aointder_computed();
|
---|
| 1463 | }
|
---|
| 1464 |
|
---|
| 1465 | if (me == 0) {
|
---|
| 1466 | ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl;
|
---|
| 1467 | }
|
---|
| 1468 |
|
---|
| 1469 | mem->sync(); // Make sure all nodes are done before deleting arrays
|
---|
| 1470 |
|
---|
| 1471 | if (debug_ > 1) {
|
---|
| 1472 | RefSCDimension ni_dim(new SCDimension(ni,1));
|
---|
| 1473 | ni_dim->blocks()->set_subdim(0, new SCDimension(ni));
|
---|
| 1474 | RefSCDimension nbasis_dim(new SCDimension(nbasis,1));
|
---|
| 1475 | nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis));
|
---|
| 1476 | RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit);
|
---|
| 1477 | Lpi_mat->assign(Lpi);
|
---|
| 1478 | Lpi_mat.print("Lpi");
|
---|
| 1479 | }
|
---|
| 1480 |
|
---|
| 1481 | if (debug_ && me == 0) {
|
---|
| 1482 | ExEnv::out0() << indent << "Back-transform Lpi" << endl;
|
---|
| 1483 | }
|
---|
| 1484 |
|
---|
| 1485 | // Back-transform Lpi to MO basis
|
---|
| 1486 | lpi_ptr = Lpi;
|
---|
| 1487 | for (p=0; p<nbasis; p++) {
|
---|
| 1488 | for (i=0; i<ni; i++) {
|
---|
| 1489 | c_pa = &scf_vector[p][nocc];
|
---|
| 1490 | laj_ptr = &Laj[nvir*(i_offset + i)];
|
---|
| 1491 | for (a=0; a<nvir; a++) {
|
---|
| 1492 | *laj_ptr++ += *c_pa++ * *lpi_ptr;
|
---|
| 1493 | } // exit a loop
|
---|
| 1494 | lpi_ptr++;
|
---|
| 1495 | } // exit i loop
|
---|
| 1496 | } // exit p loop
|
---|
| 1497 |
|
---|
| 1498 | // malloc_chain_check(1);
|
---|
| 1499 |
|
---|
| 1500 | delete[] Lpi;
|
---|
| 1501 |
|
---|
| 1502 | if (me == 0) {
|
---|
| 1503 | ExEnv::out0() << indent << "Done with pass " << pass+1 << endl;
|
---|
| 1504 | }
|
---|
| 1505 | } // exit loop over i-batches (pass)
|
---|
| 1506 | tim_exit("mp2 passes");
|
---|
| 1507 |
|
---|
| 1508 | if (dograd || do_d1_) {
|
---|
| 1509 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 1510 | delete qbt34thread[i];
|
---|
| 1511 | }
|
---|
| 1512 | delete[] qbt34thread;
|
---|
| 1513 | }
|
---|
| 1514 |
|
---|
| 1515 | mem->set_localsize(0);
|
---|
| 1516 |
|
---|
| 1517 | // debug print
|
---|
| 1518 | if (debug_ && me == 0) {
|
---|
| 1519 | ExEnv::out0() << indent << "Exited loop over i-batches" << endl;
|
---|
| 1520 | }
|
---|
| 1521 | // end of debug print
|
---|
| 1522 |
|
---|
| 1523 | ///////////////////////////////////////////////////////////////
|
---|
| 1524 | // The computation of the MP2 energy is now complete on each
|
---|
| 1525 | // node; add the nodes' contributions and print out the energy
|
---|
| 1526 | ///////////////////////////////////////////////////////////////
|
---|
| 1527 | msg_->sum(ecorr_mp2);
|
---|
| 1528 | msg_->sum(aoint_computed);
|
---|
| 1529 | msg_->sum(aointder_computed);
|
---|
| 1530 |
|
---|
| 1531 | biggest_coefs.combine(msg_);
|
---|
| 1532 | #if PRINT_BIGGEST_INTS
|
---|
| 1533 | biggest_ints_1.combine(msg_);
|
---|
| 1534 | biggest_ints_2.combine(msg_);
|
---|
| 1535 | biggest_ints_2s.combine(msg_);
|
---|
| 1536 | biggest_ints_3a.combine(msg_);
|
---|
| 1537 | biggest_ints_3.combine(msg_);
|
---|
| 1538 | #endif
|
---|
| 1539 |
|
---|
| 1540 | if (me == 0) {
|
---|
| 1541 | emp2 = escf + ecorr_mp2;
|
---|
| 1542 |
|
---|
| 1543 | #if PRINT_BIGGEST_INTS
|
---|
| 1544 | ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
|
---|
| 1545 | for (i=0; i<biggest_ints_1.ncontrib(); i++) {
|
---|
| 1546 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
| 1547 | biggest_ints_1.indices(i)[0],
|
---|
| 1548 | biggest_ints_1.indices(i)[1],
|
---|
| 1549 | biggest_ints_1.indices(i)[2],
|
---|
| 1550 | biggest_ints_1.indices(i)[3],
|
---|
| 1551 | biggest_ints_1.val(i)
|
---|
| 1552 | )
|
---|
| 1553 | << endl;
|
---|
| 1554 | }
|
---|
| 1555 | ExEnv::outn() << "biggest 2/4 transformed ints" << endl;
|
---|
| 1556 | for (i=0; i<biggest_ints_2.ncontrib(); i++) {
|
---|
| 1557 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
| 1558 | biggest_ints_2.indices(i)[0],
|
---|
| 1559 | biggest_ints_2.indices(i)[1],
|
---|
| 1560 | biggest_ints_2.indices(i)[2],
|
---|
| 1561 | biggest_ints_2.indices(i)[3],
|
---|
| 1562 | biggest_ints_2.val(i)
|
---|
| 1563 | )
|
---|
| 1564 | << endl;
|
---|
| 1565 | }
|
---|
| 1566 | ExEnv::outn() << "restricted 2/4 transformed ints" << endl;
|
---|
| 1567 | for (i=0; i<biggest_ints_2s.ncontrib(); i++) {
|
---|
| 1568 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
| 1569 | biggest_ints_2s.indices(i)[0],
|
---|
| 1570 | biggest_ints_2s.indices(i)[1],
|
---|
| 1571 | biggest_ints_2s.indices(i)[2],
|
---|
| 1572 | biggest_ints_2s.indices(i)[3],
|
---|
| 1573 | biggest_ints_2s.val(i)
|
---|
| 1574 | )
|
---|
| 1575 | << endl;
|
---|
| 1576 | }
|
---|
| 1577 | ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl;
|
---|
| 1578 | for (i=0; i<biggest_ints_3a.ncontrib(); i++) {
|
---|
| 1579 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
| 1580 | biggest_ints_3a.indices(i)[0],
|
---|
| 1581 | biggest_ints_3a.indices(i)[1],
|
---|
| 1582 | biggest_ints_3a.indices(i)[2],
|
---|
| 1583 | biggest_ints_3a.indices(i)[3],
|
---|
| 1584 | biggest_ints_3a.val(i)
|
---|
| 1585 | )
|
---|
| 1586 | << endl;
|
---|
| 1587 | }
|
---|
| 1588 | ExEnv::outn() << "biggest 3/4 transformed ints (in 4.)" << endl;
|
---|
| 1589 | for (i=0; i<biggest_ints_3.ncontrib(); i++) {
|
---|
| 1590 | ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
|
---|
| 1591 | biggest_ints_3.indices(i)[0],
|
---|
| 1592 | biggest_ints_3.indices(i)[1],
|
---|
| 1593 | biggest_ints_3.indices(i)[2],
|
---|
| 1594 | biggest_ints_3.indices(i)[3],
|
---|
| 1595 | biggest_ints_3.val(i)
|
---|
| 1596 | )
|
---|
| 1597 | << endl;
|
---|
| 1598 | }
|
---|
| 1599 | #endif
|
---|
| 1600 |
|
---|
| 1601 | if (restart_orbital_memgrp_ == 0) {
|
---|
| 1602 | if (biggest_coefs.ncontrib()) {
|
---|
| 1603 | ExEnv::out0() << endl << indent
|
---|
| 1604 | << "Largest first order coefficients (unique):"
|
---|
| 1605 | << endl;
|
---|
| 1606 | }
|
---|
| 1607 | for (i=0; i<biggest_coefs.ncontrib(); i++) {
|
---|
| 1608 | int i0 = biggest_coefs.indices(i)[0];
|
---|
| 1609 | int i1 = biggest_coefs.indices(i)[1];
|
---|
| 1610 | int i2 = biggest_coefs.indices(i)[2] + nocc;
|
---|
| 1611 | int i3 = biggest_coefs.indices(i)[3] + nocc;
|
---|
| 1612 | int spincase = biggest_coefs.indices(i)[4];
|
---|
| 1613 | ExEnv::out0() << indent
|
---|
| 1614 | << scprintf(" %2d %12.8f %2d %3s %2d %3s -> %2d %3s %2d %3s (%s)",
|
---|
| 1615 | i+1, biggest_coefs.val(i),
|
---|
| 1616 | symorb_num_[i0]+1,
|
---|
| 1617 | ct.gamma(symorb_irrep_[i0]).symbol(),
|
---|
| 1618 | symorb_num_[i1]+1,
|
---|
| 1619 | ct.gamma(symorb_irrep_[i1]).symbol(),
|
---|
| 1620 | symorb_num_[i2]+1,
|
---|
| 1621 | ct.gamma(symorb_irrep_[i2]).symbol(),
|
---|
| 1622 | symorb_num_[i3]+1,
|
---|
| 1623 | ct.gamma(symorb_irrep_[i3]).symbol(),
|
---|
| 1624 | (spincase==1111?"++++":"+-+-")
|
---|
| 1625 | )
|
---|
| 1626 | << endl;
|
---|
| 1627 | }
|
---|
| 1628 | }
|
---|
| 1629 |
|
---|
| 1630 | // Print out various energies etc.
|
---|
| 1631 |
|
---|
| 1632 | if (debug_) {
|
---|
| 1633 | ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
|
---|
| 1634 | << indent << "(or integral derivatives) would have been computed\n"
|
---|
| 1635 | << indent << "without bounds checking: "
|
---|
| 1636 | << npass*nshell*nshell*(nshell+1)*(nshell+1)/2
|
---|
| 1637 | << endl;
|
---|
| 1638 |
|
---|
| 1639 | ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
|
---|
| 1640 | << indent << "were computed: " << aoint_computed
|
---|
| 1641 | << endl;
|
---|
| 1642 |
|
---|
| 1643 | if (dograd) {
|
---|
| 1644 | ExEnv::out0() << indent
|
---|
| 1645 | << "Number of shell quartets for which AO integral derivatives\n"
|
---|
| 1646 | << indent << "were computed: " << aointder_computed
|
---|
| 1647 | << endl;
|
---|
| 1648 | }
|
---|
| 1649 | }
|
---|
| 1650 |
|
---|
| 1651 | ExEnv::out0()<<endl<<indent
|
---|
| 1652 | <<scprintf("RHF energy [au]: %17.12lf\n", escf);
|
---|
| 1653 | ExEnv::out0()<<indent
|
---|
| 1654 | <<scprintf("MP2 correlation energy [au]: %17.12lf\n", ecorr_mp2);
|
---|
| 1655 | ExEnv::out0()<<indent
|
---|
| 1656 | <<scprintf("MP2 energy [au]: %17.12lf\n", emp2);
|
---|
| 1657 | ExEnv::out0().flush();
|
---|
| 1658 | }
|
---|
| 1659 | if (method_ && strcmp(method_,"mp")) {
|
---|
| 1660 | ExEnv::out0() << indent
|
---|
| 1661 | << "MBPT2: bad method for closed shell case: " << method_
|
---|
| 1662 | << ", using mp" << endl;
|
---|
| 1663 | }
|
---|
| 1664 | set_energy(emp2);
|
---|
| 1665 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
| 1666 | *ref_to_mp2_acc);
|
---|
| 1667 |
|
---|
| 1668 | RefSCDimension nocc_act_dim(new SCDimension(nocc_act,1));
|
---|
| 1669 | nocc_act_dim->blocks()->set_subdim(0, new SCDimension(nocc_act));
|
---|
| 1670 | RefSCDimension nvir_act_dim(new SCDimension(nvir_act,1));
|
---|
| 1671 | nvir_act_dim->blocks()->set_subdim(0, new SCDimension(nvir_act));
|
---|
| 1672 | RefSCDimension nocc_dim(new SCDimension(nocc,1));
|
---|
| 1673 | nocc_dim->blocks()->set_subdim(0, new SCDimension(nocc));
|
---|
| 1674 | RefSCDimension nvir_dim(new SCDimension(nvir,1));
|
---|
| 1675 | nvir_dim->blocks()->set_subdim(0, new SCDimension(nvir));
|
---|
| 1676 | RefSCDimension nbasis_dim = ao_dimension()->blocks()->subdim(0);
|
---|
| 1677 | RefSCDimension noso_dim(new SCDimension(noso,1));
|
---|
| 1678 |
|
---|
| 1679 | if (dograd || do_d1_) {
|
---|
| 1680 | msg_->sum(Laj,nvir*nocc);
|
---|
| 1681 |
|
---|
| 1682 | RefSCMatrix T1_mat(nocc_act_dim, nvir_act_dim, kit);
|
---|
| 1683 | // the elements of T1_mat are the single-substitution amplitudes
|
---|
| 1684 | BiggestContribs biggest_t1(2,10);
|
---|
| 1685 | // compute the S2 norm of Lee et al. (s2_diag)
|
---|
| 1686 | double s2_diag = 0.0;
|
---|
| 1687 | for (j=nfzc; j<nocc; j++) {
|
---|
| 1688 | laj_ptr = &Laj[j*nvir];
|
---|
| 1689 | for (a=0; a<nvir_act; a++) {
|
---|
| 1690 | tmpval = 0.5**laj_ptr++/(evals[a+nocc]-evals[j]);
|
---|
| 1691 | T1_mat.set_element(j-nfzc,a,tmpval);
|
---|
| 1692 | biggest_t1.insert(tmpval,j,a);
|
---|
| 1693 | s2_diag += tmpval*tmpval;
|
---|
| 1694 | }
|
---|
| 1695 | }
|
---|
| 1696 | s2_diag = sqrt(s2_diag/(2*nocc_act));
|
---|
| 1697 | // compute the T1 matrix 1-norm
|
---|
| 1698 | double t1onenorm = 0.0;
|
---|
| 1699 | Ref<SCElementSumAbs> sumabs = new SCElementSumAbs;
|
---|
| 1700 | Ref<SCElementOp> genop = sumabs.pointer();
|
---|
| 1701 | for (a=0; a < nvir_act; a++) {
|
---|
| 1702 | sumabs->init();
|
---|
| 1703 | T1_mat.get_column(a).element_op(genop);
|
---|
| 1704 | if (t1onenorm < sumabs->result()) t1onenorm = sumabs->result();
|
---|
| 1705 | }
|
---|
| 1706 | // compute the T1 matrix inf-norm
|
---|
| 1707 | double t1infnorm = 0.0;
|
---|
| 1708 | for (j=0; j < nocc_act; j++) {
|
---|
| 1709 | sumabs->init();
|
---|
| 1710 | T1_mat.get_row(j).element_op(genop);
|
---|
| 1711 | if (t1infnorm < sumabs->result()) t1infnorm = sumabs->result();
|
---|
| 1712 | }
|
---|
| 1713 | // compute the T1 matrix 2-norm ( = D1(MP2) )
|
---|
| 1714 | RefSymmSCMatrix D1_mat(nocc_act_dim,kit);
|
---|
| 1715 | D1_mat.assign(0.0);
|
---|
| 1716 | D1_mat.accumulate_symmetric_product(T1_mat);
|
---|
| 1717 | T1_mat = 0;
|
---|
| 1718 | double d1_diag = sqrt(D1_mat.eigvals().get_element(nocc_act-1));
|
---|
| 1719 | D1_mat = 0;
|
---|
| 1720 | // print the norms
|
---|
| 1721 | ExEnv::out0()<<endl;
|
---|
| 1722 | ExEnv::out0()
|
---|
| 1723 | <<indent<<scprintf("D1(MP2) = %12.8f", d1_diag)
|
---|
| 1724 | <<endl
|
---|
| 1725 | <<indent<<scprintf("S2 matrix 1-norm = %12.8f", t1onenorm)
|
---|
| 1726 | <<endl
|
---|
| 1727 | <<indent<<scprintf("S2 matrix inf-norm = %12.8f", t1infnorm)
|
---|
| 1728 | <<endl
|
---|
| 1729 | <<indent<<scprintf("S2 diagnostic = %12.8f", s2_diag)
|
---|
| 1730 | <<endl;
|
---|
| 1731 | if (biggest_t1.ncontrib()) {
|
---|
| 1732 | ExEnv::out0() << endl
|
---|
| 1733 | << indent << "Largest S2 values (unique determinants):" << endl;
|
---|
| 1734 | }
|
---|
| 1735 | for (i=0; i<biggest_t1.ncontrib(); i++) {
|
---|
| 1736 | int i0 = biggest_t1.indices(i)[0];
|
---|
| 1737 | int i1 = biggest_t1.indices(i)[1] + nocc;
|
---|
| 1738 | ExEnv::out0()
|
---|
| 1739 | << indent << scprintf(" %2d %12.8f %3d %3s -> %4d %3s",
|
---|
| 1740 | i+1, biggest_t1.val(i),
|
---|
| 1741 | symorb_num_[i0]+1,
|
---|
| 1742 | ct.gamma(symorb_irrep_[i0]).symbol(),
|
---|
| 1743 | symorb_num_[i1]+1,
|
---|
| 1744 | ct.gamma(symorb_irrep_[i1]).symbol())
|
---|
| 1745 | << endl;
|
---|
| 1746 | }
|
---|
| 1747 |
|
---|
| 1748 | } // if (dograd || do_d1_)
|
---|
| 1749 |
|
---|
| 1750 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 1751 | delete e12thread[i];
|
---|
| 1752 | }
|
---|
| 1753 | delete[] e12thread;
|
---|
| 1754 |
|
---|
| 1755 | // quit here if only the energy is needed
|
---|
| 1756 | if (!dograd) {
|
---|
| 1757 | delete[] tbints_; tbints_ = 0;
|
---|
| 1758 | if (do_d1_) delete[] Laj;
|
---|
| 1759 | delete[] scf_vector;
|
---|
| 1760 | delete[] scf_vector_dat;
|
---|
| 1761 | delete[] evals;
|
---|
| 1762 | tim_exit("mp2-mem");
|
---|
| 1763 | return;
|
---|
| 1764 | }
|
---|
| 1765 |
|
---|
| 1766 | // Accumulate intermediate gradients on node 0
|
---|
| 1767 | sum_gradients(msg_, ginter, natom, 3);
|
---|
| 1768 |
|
---|
| 1769 | // Add intermediate gradients to the gradient on node 0
|
---|
| 1770 | if (me==0)
|
---|
| 1771 | accum_gradients(gradient, ginter, natom, 3);
|
---|
| 1772 |
|
---|
| 1773 | // Print out contribution to the gradient from non-sep. 2PDM
|
---|
| 1774 | if (debug_) {
|
---|
| 1775 | print_natom_3(ginter,
|
---|
| 1776 | "Contribution to MP2 gradient from non-separable 2PDM [au]:");
|
---|
| 1777 | }
|
---|
| 1778 |
|
---|
| 1779 | ////////////////////////////////////////////////////////
|
---|
| 1780 | // Add contributions from all nodes to various matrices
|
---|
| 1781 | ////////////////////////////////////////////////////////
|
---|
| 1782 | tmpint = (nvir > nocc ? nvir:nocc);
|
---|
| 1783 | double *tmpmat = new double[tmpint*tmpint];
|
---|
| 1784 | msg_->sum(Pkj,nocc*(nocc+1)/2,tmpmat); // Pkj is now complete
|
---|
| 1785 | msg_->sum(Pab,nvir*(nvir+1)/2,tmpmat); // Pab is now complete
|
---|
| 1786 | msg_->sum(Wab,nvir*nvir,tmpmat);
|
---|
| 1787 | msg_->sum(Wkj,nocc*nocc,tmpmat);
|
---|
| 1788 | msg_->sum(Waj,nvir*nocc,tmpmat);
|
---|
| 1789 | if (do_d2_) {
|
---|
| 1790 | msg_->sum(d2occ_mat,nocc_act*(nocc_act+1)/2,tmpmat);
|
---|
| 1791 | msg_->sum(d2vir_mat,nvir_act*(nvir_act+1)/2,tmpmat);
|
---|
| 1792 | }
|
---|
| 1793 | delete[] tmpmat;
|
---|
| 1794 |
|
---|
| 1795 | // Compute D2 diagnostic (d2_diag) from matrices d2_occ_mat and d2_vir_mat
|
---|
| 1796 | if (do_d2_) {
|
---|
| 1797 | RefSymmSCMatrix D2occ_mat(nocc_act_dim, kit);
|
---|
| 1798 | RefSymmSCMatrix D2vir_mat(nvir_act_dim,kit);
|
---|
| 1799 | D2occ_mat->assign(d2occ_mat);
|
---|
| 1800 | D2vir_mat->assign(d2vir_mat);
|
---|
| 1801 | d2o = sqrt(D2occ_mat.eigvals().get_element(nocc_act-1));
|
---|
| 1802 | d2v = sqrt(D2vir_mat.eigvals().get_element(nvir_act-1));
|
---|
| 1803 | d2_diag = (d2o > d2v ? d2o:d2v);
|
---|
| 1804 | ExEnv::out0() << endl
|
---|
| 1805 | << indent << scprintf("D2(MP1) = %12.8f", d2_diag) << endl << endl;
|
---|
| 1806 | delete[] d2occ_mat;
|
---|
| 1807 | delete[] d2vir_mat;
|
---|
| 1808 | }
|
---|
| 1809 |
|
---|
| 1810 | // Finish computation of Wab
|
---|
| 1811 | tim_enter("Pab and Wab");
|
---|
| 1812 | pab_ptr = Pab;
|
---|
| 1813 | for (a=0; a<nvir_act; a++) { // active-active part of Wab
|
---|
| 1814 | wba_ptr = &Wab[a];
|
---|
| 1815 | wab_ptr = &Wab[a*nvir];
|
---|
| 1816 | for (b=0; b<=a; b++) {
|
---|
| 1817 | if (a==b) {
|
---|
| 1818 | *wab_ptr -= evals[nocc+a]**pab_ptr;
|
---|
| 1819 | }
|
---|
| 1820 | else {
|
---|
| 1821 | *wab_ptr -= evals[nocc+a]**pab_ptr;
|
---|
| 1822 | *wba_ptr -= evals[nocc+b]**pab_ptr;
|
---|
| 1823 | }
|
---|
| 1824 | pab_ptr++;
|
---|
| 1825 | wab_ptr++;
|
---|
| 1826 | wba_ptr += nvir;
|
---|
| 1827 | } // exit b loop
|
---|
| 1828 | } // exit a loop
|
---|
| 1829 | for (a=0; a<nfzv; a++) { // active-frozen part of Wab
|
---|
| 1830 | wba_ptr = &Wab[nvir_act+a];
|
---|
| 1831 | wab_ptr = &Wab[(nvir_act+a)*nvir];
|
---|
| 1832 | pab_ptr = &Pab[(nvir_act+a)*(nvir_act+a+1)/2];
|
---|
| 1833 | for (b=0; b<nvir_act; b++) {
|
---|
| 1834 | *wab_ptr -= evals[nocc+b]**pab_ptr;
|
---|
| 1835 | *wba_ptr -= evals[nocc+b]**pab_ptr;
|
---|
| 1836 | pab_ptr++;
|
---|
| 1837 | wab_ptr++;
|
---|
| 1838 | wba_ptr += nvir;
|
---|
| 1839 | } // exit b loop
|
---|
| 1840 | } // exit a loop
|
---|
| 1841 | // Wab is now complete
|
---|
| 1842 | tim_exit("Pab and Wab");
|
---|
| 1843 | RefSCMatrix Wab_matrix(nvir_dim, nvir_dim, kit);
|
---|
| 1844 | Wab_matrix->assign(Wab); // Put elements of Wab into Wab_matrix
|
---|
| 1845 | free(Wab);
|
---|
| 1846 |
|
---|
| 1847 | // Update Wkj with contribution from Pkj
|
---|
| 1848 | tim_enter("Pkj and Wkj");
|
---|
| 1849 | pkj_ptr = Pkj;
|
---|
| 1850 | for (k=0; k<nocc; k++) {
|
---|
| 1851 | wjk_ptr = &Wkj[k];
|
---|
| 1852 | wkj_ptr = &Wkj[k*nocc];
|
---|
| 1853 | for (j=0; j<=k; j++) {
|
---|
| 1854 | if (k<nfzc && j<nfzc) { // don't want both j and k frozen
|
---|
| 1855 | wkj_ptr++;
|
---|
| 1856 | wjk_ptr += nocc;
|
---|
| 1857 | pkj_ptr++;
|
---|
| 1858 | continue;
|
---|
| 1859 | }
|
---|
| 1860 | if (j==k) {
|
---|
| 1861 | *wkj_ptr++ -= evals[k]**pkj_ptr++;
|
---|
| 1862 | }
|
---|
| 1863 | else if (j<nfzc) {
|
---|
| 1864 | *wkj_ptr++ -= evals[k]**pkj_ptr;
|
---|
| 1865 | *wjk_ptr -= evals[k]**pkj_ptr;
|
---|
| 1866 | pkj_ptr++;
|
---|
| 1867 | }
|
---|
| 1868 | else {
|
---|
| 1869 | *wkj_ptr++ -= evals[k]**pkj_ptr;
|
---|
| 1870 | *wjk_ptr -= evals[j]**pkj_ptr;
|
---|
| 1871 | pkj_ptr++;
|
---|
| 1872 | }
|
---|
| 1873 | wjk_ptr += nocc;
|
---|
| 1874 | } // exit j loop
|
---|
| 1875 | } // exit k loop
|
---|
| 1876 | tim_exit("Pkj and Wkj");
|
---|
| 1877 |
|
---|
| 1878 | /////////////////////////////////
|
---|
| 1879 | // Finish the computation of Laj
|
---|
| 1880 | /////////////////////////////////
|
---|
| 1881 |
|
---|
| 1882 | tim_enter("Laj");
|
---|
| 1883 |
|
---|
| 1884 | RefSCMatrix Cv(nbasis_dim, nvir_dim, kit); // virtual block of scf_vector
|
---|
| 1885 | RefSCMatrix Co(nbasis_dim, nocc_dim, kit); // occupied block of scf_vector
|
---|
| 1886 | for (p=0; p<nbasis; p++) {
|
---|
| 1887 | c_pq = scf_vector[p];
|
---|
| 1888 | for (q=0; q<noso; q++) {
|
---|
| 1889 | if (q<nocc) Co->set_element(p, q, *c_pq++);
|
---|
| 1890 | else Cv->set_element(p, q-nocc, *c_pq++);
|
---|
| 1891 | }
|
---|
| 1892 | }
|
---|
| 1893 |
|
---|
| 1894 | // Compute the density-like Dmat_matrix
|
---|
| 1895 | // (Cv*Pab_matrix*Cv.t() + Co*Pkj_matrix*Co.t())
|
---|
| 1896 | RefSymmSCMatrix Pab_matrix(nvir_dim,kit);
|
---|
| 1897 | RefSymmSCMatrix Pkj_matrix(nocc_dim,kit);
|
---|
| 1898 | RefSymmSCMatrix Dmat_matrix(nbasis_dim,kit);
|
---|
| 1899 | Pab_matrix->assign(Pab); // fill in elements of Pab_matrix from Pab
|
---|
| 1900 | free(Pab);
|
---|
| 1901 | Pkj_matrix->assign(Pkj); // fill in elements of Pkj_matrix from Pkj
|
---|
| 1902 | free(Pkj);
|
---|
| 1903 | Dmat_matrix.assign(0.0);
|
---|
| 1904 | Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
|
---|
| 1905 | Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
|
---|
| 1906 | // We now have the density-like matrix Dmat_matrix
|
---|
| 1907 |
|
---|
| 1908 | // Compute the G matrix
|
---|
| 1909 |
|
---|
| 1910 | RefSymmSCMatrix Gmat(nbasis_dim,kit);
|
---|
| 1911 | init_cs_gmat();
|
---|
| 1912 | tim_enter("make_gmat for Laj");
|
---|
| 1913 | make_cs_gmat_new(Gmat, Dmat_matrix);
|
---|
| 1914 | if (debug_ > 1) {
|
---|
| 1915 | Dmat_matrix.print("Dmat");
|
---|
| 1916 | Gmat.print("Gmat");
|
---|
| 1917 | }
|
---|
| 1918 | tim_exit("make_gmat for Laj");
|
---|
| 1919 |
|
---|
| 1920 | // Finish computation of Laj
|
---|
| 1921 | RefSCMatrix Laj_matrix(nocc_dim,nvir_dim,kit); // elements are ordered as j*nvir+a
|
---|
| 1922 | Laj_matrix->assign(Laj);
|
---|
| 1923 | if (debug_ > 1) Laj_matrix->print("Laj (first bit)");
|
---|
| 1924 | Laj_matrix = Laj_matrix - 2*Co.t()*Gmat*Cv;
|
---|
| 1925 | if (debug_ > 1) Laj_matrix->print("Laj (all of it)");
|
---|
| 1926 | Laj_matrix->convert(Laj); // Put new Laj_matrix elements into Laj
|
---|
| 1927 |
|
---|
| 1928 | tim_exit("Laj");
|
---|
| 1929 |
|
---|
| 1930 | //////////////////////////////////////
|
---|
| 1931 | // Computation of Laj is now complete
|
---|
| 1932 | //////////////////////////////////////
|
---|
| 1933 |
|
---|
| 1934 | ////////////////////////////
|
---|
| 1935 | // Solve the CPHF equations
|
---|
| 1936 | ////////////////////////////
|
---|
| 1937 | RefSCMatrix Paj_matrix(nvir_dim, nocc_dim, kit);
|
---|
| 1938 | tim_enter("cphf");
|
---|
| 1939 | cs_cphf(scf_vector, Laj, evals, Paj_matrix);
|
---|
| 1940 | tim_exit("cphf");
|
---|
| 1941 |
|
---|
| 1942 | free(Laj);
|
---|
| 1943 |
|
---|
| 1944 | // Finish computation of Waj
|
---|
| 1945 | for (a=0; a<nvir; a++) {
|
---|
| 1946 | waj_ptr = &Waj[a];
|
---|
| 1947 | for (j=0; j<nocc; j++) {
|
---|
| 1948 | *waj_ptr -= evals[j]*Paj_matrix->get_element(a,j);
|
---|
| 1949 | waj_ptr += nvir;
|
---|
| 1950 | }
|
---|
| 1951 | }
|
---|
| 1952 | // Waj is now complete
|
---|
| 1953 | RefSCMatrix Waj_matrix(nocc_dim, nvir_dim, kit);
|
---|
| 1954 | Waj_matrix->assign(Waj); // Put elements of Waj into Waj_matrix
|
---|
| 1955 | // NB. Waj_matrix elements are ordered as j*nvir+a
|
---|
| 1956 | free(Waj);
|
---|
| 1957 |
|
---|
| 1958 |
|
---|
| 1959 | // Finish computation of Wkj
|
---|
| 1960 | tim_enter("Pkj and Wkj");
|
---|
| 1961 | // Compute Dmat_matrix =
|
---|
| 1962 | // Co*Pkj_matrix*Co.t() + Co*Paj_matrix.t()*Cv.t()
|
---|
| 1963 | // + Cv*Paj_matrix*Co.t() + Cv*Pab_matrix*Cv.t();
|
---|
| 1964 | Dmat_matrix.assign(0.0);
|
---|
| 1965 | Dmat_matrix.accumulate_symmetric_sum(Cv*Paj_matrix*Co.t());
|
---|
| 1966 | Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
|
---|
| 1967 | Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
|
---|
| 1968 | tim_enter("make_gmat for Wkj");
|
---|
| 1969 | make_cs_gmat_new(Gmat, Dmat_matrix);
|
---|
| 1970 | tim_exit("make_gmat for Wkj");
|
---|
| 1971 | done_cs_gmat();
|
---|
| 1972 | for (i=0; i<thr_->nthread(); i++) tbints_[i] = 0;
|
---|
| 1973 | delete[] tbints_; tbints_ = 0;
|
---|
| 1974 | RefSCMatrix Wkj_matrix(nocc_dim, nocc_dim, kit);
|
---|
| 1975 | Wkj_matrix->assign(Wkj);
|
---|
| 1976 | Wkj_matrix = Wkj_matrix - 2*Co.t()*Gmat*Co;
|
---|
| 1977 | free(Wkj);
|
---|
| 1978 | // Wkj is now complete - not as Wkj but as Wkj_matrix
|
---|
| 1979 | tim_exit("Pkj and Wkj");
|
---|
| 1980 |
|
---|
| 1981 | ////////////////////////////////////////////////////////////////
|
---|
| 1982 | // We now have the matrices Pkj_matrix, Paj_matrix, Pab_matrix,
|
---|
| 1983 | // Wkj_matrix, Waj_matrix, Wab_matrix and can compute the
|
---|
| 1984 | // remaining contributions to the gradient
|
---|
| 1985 | ///////////////////////////////////////////////////////////////
|
---|
| 1986 |
|
---|
| 1987 | // Compute the second order correction to
|
---|
| 1988 | // the density matrix and energy weighted
|
---|
| 1989 | // density matrix in the AO basis
|
---|
| 1990 | RefSCMatrix P2AO_matrix(nbasis_dim, nbasis_dim, kit);
|
---|
| 1991 | RefSCMatrix P2MO_matrix(noso_dim, noso_dim, kit);
|
---|
| 1992 | RefSCMatrix W2AO_matrix(nbasis_dim, nbasis_dim, kit);
|
---|
| 1993 | RefSCMatrix W2MO_matrix(noso_dim, noso_dim, kit);
|
---|
| 1994 | RefSCMatrix SCF_matrix(nbasis_dim, noso_dim, kit);
|
---|
| 1995 | for (i=0; i<nocc; i++) {
|
---|
| 1996 | for (j=0; j<nocc; j++) {
|
---|
| 1997 | P2MO_matrix->set_element(i,j,Pkj_matrix->get_element(i,j));
|
---|
| 1998 | W2MO_matrix->set_element(i,j,Wkj_matrix->get_element(i,j));
|
---|
| 1999 | }
|
---|
| 2000 | for (j=nocc; j<noso; j++) {
|
---|
| 2001 | P2MO_matrix->set_element(i,j,Paj_matrix->get_element(j-nocc,i));
|
---|
| 2002 | W2MO_matrix->set_element(i,j,Waj_matrix->get_element(i,j-nocc));
|
---|
| 2003 | }
|
---|
| 2004 | }
|
---|
| 2005 | for (i=nocc; i<noso; i++) {
|
---|
| 2006 | for (j=0; j<nocc; j++) {
|
---|
| 2007 | P2MO_matrix->set_element(i,j,Paj_matrix->get_element(i-nocc,j));
|
---|
| 2008 | W2MO_matrix->set_element(i,j,Waj_matrix->get_element(j,i-nocc));
|
---|
| 2009 | }
|
---|
| 2010 | for (j=nocc; j<noso; j++) {
|
---|
| 2011 | P2MO_matrix->set_element(i,j,Pab_matrix->get_element(i-nocc,j-nocc));
|
---|
| 2012 | W2MO_matrix->set_element(i,j,Wab_matrix->get_element(i-nocc,j-nocc));
|
---|
| 2013 | }
|
---|
| 2014 | }
|
---|
| 2015 | for (i=0; i<nbasis; i++) {
|
---|
| 2016 | for (j=0; j<nocc; j++) {
|
---|
| 2017 | SCF_matrix->set_element(i,j,Co->get_element(i,j));
|
---|
| 2018 | }
|
---|
| 2019 | for (j=nocc; j<noso; j++) {
|
---|
| 2020 | SCF_matrix->set_element(i,j,Cv->get_element(i,j-nocc));
|
---|
| 2021 | }
|
---|
| 2022 | }
|
---|
| 2023 | P2AO_matrix = SCF_matrix * P2MO_matrix * SCF_matrix.t();
|
---|
| 2024 | W2AO_matrix = SCF_matrix * W2MO_matrix * SCF_matrix.t();
|
---|
| 2025 | // P2AO_matrix = Co*(Pkj_matrix*Co.t() + Paj_matrix.t()*Cv.t()) +
|
---|
| 2026 | // Cv*(Paj_matrix*Co.t() + Pab_matrix*Cv.t());
|
---|
| 2027 | // W2AO_matrix = Co*(Wkj_matrix*Co.t() + Waj_matrix*Cv.t()) +
|
---|
| 2028 | // Cv*(Waj_matrix.t()*Co.t() + Wab_matrix*Cv.t());
|
---|
| 2029 |
|
---|
| 2030 | if (debug_ > 1) {
|
---|
| 2031 | SCF_matrix.print("SCF_matrix");
|
---|
| 2032 | P2MO_matrix.print("P2MO_matrix");
|
---|
| 2033 | W2MO_matrix.print("W2MO_matrix");
|
---|
| 2034 | P2AO_matrix.print("P2AO_matrix");
|
---|
| 2035 | W2AO_matrix.print("W2AO_matrix");
|
---|
| 2036 | }
|
---|
| 2037 |
|
---|
| 2038 | // Convert P2AO_matrix and W2AO_matrix to double*
|
---|
| 2039 | P2AO = new double[nbasis*nbasis];
|
---|
| 2040 | W2AO = new double[nbasis*nbasis];
|
---|
| 2041 | P2AO_matrix->convert(P2AO);
|
---|
| 2042 | W2AO_matrix->convert(W2AO);
|
---|
| 2043 |
|
---|
| 2044 | // Compute the HF density matrix and
|
---|
| 2045 | // energy weighted density matrix
|
---|
| 2046 | PHF = new double[nbasis*nbasis];
|
---|
| 2047 | WHF = new double[nbasis*nbasis];
|
---|
| 2048 | phf_ptr = PHF;
|
---|
| 2049 | whf_ptr = WHF;
|
---|
| 2050 | for (p=0; p<nbasis; p++) {
|
---|
| 2051 | for (q=0; q<nbasis; q++) {
|
---|
| 2052 | *phf_ptr++ = 0.0;
|
---|
| 2053 | *whf_ptr++ = 0.0;
|
---|
| 2054 | }
|
---|
| 2055 | }
|
---|
| 2056 | phf_ptr = PHF;
|
---|
| 2057 | whf_ptr = WHF;
|
---|
| 2058 | for (p=0; p<nbasis; p++) {
|
---|
| 2059 | for (q=0; q<nbasis; q++) {
|
---|
| 2060 | c_pi = scf_vector[p];
|
---|
| 2061 | c_qi = scf_vector[q];
|
---|
| 2062 | for (i=0; i<nocc; i++) {
|
---|
| 2063 | tmpval = 2* *c_pi++ * *c_qi++;
|
---|
| 2064 | *phf_ptr += tmpval;
|
---|
| 2065 | *whf_ptr -= evals[i] * tmpval;
|
---|
| 2066 | } // exit i loop
|
---|
| 2067 | phf_ptr++;
|
---|
| 2068 | whf_ptr++;
|
---|
| 2069 | } // exit q loop
|
---|
| 2070 | } // exit p loop
|
---|
| 2071 |
|
---|
| 2072 | // Compute the MP2 density and energy weighted density
|
---|
| 2073 |
|
---|
| 2074 | // PMP2 = PHF + P2AO; WMP2 = WHF + W2AO
|
---|
| 2075 | PMP2 = new double[nbasis*nbasis];
|
---|
| 2076 | WMP2 = new double[nbasis*nbasis];
|
---|
| 2077 | // Initialize PMP2 and WMP2
|
---|
| 2078 | pmp2_ptr = PMP2;
|
---|
| 2079 | wmp2_ptr = WMP2;
|
---|
| 2080 | for (p=0; p<nbasis; p++) {
|
---|
| 2081 | for (q=0; q<nbasis; q++) {
|
---|
| 2082 | *pmp2_ptr++ = 0.0;
|
---|
| 2083 | *wmp2_ptr++ = 0.0;
|
---|
| 2084 | }
|
---|
| 2085 | }
|
---|
| 2086 | pmp2_ptr = PMP2;
|
---|
| 2087 | wmp2_ptr = WMP2;
|
---|
| 2088 | p2ao_ptr = P2AO;
|
---|
| 2089 | w2ao_ptr = W2AO;
|
---|
| 2090 | phf_ptr = PHF;
|
---|
| 2091 | whf_ptr = WHF;
|
---|
| 2092 | for (p=0; p<nbasis; p++) {
|
---|
| 2093 | for (q=0; q<nbasis; q++) {
|
---|
| 2094 | *pmp2_ptr++ = *phf_ptr++ + *p2ao_ptr++;
|
---|
| 2095 | *wmp2_ptr++ = *whf_ptr++ + *w2ao_ptr++;
|
---|
| 2096 | }
|
---|
| 2097 | }
|
---|
| 2098 | delete[] W2AO;
|
---|
| 2099 |
|
---|
| 2100 | if (debug_ > 1) {
|
---|
| 2101 | RefSCMatrix tmpmat(ao_dimension(), ao_dimension(), kit);
|
---|
| 2102 | tmpmat->assign(PMP2);
|
---|
| 2103 | tmpmat.print("PMP2");
|
---|
| 2104 | tmpmat->assign(P2AO);
|
---|
| 2105 | tmpmat.print("P2AO");
|
---|
| 2106 | tmpmat->assign(PHF);
|
---|
| 2107 | tmpmat.print("PHF");
|
---|
| 2108 | tmpmat->assign(WMP2);
|
---|
| 2109 | tmpmat.print("WMP2");
|
---|
| 2110 | }
|
---|
| 2111 |
|
---|
| 2112 | ////////////////////////////////////////////////
|
---|
| 2113 | // Compute the contribution to the MP2 gradient
|
---|
| 2114 | // from the separable part of the 2PDM
|
---|
| 2115 | ////////////////////////////////////////////////
|
---|
| 2116 |
|
---|
| 2117 | zero_gradients(ginter, natom, 3);
|
---|
| 2118 | zero_gradients(hf_ginter, natom, 3);
|
---|
| 2119 | tim_enter("sep 2PDM contrib.");
|
---|
| 2120 |
|
---|
| 2121 | CSGradS2PDM** s2pdmthread = new CSGradS2PDM*[thr_->nthread()];
|
---|
| 2122 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 2123 | s2pdmthread[i] = new CSGradS2PDM(i, thr_->nthread(), me, nproc,
|
---|
| 2124 | lock, basis(), tbintder_[i],
|
---|
| 2125 | PHF, P2AO, tol, debug_, dynamic_);
|
---|
| 2126 | thr_->add_thread(i,s2pdmthread[i]);
|
---|
| 2127 | # if SINGLE_THREAD_S2PDM
|
---|
| 2128 | s2pdmthread[i]->run();
|
---|
| 2129 | # endif
|
---|
| 2130 | }
|
---|
| 2131 | # if !SINGLE_THREAD_S2PDM
|
---|
| 2132 | thr_->start_threads();
|
---|
| 2133 | thr_->wait_threads();
|
---|
| 2134 | # endif
|
---|
| 2135 | for (i=0; i<thr_->nthread(); i++) {
|
---|
| 2136 | s2pdmthread[i]->accum_mp2_contrib(ginter);
|
---|
| 2137 | s2pdmthread[i]->accum_hf_contrib(hf_ginter);
|
---|
| 2138 | delete s2pdmthread[i];
|
---|
| 2139 | }
|
---|
| 2140 | sum_gradients(msg_, ginter, molecule()->natom(), 3);
|
---|
| 2141 | sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
|
---|
| 2142 | delete[] s2pdmthread;
|
---|
| 2143 |
|
---|
| 2144 | tim_exit("sep 2PDM contrib.");
|
---|
| 2145 | delete[] P2AO;
|
---|
| 2146 |
|
---|
| 2147 | // The separable 2PDM contribution to the gradient has now been
|
---|
| 2148 | // accumulated in ginter on node 0; add it to the total gradients
|
---|
| 2149 | if (me == 0) {
|
---|
| 2150 | accum_gradients(gradient, ginter, natom, 3);
|
---|
| 2151 | accum_gradients(hf_gradient, hf_ginter, natom, 3);
|
---|
| 2152 | }
|
---|
| 2153 | // Print out the contribution to the gradient from sep. 2PDM
|
---|
| 2154 | if (debug_) {
|
---|
| 2155 | print_natom_3(hf_ginter,
|
---|
| 2156 | "Contribution from separable 2PDM to HF gradient [au]:");
|
---|
| 2157 | print_natom_3(ginter,
|
---|
| 2158 | "Contribution from separable 2PDM to MP2 gradient [au]:");
|
---|
| 2159 | }
|
---|
| 2160 |
|
---|
| 2161 | // Done with two-electron integrals
|
---|
| 2162 | tbint_ = 0;
|
---|
| 2163 | if (dograd || do_d1_) {
|
---|
| 2164 | delete[] tbintder_;
|
---|
| 2165 | tbintder_ = 0;
|
---|
| 2166 | }
|
---|
| 2167 |
|
---|
| 2168 | /////////////////////////////////////////////////////////////
|
---|
| 2169 | // Compute the one-electron contribution to the MP2 gradient
|
---|
| 2170 | /////////////////////////////////////////////////////////////
|
---|
| 2171 |
|
---|
| 2172 | zero_gradients(ginter, natom, 3);
|
---|
| 2173 | zero_gradients(hf_ginter, natom, 3);
|
---|
| 2174 | tim_enter("hcore contrib.");
|
---|
| 2175 | hcore_cs_grad(PHF, PMP2, hf_ginter, ginter);
|
---|
| 2176 | tim_exit("hcore contrib.");
|
---|
| 2177 | delete[] PHF;
|
---|
| 2178 | delete[] PMP2;
|
---|
| 2179 | // The hcore contribution to the gradient has now been accumulated
|
---|
| 2180 | // in ginter on node 0; add it to the total gradients
|
---|
| 2181 | if (me == 0) {
|
---|
| 2182 | accum_gradients(gradient, ginter, natom, 3);
|
---|
| 2183 | accum_gradients(hf_gradient, hf_ginter, natom, 3);
|
---|
| 2184 | }
|
---|
| 2185 | // Print out the contribution to the gradient from hcore
|
---|
| 2186 | if (debug_) {
|
---|
| 2187 | print_natom_3(hf_ginter, "Contribution to HF gradient from hcore [au]:");
|
---|
| 2188 | print_natom_3(ginter, "Contribution to MP2 gradient from hcore [au]:");
|
---|
| 2189 | }
|
---|
| 2190 |
|
---|
| 2191 | zero_gradients(ginter, natom, 3);
|
---|
| 2192 | zero_gradients(hf_ginter, natom, 3);
|
---|
| 2193 | tim_enter("overlap contrib.");
|
---|
| 2194 | overlap_cs_grad(WHF, WMP2, hf_ginter, ginter);
|
---|
| 2195 | delete[] WHF;
|
---|
| 2196 | tim_exit("overlap contrib.");
|
---|
| 2197 | delete[] WMP2;
|
---|
| 2198 | // The overlap contribution to the gradient has now been accumulated
|
---|
| 2199 | // in ginter on node 0; add it to the total gradients
|
---|
| 2200 | if (me == 0) {
|
---|
| 2201 | accum_gradients(gradient, ginter, natom, 3);
|
---|
| 2202 | accum_gradients(hf_gradient, hf_ginter, natom, 3);
|
---|
| 2203 | }
|
---|
| 2204 |
|
---|
| 2205 | // Print out the overlap contribution to the gradient
|
---|
| 2206 | if (debug_) {
|
---|
| 2207 | print_natom_3(hf_ginter, "Overlap contribution to HF gradient [au]:");
|
---|
| 2208 | print_natom_3(ginter,"Overlap contribution to MP2 gradient [au]:");
|
---|
| 2209 | }
|
---|
| 2210 |
|
---|
| 2211 | ////////////////////////////////////////////////////////
|
---|
| 2212 | // Compute the nuclear contribution to the MP2 gradient
|
---|
| 2213 | ////////////////////////////////////////////////////////
|
---|
| 2214 |
|
---|
| 2215 | if (me == 0) {
|
---|
| 2216 | nuclear_repulsion_energy_gradient(ginter);
|
---|
| 2217 | accum_gradients(gradient, ginter, natom, 3);
|
---|
| 2218 | accum_gradients(hf_gradient, ginter, natom, 3);
|
---|
| 2219 |
|
---|
| 2220 | }
|
---|
| 2221 |
|
---|
| 2222 | // Print out the nuclear contribution to the gradient
|
---|
| 2223 | if (debug_)
|
---|
| 2224 | print_natom_3(ginter,"Nuclear contribution to MP2 gradient [au]:");
|
---|
| 2225 |
|
---|
| 2226 |
|
---|
| 2227 | ////////////////////////////////////////////////////////
|
---|
| 2228 | // The computation of the MP2 gradient is now complete;
|
---|
| 2229 | // print out the gradient
|
---|
| 2230 | ////////////////////////////////////////////////////////
|
---|
| 2231 | if (debug_) {
|
---|
| 2232 | ExEnv::out0() << "Obtaining HF gradient" << endl;
|
---|
| 2233 | print_natom_3(ref()->gradient(),"HF gradient from HF");
|
---|
| 2234 | print_natom_3(hf_gradient,"Total HF gradient from MP2 [au]:");
|
---|
| 2235 | }
|
---|
| 2236 | print_natom_3(gradient,"Total MP2 gradient [au]:");
|
---|
| 2237 |
|
---|
| 2238 | msg_->bcast(gradient_dat, natom*3);
|
---|
| 2239 | RefSCVector gradientvec = matrixkit()->vector(moldim());
|
---|
| 2240 | gradientvec->assign(gradient_dat);
|
---|
| 2241 | set_gradient(gradientvec);
|
---|
| 2242 |
|
---|
| 2243 | msg_->bcast(hf_gradient_dat, natom*3);
|
---|
| 2244 | hf_gradient_ = matrixkit()->vector(moldim());
|
---|
| 2245 | hf_gradient_->assign(hf_gradient_dat);
|
---|
| 2246 |
|
---|
| 2247 | delete[] gradient;
|
---|
| 2248 | delete[] gradient_dat;
|
---|
| 2249 |
|
---|
| 2250 | delete[] hf_gradient;
|
---|
| 2251 | delete[] hf_gradient_dat;
|
---|
| 2252 |
|
---|
| 2253 | for (i=0; i<natom; i++) {
|
---|
| 2254 | delete[] ginter[i];
|
---|
| 2255 | delete[] hf_ginter[i];
|
---|
| 2256 | }
|
---|
| 2257 | delete[] ginter;
|
---|
| 2258 | delete[] hf_ginter;
|
---|
| 2259 |
|
---|
| 2260 | delete[] scf_vector;
|
---|
| 2261 | delete[] scf_vector_dat;
|
---|
| 2262 | delete[] evals;
|
---|
| 2263 |
|
---|
| 2264 | tim_exit("mp2-mem");
|
---|
| 2265 | }
|
---|
| 2266 |
|
---|
| 2267 | ///////////////////////////////////////////////////////////
|
---|
| 2268 | // Compute the contribution to the MP2 gradient from hcore
|
---|
| 2269 | ///////////////////////////////////////////////////////////
|
---|
| 2270 | void
|
---|
| 2271 | MBPT2::hcore_cs_grad(double *PHF, double *PMP2,
|
---|
| 2272 | double **hf_ginter, double **ginter)
|
---|
| 2273 | {
|
---|
| 2274 |
|
---|
| 2275 | int i, j, k, l, m;
|
---|
| 2276 | int jj, kk;
|
---|
| 2277 | int jsize, ksize;
|
---|
| 2278 | int j_offset, k_offset;
|
---|
| 2279 | int jk_index;
|
---|
| 2280 | int index;
|
---|
| 2281 | int nshell;
|
---|
| 2282 | int nbasis;
|
---|
| 2283 | int nproc = msg_->n();
|
---|
| 2284 | int me = msg_->me();
|
---|
| 2285 |
|
---|
| 2286 | const double *oneebuf; // 1-electron buffer
|
---|
| 2287 | double tmpval1, tmpval2;
|
---|
| 2288 | double gxyz[3];
|
---|
| 2289 | double hf_gxyz[3];
|
---|
| 2290 |
|
---|
| 2291 | // Initialize 1e object
|
---|
| 2292 | Ref<OneBodyDerivInt> obintder_ = integral()->hcore_deriv();
|
---|
| 2293 | oneebuf = obintder_->buffer();
|
---|
| 2294 |
|
---|
| 2295 | nshell = basis()->nshell();
|
---|
| 2296 | nbasis = basis()->nbasis();
|
---|
| 2297 |
|
---|
| 2298 | ///////////////////////////////////////////////////////////////////////////////
|
---|
| 2299 | // Compute the kinetic and nuclear-electron energy contribution to the gradient
|
---|
| 2300 | ///////////////////////////////////////////////////////////////////////////////
|
---|
| 2301 |
|
---|
| 2302 | jk_index = 0;
|
---|
| 2303 |
|
---|
| 2304 | for (i=0; i<molecule()->natom(); i++) {
|
---|
| 2305 | for (j=0; j<nshell; j++) {
|
---|
| 2306 | jsize = basis()->shell(j).nfunction();
|
---|
| 2307 | j_offset = basis()->shell_to_function(j);
|
---|
| 2308 |
|
---|
| 2309 | for (k=0; k<=j; k++) {
|
---|
| 2310 | ksize = basis()->shell(k).nfunction();
|
---|
| 2311 | k_offset = basis()->shell_to_function(k);
|
---|
| 2312 |
|
---|
| 2313 | if (jk_index++%nproc == me) {
|
---|
| 2314 | obintder_->compute_shell(j,k,i);
|
---|
| 2315 |
|
---|
| 2316 | for (l=0; l<3; l++) { gxyz[l] = 0.0; hf_gxyz[l] = 0.0; }
|
---|
| 2317 |
|
---|
| 2318 | index = 0;
|
---|
| 2319 |
|
---|
| 2320 | for (jj=0; jj<jsize; jj++) {
|
---|
| 2321 | for (kk=0; kk<ksize; kk++) {
|
---|
| 2322 | tmpval1 = PMP2[(j_offset + jj)*nbasis + k_offset + kk];
|
---|
| 2323 | tmpval2 = PHF[(j_offset + jj)*nbasis + k_offset + kk];
|
---|
| 2324 | for (m=0; m<3; m++) {
|
---|
| 2325 | gxyz[m] += oneebuf[index] * tmpval1;
|
---|
| 2326 | hf_gxyz[m] += oneebuf[index] * tmpval2;
|
---|
| 2327 | index++;
|
---|
| 2328 | } // exit m loop
|
---|
| 2329 | } // exit kk loop
|
---|
| 2330 | } // exit jj loop
|
---|
| 2331 |
|
---|
| 2332 | if (j != k) {
|
---|
| 2333 | // off-diagonal blocks
|
---|
| 2334 | for (l=0; l<3; l++) {
|
---|
| 2335 | gxyz[l] *= 2.0;
|
---|
| 2336 | hf_gxyz[l] *= 2.0;
|
---|
| 2337 | }
|
---|
| 2338 | }
|
---|
| 2339 |
|
---|
| 2340 | for (l=0; l<3; l++) {
|
---|
| 2341 | ginter[i][l] += gxyz[l];
|
---|
| 2342 | hf_ginter[i][l] += hf_gxyz[l];
|
---|
| 2343 | }
|
---|
| 2344 |
|
---|
| 2345 | } // exit "if"
|
---|
| 2346 | } // exit k loop
|
---|
| 2347 | } // exit j loop
|
---|
| 2348 | } // exit i loop
|
---|
| 2349 |
|
---|
| 2350 | /* Accumulate the nodes' intermediate gradients on node 0 */
|
---|
| 2351 | sum_gradients(msg_, ginter, molecule()->natom(), 3);
|
---|
| 2352 | sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
|
---|
| 2353 | }
|
---|
| 2354 |
|
---|
| 2355 |
|
---|
| 2356 | ////////////////////////////////////////////////////
|
---|
| 2357 | // Compute the overlap contribution to the gradient
|
---|
| 2358 | ////////////////////////////////////////////////////
|
---|
| 2359 | void
|
---|
| 2360 | MBPT2::overlap_cs_grad(double *WHF, double *WMP2,
|
---|
| 2361 | double **hf_ginter, double **ginter)
|
---|
| 2362 | {
|
---|
| 2363 | int i, j, k, l, m;
|
---|
| 2364 | int jj, kk;
|
---|
| 2365 | int jj_index, kk_index;
|
---|
| 2366 | int jsize, ksize;
|
---|
| 2367 | int j_offset, k_offset;
|
---|
| 2368 | int jk_index;
|
---|
| 2369 | int index;
|
---|
| 2370 | int nshell;
|
---|
| 2371 | int nbasis;
|
---|
| 2372 |
|
---|
| 2373 | const double *oneebuf; // 1-electron buffer
|
---|
| 2374 | double tmpval1, tmpval2;
|
---|
| 2375 | double hf_tmpval1, hf_tmpval2;
|
---|
| 2376 | double gxyz[3];
|
---|
| 2377 | double hf_gxyz[3];
|
---|
| 2378 |
|
---|
| 2379 | // Initialize 1e object
|
---|
| 2380 | Ref<OneBodyDerivInt> obintder_ = integral()->overlap_deriv();
|
---|
| 2381 | oneebuf = obintder_->buffer();
|
---|
| 2382 |
|
---|
| 2383 | nshell = basis()->nshell();
|
---|
| 2384 | nbasis = basis()->nbasis();
|
---|
| 2385 | int nproc = msg_->n();
|
---|
| 2386 | int me = msg_->me();
|
---|
| 2387 |
|
---|
| 2388 | for (i=0; i<molecule()->natom(); i++) {
|
---|
| 2389 | jk_index = 0;
|
---|
| 2390 |
|
---|
| 2391 | for (j=0; j<nshell; j++) {
|
---|
| 2392 | j_offset = basis()->shell_to_function(j);
|
---|
| 2393 | jsize = basis()->shell(j).nfunction();
|
---|
| 2394 |
|
---|
| 2395 | for (k=0; k<=j; k++) {
|
---|
| 2396 | k_offset = basis()->shell_to_function(k);
|
---|
| 2397 | ksize = basis()->shell(k).nfunction();
|
---|
| 2398 |
|
---|
| 2399 | if (jk_index++%nproc == me) {
|
---|
| 2400 | obintder_->compute_shell(j,k,i);
|
---|
| 2401 |
|
---|
| 2402 | for (l=0; l<3; l++) {
|
---|
| 2403 | hf_gxyz[l] = 0.0;
|
---|
| 2404 | gxyz[l] = 0.0;
|
---|
| 2405 | }
|
---|
| 2406 | index = 0;
|
---|
| 2407 |
|
---|
| 2408 | for (jj=0; jj<jsize; jj++) {
|
---|
| 2409 | jj_index = j_offset + jj;
|
---|
| 2410 | for (kk=0; kk<ksize; kk++) {
|
---|
| 2411 | kk_index = k_offset + kk;
|
---|
| 2412 | if (kk_index > jj_index) {
|
---|
| 2413 | index += 3; // increment index since m-loop will be skipped
|
---|
| 2414 | break; // skip to next jj value
|
---|
| 2415 | }
|
---|
| 2416 | // NB. WMP2 is not a symmetrix matrix
|
---|
| 2417 | tmpval1 = WMP2[jj_index*nbasis + kk_index];
|
---|
| 2418 | tmpval2 = WMP2[kk_index*nbasis + jj_index];
|
---|
| 2419 | hf_tmpval1 = WHF[jj_index*nbasis + kk_index];
|
---|
| 2420 | hf_tmpval2 = WHF[kk_index*nbasis + jj_index];
|
---|
| 2421 |
|
---|
| 2422 | for (m=0; m<3; m++) {
|
---|
| 2423 | if (jj_index != kk_index) {
|
---|
| 2424 | gxyz[m] += oneebuf[index] * (tmpval1 + tmpval2);
|
---|
| 2425 | hf_gxyz[m] += oneebuf[index] * (hf_tmpval1 + hf_tmpval2);
|
---|
| 2426 | }
|
---|
| 2427 | else {
|
---|
| 2428 | gxyz[m] += oneebuf[index] * tmpval1;
|
---|
| 2429 | hf_gxyz[m] += oneebuf[index] * hf_tmpval1;
|
---|
| 2430 | }
|
---|
| 2431 | index++;
|
---|
| 2432 | } // exit m loop
|
---|
| 2433 | } // exit kk loop
|
---|
| 2434 | } // exit jj loop
|
---|
| 2435 |
|
---|
| 2436 | for (l=0; l<3; l++) {
|
---|
| 2437 | ginter[i][l] += gxyz[l];
|
---|
| 2438 | hf_ginter[i][l] += hf_gxyz[l];
|
---|
| 2439 | }
|
---|
| 2440 | } // exit "if"
|
---|
| 2441 | } // exit k loop
|
---|
| 2442 | } // exit j loop
|
---|
| 2443 | } // exit i loop
|
---|
| 2444 |
|
---|
| 2445 | /* Accumulate the nodes' intermediate gradients on node 0 */
|
---|
| 2446 | sum_gradients(msg_, ginter, molecule()->natom(), 3);
|
---|
| 2447 | sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
|
---|
| 2448 | }
|
---|
| 2449 |
|
---|
| 2450 | static void
|
---|
| 2451 | sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2)
|
---|
| 2452 | {
|
---|
| 2453 | int i;
|
---|
| 2454 |
|
---|
| 2455 | if (msg->n() == 1) return;
|
---|
| 2456 |
|
---|
| 2457 | for (i=0; i<n1; i++) {
|
---|
| 2458 | msg->sum(f[i],n2);
|
---|
| 2459 | }
|
---|
| 2460 | }
|
---|
| 2461 |
|
---|
| 2462 | static void
|
---|
| 2463 | zero_gradients(double **f, int n1, int n2)
|
---|
| 2464 | {
|
---|
| 2465 | for (int i=0; i<n1; i++) {
|
---|
| 2466 | for (int j=0; j<3; j++) f[i][j] = 0.0;
|
---|
| 2467 | }
|
---|
| 2468 | }
|
---|
| 2469 |
|
---|
| 2470 | static void
|
---|
| 2471 | accum_gradients(double **g, double **f, int n1, int n2)
|
---|
| 2472 | {
|
---|
| 2473 | for (int i=0; i<n1; i++) {
|
---|
| 2474 | for (int j=0; j<3; j++) g[i][j] += f[i][j];
|
---|
| 2475 | }
|
---|
| 2476 | }
|
---|
| 2477 |
|
---|
| 2478 | /////////////////////////////////////
|
---|
| 2479 | // Compute the batchsize
|
---|
| 2480 | //
|
---|
| 2481 | // Only arrays allocated before exiting the loop over
|
---|
| 2482 | // i-batches are included here - only these arrays
|
---|
| 2483 | // affect the batch size.
|
---|
| 2484 | /////////////////////////////////////
|
---|
| 2485 | int
|
---|
| 2486 | MBPT2::compute_cs_batchsize(size_t mem_static, int nocc_act)
|
---|
| 2487 | {
|
---|
| 2488 | size_t mem_dyn; // dynamic memory available
|
---|
| 2489 | distsize_t maxdyn;
|
---|
| 2490 | int ni;
|
---|
| 2491 |
|
---|
| 2492 | ///////////////////////////////////////
|
---|
| 2493 | // the largest memory requirement will
|
---|
| 2494 | // either occur just before the end of
|
---|
| 2495 | // the 1. q.b.t. (mem1) or just before
|
---|
| 2496 | // the end of the i-batch loop (mem2)
|
---|
| 2497 | ///////////////////////////////////////
|
---|
| 2498 |
|
---|
| 2499 | if (mem_alloc > mem_static) {
|
---|
| 2500 | mem_dyn = mem_alloc - mem_static;
|
---|
| 2501 | }
|
---|
| 2502 | else {
|
---|
| 2503 | mem_dyn = 0;
|
---|
| 2504 | }
|
---|
| 2505 |
|
---|
| 2506 | // First determine if calculation is possible at all (i.e., if ni=1 possible)
|
---|
| 2507 |
|
---|
| 2508 | ni = 1;
|
---|
| 2509 | maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
|
---|
| 2510 |
|
---|
| 2511 | if (maxdyn > mem_dyn) {
|
---|
| 2512 | return 0;
|
---|
| 2513 | }
|
---|
| 2514 |
|
---|
| 2515 | ni = 2;
|
---|
| 2516 | while (ni<=nocc_act) {
|
---|
| 2517 | maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
|
---|
| 2518 | if (maxdyn >= mem_dyn) {
|
---|
| 2519 | ni--;
|
---|
| 2520 | break;
|
---|
| 2521 | }
|
---|
| 2522 | ni++;
|
---|
| 2523 | }
|
---|
| 2524 | if (ni > nocc_act) ni = nocc_act;
|
---|
| 2525 |
|
---|
| 2526 | return ni;
|
---|
| 2527 | }
|
---|
| 2528 |
|
---|
| 2529 | /////////////////////////////////////
|
---|
| 2530 | // Compute required (dynamic) memory
|
---|
| 2531 | // for a given batch size
|
---|
| 2532 | //
|
---|
| 2533 | // Only arrays allocated before exiting the loop over
|
---|
| 2534 | // i-batches are included here - only these arrays
|
---|
| 2535 | // affect the batch size.
|
---|
| 2536 | /////////////////////////////////////
|
---|
| 2537 | distsize_t
|
---|
| 2538 | MBPT2::compute_cs_dynamic_memory(int ni, int nocc_act)
|
---|
| 2539 | {
|
---|
| 2540 | int index;
|
---|
| 2541 | distsize_t mem1, mem2, mem3;
|
---|
| 2542 | distsize_t maxdyn;
|
---|
| 2543 | distsize_t tmp;
|
---|
| 2544 | int i, j;
|
---|
| 2545 | int nij;
|
---|
| 2546 | int nproc = msg_->n();
|
---|
| 2547 |
|
---|
| 2548 | ///////////////////////////////////////
|
---|
| 2549 | // the largest memory requirement will
|
---|
| 2550 | // either occur just before the end of
|
---|
| 2551 | // the 1. q.b.t. (mem1) or just before
|
---|
| 2552 | // the end of the i-batch loop (mem2)
|
---|
| 2553 | ///////////////////////////////////////
|
---|
| 2554 |
|
---|
| 2555 | // compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
|
---|
| 2556 | index = 0;
|
---|
| 2557 | nij = 0;
|
---|
| 2558 | for (i=0; i<ni; i++) {
|
---|
| 2559 | for (j=0; j<nocc; j++) {
|
---|
| 2560 | if (index++ % nproc == 0) nij++;
|
---|
| 2561 | }
|
---|
| 2562 | }
|
---|
| 2563 | mem1 = sizeof(double)*((distsize_t)nij*nbasis*nbasis
|
---|
| 2564 | + nbasis*nvir);
|
---|
| 2565 | mem2 = sizeof(double)*((distsize_t)thr_->nthread()*ni*nbasis*nfuncmax*nfuncmax
|
---|
| 2566 | + (distsize_t)nij*nbasis*nbasis
|
---|
| 2567 | + ni*nbasis + nbasis*nfuncmax
|
---|
| 2568 | + 2*nfuncmax*nfuncmax*nfuncmax*nfuncmax);
|
---|
| 2569 | mem3 = sizeof(double)*((distsize_t)ni*nbasis*nfuncmax*nfuncmax
|
---|
| 2570 | + (distsize_t)nij*nbasis*nbasis
|
---|
| 2571 | + 2*(2 + nbasis*nfuncmax));
|
---|
| 2572 | tmp = (mem2>mem3 ? mem2:mem3);
|
---|
| 2573 | maxdyn = (mem1>tmp ? mem1:tmp);
|
---|
| 2574 |
|
---|
| 2575 | return maxdyn;
|
---|
| 2576 | }
|
---|
| 2577 |
|
---|
| 2578 | ////////////////////////////////////////////////////////////////////////////
|
---|
| 2579 |
|
---|
| 2580 | // Local Variables:
|
---|
| 2581 | // mode: c++
|
---|
| 2582 | // c-file-style: "CLJ-CONDENSED"
|
---|
| 2583 | // End:
|
---|