| [0b990d] | 1 | //
 | 
|---|
 | 2 | // csgrad.cc
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
 | 
|---|
 | 5 | //
 | 
|---|
 | 6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
 | 
|---|
 | 7 | // Maintainer: LPS
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // This file is part of the SC Toolkit.
 | 
|---|
 | 10 | //
 | 
|---|
 | 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
 | 
|---|
 | 12 | // it under the terms of the GNU Library General Public License as published by
 | 
|---|
 | 13 | // the Free Software Foundation; either version 2, or (at your option)
 | 
|---|
 | 14 | // any later version.
 | 
|---|
 | 15 | //
 | 
|---|
 | 16 | // The SC Toolkit is distributed in the hope that it will be useful,
 | 
|---|
 | 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
 | 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
 | 19 | // GNU Library General Public License for more details.
 | 
|---|
 | 20 | //
 | 
|---|
 | 21 | // You should have received a copy of the GNU Library General Public License
 | 
|---|
 | 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
 | 
|---|
 | 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 | 
|---|
 | 24 | //
 | 
|---|
 | 25 | // The U.S. Government is granted a limited license as per AL 91-7.
 | 
|---|
 | 26 | //
 | 
|---|
 | 27 | 
 | 
|---|
 | 28 | #include <stdlib.h>
 | 
|---|
 | 29 | #include <math.h>
 | 
|---|
 | 30 | #include <limits.h>
 | 
|---|
 | 31 | 
 | 
|---|
 | 32 | #include <util/misc/formio.h>
 | 
|---|
 | 33 | #include <util/misc/timer.h>
 | 
|---|
 | 34 | #include <util/group/memory.h>
 | 
|---|
 | 35 | #include <util/group/message.h>
 | 
|---|
 | 36 | #include <util/class/class.h>
 | 
|---|
 | 37 | #include <util/state/state.h>
 | 
|---|
 | 38 | #include <math/scmat/matrix.h>
 | 
|---|
 | 39 | #include <math/scmat/blocked.h>
 | 
|---|
 | 40 | #include <chemistry/molecule/molecule.h>
 | 
|---|
 | 41 | #include <chemistry/qc/basis/integral.h>
 | 
|---|
 | 42 | #include <chemistry/qc/mbpt/bzerofast.h>
 | 
|---|
 | 43 | #include <chemistry/qc/mbpt/mbpt.h>
 | 
|---|
 | 44 | #include <chemistry/qc/mbpt/util.h>
 | 
|---|
 | 45 | #include <chemistry/qc/mbpt/csgrade12.h>
 | 
|---|
 | 46 | #include <chemistry/qc/mbpt/csgrad34qb.h>
 | 
|---|
 | 47 | #include <chemistry/qc/mbpt/csgrads2pdm.h>
 | 
|---|
 | 48 | 
 | 
|---|
 | 49 | using namespace std;
 | 
|---|
 | 50 | using namespace sc;
 | 
|---|
 | 51 | 
 | 
|---|
 | 52 | #define SINGLE_THREAD_E12   0
 | 
|---|
 | 53 | #define SINGLE_THREAD_QBT34 0
 | 
|---|
 | 54 | #define SINGLE_THREAD_S2PDM 0
 | 
|---|
 | 55 | 
 | 
|---|
 | 56 | #define PRINT2Q 0
 | 
|---|
 | 57 | #define PRINT3Q 0
 | 
|---|
 | 58 | #define PRINT4Q 0
 | 
|---|
 | 59 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 60 | BiggestContribs biggest_ints_1(4,40);
 | 
|---|
 | 61 | #endif
 | 
|---|
 | 62 | 
 | 
|---|
 | 63 | #define WRITE_DOUBLES 0
 | 
|---|
 | 64 | 
 | 
|---|
 | 65 | static void sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2);
 | 
|---|
 | 66 | static void zero_gradients(double **f, int n1, int n2);
 | 
|---|
 | 67 | static void accum_gradients(double **g, double **f, int n1, int n2);
 | 
|---|
 | 68 | 
 | 
|---|
 | 69 | #define PRINT1Q 0
 | 
|---|
 | 70 | 
 | 
|---|
 | 71 | #if PRINT_CONTRIB
 | 
|---|
 | 72 | static void
 | 
|---|
 | 73 | sw(int&i,int&j)
 | 
|---|
 | 74 | {
 | 
|---|
 | 75 |   int tmp = i;
 | 
|---|
 | 76 |   i = j;
 | 
|---|
 | 77 |   j = tmp;
 | 
|---|
 | 78 | }
 | 
|---|
 | 79 | 
 | 
|---|
 | 80 | static void
 | 
|---|
 | 81 | print_contrib(double tmpval, int num, int onum,
 | 
|---|
 | 82 |               int P,int Q,int R,int S, int p,int q,int r,int s)
 | 
|---|
 | 83 | {
 | 
|---|
 | 84 | 
 | 
|---|
 | 85 |   printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
 | 
|---|
 | 86 |          num, P, Q, R, S, p, q, r, s, tmpval);
 | 
|---|
 | 87 |   printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
 | 
|---|
 | 88 |          onum, P, Q, R, S, p, q, r, s, -tmpval);
 | 
|---|
 | 89 | 
 | 
|---|
 | 90 |   if (p < q) {
 | 
|---|
 | 91 |       sw(p,q); sw(P,Q);
 | 
|---|
 | 92 |     }
 | 
|---|
 | 93 |   if (r < s) {
 | 
|---|
 | 94 |       sw(r,s); sw(R,S);
 | 
|---|
 | 95 |     }
 | 
|---|
 | 96 |   if (p < r || (p == r && q < s)) {
 | 
|---|
 | 97 |       sw(P,R); sw(p,r);
 | 
|---|
 | 98 |       sw(Q,S); sw(q,s);
 | 
|---|
 | 99 |     }
 | 
|---|
 | 100 | 
 | 
|---|
 | 101 |   printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
 | 
|---|
 | 102 |          num, P, Q, R, S, p, q, r, s, tmpval);
 | 
|---|
 | 103 |   printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
 | 
|---|
 | 104 |          onum, P, Q, R, S, p, q, r, s, -tmpval);
 | 
|---|
 | 105 | }
 | 
|---|
 | 106 | #endif
 | 
|---|
 | 107 | 
 | 
|---|
 | 108 | void
 | 
|---|
 | 109 | MBPT2::compute_cs_grad()
 | 
|---|
 | 110 | {
 | 
|---|
 | 111 | 
 | 
|---|
 | 112 |   // New version of MP2 gradient program which uses the full
 | 
|---|
 | 113 |   // permutational symmetry of the two-electron integral derivatives
 | 
|---|
 | 114 | 
 | 
|---|
 | 115 |   Ref<SCMatrixKit> kit = basis()->matrixkit();
 | 
|---|
 | 116 | 
 | 
|---|
 | 117 |   int do_d2_ = 1;  // if true, compute d2 diagnostic
 | 
|---|
 | 118 | 
 | 
|---|
 | 119 |   int nij;        // number of i,j pairs on a node (for e.g., mo_int)
 | 
|---|
 | 120 |   double *mo_int; // MO integrals of type (ov|ov)
 | 
|---|
 | 121 |                   // (and these integrals divided by
 | 
|---|
 | 122 |                   // orbital energy denominators)
 | 
|---|
 | 123 |   double *integral_iqjs; // half-transformed integrals
 | 
|---|
 | 124 | 
 | 
|---|
 | 125 |   int nocc_act, nvir_act;
 | 
|---|
 | 126 |   int i, j, k;
 | 
|---|
 | 127 |   int ii, bb;
 | 
|---|
 | 128 |   int x, y;
 | 
|---|
 | 129 |   int a, b, c;
 | 
|---|
 | 130 |   int nshell;
 | 
|---|
 | 131 |   int offset;
 | 
|---|
 | 132 |   int ik_offset;
 | 
|---|
 | 133 |   int i_offset; 
 | 
|---|
 | 134 |   int npass, pass;
 | 
|---|
 | 135 |   int tmpint;
 | 
|---|
 | 136 |   int np, nq, nr, ns; 
 | 
|---|
 | 137 |   int P, Q, R, S;
 | 
|---|
 | 138 |   int p, q, r, s;
 | 
|---|
 | 139 |   int bf1, bf2, bf3, bf4;
 | 
|---|
 | 140 |   int index;
 | 
|---|
 | 141 |   int me;
 | 
|---|
 | 142 |   int nproc;
 | 
|---|
 | 143 |   int rest;
 | 
|---|
 | 144 |   int p_offset, q_offset, r_offset, s_offset;
 | 
|---|
 | 145 | 
 | 
|---|
 | 146 |   int aoint_computed = 0; 
 | 
|---|
 | 147 |   int aointder_computed = 0; 
 | 
|---|
 | 148 |   int xyz;
 | 
|---|
 | 149 |   int natom = molecule()->natom();     // the number of atoms
 | 
|---|
 | 150 |   int int_index;
 | 
|---|
 | 151 |   size_t mem_static;    // static memory in bytes
 | 
|---|
 | 152 |   int ij_proc;          // the processor which has ij pair
 | 
|---|
 | 153 |   int ij_index;         // of the ij pairs on a proc, this ij pair is number ij_index
 | 
|---|
 | 154 |                         // (i.e., ij_index < nij)
 | 
|---|
 | 155 |   int ik_proc;          // the processor which has ik pair
 | 
|---|
 | 156 |   int ik_index;
 | 
|---|
 | 157 |   int jloop, kloop;
 | 
|---|
 | 158 | 
 | 
|---|
 | 159 |   int ni;
 | 
|---|
 | 160 | 
 | 
|---|
 | 161 |   double *evals;              // scf eigenvalues
 | 
|---|
 | 162 |   double *iajb_ptr, *ibja_ptr, *iakb_ptr, *ibka_ptr;
 | 
|---|
 | 163 |   double *iajc_ptr, *ibjc_ptr, *icjb_ptr, *icja_ptr;
 | 
|---|
 | 164 |   double *ijkb_ptr, *ibkj_ptr;
 | 
|---|
 | 165 |   double pqrs;
 | 
|---|
 | 166 |   double *c_sa, c_rj;
 | 
|---|
 | 167 |   double *c_pi, *c_qi, *c_sj;
 | 
|---|
 | 168 |   double *c_qx, *c_qa, *c_sb, *c_pa, *c_pq, *c_sy;
 | 
|---|
 | 169 |   double delta_ijab, delta_ijbc, delta_ijac;
 | 
|---|
 | 170 |   double ecorr_mp2 = 0.0;
 | 
|---|
 | 171 |   double escf;
 | 
|---|
 | 172 |   double emp2=0.0;
 | 
|---|
 | 173 |   int tol;                    // log2 of the erep tolerance
 | 
|---|
 | 174 |                               // (erep < 2^tol => discard)
 | 
|---|
 | 175 |   double *Wkj=0,*Wab=0,*Waj=0;// occ-occ, vir-vir and vir-occ parts of 
 | 
|---|
 | 176 |                               // second order correction to MP2
 | 
|---|
 | 177 |                               // energy weighted density matrix
 | 
|---|
 | 178 |   double *Pkj=0,*Pab=0;       // occ-occ and vir-vir parts of second order
 | 
|---|
 | 179 |                               // correction to MP2 density matrix
 | 
|---|
 | 180 |   double *d2occ_mat, *d2vir_mat; // matrices for computation of D2 diagnostic
 | 
|---|
 | 181 |   double *Laj=0;              // MP2 Lagrangian
 | 
|---|
 | 182 |   double *Lpi;                // contrib to MP2 Lagrangian partially in AO basis
 | 
|---|
 | 183 |   double *pkj_ptr=0, *pab_ptr;
 | 
|---|
 | 184 |   double *d2occ_mat_ptr;
 | 
|---|
 | 185 |   double *d2vir_mat_ptr;
 | 
|---|
 | 186 |   double *wkj_ptr, *wjk_ptr, *wab_ptr, *wba_ptr, *waj_ptr=0;
 | 
|---|
 | 187 |   double *laj_ptr, *lpi_ptr, *lqi_ptr;
 | 
|---|
 | 188 |   double *gamma_iajs, *gamma_iajs_tmp; 
 | 
|---|
 | 189 |                               // partially back-transformed non-sep 2PDM's
 | 
|---|
 | 190 |   double *gamma_iqjs_tmp;
 | 
|---|
 | 191 |   double *gamma_iajs_ptr;
 | 
|---|
 | 192 |   double *gamma_iqjs_ptr;
 | 
|---|
 | 193 |   double *gammabuf;           // buffer used for sending elements of gamma_iqjs
 | 
|---|
 | 194 |   double *mo_intbuf;          // buffer used for sending mo integrals
 | 
|---|
 | 195 |   double tmpval, tmpval1;
 | 
|---|
 | 196 |   double *P2AO, *W2AO;
 | 
|---|
 | 197 |   double *p2ao_ptr, *w2ao_ptr;
 | 
|---|
 | 198 |   double *PHF, *WHF;
 | 
|---|
 | 199 |   double *phf_ptr, *whf_ptr;
 | 
|---|
 | 200 |   double *PMP2, *WMP2;
 | 
|---|
 | 201 |   double *pmp2_ptr, *wmp2_ptr;
 | 
|---|
 | 202 | 
 | 
|---|
 | 203 |   double *ixjs_tmp;      // three-quarter transformed two-el integrals
 | 
|---|
 | 204 |   double *integral_ixjs;  // all three-quarter transformed two-el integrals
 | 
|---|
 | 205 |   double *integral_iajy; // mo integrals (y = any MO)
 | 
|---|
 | 206 |   double *integral_ikja; // mo integrals
 | 
|---|
 | 207 |   double *integral_iqjs_ptr;
 | 
|---|
 | 208 |   double *iajy_ptr;
 | 
|---|
 | 209 |   double *ixjs_ptr;
 | 
|---|
 | 210 |   double *ikja_ptr;
 | 
|---|
 | 211 |   double *iajs_ptr, *ikjs_ptr;
 | 
|---|
 | 212 | 
 | 
|---|
 | 213 |   double **gradient=0, *gradient_dat=0;  // The MP2 gradient
 | 
|---|
 | 214 |   double **hf_gradient=0, *hf_gradient_dat=0;  // The HF gradient
 | 
|---|
 | 215 |   double **ginter=0;    // Intermediates for the MP2 gradient
 | 
|---|
 | 216 |   double **hf_ginter=0;    // Intermediates for the HF gradient
 | 
|---|
 | 217 |   double d2o, d2v, d2_diag;
 | 
|---|
 | 218 | 
 | 
|---|
 | 219 |   BiggestContribs biggest_coefs(5,10);
 | 
|---|
 | 220 |   CharacterTable ct = molecule()->point_group()->char_table();
 | 
|---|
 | 221 | 
 | 
|---|
 | 222 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 223 |   BiggestContribs biggest_ints_2(4,40);
 | 
|---|
 | 224 |   BiggestContribs biggest_ints_2s(4,40);
 | 
|---|
 | 225 |   BiggestContribs biggest_ints_3a(4,40);
 | 
|---|
 | 226 |   BiggestContribs biggest_ints_3(4,40);
 | 
|---|
 | 227 | #endif
 | 
|---|
 | 228 | 
 | 
|---|
 | 229 |   int dograd = gradient_needed();
 | 
|---|
 | 230 | 
 | 
|---|
 | 231 |   tim_enter("mp2-mem");
 | 
|---|
 | 232 | 
 | 
|---|
 | 233 |   nfuncmax = basis()->max_nfunction_in_shell();
 | 
|---|
 | 234 | 
 | 
|---|
 | 235 |   nshell = basis()->nshell();
 | 
|---|
 | 236 | 
 | 
|---|
 | 237 |   me = msg_->me();
 | 
|---|
 | 238 | 
 | 
|---|
 | 239 |   if (me == 0) {
 | 
|---|
 | 240 |     ExEnv::out0() << endl << indent
 | 
|---|
 | 241 |          << "Entered memgrp based MP2 routine" << endl;
 | 
|---|
 | 242 |     }
 | 
|---|
 | 243 |   
 | 
|---|
 | 244 |   nproc = msg_->n();
 | 
|---|
 | 245 |   if (me == 0)
 | 
|---|
 | 246 |     ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
 | 
|---|
 | 247 | 
 | 
|---|
 | 248 |   tol = (int) (-10.0/log10(2.0));  // discard ereps smaller than 10^-10
 | 
|---|
 | 249 | 
 | 
|---|
 | 250 |   nocc = 0;
 | 
|---|
 | 251 |   for (i=0; i<oso_dimension()->n(); i++) {
 | 
|---|
 | 252 |     if (reference_->occupation(i) == 2.0) nocc++;
 | 
|---|
 | 253 |     }
 | 
|---|
 | 254 | 
 | 
|---|
 | 255 |   nocc_act = nocc - nfzc;
 | 
|---|
 | 256 |   nvir  = noso - nocc;
 | 
|---|
 | 257 |   nvir_act = nvir - nfzv;
 | 
|---|
 | 258 | 
 | 
|---|
 | 259 |   // Do a few preliminary tests to make sure the desired calculation
 | 
|---|
 | 260 |   // can be done (and appears to be meaningful!)
 | 
|---|
 | 261 | 
 | 
|---|
 | 262 |   if (nocc_act <= 0) {
 | 
|---|
 | 263 |     if (me == 0) {
 | 
|---|
 | 264 |       ExEnv::err0() << "There are no active occupied orbitals; program exiting" << endl;
 | 
|---|
 | 265 |       }
 | 
|---|
 | 266 |     abort();
 | 
|---|
 | 267 |     }
 | 
|---|
 | 268 | 
 | 
|---|
 | 269 |   if (nvir_act <= 0) {
 | 
|---|
 | 270 |     if (me == 0) {
 | 
|---|
 | 271 |       ExEnv::err0() << "There are no active virtual orbitals; program exiting" << endl;
 | 
|---|
 | 272 |       }
 | 
|---|
 | 273 |     abort();
 | 
|---|
 | 274 |     }
 | 
|---|
 | 275 |     
 | 
|---|
 | 276 |   if (restart_orbital_memgrp_) {
 | 
|---|
 | 277 |     if (!dograd && !do_d1_ && !do_d2_) {
 | 
|---|
 | 278 |       ExEnv::out0() << indent
 | 
|---|
 | 279 |            << scprintf("Restarting at orbital %d with partial energy %18.14f",
 | 
|---|
 | 280 |                        restart_orbital_memgrp_, restart_ecorr_)
 | 
|---|
 | 281 |            << endl;
 | 
|---|
 | 282 |       ecorr_mp2 = restart_ecorr_;
 | 
|---|
 | 283 |       }
 | 
|---|
 | 284 |     else {
 | 
|---|
 | 285 |       ExEnv::out0() << indent
 | 
|---|
 | 286 |            << "Restart requested but not possible with gradients, D1, or D2"
 | 
|---|
 | 287 |            << endl;
 | 
|---|
 | 288 |       restart_ecorr_ = 0.0;
 | 
|---|
 | 289 |       restart_orbital_memgrp_ = 0;
 | 
|---|
 | 290 |       }
 | 
|---|
 | 291 |     }
 | 
|---|
 | 292 |   else {
 | 
|---|
 | 293 |       restart_ecorr_ = 0.0;
 | 
|---|
 | 294 |     }
 | 
|---|
 | 295 | 
 | 
|---|
 | 296 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 297 |   // Compute batch size ni for mp2 loops;
 | 
|---|
 | 298 |   //
 | 
|---|
 | 299 |   // The following arrays are kept throughout (all of type double):
 | 
|---|
 | 300 |   //   scf_vector, gradient, ginter, Pkj, Pab, Wkj, Wab, Waj, Laj
 | 
|---|
 | 301 |   // and memory allocated for these arrays  and integral evaluators
 | 
|---|
 | 302 |   // is called mem_static
 | 
|---|
 | 303 |   //
 | 
|---|
 | 304 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 305 |   if (me == 0) {
 | 
|---|
 | 306 |     mem_static = nbasis*noso; // scf vector
 | 
|---|
 | 307 |     mem_static += 2*nbasis*nfuncmax; // iqjs & iqjr
 | 
|---|
 | 308 |     if (dograd) {
 | 
|---|
 | 309 |       mem_static += 9*natom; // gradient & ginter & hf_ginter
 | 
|---|
 | 310 |       mem_static += (nocc*(nocc+1))/2; // Pkj
 | 
|---|
 | 311 |       mem_static += (nvir*(nvir+1))/2; // Pab
 | 
|---|
 | 312 |       mem_static += nocc*nocc; // Wkj
 | 
|---|
 | 313 |       mem_static += nvir*nvir; // Wab
 | 
|---|
 | 314 |       mem_static += 2*nocc*nvir; // Waj & Laj
 | 
|---|
 | 315 |       if (do_d2_) {
 | 
|---|
 | 316 |         mem_static += (nocc_act*(nocc_act+1))/2; // d2occ_mat
 | 
|---|
 | 317 |         mem_static += (nvir_act*(nvir_act+1))/2; // d2vir_mat
 | 
|---|
 | 318 |         }
 | 
|---|
 | 319 |       }
 | 
|---|
 | 320 |     else if (do_d1_) {
 | 
|---|
 | 321 |       mem_static += nocc*nvir; // partial Laj
 | 
|---|
 | 322 |       }
 | 
|---|
 | 323 |     mem_static *= sizeof(double);
 | 
|---|
 | 324 |     int nthreads = thr_->nthread();
 | 
|---|
 | 325 |     mem_static += nthreads * integral()->storage_required_eri(basis()); // integral evaluators
 | 
|---|
 | 326 |     ni = compute_cs_batchsize(mem_static, nocc_act-restart_orbital_memgrp_); 
 | 
|---|
 | 327 |     }
 | 
|---|
 | 328 | 
 | 
|---|
 | 329 |   if (max_norb_ > 0 && ni > max_norb_) {
 | 
|---|
 | 330 |       ExEnv::out0() << indent
 | 
|---|
 | 331 |            << "\"max_norb\" set: could have done "
 | 
|---|
 | 332 |            << ni << " orbitals per pass otherwise."
 | 
|---|
 | 333 |            << endl;
 | 
|---|
 | 334 |       ni = max_norb_;
 | 
|---|
 | 335 |     }
 | 
|---|
 | 336 | 
 | 
|---|
 | 337 |   // Send value of ni and mem_static to other nodes
 | 
|---|
 | 338 |   msg_->bcast(ni);
 | 
|---|
 | 339 |   double dmem_static = mem_static;
 | 
|---|
 | 340 |   msg_->bcast(dmem_static);
 | 
|---|
 | 341 |   mem_static = size_t(dmem_static);
 | 
|---|
 | 342 | 
 | 
|---|
 | 343 |   // Compute the storage to be used by the integral routines (required plus optional)
 | 
|---|
 | 344 |   size_t dyn_mem = distsize_to_size(compute_cs_dynamic_memory(ni,nocc_act));
 | 
|---|
 | 345 |   int mem_remaining;
 | 
|---|
 | 346 |   if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining = 0;
 | 
|---|
 | 347 |   else mem_remaining = mem_alloc - dyn_mem - mem_static;
 | 
|---|
 | 348 |   mem_remaining += thr_->nthread() * integral()->storage_required_eri(basis());
 | 
|---|
 | 349 | 
 | 
|---|
 | 350 |   ExEnv::out0() << indent
 | 
|---|
 | 351 |        << "Memory available per node:      " << mem_alloc << " Bytes"
 | 
|---|
 | 352 |        << endl;
 | 
|---|
 | 353 |   ExEnv::out0() << indent
 | 
|---|
 | 354 |        << "Static memory used per node:    " << mem_static << " Bytes"
 | 
|---|
 | 355 |        << endl;
 | 
|---|
 | 356 |   ExEnv::out0() << indent
 | 
|---|
 | 357 |        << "Total memory used per node:     " << dyn_mem+mem_static << " Bytes"
 | 
|---|
 | 358 |        << endl;
 | 
|---|
 | 359 |   ExEnv::out0() << indent
 | 
|---|
 | 360 |        << "Memory required for one pass:   "
 | 
|---|
 | 361 |        << compute_cs_dynamic_memory(nocc_act,nocc_act)+mem_static
 | 
|---|
 | 362 |        << " Bytes"
 | 
|---|
 | 363 |        << endl;
 | 
|---|
 | 364 |   ExEnv::out0() << indent
 | 
|---|
 | 365 |        << "Minimum memory required:        "
 | 
|---|
 | 366 |        << compute_cs_dynamic_memory(1,nocc_act)+mem_static
 | 
|---|
 | 367 |        << " Bytes"
 | 
|---|
 | 368 |        << endl;
 | 
|---|
 | 369 |   ExEnv::out0() << indent
 | 
|---|
 | 370 |        << "Batch size:                     " << ni
 | 
|---|
 | 371 |        << endl;
 | 
|---|
 | 372 | 
 | 
|---|
 | 373 |   if (ni == 0) {
 | 
|---|
 | 374 |     ExEnv::err0() << "Batch size is 0: more memory or processors are needed"
 | 
|---|
 | 375 |          << endl;
 | 
|---|
 | 376 |     abort();
 | 
|---|
 | 377 |     }
 | 
|---|
 | 378 | 
 | 
|---|
 | 379 |   if (dynamic_) {
 | 
|---|
 | 380 |     ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
 | 
|---|
 | 381 |     }
 | 
|---|
 | 382 | 
 | 
|---|
 | 383 |   if (ni == nocc_act-restart_orbital_memgrp_) {
 | 
|---|
 | 384 |     npass = 1;
 | 
|---|
 | 385 |     rest = 0;
 | 
|---|
 | 386 |     }
 | 
|---|
 | 387 |   else {
 | 
|---|
 | 388 |     rest = (nocc_act-restart_orbital_memgrp_)%ni;
 | 
|---|
 | 389 |     npass = (nocc_act-restart_orbital_memgrp_ - rest)/ni + 1;
 | 
|---|
 | 390 |     if (rest == 0) npass--;
 | 
|---|
 | 391 |     }
 | 
|---|
 | 392 | 
 | 
|---|
 | 393 |   if (me == 0) {
 | 
|---|
 | 394 |     ExEnv::out0() << indent
 | 
|---|
 | 395 |          << scprintf(" npass  rest  nbasis  nshell  nfuncmax") << endl;
 | 
|---|
 | 396 |     ExEnv::out0() << indent
 | 
|---|
 | 397 |          << scprintf("  %-4i   %-3i   %-5i    %-4i     %-3i",
 | 
|---|
 | 398 |                      npass,rest,nbasis,nshell,nfuncmax)
 | 
|---|
 | 399 |          << endl;
 | 
|---|
 | 400 |     ExEnv::out0() << indent
 | 
|---|
 | 401 |          << scprintf(" nocc   nvir   nfzc   nfzv") << endl;
 | 
|---|
 | 402 |     ExEnv::out0() << indent
 | 
|---|
 | 403 |          << scprintf("  %-4i   %-4i   %-4i   %-4i",
 | 
|---|
 | 404 |                      nocc,nvir,nfzc,nfzv)
 | 
|---|
 | 405 |          << endl;
 | 
|---|
 | 406 |     }
 | 
|---|
 | 407 | 
 | 
|---|
 | 408 |   int nijmax = 0;
 | 
|---|
 | 409 |   index = 0;
 | 
|---|
 | 410 |   for (i=0; i<ni; i++) {
 | 
|---|
 | 411 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 412 |           if (index++ % nproc == me) nijmax++;
 | 
|---|
 | 413 |         }
 | 
|---|
 | 414 |     }
 | 
|---|
 | 415 | 
 | 
|---|
 | 416 |   ////////////////////////////////////////////////
 | 
|---|
 | 417 |   // The scf vector is distributed between nodes;
 | 
|---|
 | 418 |   // put a copy of the scf vector on each node;
 | 
|---|
 | 419 |   ////////////////////////////////////////////////
 | 
|---|
 | 420 | 
 | 
|---|
 | 421 |   escf = reference_->energy();
 | 
|---|
 | 422 |   hf_energy_ = escf;
 | 
|---|
 | 423 | 
 | 
|---|
 | 424 |   RefDiagSCMatrix occ;
 | 
|---|
 | 425 |   RefSCMatrix Scf_Vec;
 | 
|---|
 | 426 |   RefDiagSCMatrix evalmat;
 | 
|---|
 | 427 |   eigen(evalmat, Scf_Vec, occ);
 | 
|---|
 | 428 | 
 | 
|---|
 | 429 |   if (debug_ > 1) {
 | 
|---|
 | 430 |     evalmat.print("eigenvalues");
 | 
|---|
 | 431 |     Scf_Vec.print("eigenvectors");
 | 
|---|
 | 432 |     }
 | 
|---|
 | 433 | 
 | 
|---|
 | 434 |   double *scf_vector_dat = new double[nbasis*noso];
 | 
|---|
 | 435 |   Scf_Vec.t()->convert(scf_vector_dat);
 | 
|---|
 | 436 | 
 | 
|---|
 | 437 |   evals = new double[noso];
 | 
|---|
 | 438 |   double** scf_vector = new double*[nbasis];
 | 
|---|
 | 439 |   for (i=0; i<nbasis; i++) {
 | 
|---|
 | 440 |     scf_vector[i] = &scf_vector_dat[i*noso];
 | 
|---|
 | 441 |     }
 | 
|---|
 | 442 |   for (i=0; i<noso; i++) {
 | 
|---|
 | 443 |       evals[i] = evalmat(i);
 | 
|---|
 | 444 |     }
 | 
|---|
 | 445 | 
 | 
|---|
 | 446 |   Scf_Vec = 0;
 | 
|---|
 | 447 |   evalmat = 0;
 | 
|---|
 | 448 | 
 | 
|---|
 | 449 |   //////////////////////////////////////////////////////////////
 | 
|---|
 | 450 |   // Allocate storage for various arrays needed for gradients
 | 
|---|
 | 451 |   // (Pkj and Pab are symmetric, so store only lower triangle)
 | 
|---|
 | 452 |   //////////////////////////////////////////////////////////////
 | 
|---|
 | 453 | 
 | 
|---|
 | 454 |   if (dograd) {
 | 
|---|
 | 455 |     Pkj            = (double*) malloc((nocc*(nocc+1)/2)*sizeof(double));
 | 
|---|
 | 456 |     Pab            = (double*) malloc((nvir*(nvir+1)/2)*sizeof(double));
 | 
|---|
 | 457 |     Wkj            = (double*) malloc(nocc*nocc*sizeof(double));
 | 
|---|
 | 458 |     Wab            = (double*) malloc(nvir*nvir*sizeof(double));
 | 
|---|
 | 459 |     Waj            = (double*) malloc(nvir*nocc*sizeof(double));
 | 
|---|
 | 460 |     if (do_d2_) {
 | 
|---|
 | 461 |       d2occ_mat =  new double[nocc_act*(nocc_act+1)/2];
 | 
|---|
 | 462 |       d2vir_mat =  new double[nvir_act*(nvir_act+1)/2];
 | 
|---|
 | 463 |       }
 | 
|---|
 | 464 | 
 | 
|---|
 | 465 |     gradient_dat = new double[natom*3];
 | 
|---|
 | 466 |     gradient = new double*[natom];
 | 
|---|
 | 467 |     for (i=0; i<natom; i++) {
 | 
|---|
 | 468 |       gradient[i] = &gradient_dat[i*3];
 | 
|---|
 | 469 |       }
 | 
|---|
 | 470 | 
 | 
|---|
 | 471 |     hf_gradient_dat = new double[natom*3];
 | 
|---|
 | 472 |     hf_gradient = new double*[natom];
 | 
|---|
 | 473 |     for (i=0; i<natom; i++) {
 | 
|---|
 | 474 |       hf_gradient[i] = &hf_gradient_dat[i*3];
 | 
|---|
 | 475 |       }
 | 
|---|
 | 476 | 
 | 
|---|
 | 477 |     ginter = new double*[natom];
 | 
|---|
 | 478 |     for (i=0; i<natom; i++) {
 | 
|---|
 | 479 |       ginter[i] = new double[3];
 | 
|---|
 | 480 |       for (xyz=0; xyz<3; xyz++) ginter[i][xyz] = 0;
 | 
|---|
 | 481 |       }
 | 
|---|
 | 482 | 
 | 
|---|
 | 483 |     hf_ginter = new double*[natom];
 | 
|---|
 | 484 |     for (i=0; i<natom; i++) {
 | 
|---|
 | 485 |       hf_ginter[i] = new double[3];
 | 
|---|
 | 486 |       for (xyz=0; xyz<3; xyz++) hf_ginter[i][xyz] = 0;
 | 
|---|
 | 487 |       }
 | 
|---|
 | 488 | 
 | 
|---|
 | 489 |     //////////////////////////////
 | 
|---|
 | 490 |     // Initialize various arrays
 | 
|---|
 | 491 |     //////////////////////////////
 | 
|---|
 | 492 | 
 | 
|---|
 | 493 |     bzerofast(Pkj,nocc*(nocc+1)/2);
 | 
|---|
 | 494 |     bzerofast(Wkj,nocc*nocc);
 | 
|---|
 | 495 |     bzerofast(Pab,nvir*(nvir+1)/2);
 | 
|---|
 | 496 |     bzerofast(Wab,nvir*nvir);
 | 
|---|
 | 497 |     bzerofast(Waj,nvir*nocc);
 | 
|---|
 | 498 |     if (do_d2_) {
 | 
|---|
 | 499 |       bzerofast(d2occ_mat,nocc_act*(nocc_act+1)/2);
 | 
|---|
 | 500 |       bzerofast(d2vir_mat,nvir_act*(nvir_act+1)/2);
 | 
|---|
 | 501 |       }
 | 
|---|
 | 502 | 
 | 
|---|
 | 503 |     if (me == 0) zero_gradients(gradient, natom, 3);
 | 
|---|
 | 504 |     if (me == 0) zero_gradients(hf_gradient, natom, 3);
 | 
|---|
 | 505 |     }
 | 
|---|
 | 506 | 
 | 
|---|
 | 507 |   if (dograd || do_d1_) {
 | 
|---|
 | 508 |     Laj = (double*) malloc(nvir*nocc*sizeof(double));
 | 
|---|
 | 509 |     bzerofast(Laj,nvir*nocc);
 | 
|---|
 | 510 |     }
 | 
|---|
 | 511 | 
 | 
|---|
 | 512 |   if (debug_ > 2 && me == 0) {
 | 
|---|
 | 513 |     for (j=0; j<noso; j++) {
 | 
|---|
 | 514 |       ExEnv::out0() << indent
 | 
|---|
 | 515 |            << scprintf("eigenvalue[%3d] = %15.10lf", j, evals[j]);
 | 
|---|
 | 516 |       if (j < nfzc) ExEnv::out0() << " (frozen docc)";
 | 
|---|
 | 517 |       else if (j < nocc_act + nfzc) ExEnv::out0() << " (active docc)";
 | 
|---|
 | 518 |       else if (j < nvir_act + nocc_act + nfzc) ExEnv::out0() << " (active uocc)";
 | 
|---|
 | 519 |       else ExEnv::out0() << " (frozen uocc)";
 | 
|---|
 | 520 |       ExEnv::out0() << endl;
 | 
|---|
 | 521 |       }
 | 
|---|
 | 522 |     }
 | 
|---|
 | 523 | 
 | 
|---|
 | 524 |   /////////////////////////////////////
 | 
|---|
 | 525 |   //  Begin MP2 loops
 | 
|---|
 | 526 |   /////////////////////////////////////
 | 
|---|
 | 527 | 
 | 
|---|
 | 528 |   // debug print
 | 
|---|
 | 529 |   if (debug_ && me == 0) {
 | 
|---|
 | 530 |     ExEnv::out0() << indent
 | 
|---|
 | 531 |          << scprintf("node %i, begin loop over i-batches",me) << endl;
 | 
|---|
 | 532 |     }
 | 
|---|
 | 533 |   // end of debug print
 | 
|---|
 | 534 | 
 | 
|---|
 | 535 |   // Initialize the integrals
 | 
|---|
 | 536 |   integral()->set_storage(mem_remaining);
 | 
|---|
 | 537 |   tbints_ = new Ref<TwoBodyInt>[thr_->nthread()];
 | 
|---|
 | 538 |   for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 539 |       tbints_[i] = integral()->electron_repulsion();
 | 
|---|
 | 540 |     }
 | 
|---|
 | 541 |   if (dograd || do_d1_) {
 | 
|---|
 | 542 |     tbintder_ = new Ref<TwoBodyDerivInt>[thr_->nthread()];
 | 
|---|
 | 543 |     for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 544 |       tbintder_[i] = integral()->electron_repulsion_deriv();
 | 
|---|
 | 545 |       }
 | 
|---|
 | 546 |     }
 | 
|---|
 | 547 | 
 | 
|---|
 | 548 |   int mem_integral_intermediates = integral()->storage_used();
 | 
|---|
 | 549 |   int mem_integral_storage = (mem_remaining - mem_integral_intermediates) / thr_->nthread();
 | 
|---|
 | 550 |   if (mem_integral_storage<0) mem_integral_storage = 0;
 | 
|---|
 | 551 |   for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 552 |       tbints_[i]->set_integral_storage(mem_integral_storage);
 | 
|---|
 | 553 |     }
 | 
|---|
 | 554 | 
 | 
|---|
 | 555 |   ExEnv::out0() << endl << indent
 | 
|---|
 | 556 |        << scprintf("Memory used for integral intermediates: %i Bytes",
 | 
|---|
 | 557 |                    mem_integral_intermediates)
 | 
|---|
 | 558 |        << endl;
 | 
|---|
 | 559 |   ExEnv::out0() << indent
 | 
|---|
 | 560 |        << scprintf("Memory used for integral storage:       %i Bytes",
 | 
|---|
 | 561 |                    mem_integral_storage)
 | 
|---|
 | 562 |        << endl;
 | 
|---|
 | 563 | 
 | 
|---|
 | 564 |   if (mem.null()) {
 | 
|---|
 | 565 |       ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
 | 
|---|
 | 566 |       abort();
 | 
|---|
 | 567 |     }
 | 
|---|
 | 568 | 
 | 
|---|
 | 569 |   mem->set_localsize(size_t(nijmax)*nbasis*nbasis*sizeof(double));
 | 
|---|
 | 570 |   ExEnv::out0() << indent
 | 
|---|
 | 571 |        << "Size of global distributed array:       "
 | 
|---|
 | 572 |        << mem->totalsize()
 | 
|---|
 | 573 |        << " Bytes"
 | 
|---|
 | 574 |        << endl;
 | 
|---|
 | 575 | 
 | 
|---|
 | 576 |   MemoryGrpBuf<double> membuf_remote(mem);
 | 
|---|
 | 577 | 
 | 
|---|
 | 578 |   int usep4 = !dograd;
 | 
|---|
 | 579 | 
 | 
|---|
 | 580 |   Ref<ThreadLock> lock = thr_->new_lock();
 | 
|---|
 | 581 |   CSGradErep12Qtr** e12thread = new CSGradErep12Qtr*[thr_->nthread()];
 | 
|---|
 | 582 |   DistShellPair::SharedData sp_e_data, sp_g_data;
 | 
|---|
 | 583 |   for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 584 |     e12thread[i] = new CSGradErep12Qtr(i, thr_->nthread(), me, nproc,
 | 
|---|
 | 585 |                                        mem, msg_, lock, basis(), tbints_[i],
 | 
|---|
 | 586 |                                        nocc, scf_vector, tol, debug_,
 | 
|---|
 | 587 |                                        dynamic_, print_percent_,
 | 
|---|
 | 588 |                                        &sp_e_data, usep4);
 | 
|---|
 | 589 |     }
 | 
|---|
 | 590 | 
 | 
|---|
 | 591 |     CSGrad34Qbtr** qbt34thread;
 | 
|---|
 | 592 |     if (dograd || do_d1_) {
 | 
|---|
 | 593 |       qbt34thread = new CSGrad34Qbtr*[thr_->nthread()];
 | 
|---|
 | 594 |       for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 595 |         qbt34thread[i] = new CSGrad34Qbtr(i, thr_->nthread(), me, nproc,
 | 
|---|
 | 596 |                                           mem, msg_, lock, basis(), tbints_[i],
 | 
|---|
 | 597 |                                           tbintder_[i], nocc, nfzc, scf_vector,
 | 
|---|
 | 598 |                                           tol, debug_, dynamic_, print_percent_,
 | 
|---|
 | 599 |                                           &sp_g_data, dograd, natom);
 | 
|---|
 | 600 |         }
 | 
|---|
 | 601 |       }
 | 
|---|
 | 602 | 
 | 
|---|
 | 603 |   tim_enter("mp2 passes");
 | 
|---|
 | 604 |   for (pass=0; pass<npass; pass++) {
 | 
|---|
 | 605 | 
 | 
|---|
 | 606 |     if (me == 0) {
 | 
|---|
 | 607 |       ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
 | 
|---|
 | 608 |       }
 | 
|---|
 | 609 | 
 | 
|---|
 | 610 |     i_offset = restart_orbital_memgrp_ + pass*ni + nfzc;
 | 
|---|
 | 611 |     if ((pass == npass - 1) && (rest != 0)) ni = rest;
 | 
|---|
 | 612 | 
 | 
|---|
 | 613 |     // Compute number of of i,j pairs on each node for
 | 
|---|
 | 614 |     // two-el integrals and non-sep 2PDM elements 
 | 
|---|
 | 615 |     index = 0;
 | 
|---|
 | 616 |     nij = 0;
 | 
|---|
 | 617 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 618 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 619 |         if (index++ % nproc == me) nij++;
 | 
|---|
 | 620 |         }
 | 
|---|
 | 621 |       }
 | 
|---|
 | 622 | 
 | 
|---|
 | 623 |     // debug print
 | 
|---|
 | 624 |     if (debug_)
 | 
|---|
 | 625 |       ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
 | 
|---|
 | 626 |     // end of debug print
 | 
|---|
 | 627 | 
 | 
|---|
 | 628 |     mem->sync(); // This must be here or gamma non-sep will be wrong when running
 | 
|---|
 | 629 |                  // on multiple processors with more than one pass
 | 
|---|
 | 630 | 
 | 
|---|
 | 631 |     r_offset = 0;
 | 
|---|
 | 632 | 
 | 
|---|
 | 633 |     // Allocate and initialize some arrays
 | 
|---|
 | 634 |     // (done here to avoid having these arrays
 | 
|---|
 | 635 |     // overlap with arrays allocated later)
 | 
|---|
 | 636 | 
 | 
|---|
 | 637 |     // Allocate (and initialize) some arrays
 | 
|---|
 | 638 | 
 | 
|---|
 | 639 |     integral_iqjs = (double*) mem->localdata();
 | 
|---|
 | 640 | 
 | 
|---|
 | 641 |     bzerofast(integral_iqjs, nij*nbasis*nbasis);
 | 
|---|
 | 642 | 
 | 
|---|
 | 643 |     integral_iqjs = 0;
 | 
|---|
 | 644 | 
 | 
|---|
 | 645 |     mem->sync();
 | 
|---|
 | 646 | 
 | 
|---|
 | 647 |     index = 0;
 | 
|---|
 | 648 | 
 | 
|---|
 | 649 |     if (me == 0) {
 | 
|---|
 | 650 |       ExEnv::out0() << indent
 | 
|---|
 | 651 |            << scprintf("Begin loop over shells (erep, 1.+2. q.t.)") << endl;
 | 
|---|
 | 652 |       }
 | 
|---|
 | 653 | 
 | 
|---|
 | 654 |     // Do the two eletron integrals and the first two quarter transformations
 | 
|---|
 | 655 |     tim_enter("erep+1.qt+2.qt");
 | 
|---|
 | 656 |     sp_e_data.init();
 | 
|---|
 | 657 |     for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 658 |       e12thread[i]->set_i_offset(i_offset);
 | 
|---|
 | 659 |       e12thread[i]->set_ni(ni);
 | 
|---|
 | 660 |       thr_->add_thread(i,e12thread[i]);
 | 
|---|
 | 661 | #     if SINGLE_THREAD_E12
 | 
|---|
 | 662 |       e12thread[i]->run();
 | 
|---|
 | 663 | #     endif
 | 
|---|
 | 664 |       }
 | 
|---|
 | 665 | #   if !SINGLE_THREAD_E12
 | 
|---|
 | 666 |     thr_->start_threads();
 | 
|---|
 | 667 |     thr_->wait_threads();
 | 
|---|
 | 668 | #   endif
 | 
|---|
 | 669 |     tim_exit("erep+1.qt+2.qt");
 | 
|---|
 | 670 | 
 | 
|---|
 | 671 |     if (me == 0) {
 | 
|---|
 | 672 |       ExEnv::out0() << indent << "End of loop over shells" << endl;
 | 
|---|
 | 673 |       }
 | 
|---|
 | 674 | 
 | 
|---|
 | 675 |     mem->sync();  // Make sure iqjs is complete on each node before continuing
 | 
|---|
 | 676 | 
 | 
|---|
 | 677 |     integral_iqjs = (double*) mem->localdata();
 | 
|---|
 | 678 | 
 | 
|---|
 | 679 | #if PRINT2Q
 | 
|---|
 | 680 |     if (me == 0) {
 | 
|---|
 | 681 |       int index = 0;
 | 
|---|
 | 682 |       int ij_index = 0;
 | 
|---|
 | 683 |       for (int i = 0; i<ni; i++) {
 | 
|---|
 | 684 |         for (int j = 0; j<nocc; j++) {
 | 
|---|
 | 685 |           if (index++ % nproc == me) {
 | 
|---|
 | 686 |             if (j >= nfzc) {
 | 
|---|
 | 687 |               double *integral_ij_offset = integral_iqjs + nbasis*nbasis*ij_index;
 | 
|---|
 | 688 |               for (int s = 0; s<nbasis; s++) {
 | 
|---|
 | 689 |                 double *integral_ijsq_ptr = integral_ij_offset + s*nbasis;
 | 
|---|
 | 690 |                 for (int q = 0; q<nbasis; q++) {
 | 
|---|
 | 691 |                   printf("2Q: (%d %d|%d %d) = %12.8f\n",
 | 
|---|
 | 692 |                          i,q,j,s,*integral_ijsq_ptr);
 | 
|---|
 | 693 |                   *integral_ijsq_ptr++;
 | 
|---|
 | 694 |                 }
 | 
|---|
 | 695 |               }
 | 
|---|
 | 696 |             }
 | 
|---|
 | 697 |             ij_index++;
 | 
|---|
 | 698 |           }
 | 
|---|
 | 699 |         }
 | 
|---|
 | 700 |       }
 | 
|---|
 | 701 |     }
 | 
|---|
 | 702 | #endif
 | 
|---|
 | 703 | 
 | 
|---|
 | 704 |     // Allocate and initialize some arrays
 | 
|---|
 | 705 |     ixjs_tmp = new double[nbasis];
 | 
|---|
 | 706 | 
 | 
|---|
 | 707 |     if (me == 0) {
 | 
|---|
 | 708 |       ExEnv::out0() << indent << "Begin third q.t." << endl;
 | 
|---|
 | 709 |       }
 | 
|---|
 | 710 | 
 | 
|---|
 | 711 |     tim_enter("3. q.t.");
 | 
|---|
 | 712 |     // Begin third quarter transformation;
 | 
|---|
 | 713 |     // generate (ix|js) for i act, j act or frz, and x any MO (act or frz)
 | 
|---|
 | 714 |     index = 0;
 | 
|---|
 | 715 |     ij_index = 0;
 | 
|---|
 | 716 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 717 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 718 |         if (index++ % nproc == me) {
 | 
|---|
 | 719 | 
 | 
|---|
 | 720 |           for (s=0; s<nbasis; s++) {
 | 
|---|
 | 721 | 
 | 
|---|
 | 722 |             bzerofast(ixjs_tmp, nbasis);
 | 
|---|
 | 723 |             for (q=0; q<nbasis; q++) {
 | 
|---|
 | 724 |               integral_iqjs_ptr = &integral_iqjs[q + nbasis*(s + nbasis*ij_index)];
 | 
|---|
 | 725 |               ixjs_ptr = ixjs_tmp;
 | 
|---|
 | 726 |               c_qx = scf_vector[q];
 | 
|---|
 | 727 |               tmpval = *integral_iqjs_ptr;
 | 
|---|
 | 728 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 729 |               biggest_ints_2.insert(tmpval,i+i_offset,j,s,q);
 | 
|---|
 | 730 |               if ((i+i_offset==104 && j == 1)
 | 
|---|
 | 731 |                   ||(i+i_offset==104 && j == 2)) {
 | 
|---|
 | 732 |                 biggest_ints_2s.insert(tmpval,i+i_offset,j,s,q);
 | 
|---|
 | 733 |                 }
 | 
|---|
 | 734 | #endif
 | 
|---|
 | 735 |               for (x=0; x<noso; x++) {
 | 
|---|
 | 736 |                 *ixjs_ptr++ += *c_qx++ * tmpval;
 | 
|---|
 | 737 |                 }
 | 
|---|
 | 738 |               }   // exit q loop
 | 
|---|
 | 739 | 
 | 
|---|
 | 740 |             // Put ixjs into integral_iqjs, while overwriting what was there;
 | 
|---|
 | 741 |             // i.e., integral_iqjs will now contain three-quarter transformed
 | 
|---|
 | 742 |             // integrals ixjs
 | 
|---|
 | 743 |             integral_iqjs_ptr = &integral_iqjs[nbasis*(s + nbasis*ij_index)];
 | 
|---|
 | 744 |             ixjs_ptr = ixjs_tmp;
 | 
|---|
 | 745 |             for (x=0; x<noso; x++) {
 | 
|---|
 | 746 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 747 |               if (x>=nocc) {
 | 
|---|
 | 748 |                 biggest_ints_3a.insert(*ixjs_ptr,i+i_offset,j,s,x-nocc);
 | 
|---|
 | 749 |                 }
 | 
|---|
 | 750 | #endif
 | 
|---|
 | 751 |               *integral_iqjs_ptr++ = *ixjs_ptr++;
 | 
|---|
 | 752 |               }
 | 
|---|
 | 753 |             }   // exit s loop
 | 
|---|
 | 754 |           ij_index++;
 | 
|---|
 | 755 |           }     // endif
 | 
|---|
 | 756 |         }       // exit j loop
 | 
|---|
 | 757 |       }         // exit i loop
 | 
|---|
 | 758 |     // end of third quarter transformation
 | 
|---|
 | 759 |     tim_exit("3. q.t.");
 | 
|---|
 | 760 | 
 | 
|---|
 | 761 |     if (me == 0) {
 | 
|---|
 | 762 |       ExEnv::out0() << indent << "End of third q.t." << endl;
 | 
|---|
 | 763 |       }
 | 
|---|
 | 764 | 
 | 
|---|
 | 765 |     delete[] ixjs_tmp;
 | 
|---|
 | 766 | 
 | 
|---|
 | 767 |     // The array of half-transformed integrals integral_iqjs has now
 | 
|---|
 | 768 |     // been overwritten by three-quarter transformed integrals ixjs;
 | 
|---|
 | 769 |     // rename the array integral_ixjs, where x = any MO
 | 
|---|
 | 770 |     integral_ixjs = integral_iqjs;
 | 
|---|
 | 771 | 
 | 
|---|
 | 772 | #if PRINT3Q
 | 
|---|
 | 773 |     if (me == 0) {
 | 
|---|
 | 774 |       int index = 0;
 | 
|---|
 | 775 |       int ij_index = 0;
 | 
|---|
 | 776 |       for (int i = 0; i<ni; i++) {
 | 
|---|
 | 777 |         for (int j = 0; j<nocc; j++) {
 | 
|---|
 | 778 |           if (index++ % nproc == me) {
 | 
|---|
 | 779 |             if (j >= nfzc) {
 | 
|---|
 | 780 |               double *integral_ij_offset = integral_ixjs + nbasis*nbasis*ij_index;
 | 
|---|
 | 781 |               for (int s = 0; s<nbasis; s++) {
 | 
|---|
 | 782 |                 double *integral_ijsx_ptr = integral_ij_offset + s*nbasis;
 | 
|---|
 | 783 |                 for (int x = 0; x<noso; x++) {
 | 
|---|
 | 784 |                   printf("3Q: (%d %d|%d %d) = %12.8f\n",
 | 
|---|
 | 785 |                          i,x,j,s,*integral_ijsx_ptr);
 | 
|---|
 | 786 |                   *integral_ijsx_ptr++;
 | 
|---|
 | 787 |                 }
 | 
|---|
 | 788 |               }
 | 
|---|
 | 789 |             }
 | 
|---|
 | 790 |             ij_index++;
 | 
|---|
 | 791 |           }
 | 
|---|
 | 792 |         }
 | 
|---|
 | 793 |       }
 | 
|---|
 | 794 |     }
 | 
|---|
 | 795 | #endif
 | 
|---|
 | 796 | 
 | 
|---|
 | 797 |     integral_iajy = new double[noso];
 | 
|---|
 | 798 |     // in iajy: i act; a,j act or frz; y act or frz occ or virt.
 | 
|---|
 | 799 |     integral_ikja = new double[nvir_act];
 | 
|---|
 | 800 |     // in ikja: i,j act; k act or frz; a act.
 | 
|---|
 | 801 | 
 | 
|---|
 | 802 |     if (me == 0) {
 | 
|---|
 | 803 |       ExEnv::out0() << indent << "Begin fourth q.t." << endl;
 | 
|---|
 | 804 |       }
 | 
|---|
 | 805 | 
 | 
|---|
 | 806 |     // Begin fourth quarter transformation
 | 
|---|
 | 807 |     // generating MO integrals (ov|ov), (ov|oo) and (oo|ov)
 | 
|---|
 | 808 |     tim_enter("4. q.t.");
 | 
|---|
 | 809 |     index = 0;
 | 
|---|
 | 810 |     ij_index = 0;
 | 
|---|
 | 811 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 812 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 813 |         if (index++ % nproc == me) {
 | 
|---|
 | 814 | 
 | 
|---|
 | 815 |           for (a=0; a<nvir; a++) {
 | 
|---|
 | 816 |             bzerofast(integral_iajy, noso);
 | 
|---|
 | 817 |             iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
 | 
|---|
 | 818 |             for (s=0; s<nbasis; s++) {
 | 
|---|
 | 819 |               c_sy = scf_vector[s];
 | 
|---|
 | 820 |               iajy_ptr = integral_iajy;
 | 
|---|
 | 821 |               tmpval = *iajs_ptr;
 | 
|---|
 | 822 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 823 |               biggest_ints_3.insert(tmpval,i+i_offset,j,s,a);
 | 
|---|
 | 824 |               if ((i+i_offset==105 && j == 2 && s == 170 && a == 3)
 | 
|---|
 | 825 |                   ||(i+i_offset==102 && j == 2 && s == 170 && a == 2)) {
 | 
|---|
 | 826 |                 ExEnv::outn() << scprintf("3/4: %3d %3d %3d %3d: %16.10f",
 | 
|---|
 | 827 |                                  i+i_offset, j, s, x-nocc)
 | 
|---|
 | 828 |                      << endl;
 | 
|---|
 | 829 |                 }
 | 
|---|
 | 830 | #endif
 | 
|---|
 | 831 |               for (y=0; y<noso; y++) {
 | 
|---|
 | 832 |                 *iajy_ptr++ += *c_sy++ * tmpval;
 | 
|---|
 | 833 |                 } // exit y loop
 | 
|---|
 | 834 |               iajs_ptr += nbasis;
 | 
|---|
 | 835 |               }   // exit s loop
 | 
|---|
 | 836 |             // Put integral_iajy into ixjs for one i,a,j while
 | 
|---|
 | 837 |             // overwriting elements of ixjs
 | 
|---|
 | 838 |             iajs_ptr = &integral_ixjs[a+nocc + nbasis*nbasis*ij_index];
 | 
|---|
 | 839 |             iajy_ptr = integral_iajy;
 | 
|---|
 | 840 |             for (y=0; y<noso; y++) {
 | 
|---|
 | 841 |               *iajs_ptr = *iajy_ptr++;
 | 
|---|
 | 842 |               iajs_ptr += nbasis;
 | 
|---|
 | 843 |               } // exit y loop
 | 
|---|
 | 844 |             }   // exit a loop
 | 
|---|
 | 845 | 
 | 
|---|
 | 846 |           // this is only needed for gradients
 | 
|---|
 | 847 |           if ((dograd || do_d1_) && j >= nfzc) {
 | 
|---|
 | 848 |             for (k=0; k<nocc; k++) {
 | 
|---|
 | 849 |               bzerofast(integral_ikja, nvir_act);
 | 
|---|
 | 850 |               ikjs_ptr = &integral_ixjs[k + nbasis*nbasis*ij_index];
 | 
|---|
 | 851 |               for (s=0; s<nbasis; s++) {
 | 
|---|
 | 852 |                 c_sa = &scf_vector[s][nocc];
 | 
|---|
 | 853 |                 ikja_ptr = integral_ikja;
 | 
|---|
 | 854 |                 tmpval = *ikjs_ptr;
 | 
|---|
 | 855 |                 for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 856 |                   *ikja_ptr++ += *c_sa++ * tmpval;
 | 
|---|
 | 857 |                   } // exit a loop 
 | 
|---|
 | 858 |                 ikjs_ptr += nbasis;
 | 
|---|
 | 859 |                 }   // exit s loop 
 | 
|---|
 | 860 |               // Put integral_ikja into ixjs for one i,k,j while
 | 
|---|
 | 861 |               // overwriting elements of ixjs
 | 
|---|
 | 862 |               ikjs_ptr = &integral_ixjs[k + nbasis*(nocc + nbasis*ij_index)];
 | 
|---|
 | 863 |               ikja_ptr = integral_ikja;
 | 
|---|
 | 864 |               for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 865 |                 *ikjs_ptr = *ikja_ptr++;
 | 
|---|
 | 866 |                 ikjs_ptr += nbasis;
 | 
|---|
 | 867 |                 } // exit a loop 
 | 
|---|
 | 868 |               }   // exit k loop 
 | 
|---|
 | 869 |             }     //endif
 | 
|---|
 | 870 | 
 | 
|---|
 | 871 |           ij_index++;
 | 
|---|
 | 872 |           }   // endif
 | 
|---|
 | 873 |         }     // exit j loop
 | 
|---|
 | 874 |       }       // exit i loop
 | 
|---|
 | 875 |     // end of fourth quarter transformation
 | 
|---|
 | 876 |     tim_exit("4. q.t.");
 | 
|---|
 | 877 | 
 | 
|---|
 | 878 |     if (me == 0) {
 | 
|---|
 | 879 |       ExEnv::out0() << indent << "End of fourth q.t." << endl;
 | 
|---|
 | 880 |       }
 | 
|---|
 | 881 | 
 | 
|---|
 | 882 |     // The array integral_ixjs has now been overwritten by MO integrals
 | 
|---|
 | 883 |     // iajy and ikja, so rename the array mo_int
 | 
|---|
 | 884 |     mo_int = integral_ixjs;
 | 
|---|
 | 885 | 
 | 
|---|
 | 886 |     delete[] integral_iajy;
 | 
|---|
 | 887 |     delete[] integral_ikja;
 | 
|---|
 | 888 | 
 | 
|---|
 | 889 |     // Divide the (ia|jb) MO integrals by the term 
 | 
|---|
 | 890 |     // evals[i]+evals[j]-evals[a]-evals[b]
 | 
|---|
 | 891 |     // and keep these integrals in mo_int;
 | 
|---|
 | 892 |     // do this only for active i, j, a, and b
 | 
|---|
 | 893 |     tim_enter("divide (ia|jb)'s");
 | 
|---|
 | 894 | 
 | 
|---|
 | 895 |     index = 0;
 | 
|---|
 | 896 |     ij_index = 0;
 | 
|---|
 | 897 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 898 |       ii = i+i_offset;
 | 
|---|
 | 899 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 900 |         if (index++ % nproc == me) {
 | 
|---|
 | 901 |           if (j>=nfzc) {
 | 
|---|
 | 902 |             for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 903 |               iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
 | 
|---|
 | 904 |               bb = b+nocc;
 | 
|---|
 | 905 |               for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 906 | #if PRINT4Q
 | 
|---|
 | 907 |               printf("4Q: (%d %d|%d %d) = %12.8f\n",
 | 
|---|
 | 908 |                      i,a+nocc,j,b+nocc,*iajb_ptr);
 | 
|---|
 | 909 | #endif
 | 
|---|
 | 910 |                 // Zero out nonsymmetric integral, else divide by denominators
 | 
|---|
 | 911 |                 if (usep4 && ( symorb_irrep_[ii] ^
 | 
|---|
 | 912 |                       symorb_irrep_[j] ^
 | 
|---|
 | 913 |                       symorb_irrep_[a+nocc] ^
 | 
|---|
 | 914 |                       symorb_irrep_[bb]) ) {
 | 
|---|
 | 915 |                   *iajb_ptr++ = 0.0;
 | 
|---|
 | 916 |                 }
 | 
|---|
 | 917 |                 else
 | 
|---|
 | 918 |                   *iajb_ptr++ /= evals[ii]+evals[j]-evals[a+nocc]-evals[bb];
 | 
|---|
 | 919 |                 } // exit a loop
 | 
|---|
 | 920 |               }   // exit b loop
 | 
|---|
 | 921 |             }     // endif
 | 
|---|
 | 922 |           ij_index++;
 | 
|---|
 | 923 |           }       // endif
 | 
|---|
 | 924 |         }         // exit j loop
 | 
|---|
 | 925 |       }           // exit i loop
 | 
|---|
 | 926 |     tim_exit("divide (ia|jb)'s");
 | 
|---|
 | 927 | 
 | 
|---|
 | 928 |     // We now have the fully transformed integrals (ia|jb)
 | 
|---|
 | 929 |     // divided by the proper orbital energy denominators
 | 
|---|
 | 930 |     // for one batch of i, all j<nocc, and all a<nvir and b<nvir,
 | 
|---|
 | 931 |     // where i, j, a, and b are all active;
 | 
|---|
 | 932 |     // compute contribution to the MP2 correlation energy
 | 
|---|
 | 933 |     // from these integrals 
 | 
|---|
 | 934 | 
 | 
|---|
 | 935 | #if WRITE_DOUBLES
 | 
|---|
 | 936 |     if (nproc > 1 || npass > 1) {
 | 
|---|
 | 937 |       ExEnv::outn() << "csgrad.cc: WRITE_DOUBLES set but case not allowed" << endl;
 | 
|---|
 | 938 |       abort();
 | 
|---|
 | 939 |       }
 | 
|---|
 | 940 |     ExEnv::outn() << "csgrad.cc: WRITING DOUBLES: CHECK ORDER" << endl;
 | 
|---|
 | 941 |     char *doutname = SCFormIO::fileext_to_filename(".mp2");
 | 
|---|
 | 942 |     FILE *dout = fopen(doutname,"w");
 | 
|---|
 | 943 |     delete[] doutname;
 | 
|---|
 | 944 |     fwrite(&nocc_act, sizeof(int), 1, dout);
 | 
|---|
 | 945 |     fwrite(&nvir_act, sizeof(int), 1, dout);
 | 
|---|
 | 946 |     for (j=nfzc; j<nocc; j++) {
 | 
|---|
 | 947 |       for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 948 |         for (i=0; i<ni; i++) {
 | 
|---|
 | 949 |           ij_index = nocc*i + j;
 | 
|---|
 | 950 |           iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
 | 
|---|
 | 951 |           fwrite(iajb_ptr, sizeof(double), nvir_act, dout);
 | 
|---|
 | 952 |           for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 953 |             if (fabs(iajb_ptr[a])>1.0e-8) {
 | 
|---|
 | 954 |               ExEnv::outn() << scprintf(" Djbia(%2d %2d %2d %2d) = %12.8f",
 | 
|---|
 | 955 |                                j+1-nfzc,b+1,i+1,a+1,iajb_ptr[a])
 | 
|---|
 | 956 |                    << endl;
 | 
|---|
 | 957 |               }
 | 
|---|
 | 958 |             }
 | 
|---|
 | 959 |           }
 | 
|---|
 | 960 |         }
 | 
|---|
 | 961 |       }
 | 
|---|
 | 962 |     fclose(dout);
 | 
|---|
 | 963 | #endif
 | 
|---|
 | 964 | 
 | 
|---|
 | 965 |     tim_enter("compute ecorr");
 | 
|---|
 | 966 | 
 | 
|---|
 | 967 |     index = 0;
 | 
|---|
 | 968 |     ij_index = 0;
 | 
|---|
 | 969 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 970 |       double ecorr_i = 0.0;
 | 
|---|
 | 971 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 972 |         double ecorr_ij = 0.0;
 | 
|---|
 | 973 |         if (index++ % nproc == me) {
 | 
|---|
 | 974 | 
 | 
|---|
 | 975 |           if (j>=nfzc) {
 | 
|---|
 | 976 |             for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 977 |               iajb_ptr = &mo_int[nocc + nbasis*(b+nocc + nbasis*ij_index)];
 | 
|---|
 | 978 |               ibja_ptr = &mo_int[b+nocc + nbasis*(nocc + nbasis*ij_index)];
 | 
|---|
 | 979 |               for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 980 |                 delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
 | 
|---|
 | 981 |                 // only include determinants with unique coefficients
 | 
|---|
 | 982 |                 if (a>=b && i_offset+i>=j) {
 | 
|---|
 | 983 |                   if (a>b && i_offset+i>j) {
 | 
|---|
 | 984 |                     // aaaa or bbbb
 | 
|---|
 | 985 |                     biggest_coefs.insert(*iajb_ptr - *ibja_ptr,
 | 
|---|
 | 986 |                                          i_offset+i,j,a,b,1111);
 | 
|---|
 | 987 |                     // aabb or bbaa or abba or baab
 | 
|---|
 | 988 |                     biggest_coefs.insert(*ibja_ptr,i_offset+i,j,b,a,1212);
 | 
|---|
 | 989 |                     } // endif
 | 
|---|
 | 990 |                   // aabb or bbaa or abba or baab
 | 
|---|
 | 991 |                   biggest_coefs.insert(*iajb_ptr,i_offset+i,j,a,b,1212);
 | 
|---|
 | 992 |                   } // endif
 | 
|---|
 | 993 | 
 | 
|---|
 | 994 |                 tmpval = *iajb_ptr*(2**iajb_ptr - *ibja_ptr)*delta_ijab;
 | 
|---|
 | 995 |                 ecorr_mp2 += tmpval;
 | 
|---|
 | 996 |                 if (debug_) ecorr_ij += tmpval;
 | 
|---|
 | 997 |                 iajb_ptr++;
 | 
|---|
 | 998 |                 ibja_ptr += nbasis;;
 | 
|---|
 | 999 |                 } // exit a loop
 | 
|---|
 | 1000 |               }   // exit b loop
 | 
|---|
 | 1001 |             }     // endif
 | 
|---|
 | 1002 |           ij_index++;
 | 
|---|
 | 1003 |           }       // endif
 | 
|---|
 | 1004 |         if (debug_) {
 | 
|---|
 | 1005 |           msg_->sum(ecorr_ij);
 | 
|---|
 | 1006 |           ecorr_i += ecorr_ij;
 | 
|---|
 | 1007 |           ExEnv::out0() << indent
 | 
|---|
 | 1008 |                << scprintf("correlation energy for pair %3d %3d = %16.12f",
 | 
|---|
 | 1009 |                            i+i_offset, j, ecorr_ij)
 | 
|---|
 | 1010 |                << endl;
 | 
|---|
 | 1011 |           }
 | 
|---|
 | 1012 |         }         // exit j loop
 | 
|---|
 | 1013 |       if (debug_) {
 | 
|---|
 | 1014 |         ExEnv::out0() << indent
 | 
|---|
 | 1015 |              << scprintf("correlation energy for orbital %3d = %16.12f",
 | 
|---|
 | 1016 |                          i+i_offset, ecorr_i)
 | 
|---|
 | 1017 |              << endl;
 | 
|---|
 | 1018 |         }
 | 
|---|
 | 1019 |       }           // exit i loop
 | 
|---|
 | 1020 |     tim_exit("compute ecorr");
 | 
|---|
 | 1021 | 
 | 
|---|
 | 1022 |     // debug print
 | 
|---|
 | 1023 |     if (debug_ && me == 0) {
 | 
|---|
 | 1024 |       ExEnv::out0() << indent << "End of ecorr" << endl;
 | 
|---|
 | 1025 |       }
 | 
|---|
 | 1026 |     // end of debug print
 | 
|---|
 | 1027 | 
 | 
|---|
 | 1028 |     if (npass > 1 && pass < npass - 1) {
 | 
|---|
 | 1029 |       double passe = ecorr_mp2;
 | 
|---|
 | 1030 |       msg_->sum(passe);
 | 
|---|
 | 1031 |       ExEnv::out0() << indent
 | 
|---|
 | 1032 |            << "Partial correlation energy for pass " << pass << ":" << endl;
 | 
|---|
 | 1033 |       ExEnv::out0() << indent
 | 
|---|
 | 1034 |            << scprintf("  restart_ecorr          = %18.14f", passe)
 | 
|---|
 | 1035 |            << endl;
 | 
|---|
 | 1036 |       ExEnv::out0() << indent
 | 
|---|
 | 1037 |            << scprintf("  restart_orbital_memgrp = %d", ((pass+1) * ni))
 | 
|---|
 | 1038 |            << endl;
 | 
|---|
 | 1039 |       }
 | 
|---|
 | 1040 | 
 | 
|---|
 | 1041 |     integral_iqjs = 0;
 | 
|---|
 | 1042 |     mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
 | 
|---|
 | 1043 | 
 | 
|---|
 | 1044 |     // don't go beyond this point if only the energy is needed
 | 
|---|
 | 1045 |     if (!dograd && !do_d1_) continue;
 | 
|---|
 | 1046 | 
 | 
|---|
 | 1047 |     mo_int = (double*) mem->localdata();
 | 
|---|
 | 1048 | 
 | 
|---|
 | 1049 |     if (!dograd) goto compute_L;
 | 
|---|
 | 1050 | 
 | 
|---|
 | 1051 |     // Update the matrices Pkj and Wkj with
 | 
|---|
 | 1052 |     // contributions from (occ vir|occ vir) integrals
 | 
|---|
 | 1053 |     index = 0;
 | 
|---|
 | 1054 |     ij_index = 0;
 | 
|---|
 | 1055 |     tim_enter("Pkj and Wkj");
 | 
|---|
 | 1056 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 1057 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 1058 |         if (index++ % nproc == me) {
 | 
|---|
 | 1059 | 
 | 
|---|
 | 1060 |           if (j>=nfzc) {
 | 
|---|
 | 1061 |             for (kloop=me; kloop<me+nocc; kloop++) {
 | 
|---|
 | 1062 |               // stagger k's to minimize contention
 | 
|---|
 | 1063 |               k = kloop%nocc;
 | 
|---|
 | 1064 |               if (k<=j) pkj_ptr = &Pkj[j*(j+1)/2 + k];
 | 
|---|
 | 1065 |               if (do_d2_) {
 | 
|---|
 | 1066 |                 if (k<=j && k>=nfzc) {
 | 
|---|
 | 1067 |                   d2occ_mat_ptr = &d2occ_mat[(j-nfzc)*(j-nfzc+1)/2 + k-nfzc];
 | 
|---|
 | 1068 |                   }
 | 
|---|
 | 1069 |                 }
 | 
|---|
 | 1070 |               wjk_ptr = &Wkj[j*nocc + k];
 | 
|---|
 | 1071 |               // Send for iakb, if necessary
 | 
|---|
 | 1072 |               ik_index = (i*nocc + k)/nproc;
 | 
|---|
 | 1073 |               ik_proc = (i*nocc + k)%nproc;
 | 
|---|
 | 1074 |               ik_offset = nocc + nocc*nbasis + nbasis*nbasis*ik_index;
 | 
|---|
 | 1075 |               mo_intbuf = (double*) membuf_remote.readonly_on_node(ik_offset,
 | 
|---|
 | 1076 |                                                                    nbasis*nvir-nocc,
 | 
|---|
 | 1077 |                                                                    ik_proc);
 | 
|---|
 | 1078 |               for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1079 |                 ibja_ptr = &mo_int[nocc + nbasis*(a+nocc + nbasis*ij_index)];
 | 
|---|
 | 1080 |                 iajb_ptr = &mo_int[a+nocc + nbasis*(nocc + nbasis*ij_index)];
 | 
|---|
 | 1081 |                 iakb_ptr = &mo_intbuf[a];
 | 
|---|
 | 1082 |                 for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 1083 |                   tmpval1 = *iakb_ptr * *iajb_ptr;
 | 
|---|
 | 1084 |                   tmpval = 2**iakb_ptr * *ibja_ptr++ - 4*tmpval1;
 | 
|---|
 | 1085 |                   // tmpval = 2**iakb_ptr * (*ibja_ptr++ - 2 * *iajb_ptr);
 | 
|---|
 | 1086 |                   iakb_ptr += nbasis;
 | 
|---|
 | 1087 |                   iajb_ptr += nbasis;
 | 
|---|
 | 1088 |                   if (k<nfzc && k<=j) *pkj_ptr += tmpval/(evals[k]-evals[j]);
 | 
|---|
 | 1089 |                   else if (k<=j) {
 | 
|---|
 | 1090 |                     *pkj_ptr += tmpval;
 | 
|---|
 | 1091 |                     if (do_d2_) *d2occ_mat_ptr += tmpval1;
 | 
|---|
 | 1092 |                     }
 | 
|---|
 | 1093 |                   if (k>=nfzc) {
 | 
|---|
 | 1094 |                     delta_ijab = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+b];
 | 
|---|
 | 1095 |                     *wjk_ptr += tmpval*delta_ijab;
 | 
|---|
 | 1096 |                     } 
 | 
|---|
 | 1097 |                   } // exit b loop
 | 
|---|
 | 1098 |                 }   // exit a loop
 | 
|---|
 | 1099 |               mo_intbuf = 0;
 | 
|---|
 | 1100 |               membuf_remote.release();
 | 
|---|
 | 1101 |               }     // end kloop loop
 | 
|---|
 | 1102 |             }       // endif
 | 
|---|
 | 1103 | 
 | 
|---|
 | 1104 |           ij_index++;
 | 
|---|
 | 1105 |           }         // endif
 | 
|---|
 | 1106 |         }           // exit j loop
 | 
|---|
 | 1107 |       }             // exit i loop
 | 
|---|
 | 1108 |     tim_exit("Pkj and Wkj");
 | 
|---|
 | 1109 | 
 | 
|---|
 | 1110 |     // debug print
 | 
|---|
 | 1111 |     if (debug_ && me == 0) {
 | 
|---|
 | 1112 |       ExEnv::out0() << indent << "End of Pkj and Wkj" << endl;
 | 
|---|
 | 1113 |       }
 | 
|---|
 | 1114 |     // end of debug print
 | 
|---|
 | 1115 | 
 | 
|---|
 | 1116 |     // Update the matrices Pab and Wab with
 | 
|---|
 | 1117 |     // contributions from (occ vir|occ vir) integrals
 | 
|---|
 | 1118 |     tim_enter("Pab and Wab");
 | 
|---|
 | 1119 |     index = 0;
 | 
|---|
 | 1120 |     ij_index = 0;
 | 
|---|
 | 1121 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 1122 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 1123 |         if (index++ % nproc == me) {
 | 
|---|
 | 1124 |           if (j>=nfzc) {
 | 
|---|
 | 1125 | 
 | 
|---|
 | 1126 |             offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
 | 
|---|
 | 1127 |             for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1128 |               pab_ptr = &Pab[a*(a+1)/2];
 | 
|---|
 | 1129 |               if (do_d2_) d2vir_mat_ptr = &d2vir_mat[a*(a+1)/2];
 | 
|---|
 | 1130 |               for (b=0; b<=a; b++) {  // active-active part of Pab and Wab
 | 
|---|
 | 1131 |                 wab_ptr = &Wab[a*nvir + b];
 | 
|---|
 | 1132 |                 wba_ptr = &Wab[b*nvir + a];
 | 
|---|
 | 1133 |                 ibjc_ptr = &mo_int[offset + b];
 | 
|---|
 | 1134 |                 icjb_ptr = &mo_int[offset + b*nbasis];
 | 
|---|
 | 1135 |                 iajc_ptr = &mo_int[offset + a];
 | 
|---|
 | 1136 |                 icja_ptr = &mo_int[offset + a*nbasis];
 | 
|---|
 | 1137 |                 for (c=0; c<nvir_act; c++) {
 | 
|---|
 | 1138 |                   tmpval = *iajc_ptr**ibjc_ptr;
 | 
|---|
 | 1139 |                   if (do_d2_) *d2vir_mat_ptr += tmpval;
 | 
|---|
 | 1140 |                   *pab_ptr += 4*tmpval - 2**iajc_ptr * *icjb_ptr;
 | 
|---|
 | 1141 |                   // *pab_ptr += 2**iajc_ptr * (2 * *ibjc_ptr - *icjb_ptr);
 | 
|---|
 | 1142 | 
 | 
|---|
 | 1143 |                   if (a == b) {
 | 
|---|
 | 1144 |                     delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
 | 
|---|
 | 1145 |                     *wab_ptr += 2**ibjc_ptr*(*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
 | 
|---|
 | 1146 |                     }
 | 
|---|
 | 1147 |                   else {
 | 
|---|
 | 1148 |                     delta_ijbc = evals[i_offset+i]+evals[j]-evals[nocc+b]-evals[nocc+c];
 | 
|---|
 | 1149 |                     delta_ijac = evals[i_offset+i]+evals[j]-evals[nocc+a]-evals[nocc+c];
 | 
|---|
 | 1150 |                     *wab_ptr += 2**ibjc_ptr * (*icja_ptr - 2 * *iajc_ptr)*delta_ijac;
 | 
|---|
 | 1151 |                     *wba_ptr += 2**iajc_ptr * (*icjb_ptr - 2 * *ibjc_ptr)*delta_ijbc;
 | 
|---|
 | 1152 |                     }
 | 
|---|
 | 1153 |                   iajc_ptr += nbasis;
 | 
|---|
 | 1154 |                   ibjc_ptr += nbasis;
 | 
|---|
 | 1155 |                   icja_ptr++;
 | 
|---|
 | 1156 |                   icjb_ptr++;
 | 
|---|
 | 1157 |                   } // exit c loop
 | 
|---|
 | 1158 |                 pab_ptr++;
 | 
|---|
 | 1159 |                 d2vir_mat_ptr++;
 | 
|---|
 | 1160 |                 }   // exit b loop
 | 
|---|
 | 1161 |               }     // exit a loop
 | 
|---|
 | 1162 | 
 | 
|---|
 | 1163 |             for (a=0; a<nfzv; a++) {        // active-frozen part of Pab
 | 
|---|
 | 1164 |               pab_ptr = &Pab[(a+nvir_act)*(a+nvir_act+1)/2];
 | 
|---|
 | 1165 |               for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 1166 |                 tmpval = evals[nocc+b] - evals[nocc+nvir_act+a];
 | 
|---|
 | 1167 |                 ibjc_ptr = &mo_int[offset+b];
 | 
|---|
 | 1168 |                 iajc_ptr = &mo_int[offset+a+nvir_act];
 | 
|---|
 | 1169 |                 icja_ptr = &mo_int[offset+(a+nvir_act)*nbasis];
 | 
|---|
 | 1170 |                 for (c=0; c<nvir_act; c++) {
 | 
|---|
 | 1171 |                   *pab_ptr += 2**ibjc_ptr*(2**iajc_ptr - *icja_ptr++)/tmpval;
 | 
|---|
 | 1172 |                   ibjc_ptr += nbasis;
 | 
|---|
 | 1173 |                   iajc_ptr += nbasis;
 | 
|---|
 | 1174 |                   }  // exit c loop
 | 
|---|
 | 1175 |                 pab_ptr++;
 | 
|---|
 | 1176 |                 }    // exit b loop
 | 
|---|
 | 1177 |               }      // exit a loop
 | 
|---|
 | 1178 | 
 | 
|---|
 | 1179 |             }        // endif
 | 
|---|
 | 1180 |           ij_index++;
 | 
|---|
 | 1181 |           }     // endif
 | 
|---|
 | 1182 |         }       // exit j loop
 | 
|---|
 | 1183 |       }         // exit i loop
 | 
|---|
 | 1184 |     tim_exit("Pab and Wab");
 | 
|---|
 | 1185 | 
 | 
|---|
 | 1186 |     // debug print
 | 
|---|
 | 1187 |     if (debug_ && me == 0) {
 | 
|---|
 | 1188 |       ExEnv::out0() << indent << "End of Pab and Wab" << endl;
 | 
|---|
 | 1189 |       }
 | 
|---|
 | 1190 |     // end of debug print
 | 
|---|
 | 1191 | 
 | 
|---|
 | 1192 |     compute_L:
 | 
|---|
 | 1193 | 
 | 
|---|
 | 1194 |     ///////////////////////////////////////
 | 
|---|
 | 1195 |     // Update Waj and Laj with contrib. 
 | 
|---|
 | 1196 |     // from (oo|ov) and (ov|oo) integrals;
 | 
|---|
 | 1197 |     // here a is active and j is active or
 | 
|---|
 | 1198 |     // frozen
 | 
|---|
 | 1199 |     ///////////////////////////////////////
 | 
|---|
 | 1200 |     tim_enter("Waj and Laj");
 | 
|---|
 | 1201 | 
 | 
|---|
 | 1202 |     // (oo|ov) contribution
 | 
|---|
 | 1203 |     index = 0;
 | 
|---|
 | 1204 |     ik_index = 0;
 | 
|---|
 | 1205 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 1206 |       for (k=0; k<nocc; k++) {
 | 
|---|
 | 1207 |         if (index++ % nproc == me) {
 | 
|---|
 | 1208 |           if (k>=nfzc) {
 | 
|---|
 | 1209 |             offset = nbasis*nocc + nbasis*nbasis*ik_index;
 | 
|---|
 | 1210 |             for (j=0; j<nocc; j++) {
 | 
|---|
 | 1211 |               for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 1212 |                 ibka_ptr = &mo_int[b+nocc + offset];
 | 
|---|
 | 1213 |                 ijkb_ptr = &mo_int[j + nbasis*b + offset];
 | 
|---|
 | 1214 |                 if (dograd) waj_ptr = &Waj[j*nvir]; // order as j*nvir+a to make loops more efficient
 | 
|---|
 | 1215 |                 laj_ptr = &Laj[j*nvir];
 | 
|---|
 | 1216 |                 for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1217 |                   tmpval = 2**ibka_ptr * *ijkb_ptr;
 | 
|---|
 | 1218 |                   ibka_ptr += nbasis;
 | 
|---|
 | 1219 |                   if (dograd) *waj_ptr++ += tmpval;
 | 
|---|
 | 1220 |                   *laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper
 | 
|---|
 | 1221 |                   } // exit a loop
 | 
|---|
 | 1222 |                 }   // exit b loop
 | 
|---|
 | 1223 |               }     // exit j loop
 | 
|---|
 | 1224 |             }       // endif
 | 
|---|
 | 1225 |           ik_index++;
 | 
|---|
 | 1226 |           }         // endif
 | 
|---|
 | 1227 |         }           // exit k loop
 | 
|---|
 | 1228 |       }             // exit i loop
 | 
|---|
 | 1229 | 
 | 
|---|
 | 1230 |     // (ov|oo) contribution
 | 
|---|
 | 1231 |     index = 0;
 | 
|---|
 | 1232 |     ik_index = 0;
 | 
|---|
 | 1233 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 1234 |       for (k=0; k<nocc; k++) {
 | 
|---|
 | 1235 |         if (index++ % nproc == me) {
 | 
|---|
 | 1236 |           if (k>=nfzc) {
 | 
|---|
 | 1237 |             offset = nocc + nbasis*nbasis*ik_index;
 | 
|---|
 | 1238 |             for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 1239 |               for (j=0; j<nocc; j++) {
 | 
|---|
 | 1240 |                 ibkj_ptr = &mo_int[offset + b + j*nbasis];
 | 
|---|
 | 1241 |                 ibka_ptr = &mo_int[offset + b + nocc*nbasis];
 | 
|---|
 | 1242 |                 if (dograd) waj_ptr = &Waj[j*nvir];
 | 
|---|
 | 1243 |                 laj_ptr = &Laj[j*nvir];
 | 
|---|
 | 1244 |                 for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1245 |                   tmpval = 4 * *ibka_ptr * *ibkj_ptr;
 | 
|---|
 | 1246 |                   ibka_ptr += nbasis;
 | 
|---|
 | 1247 |                   if (dograd) *waj_ptr++ -= tmpval;
 | 
|---|
 | 1248 |                   *laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper
 | 
|---|
 | 1249 |                   } // exit a loop
 | 
|---|
 | 1250 |                 }   // exit j loop
 | 
|---|
 | 1251 |               }     // exit b loop
 | 
|---|
 | 1252 |             }       // endif
 | 
|---|
 | 1253 |           ik_index++;
 | 
|---|
 | 1254 |           }       // endif
 | 
|---|
 | 1255 |         }         // exit k loop
 | 
|---|
 | 1256 |       }           // exit i loop
 | 
|---|
 | 1257 | 
 | 
|---|
 | 1258 |     tim_exit("Waj and Laj");
 | 
|---|
 | 1259 |     /////////////////////////////
 | 
|---|
 | 1260 |     // End of Waj and Laj update
 | 
|---|
 | 1261 |     /////////////////////////////
 | 
|---|
 | 1262 | 
 | 
|---|
 | 1263 |     // debug print
 | 
|---|
 | 1264 |     if (debug_ && me == 0) {
 | 
|---|
 | 1265 |       ExEnv::out0() << indent << "End of Paj and Waj" << endl;
 | 
|---|
 | 1266 |       }
 | 
|---|
 | 1267 |     // end of debug print
 | 
|---|
 | 1268 | 
 | 
|---|
 | 1269 |     mo_int = 0;
 | 
|---|
 | 1270 | 
 | 
|---|
 | 1271 |     mem->sync(); // Need to synchronize before deleting mo_intbuf
 | 
|---|
 | 1272 | 
 | 
|---|
 | 1273 |     mo_int = (double*) mem->localdata();
 | 
|---|
 | 1274 | 
 | 
|---|
 | 1275 |     gamma_iajs_tmp = new double[nbasis*nvir_act];
 | 
|---|
 | 1276 |     if (!gamma_iajs_tmp) {
 | 
|---|
 | 1277 |       ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl;
 | 
|---|
 | 1278 |       abort();
 | 
|---|
 | 1279 |       }
 | 
|---|
 | 1280 | 
 | 
|---|
 | 1281 |     // debug print
 | 
|---|
 | 1282 |     if (debug_ && me == 0) {
 | 
|---|
 | 1283 |       ExEnv::out0() << indent << "Begin first and second q.b.t." << endl;
 | 
|---|
 | 1284 |       }
 | 
|---|
 | 1285 |     // end of debug print
 | 
|---|
 | 1286 | 
 | 
|---|
 | 1287 |     ///////////////////////////////////////////////////////////
 | 
|---|
 | 1288 |     // Perform first and second quarter back-transformation.
 | 
|---|
 | 1289 |     // Each node produces gamma_iajs, and gamma_iqjs 
 | 
|---|
 | 1290 |     // for a subset of i and j, all a and all s;
 | 
|---|
 | 1291 |     // the back-transf. is done only for active i, j, a, and b
 | 
|---|
 | 1292 |     ///////////////////////////////////////////////////////////
 | 
|---|
 | 1293 | 
 | 
|---|
 | 1294 |     // Begin first quarter back-transformation
 | 
|---|
 | 1295 |     tim_enter("1. q.b.t.");
 | 
|---|
 | 1296 |     index = 0;
 | 
|---|
 | 1297 |     ij_index = 0;
 | 
|---|
 | 1298 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 1299 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 1300 |         if (index++ % nproc == me) {
 | 
|---|
 | 1301 |           if (j>=nfzc) {
 | 
|---|
 | 1302 |             bzerofast(gamma_iajs_tmp,nbasis*nvir_act);
 | 
|---|
 | 1303 |             offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index;
 | 
|---|
 | 1304 | 
 | 
|---|
 | 1305 |             for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1306 |               for (s=0; s<nbasis; s++) {
 | 
|---|
 | 1307 |                 c_sb = &scf_vector[s][nocc];
 | 
|---|
 | 1308 |                 gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a];
 | 
|---|
 | 1309 |                 ibja_ptr = &mo_int[a*nbasis + offset];
 | 
|---|
 | 1310 |                 iajb_ptr = &mo_int[a + offset];
 | 
|---|
 | 1311 | 
 | 
|---|
 | 1312 |                 tmpval = 0.0;
 | 
|---|
 | 1313 |                 for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 1314 |                   tmpval += 2**c_sb++ * (2**iajb_ptr - *ibja_ptr++);
 | 
|---|
 | 1315 |                   iajb_ptr += nbasis;
 | 
|---|
 | 1316 |                   } // exit b loop
 | 
|---|
 | 1317 |                 *gamma_iajs_ptr += tmpval;
 | 
|---|
 | 1318 |                 }   // exit s loop
 | 
|---|
 | 1319 |               }     // exit a loop
 | 
|---|
 | 1320 |             // Put gamma_iajs_tmp into mo_int for one i,j
 | 
|---|
 | 1321 |             // while overwriting mo_int
 | 
|---|
 | 1322 |             gamma_iajs_ptr = gamma_iajs_tmp;
 | 
|---|
 | 1323 |             for (y=0; y<nbasis; y++) {
 | 
|---|
 | 1324 |               iajy_ptr = &mo_int[nocc + nbasis*(y + nbasis*ij_index)];
 | 
|---|
 | 1325 |               for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1326 |                 *iajy_ptr++ = *gamma_iajs_ptr++;
 | 
|---|
 | 1327 |                 }
 | 
|---|
 | 1328 |               }
 | 
|---|
 | 1329 | 
 | 
|---|
 | 1330 |             }     // endif
 | 
|---|
 | 1331 |           ij_index++;
 | 
|---|
 | 1332 |           }       // endif
 | 
|---|
 | 1333 |         }         // exit j loop
 | 
|---|
 | 1334 |       }           // exit i loop
 | 
|---|
 | 1335 |     // end of first quarter back-transformation
 | 
|---|
 | 1336 |     tim_exit("1. q.b.t.");
 | 
|---|
 | 1337 | 
 | 
|---|
 | 1338 |     // debug print
 | 
|---|
 | 1339 |     if (debug_ && me == 0) {
 | 
|---|
 | 1340 |       ExEnv::out0() << indent << "End of first q.b.t." << endl;
 | 
|---|
 | 1341 |       }
 | 
|---|
 | 1342 |     // end of debug print
 | 
|---|
 | 1343 | 
 | 
|---|
 | 1344 |     mo_int = 0;
 | 
|---|
 | 1345 | 
 | 
|---|
 | 1346 |     mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming
 | 
|---|
 | 1347 | 
 | 
|---|
 | 1348 |     delete[] gamma_iajs_tmp;
 | 
|---|
 | 1349 | 
 | 
|---|
 | 1350 |     // The array mo_int has now been overwritten by the quarter 
 | 
|---|
 | 1351 |     // back-transformed non-sep 2PDM gamma_iajs, so rename
 | 
|---|
 | 1352 |     gamma_iajs = (double*) mem->localdata();
 | 
|---|
 | 1353 | 
 | 
|---|
 | 1354 |     gamma_iqjs_tmp = new double[nbasis];
 | 
|---|
 | 1355 |     if (!gamma_iqjs_tmp) {
 | 
|---|
 | 1356 |       ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl;
 | 
|---|
 | 1357 |       abort();
 | 
|---|
 | 1358 |       }
 | 
|---|
 | 1359 | 
 | 
|---|
 | 1360 |     if (debug_ && me == 0) {
 | 
|---|
 | 1361 |       ExEnv::out0() << indent << "Begin second q.b.t." << endl;
 | 
|---|
 | 1362 |       }
 | 
|---|
 | 1363 | 
 | 
|---|
 | 1364 |     // Begin second quarter back-transformation
 | 
|---|
 | 1365 |     // (gamma_iqjs elements ordered as i,j,s,q,
 | 
|---|
 | 1366 |     // i.e., q varies fastest)
 | 
|---|
 | 1367 |     tim_enter("2. q.b.t.");
 | 
|---|
 | 1368 |     index = 0;
 | 
|---|
 | 1369 |     ij_index = 0;
 | 
|---|
 | 1370 |     for (i=0; i<ni; i++) {
 | 
|---|
 | 1371 |       for (j=0; j<nocc; j++) {
 | 
|---|
 | 1372 |         if (index++ % nproc == me) {
 | 
|---|
 | 1373 |           if (j>=nfzc) {
 | 
|---|
 | 1374 |             offset = nbasis*nbasis*ij_index;
 | 
|---|
 | 1375 | 
 | 
|---|
 | 1376 |             for (s=0; s<nbasis; s++) {
 | 
|---|
 | 1377 |               bzerofast(gamma_iqjs_tmp,nbasis);
 | 
|---|
 | 1378 |               for (q=0; q<nbasis; q++) {
 | 
|---|
 | 1379 |                 gamma_iqjs_ptr = &gamma_iqjs_tmp[q];
 | 
|---|
 | 1380 |                 gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset];
 | 
|---|
 | 1381 |                 c_qa = &scf_vector[q][nocc];
 | 
|---|
 | 1382 | 
 | 
|---|
 | 1383 |                 tmpval = 0.0;
 | 
|---|
 | 1384 |                 for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1385 |                   tmpval += *c_qa++ * *gamma_iajs_ptr++;
 | 
|---|
 | 1386 |                   } // exit a loop
 | 
|---|
 | 1387 |                 *gamma_iqjs_ptr += tmpval;
 | 
|---|
 | 1388 |                 }   // exit q loop
 | 
|---|
 | 1389 |               // Put gamma_iqjs_tmp into gamma_iajs for one i,j,s
 | 
|---|
 | 1390 |               // while overwriting gamma_iajs
 | 
|---|
 | 1391 |               gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset];
 | 
|---|
 | 1392 |               gamma_iqjs_ptr = gamma_iqjs_tmp;
 | 
|---|
 | 1393 |               for (q=0; q<nbasis; q++) {
 | 
|---|
 | 1394 |                 *gamma_iajs_ptr++ = *gamma_iqjs_ptr++;
 | 
|---|
 | 1395 |                 }
 | 
|---|
 | 1396 |               }   // exit s loop
 | 
|---|
 | 1397 | 
 | 
|---|
 | 1398 |             }     // endif
 | 
|---|
 | 1399 |           ij_index++;
 | 
|---|
 | 1400 |           }       // endif
 | 
|---|
 | 1401 |         }         // exit j loop
 | 
|---|
 | 1402 |       }           // exit i loop
 | 
|---|
 | 1403 |     tim_exit("2. q.b.t.");
 | 
|---|
 | 1404 |     // end of second quarter back-transformation
 | 
|---|
 | 1405 | 
 | 
|---|
 | 1406 |     if (debug_ && me == 0) {
 | 
|---|
 | 1407 |       ExEnv::out0() << indent << "End of second q.b.t." << endl;
 | 
|---|
 | 1408 |       }
 | 
|---|
 | 1409 | 
 | 
|---|
 | 1410 |     gamma_iajs = 0;
 | 
|---|
 | 1411 |     
 | 
|---|
 | 1412 |     mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs
 | 
|---|
 | 1413 |                  // before it is needed below, and that gamma_iajs is not
 | 
|---|
 | 1414 |                  // deleted prematurely
 | 
|---|
 | 1415 | 
 | 
|---|
 | 1416 |     // The quarter back-transformed elements gamma_iajs have now been
 | 
|---|
 | 1417 |     // overwritten by the half back-transformed elements gamma_iqjs
 | 
|---|
 | 1418 | 
 | 
|---|
 | 1419 |     delete[] gamma_iqjs_tmp;
 | 
|---|
 | 1420 | 
 | 
|---|
 | 1421 |     /////////////////////////////////////////////////
 | 
|---|
 | 1422 |     // End of 1. and 2. quarter back-transformation
 | 
|---|
 | 1423 |     /////////////////////////////////////////////////
 | 
|---|
 | 1424 | 
 | 
|---|
 | 1425 |     Lpi = new double[nbasis*ni];
 | 
|---|
 | 1426 |     bzerofast(Lpi,nbasis*ni);
 | 
|---|
 | 1427 | 
 | 
|---|
 | 1428 |     if (me == 0) {
 | 
|---|
 | 1429 |       ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl;
 | 
|---|
 | 1430 |       }
 | 
|---|
 | 1431 | 
 | 
|---|
 | 1432 |     //////////////////////////////////////////////////////////
 | 
|---|
 | 1433 |     // Perform third and fourth quarter back-transformation
 | 
|---|
 | 1434 |     // and compute contribution to gradient from non-sep 2PDM
 | 
|---|
 | 1435 |     //////////////////////////////////////////////////////////
 | 
|---|
 | 1436 | 
 | 
|---|
 | 1437 |     tim_enter("3.qbt+4.qbt+non-sep contrib.");
 | 
|---|
 | 1438 |     sp_g_data.init();
 | 
|---|
 | 1439 |     for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 1440 |       qbt34thread[i]->set_i_offset(i_offset);
 | 
|---|
 | 1441 |       qbt34thread[i]->set_ni(ni);
 | 
|---|
 | 1442 |       thr_->add_thread(i,qbt34thread[i]);
 | 
|---|
 | 1443 | #     if SINGLE_THREAD_QBT34
 | 
|---|
 | 1444 |       qbt34thread[i]->run();
 | 
|---|
 | 1445 | #     endif
 | 
|---|
 | 1446 |       }
 | 
|---|
 | 1447 | #   if !SINGLE_THREAD_QBT34
 | 
|---|
 | 1448 |     thr_->start_threads();
 | 
|---|
 | 1449 |     thr_->wait_threads();
 | 
|---|
 | 1450 | #   endif
 | 
|---|
 | 1451 |     tim_exit("3.qbt+4.qbt+non-sep contrib.");
 | 
|---|
 | 1452 |     // Add thread contributions to Lpi and ginter
 | 
|---|
 | 1453 |     for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 1454 |       double *Lpi_thread = qbt34thread[i]->get_Lpi();
 | 
|---|
 | 1455 |       double **ginter_thread = qbt34thread[i]->get_ginter();
 | 
|---|
 | 1456 |       for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j];
 | 
|---|
 | 1457 |       for (j=0; j<natom; j++) {
 | 
|---|
 | 1458 |         for (k=0; k<3; k++) {
 | 
|---|
 | 1459 |           ginter[j][k] += ginter_thread[j][k];
 | 
|---|
 | 1460 |           }
 | 
|---|
 | 1461 |         }
 | 
|---|
 | 1462 |       aointder_computed += qbt34thread[i]->get_aointder_computed();
 | 
|---|
 | 1463 |       }
 | 
|---|
 | 1464 | 
 | 
|---|
 | 1465 |     if (me == 0) {
 | 
|---|
 | 1466 |       ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl;
 | 
|---|
 | 1467 |       }
 | 
|---|
 | 1468 | 
 | 
|---|
 | 1469 |     mem->sync(); // Make sure all nodes are done before deleting arrays
 | 
|---|
 | 1470 | 
 | 
|---|
 | 1471 |     if (debug_ > 1) {
 | 
|---|
 | 1472 |       RefSCDimension ni_dim(new SCDimension(ni,1));
 | 
|---|
 | 1473 |       ni_dim->blocks()->set_subdim(0, new SCDimension(ni));
 | 
|---|
 | 1474 |       RefSCDimension nbasis_dim(new SCDimension(nbasis,1));
 | 
|---|
 | 1475 |       nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis));
 | 
|---|
 | 1476 |       RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit);
 | 
|---|
 | 1477 |       Lpi_mat->assign(Lpi);
 | 
|---|
 | 1478 |       Lpi_mat.print("Lpi");
 | 
|---|
 | 1479 |       }
 | 
|---|
 | 1480 | 
 | 
|---|
 | 1481 |     if (debug_ && me == 0) {
 | 
|---|
 | 1482 |       ExEnv::out0() << indent << "Back-transform Lpi" << endl;
 | 
|---|
 | 1483 |       }
 | 
|---|
 | 1484 | 
 | 
|---|
 | 1485 |     // Back-transform Lpi to MO basis
 | 
|---|
 | 1486 |     lpi_ptr = Lpi;
 | 
|---|
 | 1487 |     for (p=0; p<nbasis; p++) {
 | 
|---|
 | 1488 |       for (i=0; i<ni; i++) {
 | 
|---|
 | 1489 |         c_pa = &scf_vector[p][nocc];
 | 
|---|
 | 1490 |         laj_ptr = &Laj[nvir*(i_offset + i)];
 | 
|---|
 | 1491 |         for (a=0; a<nvir; a++) {
 | 
|---|
 | 1492 |           *laj_ptr++ += *c_pa++ * *lpi_ptr;
 | 
|---|
 | 1493 |           } // exit a loop
 | 
|---|
 | 1494 |         lpi_ptr++;
 | 
|---|
 | 1495 |         }   // exit i loop
 | 
|---|
 | 1496 |       }     // exit p loop
 | 
|---|
 | 1497 | 
 | 
|---|
 | 1498 | //  malloc_chain_check(1);
 | 
|---|
 | 1499 | 
 | 
|---|
 | 1500 |     delete[] Lpi;
 | 
|---|
 | 1501 | 
 | 
|---|
 | 1502 |     if (me == 0) {
 | 
|---|
 | 1503 |       ExEnv::out0() << indent << "Done with pass " << pass+1 << endl;
 | 
|---|
 | 1504 |       }
 | 
|---|
 | 1505 |     }           // exit loop over i-batches (pass)
 | 
|---|
 | 1506 |   tim_exit("mp2 passes");
 | 
|---|
 | 1507 | 
 | 
|---|
 | 1508 |   if (dograd || do_d1_) {
 | 
|---|
 | 1509 |     for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 1510 |       delete qbt34thread[i];
 | 
|---|
 | 1511 |     }
 | 
|---|
 | 1512 |     delete[] qbt34thread;
 | 
|---|
 | 1513 |   }
 | 
|---|
 | 1514 | 
 | 
|---|
 | 1515 |   mem->set_localsize(0);
 | 
|---|
 | 1516 | 
 | 
|---|
 | 1517 |   // debug print
 | 
|---|
 | 1518 |   if (debug_ && me == 0) {
 | 
|---|
 | 1519 |     ExEnv::out0() << indent << "Exited loop over i-batches" << endl;
 | 
|---|
 | 1520 |     }
 | 
|---|
 | 1521 |   // end of debug print
 | 
|---|
 | 1522 | 
 | 
|---|
 | 1523 |   ///////////////////////////////////////////////////////////////
 | 
|---|
 | 1524 |   // The computation of the MP2 energy is now complete on each
 | 
|---|
 | 1525 |   // node; add the nodes' contributions and print out the energy
 | 
|---|
 | 1526 |   ///////////////////////////////////////////////////////////////
 | 
|---|
 | 1527 |   msg_->sum(ecorr_mp2);
 | 
|---|
 | 1528 |   msg_->sum(aoint_computed);
 | 
|---|
 | 1529 |   msg_->sum(aointder_computed);
 | 
|---|
 | 1530 | 
 | 
|---|
 | 1531 |   biggest_coefs.combine(msg_);
 | 
|---|
 | 1532 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 1533 |   biggest_ints_1.combine(msg_);
 | 
|---|
 | 1534 |   biggest_ints_2.combine(msg_);
 | 
|---|
 | 1535 |   biggest_ints_2s.combine(msg_);
 | 
|---|
 | 1536 |   biggest_ints_3a.combine(msg_);
 | 
|---|
 | 1537 |   biggest_ints_3.combine(msg_);
 | 
|---|
 | 1538 | #endif
 | 
|---|
 | 1539 | 
 | 
|---|
 | 1540 |   if (me == 0) {
 | 
|---|
 | 1541 |     emp2 = escf + ecorr_mp2;
 | 
|---|
 | 1542 | 
 | 
|---|
 | 1543 | #if PRINT_BIGGEST_INTS
 | 
|---|
 | 1544 |     ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
 | 
|---|
 | 1545 |     for (i=0; i<biggest_ints_1.ncontrib(); i++) {
 | 
|---|
 | 1546 |       ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
 | 
|---|
 | 1547 |                        biggest_ints_1.indices(i)[0],
 | 
|---|
 | 1548 |                        biggest_ints_1.indices(i)[1],
 | 
|---|
 | 1549 |                        biggest_ints_1.indices(i)[2],
 | 
|---|
 | 1550 |                        biggest_ints_1.indices(i)[3],
 | 
|---|
 | 1551 |                        biggest_ints_1.val(i)
 | 
|---|
 | 1552 |                        )
 | 
|---|
 | 1553 |            << endl;
 | 
|---|
 | 1554 |       }
 | 
|---|
 | 1555 |     ExEnv::outn() << "biggest 2/4 transformed ints" << endl;
 | 
|---|
 | 1556 |     for (i=0; i<biggest_ints_2.ncontrib(); i++) {
 | 
|---|
 | 1557 |       ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
 | 
|---|
 | 1558 |                        biggest_ints_2.indices(i)[0],
 | 
|---|
 | 1559 |                        biggest_ints_2.indices(i)[1],
 | 
|---|
 | 1560 |                        biggest_ints_2.indices(i)[2],
 | 
|---|
 | 1561 |                        biggest_ints_2.indices(i)[3],
 | 
|---|
 | 1562 |                        biggest_ints_2.val(i)
 | 
|---|
 | 1563 |                        )
 | 
|---|
 | 1564 |            << endl;
 | 
|---|
 | 1565 |       }
 | 
|---|
 | 1566 |     ExEnv::outn() << "restricted 2/4 transformed ints" << endl;
 | 
|---|
 | 1567 |     for (i=0; i<biggest_ints_2s.ncontrib(); i++) {
 | 
|---|
 | 1568 |       ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
 | 
|---|
 | 1569 |                        biggest_ints_2s.indices(i)[0],
 | 
|---|
 | 1570 |                        biggest_ints_2s.indices(i)[1],
 | 
|---|
 | 1571 |                        biggest_ints_2s.indices(i)[2],
 | 
|---|
 | 1572 |                        biggest_ints_2s.indices(i)[3],
 | 
|---|
 | 1573 |                        biggest_ints_2s.val(i)
 | 
|---|
 | 1574 |                        )
 | 
|---|
 | 1575 |            << endl;
 | 
|---|
 | 1576 |       }
 | 
|---|
 | 1577 |     ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl;
 | 
|---|
 | 1578 |     for (i=0; i<biggest_ints_3a.ncontrib(); i++) {
 | 
|---|
 | 1579 |       ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
 | 
|---|
 | 1580 |                        biggest_ints_3a.indices(i)[0],
 | 
|---|
 | 1581 |                        biggest_ints_3a.indices(i)[1],
 | 
|---|
 | 1582 |                        biggest_ints_3a.indices(i)[2],
 | 
|---|
 | 1583 |                        biggest_ints_3a.indices(i)[3],
 | 
|---|
 | 1584 |                        biggest_ints_3a.val(i)
 | 
|---|
 | 1585 |                        )
 | 
|---|
 | 1586 |            << endl;
 | 
|---|
 | 1587 |       }
 | 
|---|
 | 1588 |     ExEnv::outn() << "biggest 3/4 transformed ints (in 4.)" << endl;
 | 
|---|
 | 1589 |     for (i=0; i<biggest_ints_3.ncontrib(); i++) {
 | 
|---|
 | 1590 |       ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f",
 | 
|---|
 | 1591 |                        biggest_ints_3.indices(i)[0],
 | 
|---|
 | 1592 |                        biggest_ints_3.indices(i)[1],
 | 
|---|
 | 1593 |                        biggest_ints_3.indices(i)[2],
 | 
|---|
 | 1594 |                        biggest_ints_3.indices(i)[3],
 | 
|---|
 | 1595 |                        biggest_ints_3.val(i)
 | 
|---|
 | 1596 |                        )
 | 
|---|
 | 1597 |            << endl;
 | 
|---|
 | 1598 |       }
 | 
|---|
 | 1599 | #endif
 | 
|---|
 | 1600 | 
 | 
|---|
 | 1601 |     if (restart_orbital_memgrp_ == 0) {
 | 
|---|
 | 1602 |       if (biggest_coefs.ncontrib()) {
 | 
|---|
 | 1603 |         ExEnv::out0() << endl << indent
 | 
|---|
 | 1604 |              << "Largest first order coefficients (unique):"
 | 
|---|
 | 1605 |              << endl;
 | 
|---|
 | 1606 |         }
 | 
|---|
 | 1607 |       for (i=0; i<biggest_coefs.ncontrib(); i++) {
 | 
|---|
 | 1608 |         int i0 = biggest_coefs.indices(i)[0];
 | 
|---|
 | 1609 |         int i1 = biggest_coefs.indices(i)[1];
 | 
|---|
 | 1610 |         int i2 = biggest_coefs.indices(i)[2] + nocc;
 | 
|---|
 | 1611 |         int i3 = biggest_coefs.indices(i)[3] + nocc;
 | 
|---|
 | 1612 |         int spincase = biggest_coefs.indices(i)[4];
 | 
|---|
 | 1613 |         ExEnv::out0() << indent
 | 
|---|
 | 1614 |              << scprintf("  %2d %12.8f %2d %3s %2d %3s -> %2d %3s %2d %3s (%s)",
 | 
|---|
 | 1615 |                          i+1, biggest_coefs.val(i),
 | 
|---|
 | 1616 |                          symorb_num_[i0]+1,
 | 
|---|
 | 1617 |                          ct.gamma(symorb_irrep_[i0]).symbol(),
 | 
|---|
 | 1618 |                          symorb_num_[i1]+1,
 | 
|---|
 | 1619 |                          ct.gamma(symorb_irrep_[i1]).symbol(),
 | 
|---|
 | 1620 |                          symorb_num_[i2]+1,
 | 
|---|
 | 1621 |                          ct.gamma(symorb_irrep_[i2]).symbol(),
 | 
|---|
 | 1622 |                          symorb_num_[i3]+1,
 | 
|---|
 | 1623 |                          ct.gamma(symorb_irrep_[i3]).symbol(),
 | 
|---|
 | 1624 |                          (spincase==1111?"++++":"+-+-")
 | 
|---|
 | 1625 |                )
 | 
|---|
 | 1626 |              << endl;
 | 
|---|
 | 1627 |         }
 | 
|---|
 | 1628 |       }
 | 
|---|
 | 1629 | 
 | 
|---|
 | 1630 |     // Print out various energies etc.
 | 
|---|
 | 1631 | 
 | 
|---|
 | 1632 |     if (debug_) {
 | 
|---|
 | 1633 |       ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
 | 
|---|
 | 1634 |            << indent << "(or integral derivatives) would have been computed\n"
 | 
|---|
 | 1635 |            << indent << "without bounds checking: "
 | 
|---|
 | 1636 |            << npass*nshell*nshell*(nshell+1)*(nshell+1)/2
 | 
|---|
 | 1637 |            << endl;
 | 
|---|
 | 1638 | 
 | 
|---|
 | 1639 |       ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
 | 
|---|
 | 1640 |            << indent << "were computed: " << aoint_computed
 | 
|---|
 | 1641 |            << endl;
 | 
|---|
 | 1642 | 
 | 
|---|
 | 1643 |       if (dograd) {
 | 
|---|
 | 1644 |         ExEnv::out0() << indent
 | 
|---|
 | 1645 |              << "Number of shell quartets for which AO integral derivatives\n"
 | 
|---|
 | 1646 |              << indent << "were computed: " << aointder_computed
 | 
|---|
 | 1647 |              << endl;
 | 
|---|
 | 1648 |         }
 | 
|---|
 | 1649 |       }
 | 
|---|
 | 1650 | 
 | 
|---|
 | 1651 |     ExEnv::out0()<<endl<<indent
 | 
|---|
 | 1652 |         <<scprintf("RHF energy [au]:                   %17.12lf\n", escf);
 | 
|---|
 | 1653 |     ExEnv::out0()<<indent
 | 
|---|
 | 1654 |         <<scprintf("MP2 correlation energy [au]:       %17.12lf\n", ecorr_mp2);
 | 
|---|
 | 1655 |     ExEnv::out0()<<indent
 | 
|---|
 | 1656 |         <<scprintf("MP2 energy [au]:                   %17.12lf\n", emp2);
 | 
|---|
 | 1657 |     ExEnv::out0().flush();
 | 
|---|
 | 1658 |     }
 | 
|---|
 | 1659 |   if (method_ && strcmp(method_,"mp")) {
 | 
|---|
 | 1660 |     ExEnv::out0() << indent
 | 
|---|
 | 1661 |          << "MBPT2: bad method for closed shell case: " << method_
 | 
|---|
 | 1662 |          << ", using mp" << endl;
 | 
|---|
 | 1663 |     }
 | 
|---|
 | 1664 |   set_energy(emp2);
 | 
|---|
 | 1665 |   set_actual_value_accuracy(reference_->actual_value_accuracy()
 | 
|---|
 | 1666 |                             *ref_to_mp2_acc);
 | 
|---|
 | 1667 | 
 | 
|---|
 | 1668 |   RefSCDimension nocc_act_dim(new SCDimension(nocc_act,1));
 | 
|---|
 | 1669 |   nocc_act_dim->blocks()->set_subdim(0, new SCDimension(nocc_act));
 | 
|---|
 | 1670 |   RefSCDimension nvir_act_dim(new SCDimension(nvir_act,1));
 | 
|---|
 | 1671 |   nvir_act_dim->blocks()->set_subdim(0, new SCDimension(nvir_act));
 | 
|---|
 | 1672 |   RefSCDimension nocc_dim(new SCDimension(nocc,1));
 | 
|---|
 | 1673 |   nocc_dim->blocks()->set_subdim(0, new SCDimension(nocc));
 | 
|---|
 | 1674 |   RefSCDimension nvir_dim(new SCDimension(nvir,1));
 | 
|---|
 | 1675 |   nvir_dim->blocks()->set_subdim(0, new SCDimension(nvir));
 | 
|---|
 | 1676 |   RefSCDimension nbasis_dim = ao_dimension()->blocks()->subdim(0);
 | 
|---|
 | 1677 |   RefSCDimension noso_dim(new SCDimension(noso,1));
 | 
|---|
 | 1678 | 
 | 
|---|
 | 1679 |   if (dograd || do_d1_) {
 | 
|---|
 | 1680 |     msg_->sum(Laj,nvir*nocc);
 | 
|---|
 | 1681 | 
 | 
|---|
 | 1682 |     RefSCMatrix T1_mat(nocc_act_dim, nvir_act_dim, kit);
 | 
|---|
 | 1683 |     // the elements of T1_mat are the single-substitution amplitudes
 | 
|---|
 | 1684 |     BiggestContribs biggest_t1(2,10);
 | 
|---|
 | 1685 |     // compute the S2 norm of Lee et al. (s2_diag)
 | 
|---|
 | 1686 |     double s2_diag = 0.0;
 | 
|---|
 | 1687 |     for (j=nfzc; j<nocc; j++) {
 | 
|---|
 | 1688 |       laj_ptr = &Laj[j*nvir];
 | 
|---|
 | 1689 |       for (a=0; a<nvir_act; a++) {
 | 
|---|
 | 1690 |         tmpval = 0.5**laj_ptr++/(evals[a+nocc]-evals[j]);
 | 
|---|
 | 1691 |         T1_mat.set_element(j-nfzc,a,tmpval);
 | 
|---|
 | 1692 |         biggest_t1.insert(tmpval,j,a);
 | 
|---|
 | 1693 |         s2_diag += tmpval*tmpval;
 | 
|---|
 | 1694 |         }
 | 
|---|
 | 1695 |       }
 | 
|---|
 | 1696 |     s2_diag = sqrt(s2_diag/(2*nocc_act));
 | 
|---|
 | 1697 |     // compute the T1 matrix 1-norm
 | 
|---|
 | 1698 |     double t1onenorm = 0.0;
 | 
|---|
 | 1699 |     Ref<SCElementSumAbs> sumabs = new SCElementSumAbs;
 | 
|---|
 | 1700 |     Ref<SCElementOp> genop = sumabs.pointer();
 | 
|---|
 | 1701 |     for (a=0; a < nvir_act; a++) {
 | 
|---|
 | 1702 |       sumabs->init();
 | 
|---|
 | 1703 |       T1_mat.get_column(a).element_op(genop);
 | 
|---|
 | 1704 |       if (t1onenorm < sumabs->result()) t1onenorm = sumabs->result();
 | 
|---|
 | 1705 |       }
 | 
|---|
 | 1706 |     // compute the T1 matrix inf-norm
 | 
|---|
 | 1707 |     double t1infnorm = 0.0;
 | 
|---|
 | 1708 |     for (j=0; j < nocc_act; j++) {
 | 
|---|
 | 1709 |       sumabs->init();
 | 
|---|
 | 1710 |       T1_mat.get_row(j).element_op(genop);
 | 
|---|
 | 1711 |       if (t1infnorm < sumabs->result()) t1infnorm = sumabs->result();
 | 
|---|
 | 1712 |       }
 | 
|---|
 | 1713 |     // compute the T1 matrix 2-norm ( = D1(MP2) )
 | 
|---|
 | 1714 |     RefSymmSCMatrix D1_mat(nocc_act_dim,kit);
 | 
|---|
 | 1715 |     D1_mat.assign(0.0);
 | 
|---|
 | 1716 |     D1_mat.accumulate_symmetric_product(T1_mat);
 | 
|---|
 | 1717 |     T1_mat = 0;
 | 
|---|
 | 1718 |     double d1_diag = sqrt(D1_mat.eigvals().get_element(nocc_act-1));
 | 
|---|
 | 1719 |     D1_mat = 0;
 | 
|---|
 | 1720 |     // print the norms
 | 
|---|
 | 1721 |     ExEnv::out0()<<endl;
 | 
|---|
 | 1722 |     ExEnv::out0()
 | 
|---|
 | 1723 |          <<indent<<scprintf("D1(MP2)                = %12.8f", d1_diag)
 | 
|---|
 | 1724 |          <<endl
 | 
|---|
 | 1725 |          <<indent<<scprintf("S2 matrix 1-norm       = %12.8f", t1onenorm)
 | 
|---|
 | 1726 |          <<endl
 | 
|---|
 | 1727 |          <<indent<<scprintf("S2 matrix inf-norm     = %12.8f", t1infnorm)
 | 
|---|
 | 1728 |          <<endl
 | 
|---|
 | 1729 |          <<indent<<scprintf("S2 diagnostic          = %12.8f", s2_diag)
 | 
|---|
 | 1730 |          <<endl;
 | 
|---|
 | 1731 |     if (biggest_t1.ncontrib()) {
 | 
|---|
 | 1732 |       ExEnv::out0() << endl
 | 
|---|
 | 1733 |            << indent << "Largest S2 values (unique determinants):" << endl;
 | 
|---|
 | 1734 |       }
 | 
|---|
 | 1735 |     for (i=0; i<biggest_t1.ncontrib(); i++) {
 | 
|---|
 | 1736 |       int i0 = biggest_t1.indices(i)[0];
 | 
|---|
 | 1737 |       int i1 = biggest_t1.indices(i)[1] + nocc;
 | 
|---|
 | 1738 |       ExEnv::out0()
 | 
|---|
 | 1739 |            << indent << scprintf("  %2d %12.8f  %3d %3s -> %4d %3s",
 | 
|---|
 | 1740 |                                  i+1, biggest_t1.val(i),
 | 
|---|
 | 1741 |                                  symorb_num_[i0]+1,
 | 
|---|
 | 1742 |                                  ct.gamma(symorb_irrep_[i0]).symbol(),
 | 
|---|
 | 1743 |                                  symorb_num_[i1]+1,
 | 
|---|
 | 1744 |                                  ct.gamma(symorb_irrep_[i1]).symbol())
 | 
|---|
 | 1745 |            << endl;
 | 
|---|
 | 1746 |       }
 | 
|---|
 | 1747 | 
 | 
|---|
 | 1748 |     } // if (dograd || do_d1_)
 | 
|---|
 | 1749 | 
 | 
|---|
 | 1750 |   for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 1751 |       delete e12thread[i];
 | 
|---|
 | 1752 |     }
 | 
|---|
 | 1753 |   delete[] e12thread;
 | 
|---|
 | 1754 | 
 | 
|---|
 | 1755 |   // quit here if only the energy is needed
 | 
|---|
 | 1756 |   if (!dograd) {
 | 
|---|
 | 1757 |     delete[] tbints_; tbints_ = 0;
 | 
|---|
 | 1758 |     if (do_d1_) delete[] Laj;
 | 
|---|
 | 1759 |     delete[] scf_vector;
 | 
|---|
 | 1760 |     delete[] scf_vector_dat;
 | 
|---|
 | 1761 |     delete[] evals;
 | 
|---|
 | 1762 |     tim_exit("mp2-mem");
 | 
|---|
 | 1763 |     return;
 | 
|---|
 | 1764 |     }
 | 
|---|
 | 1765 | 
 | 
|---|
 | 1766 |   // Accumulate intermediate gradients on node 0
 | 
|---|
 | 1767 |   sum_gradients(msg_, ginter, natom, 3);
 | 
|---|
 | 1768 | 
 | 
|---|
 | 1769 |   // Add intermediate gradients to the gradient on node 0
 | 
|---|
 | 1770 |   if (me==0)
 | 
|---|
 | 1771 |     accum_gradients(gradient, ginter, natom, 3);
 | 
|---|
 | 1772 | 
 | 
|---|
 | 1773 |   // Print out contribution to the gradient from non-sep. 2PDM
 | 
|---|
 | 1774 |   if (debug_) {
 | 
|---|
 | 1775 |     print_natom_3(ginter,
 | 
|---|
 | 1776 |               "Contribution to MP2 gradient from non-separable 2PDM [au]:");
 | 
|---|
 | 1777 |     }
 | 
|---|
 | 1778 | 
 | 
|---|
 | 1779 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 1780 |   // Add contributions from all nodes to various matrices
 | 
|---|
 | 1781 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 1782 |   tmpint = (nvir > nocc ? nvir:nocc);
 | 
|---|
 | 1783 |   double *tmpmat = new double[tmpint*tmpint];
 | 
|---|
 | 1784 |   msg_->sum(Pkj,nocc*(nocc+1)/2,tmpmat); // Pkj is now complete
 | 
|---|
 | 1785 |   msg_->sum(Pab,nvir*(nvir+1)/2,tmpmat); // Pab is now complete
 | 
|---|
 | 1786 |   msg_->sum(Wab,nvir*nvir,tmpmat);
 | 
|---|
 | 1787 |   msg_->sum(Wkj,nocc*nocc,tmpmat);
 | 
|---|
 | 1788 |   msg_->sum(Waj,nvir*nocc,tmpmat);
 | 
|---|
 | 1789 |   if (do_d2_) {
 | 
|---|
 | 1790 |     msg_->sum(d2occ_mat,nocc_act*(nocc_act+1)/2,tmpmat); 
 | 
|---|
 | 1791 |     msg_->sum(d2vir_mat,nvir_act*(nvir_act+1)/2,tmpmat); 
 | 
|---|
 | 1792 |     }
 | 
|---|
 | 1793 |   delete[] tmpmat;
 | 
|---|
 | 1794 | 
 | 
|---|
 | 1795 |   // Compute D2 diagnostic (d2_diag) from matrices d2_occ_mat and d2_vir_mat
 | 
|---|
 | 1796 |   if (do_d2_) {
 | 
|---|
 | 1797 |     RefSymmSCMatrix D2occ_mat(nocc_act_dim, kit);
 | 
|---|
 | 1798 |     RefSymmSCMatrix D2vir_mat(nvir_act_dim,kit);
 | 
|---|
 | 1799 |     D2occ_mat->assign(d2occ_mat);
 | 
|---|
 | 1800 |     D2vir_mat->assign(d2vir_mat);
 | 
|---|
 | 1801 |     d2o = sqrt(D2occ_mat.eigvals().get_element(nocc_act-1));
 | 
|---|
 | 1802 |     d2v = sqrt(D2vir_mat.eigvals().get_element(nvir_act-1));
 | 
|---|
 | 1803 |     d2_diag = (d2o > d2v ? d2o:d2v);
 | 
|---|
 | 1804 |     ExEnv::out0() << endl
 | 
|---|
 | 1805 |          << indent <<  scprintf("D2(MP1) = %12.8f", d2_diag) << endl << endl;
 | 
|---|
 | 1806 |     delete[] d2occ_mat;
 | 
|---|
 | 1807 |     delete[] d2vir_mat;
 | 
|---|
 | 1808 |   }
 | 
|---|
 | 1809 | 
 | 
|---|
 | 1810 |   // Finish computation of Wab
 | 
|---|
 | 1811 |   tim_enter("Pab and Wab");
 | 
|---|
 | 1812 |   pab_ptr = Pab;
 | 
|---|
 | 1813 |   for (a=0; a<nvir_act; a++) {  // active-active part of Wab
 | 
|---|
 | 1814 |     wba_ptr = &Wab[a];
 | 
|---|
 | 1815 |     wab_ptr = &Wab[a*nvir];
 | 
|---|
 | 1816 |     for (b=0; b<=a; b++) {
 | 
|---|
 | 1817 |       if (a==b) {
 | 
|---|
 | 1818 |         *wab_ptr -= evals[nocc+a]**pab_ptr;
 | 
|---|
 | 1819 |         }
 | 
|---|
 | 1820 |       else {
 | 
|---|
 | 1821 |         *wab_ptr -= evals[nocc+a]**pab_ptr;
 | 
|---|
 | 1822 |         *wba_ptr -= evals[nocc+b]**pab_ptr;
 | 
|---|
 | 1823 |         } 
 | 
|---|
 | 1824 |       pab_ptr++;
 | 
|---|
 | 1825 |       wab_ptr++;
 | 
|---|
 | 1826 |       wba_ptr += nvir;
 | 
|---|
 | 1827 |       } // exit b loop
 | 
|---|
 | 1828 |     }   // exit a loop
 | 
|---|
 | 1829 |   for (a=0; a<nfzv; a++) {  // active-frozen part of Wab
 | 
|---|
 | 1830 |     wba_ptr = &Wab[nvir_act+a];
 | 
|---|
 | 1831 |     wab_ptr = &Wab[(nvir_act+a)*nvir];
 | 
|---|
 | 1832 |     pab_ptr = &Pab[(nvir_act+a)*(nvir_act+a+1)/2];
 | 
|---|
 | 1833 |     for (b=0; b<nvir_act; b++) {
 | 
|---|
 | 1834 |       *wab_ptr -= evals[nocc+b]**pab_ptr;
 | 
|---|
 | 1835 |       *wba_ptr -= evals[nocc+b]**pab_ptr;
 | 
|---|
 | 1836 |       pab_ptr++;
 | 
|---|
 | 1837 |       wab_ptr++;
 | 
|---|
 | 1838 |       wba_ptr += nvir;
 | 
|---|
 | 1839 |       } // exit b loop
 | 
|---|
 | 1840 |     }   // exit a loop
 | 
|---|
 | 1841 |   // Wab is now complete
 | 
|---|
 | 1842 |   tim_exit("Pab and Wab");
 | 
|---|
 | 1843 |   RefSCMatrix Wab_matrix(nvir_dim, nvir_dim, kit);
 | 
|---|
 | 1844 |   Wab_matrix->assign(Wab); // Put elements of Wab into Wab_matrix
 | 
|---|
 | 1845 |   free(Wab);
 | 
|---|
 | 1846 | 
 | 
|---|
 | 1847 |   // Update Wkj with contribution from Pkj
 | 
|---|
 | 1848 |   tim_enter("Pkj and Wkj");
 | 
|---|
 | 1849 |   pkj_ptr = Pkj;
 | 
|---|
 | 1850 |   for (k=0; k<nocc; k++) {
 | 
|---|
 | 1851 |     wjk_ptr = &Wkj[k];
 | 
|---|
 | 1852 |     wkj_ptr = &Wkj[k*nocc];
 | 
|---|
 | 1853 |     for (j=0; j<=k; j++) {
 | 
|---|
 | 1854 |       if (k<nfzc && j<nfzc) {   // don't want both j and k frozen
 | 
|---|
 | 1855 |         wkj_ptr++;
 | 
|---|
 | 1856 |         wjk_ptr += nocc;
 | 
|---|
 | 1857 |         pkj_ptr++;
 | 
|---|
 | 1858 |         continue;
 | 
|---|
 | 1859 |         }
 | 
|---|
 | 1860 |       if (j==k) {
 | 
|---|
 | 1861 |         *wkj_ptr++ -= evals[k]**pkj_ptr++;
 | 
|---|
 | 1862 |         }
 | 
|---|
 | 1863 |       else if (j<nfzc) {
 | 
|---|
 | 1864 |         *wkj_ptr++ -= evals[k]**pkj_ptr;
 | 
|---|
 | 1865 |         *wjk_ptr   -= evals[k]**pkj_ptr;
 | 
|---|
 | 1866 |         pkj_ptr++;
 | 
|---|
 | 1867 |         }
 | 
|---|
 | 1868 |       else {
 | 
|---|
 | 1869 |         *wkj_ptr++ -= evals[k]**pkj_ptr;
 | 
|---|
 | 1870 |         *wjk_ptr   -= evals[j]**pkj_ptr;
 | 
|---|
 | 1871 |         pkj_ptr++;
 | 
|---|
 | 1872 |         }
 | 
|---|
 | 1873 |       wjk_ptr += nocc;
 | 
|---|
 | 1874 |       }  // exit j loop
 | 
|---|
 | 1875 |     }    // exit k loop
 | 
|---|
 | 1876 |   tim_exit("Pkj and Wkj");
 | 
|---|
 | 1877 | 
 | 
|---|
 | 1878 |   /////////////////////////////////
 | 
|---|
 | 1879 |   // Finish the computation of Laj
 | 
|---|
 | 1880 |   /////////////////////////////////
 | 
|---|
 | 1881 | 
 | 
|---|
 | 1882 |   tim_enter("Laj");
 | 
|---|
 | 1883 | 
 | 
|---|
 | 1884 |   RefSCMatrix Cv(nbasis_dim, nvir_dim, kit); // virtual block of scf_vector
 | 
|---|
 | 1885 |   RefSCMatrix Co(nbasis_dim, nocc_dim, kit); // occupied block of scf_vector
 | 
|---|
 | 1886 |   for (p=0; p<nbasis; p++) {
 | 
|---|
 | 1887 |     c_pq = scf_vector[p];
 | 
|---|
 | 1888 |     for (q=0; q<noso; q++) {
 | 
|---|
 | 1889 |       if (q<nocc) Co->set_element(p, q, *c_pq++);
 | 
|---|
 | 1890 |       else Cv->set_element(p, q-nocc, *c_pq++);
 | 
|---|
 | 1891 |       }
 | 
|---|
 | 1892 |     }
 | 
|---|
 | 1893 | 
 | 
|---|
 | 1894 |   // Compute the density-like Dmat_matrix
 | 
|---|
 | 1895 |   // (Cv*Pab_matrix*Cv.t() + Co*Pkj_matrix*Co.t())
 | 
|---|
 | 1896 |   RefSymmSCMatrix Pab_matrix(nvir_dim,kit);
 | 
|---|
 | 1897 |   RefSymmSCMatrix Pkj_matrix(nocc_dim,kit);
 | 
|---|
 | 1898 |   RefSymmSCMatrix Dmat_matrix(nbasis_dim,kit);
 | 
|---|
 | 1899 |   Pab_matrix->assign(Pab); // fill in elements of Pab_matrix from Pab
 | 
|---|
 | 1900 |   free(Pab);
 | 
|---|
 | 1901 |   Pkj_matrix->assign(Pkj); // fill in elements of Pkj_matrix from Pkj
 | 
|---|
 | 1902 |   free(Pkj);
 | 
|---|
 | 1903 |   Dmat_matrix.assign(0.0);
 | 
|---|
 | 1904 |   Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
 | 
|---|
 | 1905 |   Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
 | 
|---|
 | 1906 |   // We now have the density-like matrix Dmat_matrix
 | 
|---|
 | 1907 | 
 | 
|---|
 | 1908 |   // Compute the G matrix
 | 
|---|
 | 1909 | 
 | 
|---|
 | 1910 |   RefSymmSCMatrix Gmat(nbasis_dim,kit);
 | 
|---|
 | 1911 |   init_cs_gmat();
 | 
|---|
 | 1912 |   tim_enter("make_gmat for Laj");
 | 
|---|
 | 1913 |   make_cs_gmat_new(Gmat, Dmat_matrix); 
 | 
|---|
 | 1914 |   if (debug_ > 1) {
 | 
|---|
 | 1915 |     Dmat_matrix.print("Dmat");
 | 
|---|
 | 1916 |     Gmat.print("Gmat");
 | 
|---|
 | 1917 |     }
 | 
|---|
 | 1918 |   tim_exit("make_gmat for Laj");
 | 
|---|
 | 1919 | 
 | 
|---|
 | 1920 |   // Finish computation of Laj
 | 
|---|
 | 1921 |   RefSCMatrix Laj_matrix(nocc_dim,nvir_dim,kit); // elements are ordered as j*nvir+a
 | 
|---|
 | 1922 |   Laj_matrix->assign(Laj);
 | 
|---|
 | 1923 |   if (debug_ > 1) Laj_matrix->print("Laj (first bit)");
 | 
|---|
 | 1924 |   Laj_matrix = Laj_matrix - 2*Co.t()*Gmat*Cv;
 | 
|---|
 | 1925 |   if (debug_ > 1) Laj_matrix->print("Laj (all of it)");
 | 
|---|
 | 1926 |   Laj_matrix->convert(Laj);  // Put new Laj_matrix elements into Laj
 | 
|---|
 | 1927 | 
 | 
|---|
 | 1928 |   tim_exit("Laj");
 | 
|---|
 | 1929 | 
 | 
|---|
 | 1930 |   //////////////////////////////////////
 | 
|---|
 | 1931 |   // Computation of Laj is now complete
 | 
|---|
 | 1932 |   //////////////////////////////////////
 | 
|---|
 | 1933 | 
 | 
|---|
 | 1934 |   ////////////////////////////
 | 
|---|
 | 1935 |   // Solve the CPHF equations
 | 
|---|
 | 1936 |   ////////////////////////////
 | 
|---|
 | 1937 |   RefSCMatrix Paj_matrix(nvir_dim, nocc_dim, kit);
 | 
|---|
 | 1938 |   tim_enter("cphf");
 | 
|---|
 | 1939 |   cs_cphf(scf_vector, Laj, evals, Paj_matrix);
 | 
|---|
 | 1940 |   tim_exit("cphf");
 | 
|---|
 | 1941 | 
 | 
|---|
 | 1942 |   free(Laj);
 | 
|---|
 | 1943 | 
 | 
|---|
 | 1944 |   // Finish computation of Waj
 | 
|---|
 | 1945 |   for (a=0; a<nvir; a++) {
 | 
|---|
 | 1946 |     waj_ptr = &Waj[a];
 | 
|---|
 | 1947 |     for (j=0; j<nocc; j++) {
 | 
|---|
 | 1948 |       *waj_ptr -= evals[j]*Paj_matrix->get_element(a,j);
 | 
|---|
 | 1949 |       waj_ptr += nvir;
 | 
|---|
 | 1950 |       }
 | 
|---|
 | 1951 |     }
 | 
|---|
 | 1952 |   // Waj is now complete
 | 
|---|
 | 1953 |   RefSCMatrix Waj_matrix(nocc_dim, nvir_dim, kit);
 | 
|---|
 | 1954 |   Waj_matrix->assign(Waj); // Put elements of Waj into Waj_matrix
 | 
|---|
 | 1955 |   // NB. Waj_matrix elements are ordered as j*nvir+a
 | 
|---|
 | 1956 |   free(Waj);
 | 
|---|
 | 1957 | 
 | 
|---|
 | 1958 | 
 | 
|---|
 | 1959 |   // Finish computation of Wkj
 | 
|---|
 | 1960 |   tim_enter("Pkj and Wkj");
 | 
|---|
 | 1961 |   // Compute Dmat_matrix =
 | 
|---|
 | 1962 |   // Co*Pkj_matrix*Co.t() + Co*Paj_matrix.t()*Cv.t()
 | 
|---|
 | 1963 |   // + Cv*Paj_matrix*Co.t() + Cv*Pab_matrix*Cv.t();
 | 
|---|
 | 1964 |   Dmat_matrix.assign(0.0);
 | 
|---|
 | 1965 |   Dmat_matrix.accumulate_symmetric_sum(Cv*Paj_matrix*Co.t());
 | 
|---|
 | 1966 |   Dmat_matrix.accumulate_transform(Co,Pkj_matrix);
 | 
|---|
 | 1967 |   Dmat_matrix.accumulate_transform(Cv,Pab_matrix);
 | 
|---|
 | 1968 |   tim_enter("make_gmat for Wkj");
 | 
|---|
 | 1969 |   make_cs_gmat_new(Gmat, Dmat_matrix);
 | 
|---|
 | 1970 |   tim_exit("make_gmat for Wkj");
 | 
|---|
 | 1971 |   done_cs_gmat();
 | 
|---|
 | 1972 |   for (i=0; i<thr_->nthread(); i++) tbints_[i] = 0;
 | 
|---|
 | 1973 |   delete[] tbints_; tbints_ = 0;
 | 
|---|
 | 1974 |   RefSCMatrix Wkj_matrix(nocc_dim, nocc_dim, kit);
 | 
|---|
 | 1975 |   Wkj_matrix->assign(Wkj);
 | 
|---|
 | 1976 |   Wkj_matrix = Wkj_matrix - 2*Co.t()*Gmat*Co;
 | 
|---|
 | 1977 |   free(Wkj);
 | 
|---|
 | 1978 |   // Wkj is now complete - not as Wkj but as Wkj_matrix
 | 
|---|
 | 1979 |   tim_exit("Pkj and Wkj");
 | 
|---|
 | 1980 | 
 | 
|---|
 | 1981 |   ////////////////////////////////////////////////////////////////
 | 
|---|
 | 1982 |   // We now have the matrices Pkj_matrix, Paj_matrix, Pab_matrix,
 | 
|---|
 | 1983 |   // Wkj_matrix, Waj_matrix, Wab_matrix and can compute the 
 | 
|---|
 | 1984 |   // remaining contributions to the gradient
 | 
|---|
 | 1985 |   ///////////////////////////////////////////////////////////////
 | 
|---|
 | 1986 | 
 | 
|---|
 | 1987 |   // Compute the second order correction to 
 | 
|---|
 | 1988 |   // the density matrix and energy weighted
 | 
|---|
 | 1989 |   // density matrix in the AO basis
 | 
|---|
 | 1990 |   RefSCMatrix P2AO_matrix(nbasis_dim, nbasis_dim, kit);
 | 
|---|
 | 1991 |   RefSCMatrix P2MO_matrix(noso_dim, noso_dim, kit);
 | 
|---|
 | 1992 |   RefSCMatrix W2AO_matrix(nbasis_dim, nbasis_dim, kit);
 | 
|---|
 | 1993 |   RefSCMatrix W2MO_matrix(noso_dim, noso_dim, kit);
 | 
|---|
 | 1994 |   RefSCMatrix SCF_matrix(nbasis_dim, noso_dim, kit);
 | 
|---|
 | 1995 |   for (i=0; i<nocc; i++) {
 | 
|---|
 | 1996 |     for (j=0; j<nocc; j++) {
 | 
|---|
 | 1997 |       P2MO_matrix->set_element(i,j,Pkj_matrix->get_element(i,j));
 | 
|---|
 | 1998 |       W2MO_matrix->set_element(i,j,Wkj_matrix->get_element(i,j));
 | 
|---|
 | 1999 |       }
 | 
|---|
 | 2000 |     for (j=nocc; j<noso; j++) {
 | 
|---|
 | 2001 |       P2MO_matrix->set_element(i,j,Paj_matrix->get_element(j-nocc,i));
 | 
|---|
 | 2002 |       W2MO_matrix->set_element(i,j,Waj_matrix->get_element(i,j-nocc));
 | 
|---|
 | 2003 |       }
 | 
|---|
 | 2004 |     }
 | 
|---|
 | 2005 |   for (i=nocc; i<noso; i++) {
 | 
|---|
 | 2006 |     for (j=0; j<nocc; j++) {
 | 
|---|
 | 2007 |       P2MO_matrix->set_element(i,j,Paj_matrix->get_element(i-nocc,j));
 | 
|---|
 | 2008 |       W2MO_matrix->set_element(i,j,Waj_matrix->get_element(j,i-nocc));
 | 
|---|
 | 2009 |       }
 | 
|---|
 | 2010 |     for (j=nocc; j<noso; j++) {
 | 
|---|
 | 2011 |       P2MO_matrix->set_element(i,j,Pab_matrix->get_element(i-nocc,j-nocc));
 | 
|---|
 | 2012 |       W2MO_matrix->set_element(i,j,Wab_matrix->get_element(i-nocc,j-nocc));
 | 
|---|
 | 2013 |       }
 | 
|---|
 | 2014 |     }
 | 
|---|
 | 2015 |   for (i=0; i<nbasis; i++) {
 | 
|---|
 | 2016 |     for (j=0; j<nocc; j++) {
 | 
|---|
 | 2017 |       SCF_matrix->set_element(i,j,Co->get_element(i,j));
 | 
|---|
 | 2018 |       }
 | 
|---|
 | 2019 |     for (j=nocc; j<noso; j++) {
 | 
|---|
 | 2020 |       SCF_matrix->set_element(i,j,Cv->get_element(i,j-nocc));
 | 
|---|
 | 2021 |       }
 | 
|---|
 | 2022 |     }
 | 
|---|
 | 2023 |   P2AO_matrix = SCF_matrix * P2MO_matrix * SCF_matrix.t();
 | 
|---|
 | 2024 |   W2AO_matrix = SCF_matrix * W2MO_matrix * SCF_matrix.t();
 | 
|---|
 | 2025 | //  P2AO_matrix = Co*(Pkj_matrix*Co.t() + Paj_matrix.t()*Cv.t()) +
 | 
|---|
 | 2026 | //                Cv*(Paj_matrix*Co.t() + Pab_matrix*Cv.t());
 | 
|---|
 | 2027 | //  W2AO_matrix = Co*(Wkj_matrix*Co.t() + Waj_matrix*Cv.t()) +
 | 
|---|
 | 2028 | //                Cv*(Waj_matrix.t()*Co.t() + Wab_matrix*Cv.t());
 | 
|---|
 | 2029 | 
 | 
|---|
 | 2030 |   if (debug_ > 1) {
 | 
|---|
 | 2031 |     SCF_matrix.print("SCF_matrix");
 | 
|---|
 | 2032 |     P2MO_matrix.print("P2MO_matrix");
 | 
|---|
 | 2033 |     W2MO_matrix.print("W2MO_matrix");
 | 
|---|
 | 2034 |     P2AO_matrix.print("P2AO_matrix");
 | 
|---|
 | 2035 |     W2AO_matrix.print("W2AO_matrix");
 | 
|---|
 | 2036 |     }
 | 
|---|
 | 2037 | 
 | 
|---|
 | 2038 |   // Convert P2AO_matrix and W2AO_matrix to double*
 | 
|---|
 | 2039 |   P2AO = new double[nbasis*nbasis];
 | 
|---|
 | 2040 |   W2AO = new double[nbasis*nbasis];
 | 
|---|
 | 2041 |   P2AO_matrix->convert(P2AO);
 | 
|---|
 | 2042 |   W2AO_matrix->convert(W2AO);
 | 
|---|
 | 2043 | 
 | 
|---|
 | 2044 |   // Compute the HF density matrix and
 | 
|---|
 | 2045 |   // energy weighted density matrix
 | 
|---|
 | 2046 |   PHF = new double[nbasis*nbasis];
 | 
|---|
 | 2047 |   WHF = new double[nbasis*nbasis];
 | 
|---|
 | 2048 |   phf_ptr = PHF;
 | 
|---|
 | 2049 |   whf_ptr = WHF;
 | 
|---|
 | 2050 |   for (p=0; p<nbasis; p++) {
 | 
|---|
 | 2051 |     for (q=0; q<nbasis; q++) {
 | 
|---|
 | 2052 |       *phf_ptr++ = 0.0;
 | 
|---|
 | 2053 |       *whf_ptr++ = 0.0;
 | 
|---|
 | 2054 |       }
 | 
|---|
 | 2055 |     }
 | 
|---|
 | 2056 |   phf_ptr = PHF;
 | 
|---|
 | 2057 |   whf_ptr = WHF;
 | 
|---|
 | 2058 |   for (p=0; p<nbasis; p++) {
 | 
|---|
 | 2059 |     for (q=0; q<nbasis; q++) {
 | 
|---|
 | 2060 |       c_pi = scf_vector[p];
 | 
|---|
 | 2061 |       c_qi = scf_vector[q];
 | 
|---|
 | 2062 |       for (i=0; i<nocc; i++) {
 | 
|---|
 | 2063 |         tmpval = 2* *c_pi++ * *c_qi++;
 | 
|---|
 | 2064 |         *phf_ptr += tmpval;
 | 
|---|
 | 2065 |         *whf_ptr -= evals[i] * tmpval;
 | 
|---|
 | 2066 |         } // exit i loop
 | 
|---|
 | 2067 |       phf_ptr++;
 | 
|---|
 | 2068 |       whf_ptr++;
 | 
|---|
 | 2069 |       }   // exit q loop
 | 
|---|
 | 2070 |     }     // exit p loop
 | 
|---|
 | 2071 | 
 | 
|---|
 | 2072 |   // Compute the MP2 density and energy weighted density
 | 
|---|
 | 2073 | 
 | 
|---|
 | 2074 |   // PMP2 = PHF + P2AO; WMP2 = WHF + W2AO
 | 
|---|
 | 2075 |   PMP2 = new double[nbasis*nbasis];
 | 
|---|
 | 2076 |   WMP2 = new double[nbasis*nbasis];
 | 
|---|
 | 2077 |   // Initialize PMP2 and WMP2
 | 
|---|
 | 2078 |   pmp2_ptr = PMP2;
 | 
|---|
 | 2079 |   wmp2_ptr = WMP2;
 | 
|---|
 | 2080 |   for (p=0; p<nbasis; p++) {
 | 
|---|
 | 2081 |     for (q=0; q<nbasis; q++) {
 | 
|---|
 | 2082 |       *pmp2_ptr++ = 0.0;
 | 
|---|
 | 2083 |       *wmp2_ptr++ = 0.0;
 | 
|---|
 | 2084 |       }
 | 
|---|
 | 2085 |     }
 | 
|---|
 | 2086 |   pmp2_ptr = PMP2;
 | 
|---|
 | 2087 |   wmp2_ptr = WMP2;
 | 
|---|
 | 2088 |   p2ao_ptr = P2AO;
 | 
|---|
 | 2089 |   w2ao_ptr = W2AO;
 | 
|---|
 | 2090 |   phf_ptr = PHF;
 | 
|---|
 | 2091 |   whf_ptr = WHF;
 | 
|---|
 | 2092 |   for (p=0; p<nbasis; p++) {
 | 
|---|
 | 2093 |     for (q=0; q<nbasis; q++) {
 | 
|---|
 | 2094 |       *pmp2_ptr++ = *phf_ptr++ + *p2ao_ptr++;
 | 
|---|
 | 2095 |       *wmp2_ptr++ = *whf_ptr++ + *w2ao_ptr++;
 | 
|---|
 | 2096 |       }
 | 
|---|
 | 2097 |     }
 | 
|---|
 | 2098 |   delete[] W2AO;
 | 
|---|
 | 2099 | 
 | 
|---|
 | 2100 |   if (debug_ > 1) {
 | 
|---|
 | 2101 |     RefSCMatrix tmpmat(ao_dimension(), ao_dimension(), kit);
 | 
|---|
 | 2102 |     tmpmat->assign(PMP2);
 | 
|---|
 | 2103 |     tmpmat.print("PMP2");
 | 
|---|
 | 2104 |     tmpmat->assign(P2AO);
 | 
|---|
 | 2105 |     tmpmat.print("P2AO");
 | 
|---|
 | 2106 |     tmpmat->assign(PHF);
 | 
|---|
 | 2107 |     tmpmat.print("PHF");
 | 
|---|
 | 2108 |     tmpmat->assign(WMP2);
 | 
|---|
 | 2109 |     tmpmat.print("WMP2");
 | 
|---|
 | 2110 |     }
 | 
|---|
 | 2111 | 
 | 
|---|
 | 2112 |   ////////////////////////////////////////////////
 | 
|---|
 | 2113 |   // Compute the contribution to the MP2 gradient 
 | 
|---|
 | 2114 |   // from the separable part of the 2PDM
 | 
|---|
 | 2115 |   ////////////////////////////////////////////////
 | 
|---|
 | 2116 | 
 | 
|---|
 | 2117 |   zero_gradients(ginter, natom, 3);
 | 
|---|
 | 2118 |   zero_gradients(hf_ginter, natom, 3);
 | 
|---|
 | 2119 |   tim_enter("sep 2PDM contrib.");
 | 
|---|
 | 2120 | 
 | 
|---|
 | 2121 |   CSGradS2PDM** s2pdmthread = new CSGradS2PDM*[thr_->nthread()];
 | 
|---|
 | 2122 |   for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 2123 |       s2pdmthread[i] = new CSGradS2PDM(i, thr_->nthread(), me, nproc,
 | 
|---|
 | 2124 |                                        lock, basis(), tbintder_[i],
 | 
|---|
 | 2125 |                                        PHF, P2AO, tol, debug_, dynamic_);
 | 
|---|
 | 2126 |       thr_->add_thread(i,s2pdmthread[i]);
 | 
|---|
 | 2127 | #     if SINGLE_THREAD_S2PDM
 | 
|---|
 | 2128 |       s2pdmthread[i]->run();
 | 
|---|
 | 2129 | #     endif
 | 
|---|
 | 2130 |     }
 | 
|---|
 | 2131 | # if !SINGLE_THREAD_S2PDM
 | 
|---|
 | 2132 |   thr_->start_threads();
 | 
|---|
 | 2133 |   thr_->wait_threads();
 | 
|---|
 | 2134 | # endif
 | 
|---|
 | 2135 |   for (i=0; i<thr_->nthread(); i++) {
 | 
|---|
 | 2136 |       s2pdmthread[i]->accum_mp2_contrib(ginter);
 | 
|---|
 | 2137 |       s2pdmthread[i]->accum_hf_contrib(hf_ginter);
 | 
|---|
 | 2138 |       delete s2pdmthread[i];
 | 
|---|
 | 2139 |     }
 | 
|---|
 | 2140 |   sum_gradients(msg_, ginter, molecule()->natom(), 3);
 | 
|---|
 | 2141 |   sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
 | 
|---|
 | 2142 |   delete[] s2pdmthread;
 | 
|---|
 | 2143 | 
 | 
|---|
 | 2144 |   tim_exit("sep 2PDM contrib.");
 | 
|---|
 | 2145 |   delete[] P2AO;
 | 
|---|
 | 2146 | 
 | 
|---|
 | 2147 |   // The separable 2PDM contribution to the gradient has now been
 | 
|---|
 | 2148 |   // accumulated in ginter on node 0; add it to the total gradients
 | 
|---|
 | 2149 |   if (me == 0) {
 | 
|---|
 | 2150 |     accum_gradients(gradient, ginter, natom, 3);
 | 
|---|
 | 2151 |     accum_gradients(hf_gradient, hf_ginter, natom, 3);
 | 
|---|
 | 2152 |     }
 | 
|---|
 | 2153 |   // Print out the contribution to the gradient from sep. 2PDM
 | 
|---|
 | 2154 |   if (debug_) {
 | 
|---|
 | 2155 |     print_natom_3(hf_ginter,
 | 
|---|
 | 2156 |                   "Contribution from separable 2PDM to HF gradient [au]:");
 | 
|---|
 | 2157 |     print_natom_3(ginter,
 | 
|---|
 | 2158 |                   "Contribution from separable 2PDM to MP2 gradient [au]:");
 | 
|---|
 | 2159 |     }
 | 
|---|
 | 2160 | 
 | 
|---|
 | 2161 |   // Done with two-electron integrals
 | 
|---|
 | 2162 |   tbint_ = 0;
 | 
|---|
 | 2163 |   if (dograd || do_d1_) {
 | 
|---|
 | 2164 |     delete[] tbintder_;
 | 
|---|
 | 2165 |     tbintder_ = 0;
 | 
|---|
 | 2166 |     }
 | 
|---|
 | 2167 | 
 | 
|---|
 | 2168 |   /////////////////////////////////////////////////////////////
 | 
|---|
 | 2169 |   // Compute the one-electron contribution to the MP2 gradient
 | 
|---|
 | 2170 |   /////////////////////////////////////////////////////////////
 | 
|---|
 | 2171 | 
 | 
|---|
 | 2172 |   zero_gradients(ginter, natom, 3);
 | 
|---|
 | 2173 |   zero_gradients(hf_ginter, natom, 3);
 | 
|---|
 | 2174 |   tim_enter("hcore contrib.");
 | 
|---|
 | 2175 |   hcore_cs_grad(PHF, PMP2, hf_ginter, ginter);
 | 
|---|
 | 2176 |   tim_exit("hcore contrib.");
 | 
|---|
 | 2177 |   delete[] PHF;
 | 
|---|
 | 2178 |   delete[] PMP2;
 | 
|---|
 | 2179 |   // The hcore contribution to the gradient has now been accumulated
 | 
|---|
 | 2180 |   // in ginter on node 0; add it to the total gradients
 | 
|---|
 | 2181 |   if (me == 0) {
 | 
|---|
 | 2182 |     accum_gradients(gradient, ginter, natom, 3);
 | 
|---|
 | 2183 |     accum_gradients(hf_gradient, hf_ginter, natom, 3);
 | 
|---|
 | 2184 |     }
 | 
|---|
 | 2185 |   // Print out the contribution to the gradient from hcore
 | 
|---|
 | 2186 |   if (debug_) {
 | 
|---|
 | 2187 |     print_natom_3(hf_ginter, "Contribution to HF gradient from hcore [au]:");
 | 
|---|
 | 2188 |     print_natom_3(ginter, "Contribution to MP2 gradient from hcore [au]:");
 | 
|---|
 | 2189 |     }
 | 
|---|
 | 2190 | 
 | 
|---|
 | 2191 |   zero_gradients(ginter, natom, 3);
 | 
|---|
 | 2192 |   zero_gradients(hf_ginter, natom, 3);
 | 
|---|
 | 2193 |   tim_enter("overlap contrib.");
 | 
|---|
 | 2194 |   overlap_cs_grad(WHF, WMP2, hf_ginter, ginter);
 | 
|---|
 | 2195 |   delete[] WHF;
 | 
|---|
 | 2196 |   tim_exit("overlap contrib.");
 | 
|---|
 | 2197 |   delete[] WMP2;
 | 
|---|
 | 2198 |   // The overlap contribution to the gradient has now been accumulated
 | 
|---|
 | 2199 |   // in ginter on node 0; add it to the total gradients
 | 
|---|
 | 2200 |   if (me == 0) {
 | 
|---|
 | 2201 |       accum_gradients(gradient, ginter, natom, 3);
 | 
|---|
 | 2202 |       accum_gradients(hf_gradient, hf_ginter, natom, 3);
 | 
|---|
 | 2203 |     }
 | 
|---|
 | 2204 | 
 | 
|---|
 | 2205 |   // Print out the overlap contribution to the gradient
 | 
|---|
 | 2206 |   if (debug_) {
 | 
|---|
 | 2207 |     print_natom_3(hf_ginter, "Overlap contribution to HF gradient [au]:");
 | 
|---|
 | 2208 |     print_natom_3(ginter,"Overlap contribution to MP2 gradient [au]:");
 | 
|---|
 | 2209 |     }
 | 
|---|
 | 2210 | 
 | 
|---|
 | 2211 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 2212 |   // Compute the nuclear contribution to the MP2 gradient
 | 
|---|
 | 2213 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 2214 | 
 | 
|---|
 | 2215 |   if (me == 0) {
 | 
|---|
 | 2216 |     nuclear_repulsion_energy_gradient(ginter);
 | 
|---|
 | 2217 |     accum_gradients(gradient, ginter, natom, 3);
 | 
|---|
 | 2218 |     accum_gradients(hf_gradient, ginter, natom, 3);
 | 
|---|
 | 2219 | 
 | 
|---|
 | 2220 |     }
 | 
|---|
 | 2221 | 
 | 
|---|
 | 2222 |   // Print out the nuclear contribution to the gradient
 | 
|---|
 | 2223 |   if (debug_)
 | 
|---|
 | 2224 |     print_natom_3(ginter,"Nuclear contribution to MP2 gradient [au]:");
 | 
|---|
 | 2225 | 
 | 
|---|
 | 2226 | 
 | 
|---|
 | 2227 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 2228 |   // The computation of the MP2 gradient is now complete;
 | 
|---|
 | 2229 |   // print out the gradient
 | 
|---|
 | 2230 |   ////////////////////////////////////////////////////////
 | 
|---|
 | 2231 |   if (debug_) {
 | 
|---|
 | 2232 |     ExEnv::out0() << "Obtaining HF gradient" << endl;
 | 
|---|
 | 2233 |     print_natom_3(ref()->gradient(),"HF gradient from HF");
 | 
|---|
 | 2234 |     print_natom_3(hf_gradient,"Total HF gradient from MP2 [au]:");
 | 
|---|
 | 2235 |     }
 | 
|---|
 | 2236 |   print_natom_3(gradient,"Total MP2 gradient [au]:");
 | 
|---|
 | 2237 | 
 | 
|---|
 | 2238 |   msg_->bcast(gradient_dat, natom*3);
 | 
|---|
 | 2239 |   RefSCVector gradientvec = matrixkit()->vector(moldim());
 | 
|---|
 | 2240 |   gradientvec->assign(gradient_dat);
 | 
|---|
 | 2241 |   set_gradient(gradientvec);
 | 
|---|
 | 2242 | 
 | 
|---|
 | 2243 |   msg_->bcast(hf_gradient_dat, natom*3);
 | 
|---|
 | 2244 |   hf_gradient_ = matrixkit()->vector(moldim());
 | 
|---|
 | 2245 |   hf_gradient_->assign(hf_gradient_dat);
 | 
|---|
 | 2246 | 
 | 
|---|
 | 2247 |   delete[] gradient;
 | 
|---|
 | 2248 |   delete[] gradient_dat;
 | 
|---|
 | 2249 | 
 | 
|---|
 | 2250 |   delete[] hf_gradient;
 | 
|---|
 | 2251 |   delete[] hf_gradient_dat;
 | 
|---|
 | 2252 | 
 | 
|---|
 | 2253 |   for (i=0; i<natom; i++) {
 | 
|---|
 | 2254 |     delete[] ginter[i];
 | 
|---|
 | 2255 |     delete[] hf_ginter[i];
 | 
|---|
 | 2256 |     }
 | 
|---|
 | 2257 |   delete[] ginter;
 | 
|---|
 | 2258 |   delete[] hf_ginter;
 | 
|---|
 | 2259 | 
 | 
|---|
 | 2260 |   delete[] scf_vector;
 | 
|---|
 | 2261 |   delete[] scf_vector_dat;
 | 
|---|
 | 2262 |   delete[] evals;
 | 
|---|
 | 2263 | 
 | 
|---|
 | 2264 |   tim_exit("mp2-mem");
 | 
|---|
 | 2265 |   }
 | 
|---|
 | 2266 | 
 | 
|---|
 | 2267 | ///////////////////////////////////////////////////////////
 | 
|---|
 | 2268 | // Compute the contribution to the MP2 gradient from hcore
 | 
|---|
 | 2269 | ///////////////////////////////////////////////////////////
 | 
|---|
 | 2270 | void 
 | 
|---|
 | 2271 | MBPT2::hcore_cs_grad(double *PHF, double *PMP2,
 | 
|---|
 | 2272 |                      double **hf_ginter, double **ginter)
 | 
|---|
 | 2273 | {
 | 
|---|
 | 2274 | 
 | 
|---|
 | 2275 |   int i, j, k, l, m;
 | 
|---|
 | 2276 |   int jj, kk;
 | 
|---|
 | 2277 |   int jsize, ksize;
 | 
|---|
 | 2278 |   int j_offset, k_offset;
 | 
|---|
 | 2279 |   int jk_index;
 | 
|---|
 | 2280 |   int index;
 | 
|---|
 | 2281 |   int nshell;
 | 
|---|
 | 2282 |   int nbasis;
 | 
|---|
 | 2283 |   int nproc = msg_->n();
 | 
|---|
 | 2284 |   int me = msg_->me();
 | 
|---|
 | 2285 | 
 | 
|---|
 | 2286 |   const double *oneebuf; // 1-electron buffer
 | 
|---|
 | 2287 |   double tmpval1, tmpval2;
 | 
|---|
 | 2288 |   double gxyz[3];
 | 
|---|
 | 2289 |   double hf_gxyz[3];
 | 
|---|
 | 2290 | 
 | 
|---|
 | 2291 |   // Initialize 1e object
 | 
|---|
 | 2292 |   Ref<OneBodyDerivInt> obintder_ = integral()->hcore_deriv();
 | 
|---|
 | 2293 |   oneebuf = obintder_->buffer();
 | 
|---|
 | 2294 | 
 | 
|---|
 | 2295 |   nshell = basis()->nshell();
 | 
|---|
 | 2296 |   nbasis = basis()->nbasis();
 | 
|---|
 | 2297 | 
 | 
|---|
 | 2298 |   ///////////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 2299 |   // Compute the kinetic and nuclear-electron energy contribution to the gradient
 | 
|---|
 | 2300 |   ///////////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 2301 | 
 | 
|---|
 | 2302 |   jk_index = 0;
 | 
|---|
 | 2303 | 
 | 
|---|
 | 2304 |   for (i=0; i<molecule()->natom(); i++) {
 | 
|---|
 | 2305 |     for (j=0; j<nshell; j++) {
 | 
|---|
 | 2306 |       jsize = basis()->shell(j).nfunction();
 | 
|---|
 | 2307 |       j_offset = basis()->shell_to_function(j);
 | 
|---|
 | 2308 | 
 | 
|---|
 | 2309 |       for (k=0; k<=j; k++) {
 | 
|---|
 | 2310 |         ksize = basis()->shell(k).nfunction();
 | 
|---|
 | 2311 |         k_offset = basis()->shell_to_function(k);
 | 
|---|
 | 2312 | 
 | 
|---|
 | 2313 |         if (jk_index++%nproc == me) {
 | 
|---|
 | 2314 |           obintder_->compute_shell(j,k,i);
 | 
|---|
 | 2315 | 
 | 
|---|
 | 2316 |           for (l=0; l<3; l++) { gxyz[l] = 0.0; hf_gxyz[l] = 0.0; }
 | 
|---|
 | 2317 | 
 | 
|---|
 | 2318 |           index = 0;
 | 
|---|
 | 2319 | 
 | 
|---|
 | 2320 |           for (jj=0; jj<jsize; jj++) {
 | 
|---|
 | 2321 |             for (kk=0; kk<ksize; kk++) {
 | 
|---|
 | 2322 |               tmpval1 = PMP2[(j_offset + jj)*nbasis + k_offset + kk];
 | 
|---|
 | 2323 |               tmpval2 = PHF[(j_offset + jj)*nbasis + k_offset + kk];
 | 
|---|
 | 2324 |               for (m=0; m<3; m++) {
 | 
|---|
 | 2325 |                 gxyz[m] += oneebuf[index] * tmpval1;
 | 
|---|
 | 2326 |                 hf_gxyz[m] += oneebuf[index] * tmpval2;
 | 
|---|
 | 2327 |                 index++;
 | 
|---|
 | 2328 |                 } // exit m loop
 | 
|---|
 | 2329 |               }   // exit kk loop
 | 
|---|
 | 2330 |             }     // exit jj loop
 | 
|---|
 | 2331 | 
 | 
|---|
 | 2332 |           if (j != k) {
 | 
|---|
 | 2333 |             // off-diagonal blocks
 | 
|---|
 | 2334 |             for (l=0; l<3; l++) {
 | 
|---|
 | 2335 |               gxyz[l] *= 2.0;
 | 
|---|
 | 2336 |               hf_gxyz[l] *= 2.0;
 | 
|---|
 | 2337 |               }
 | 
|---|
 | 2338 |             }
 | 
|---|
 | 2339 | 
 | 
|---|
 | 2340 |           for (l=0; l<3; l++) {
 | 
|---|
 | 2341 |             ginter[i][l] += gxyz[l];
 | 
|---|
 | 2342 |             hf_ginter[i][l] += hf_gxyz[l];
 | 
|---|
 | 2343 |             }
 | 
|---|
 | 2344 | 
 | 
|---|
 | 2345 |           } // exit "if"
 | 
|---|
 | 2346 |         }   // exit k loop
 | 
|---|
 | 2347 |       }     // exit j loop
 | 
|---|
 | 2348 |     }       // exit i loop
 | 
|---|
 | 2349 | 
 | 
|---|
 | 2350 |   /* Accumulate the nodes' intermediate gradients on node 0 */
 | 
|---|
 | 2351 |   sum_gradients(msg_, ginter, molecule()->natom(), 3);
 | 
|---|
 | 2352 |   sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
 | 
|---|
 | 2353 | }
 | 
|---|
 | 2354 | 
 | 
|---|
 | 2355 | 
 | 
|---|
 | 2356 | ////////////////////////////////////////////////////
 | 
|---|
 | 2357 | // Compute the overlap contribution to the gradient
 | 
|---|
 | 2358 | ////////////////////////////////////////////////////
 | 
|---|
 | 2359 | void 
 | 
|---|
 | 2360 | MBPT2::overlap_cs_grad(double *WHF, double *WMP2,
 | 
|---|
 | 2361 |                        double **hf_ginter, double **ginter)
 | 
|---|
 | 2362 | {
 | 
|---|
 | 2363 |   int i, j, k, l, m;
 | 
|---|
 | 2364 |   int jj, kk;
 | 
|---|
 | 2365 |   int jj_index, kk_index;
 | 
|---|
 | 2366 |   int jsize, ksize;
 | 
|---|
 | 2367 |   int j_offset, k_offset;
 | 
|---|
 | 2368 |   int jk_index;
 | 
|---|
 | 2369 |   int index;
 | 
|---|
 | 2370 |   int nshell;
 | 
|---|
 | 2371 |   int nbasis;
 | 
|---|
 | 2372 | 
 | 
|---|
 | 2373 |   const double *oneebuf; // 1-electron buffer
 | 
|---|
 | 2374 |   double tmpval1, tmpval2;
 | 
|---|
 | 2375 |   double hf_tmpval1, hf_tmpval2;
 | 
|---|
 | 2376 |   double gxyz[3];
 | 
|---|
 | 2377 |   double hf_gxyz[3];
 | 
|---|
 | 2378 | 
 | 
|---|
 | 2379 |   // Initialize 1e object
 | 
|---|
 | 2380 |   Ref<OneBodyDerivInt> obintder_ = integral()->overlap_deriv();
 | 
|---|
 | 2381 |   oneebuf = obintder_->buffer();
 | 
|---|
 | 2382 | 
 | 
|---|
 | 2383 |   nshell = basis()->nshell();
 | 
|---|
 | 2384 |   nbasis = basis()->nbasis();
 | 
|---|
 | 2385 |   int nproc = msg_->n();
 | 
|---|
 | 2386 |   int me = msg_->me();
 | 
|---|
 | 2387 | 
 | 
|---|
 | 2388 |   for (i=0; i<molecule()->natom(); i++) {
 | 
|---|
 | 2389 |     jk_index = 0;
 | 
|---|
 | 2390 | 
 | 
|---|
 | 2391 |     for (j=0; j<nshell; j++) {
 | 
|---|
 | 2392 |       j_offset = basis()->shell_to_function(j);
 | 
|---|
 | 2393 |       jsize = basis()->shell(j).nfunction();
 | 
|---|
 | 2394 | 
 | 
|---|
 | 2395 |       for (k=0; k<=j; k++) {
 | 
|---|
 | 2396 |         k_offset = basis()->shell_to_function(k);
 | 
|---|
 | 2397 |         ksize = basis()->shell(k).nfunction();
 | 
|---|
 | 2398 | 
 | 
|---|
 | 2399 |         if (jk_index++%nproc == me) {
 | 
|---|
 | 2400 |           obintder_->compute_shell(j,k,i);
 | 
|---|
 | 2401 | 
 | 
|---|
 | 2402 |           for (l=0; l<3; l++) {
 | 
|---|
 | 2403 |             hf_gxyz[l] = 0.0;
 | 
|---|
 | 2404 |             gxyz[l] = 0.0;
 | 
|---|
 | 2405 |             }
 | 
|---|
 | 2406 |           index = 0;
 | 
|---|
 | 2407 | 
 | 
|---|
 | 2408 |           for (jj=0; jj<jsize; jj++) {
 | 
|---|
 | 2409 |             jj_index = j_offset + jj;
 | 
|---|
 | 2410 |             for (kk=0; kk<ksize; kk++) {
 | 
|---|
 | 2411 |               kk_index = k_offset + kk;
 | 
|---|
 | 2412 |               if (kk_index > jj_index) {
 | 
|---|
 | 2413 |                 index += 3;  // increment index since m-loop will be skipped
 | 
|---|
 | 2414 |                 break;       // skip to next jj value
 | 
|---|
 | 2415 |                 }
 | 
|---|
 | 2416 |               // NB. WMP2 is not a symmetrix matrix
 | 
|---|
 | 2417 |               tmpval1 = WMP2[jj_index*nbasis + kk_index];
 | 
|---|
 | 2418 |               tmpval2 = WMP2[kk_index*nbasis + jj_index];
 | 
|---|
 | 2419 |               hf_tmpval1 = WHF[jj_index*nbasis + kk_index];
 | 
|---|
 | 2420 |               hf_tmpval2 = WHF[kk_index*nbasis + jj_index];
 | 
|---|
 | 2421 | 
 | 
|---|
 | 2422 |               for (m=0; m<3; m++) {
 | 
|---|
 | 2423 |                 if (jj_index != kk_index) {
 | 
|---|
 | 2424 |                   gxyz[m] += oneebuf[index] * (tmpval1 + tmpval2);
 | 
|---|
 | 2425 |                   hf_gxyz[m] += oneebuf[index] * (hf_tmpval1 + hf_tmpval2);
 | 
|---|
 | 2426 |                   }
 | 
|---|
 | 2427 |                 else {
 | 
|---|
 | 2428 |                   gxyz[m] += oneebuf[index] * tmpval1;
 | 
|---|
 | 2429 |                   hf_gxyz[m] += oneebuf[index] * hf_tmpval1;
 | 
|---|
 | 2430 |                   }
 | 
|---|
 | 2431 |                 index++;
 | 
|---|
 | 2432 |                 } // exit m loop
 | 
|---|
 | 2433 |               }   // exit kk loop
 | 
|---|
 | 2434 |             }     // exit jj loop
 | 
|---|
 | 2435 | 
 | 
|---|
 | 2436 |           for (l=0; l<3; l++) {
 | 
|---|
 | 2437 |             ginter[i][l] += gxyz[l];
 | 
|---|
 | 2438 |             hf_ginter[i][l] += hf_gxyz[l];
 | 
|---|
 | 2439 |             }
 | 
|---|
 | 2440 |           } // exit "if"
 | 
|---|
 | 2441 |         }   // exit k loop
 | 
|---|
 | 2442 |       }     // exit j loop
 | 
|---|
 | 2443 |     }       // exit i loop
 | 
|---|
 | 2444 |   
 | 
|---|
 | 2445 |   /* Accumulate the nodes' intermediate gradients on node 0 */
 | 
|---|
 | 2446 |   sum_gradients(msg_, ginter, molecule()->natom(), 3);
 | 
|---|
 | 2447 |   sum_gradients(msg_, hf_ginter, molecule()->natom(), 3);
 | 
|---|
 | 2448 | }
 | 
|---|
 | 2449 | 
 | 
|---|
 | 2450 | static void
 | 
|---|
 | 2451 | sum_gradients(const Ref<MessageGrp>& msg, double **f, int n1, int n2)
 | 
|---|
 | 2452 | {
 | 
|---|
 | 2453 |   int i;
 | 
|---|
 | 2454 | 
 | 
|---|
 | 2455 |   if (msg->n() == 1) return;
 | 
|---|
 | 2456 | 
 | 
|---|
 | 2457 |   for (i=0; i<n1; i++) {
 | 
|---|
 | 2458 |     msg->sum(f[i],n2);
 | 
|---|
 | 2459 |     }
 | 
|---|
 | 2460 | }
 | 
|---|
 | 2461 | 
 | 
|---|
 | 2462 | static void
 | 
|---|
 | 2463 | zero_gradients(double **f, int n1, int n2)
 | 
|---|
 | 2464 | {
 | 
|---|
 | 2465 |   for (int i=0; i<n1; i++) {
 | 
|---|
 | 2466 |     for (int j=0; j<3; j++) f[i][j] = 0.0;
 | 
|---|
 | 2467 |     }
 | 
|---|
 | 2468 | }
 | 
|---|
 | 2469 | 
 | 
|---|
 | 2470 | static void
 | 
|---|
 | 2471 | accum_gradients(double **g, double **f, int n1, int n2)
 | 
|---|
 | 2472 | {
 | 
|---|
 | 2473 |   for (int i=0; i<n1; i++) {
 | 
|---|
 | 2474 |     for (int j=0; j<3; j++) g[i][j] += f[i][j];
 | 
|---|
 | 2475 |     }
 | 
|---|
 | 2476 | }
 | 
|---|
 | 2477 | 
 | 
|---|
 | 2478 | /////////////////////////////////////
 | 
|---|
 | 2479 | // Compute the batchsize
 | 
|---|
 | 2480 | //
 | 
|---|
 | 2481 | // Only arrays allocated before exiting the loop over
 | 
|---|
 | 2482 | // i-batches are included here  - only these arrays
 | 
|---|
 | 2483 | // affect the batch size.
 | 
|---|
 | 2484 | /////////////////////////////////////
 | 
|---|
 | 2485 | int
 | 
|---|
 | 2486 | MBPT2::compute_cs_batchsize(size_t mem_static, int nocc_act)
 | 
|---|
 | 2487 | {
 | 
|---|
 | 2488 |   size_t mem_dyn;   // dynamic memory available
 | 
|---|
 | 2489 |   distsize_t maxdyn;
 | 
|---|
 | 2490 |   int ni;
 | 
|---|
 | 2491 | 
 | 
|---|
 | 2492 |   ///////////////////////////////////////
 | 
|---|
 | 2493 |   // the largest memory requirement will
 | 
|---|
 | 2494 |   // either occur just before the end of
 | 
|---|
 | 2495 |   // the 1. q.b.t. (mem1) or just before
 | 
|---|
 | 2496 |   // the end of the i-batch loop (mem2)
 | 
|---|
 | 2497 |   ///////////////////////////////////////
 | 
|---|
 | 2498 | 
 | 
|---|
 | 2499 |   if (mem_alloc > mem_static) {
 | 
|---|
 | 2500 |     mem_dyn = mem_alloc - mem_static;
 | 
|---|
 | 2501 |     }
 | 
|---|
 | 2502 |   else {
 | 
|---|
 | 2503 |     mem_dyn = 0;
 | 
|---|
 | 2504 |     }
 | 
|---|
 | 2505 | 
 | 
|---|
 | 2506 |   // First determine if calculation is possible at all (i.e., if ni=1 possible)
 | 
|---|
 | 2507 | 
 | 
|---|
 | 2508 |   ni = 1;
 | 
|---|
 | 2509 |   maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
 | 
|---|
 | 2510 | 
 | 
|---|
 | 2511 |   if (maxdyn > mem_dyn) {
 | 
|---|
 | 2512 |     return 0;
 | 
|---|
 | 2513 |     }
 | 
|---|
 | 2514 | 
 | 
|---|
 | 2515 |   ni = 2;
 | 
|---|
 | 2516 |   while (ni<=nocc_act) {
 | 
|---|
 | 2517 |     maxdyn = compute_cs_dynamic_memory(ni, nocc_act);
 | 
|---|
 | 2518 |     if (maxdyn >= mem_dyn) {
 | 
|---|
 | 2519 |       ni--;
 | 
|---|
 | 2520 |       break;
 | 
|---|
 | 2521 |       }
 | 
|---|
 | 2522 |     ni++;
 | 
|---|
 | 2523 |     }
 | 
|---|
 | 2524 |   if (ni > nocc_act) ni = nocc_act;
 | 
|---|
 | 2525 | 
 | 
|---|
 | 2526 |   return ni;
 | 
|---|
 | 2527 | }
 | 
|---|
 | 2528 | 
 | 
|---|
 | 2529 | /////////////////////////////////////
 | 
|---|
 | 2530 | // Compute required (dynamic) memory
 | 
|---|
 | 2531 | // for a given batch size
 | 
|---|
 | 2532 | //
 | 
|---|
 | 2533 | // Only arrays allocated before exiting the loop over
 | 
|---|
 | 2534 | // i-batches are included here  - only these arrays
 | 
|---|
 | 2535 | // affect the batch size.
 | 
|---|
 | 2536 | /////////////////////////////////////
 | 
|---|
 | 2537 | distsize_t
 | 
|---|
 | 2538 | MBPT2::compute_cs_dynamic_memory(int ni, int nocc_act)
 | 
|---|
 | 2539 | {
 | 
|---|
 | 2540 |   int index;
 | 
|---|
 | 2541 |   distsize_t mem1, mem2, mem3;
 | 
|---|
 | 2542 |   distsize_t maxdyn;
 | 
|---|
 | 2543 |   distsize_t tmp;
 | 
|---|
 | 2544 |   int i, j;
 | 
|---|
 | 2545 |   int nij;
 | 
|---|
 | 2546 |   int nproc = msg_->n();
 | 
|---|
 | 2547 | 
 | 
|---|
 | 2548 |   ///////////////////////////////////////
 | 
|---|
 | 2549 |   // the largest memory requirement will
 | 
|---|
 | 2550 |   // either occur just before the end of
 | 
|---|
 | 2551 |   // the 1. q.b.t. (mem1) or just before
 | 
|---|
 | 2552 |   // the end of the i-batch loop (mem2)
 | 
|---|
 | 2553 |   ///////////////////////////////////////
 | 
|---|
 | 2554 | 
 | 
|---|
 | 2555 |   // compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
 | 
|---|
 | 2556 |   index = 0;
 | 
|---|
 | 2557 |   nij = 0;
 | 
|---|
 | 2558 |   for (i=0; i<ni; i++) {
 | 
|---|
 | 2559 |     for (j=0; j<nocc; j++) {
 | 
|---|
 | 2560 |       if (index++ % nproc == 0) nij++;
 | 
|---|
 | 2561 |       }
 | 
|---|
 | 2562 |     }
 | 
|---|
 | 2563 |   mem1 = sizeof(double)*((distsize_t)nij*nbasis*nbasis
 | 
|---|
 | 2564 |                          + nbasis*nvir);
 | 
|---|
 | 2565 |   mem2 = sizeof(double)*((distsize_t)thr_->nthread()*ni*nbasis*nfuncmax*nfuncmax
 | 
|---|
 | 2566 |                          + (distsize_t)nij*nbasis*nbasis
 | 
|---|
 | 2567 |                          + ni*nbasis + nbasis*nfuncmax
 | 
|---|
 | 2568 |                          + 2*nfuncmax*nfuncmax*nfuncmax*nfuncmax);
 | 
|---|
 | 2569 |   mem3 = sizeof(double)*((distsize_t)ni*nbasis*nfuncmax*nfuncmax
 | 
|---|
 | 2570 |                          + (distsize_t)nij*nbasis*nbasis
 | 
|---|
 | 2571 |                          + 2*(2 + nbasis*nfuncmax));
 | 
|---|
 | 2572 |   tmp = (mem2>mem3 ? mem2:mem3);
 | 
|---|
 | 2573 |   maxdyn = (mem1>tmp ? mem1:tmp);
 | 
|---|
 | 2574 | 
 | 
|---|
 | 2575 |   return maxdyn;
 | 
|---|
 | 2576 | }
 | 
|---|
 | 2577 | 
 | 
|---|
 | 2578 | ////////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 2579 | 
 | 
|---|
 | 2580 | // Local Variables:
 | 
|---|
 | 2581 | // mode: c++
 | 
|---|
 | 2582 | // c-file-style: "CLJ-CONDENSED"
 | 
|---|
 | 2583 | // End:
 | 
|---|