source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbptr12/compute_ijxy.cc@ b7e5b0

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since b7e5b0 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 15.7 KB
Line 
1//
2// compute_ijxy.cc
3//
4// Copyright (C) 2004 Edward Valeev
5//
6// Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
7// Maintainer: EV
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#include <stdexcept>
29#include <stdlib.h>
30#include <string.h>
31#include <math.h>
32#include <limits.h>
33
34#include <scconfig.h>
35#include <util/misc/formio.h>
36#include <util/misc/timer.h>
37#include <util/class/class.h>
38#include <util/state/state.h>
39#include <util/state/state_text.h>
40#include <util/state/state_bin.h>
41#include <math/scmat/matrix.h>
42#include <chemistry/qc/mbpt/bzerofast.h>
43#include <chemistry/qc/mbptr12/transform_ijxy.h>
44#include <chemistry/qc/mbptr12/blas.h>
45#include <chemistry/qc/mbptr12/transform_12inds.h>
46
47using namespace std;
48using namespace sc;
49
50#define SINGLE_THREAD_E12 0
51#define PRINT2Q 0
52#define PRINT3Q 0
53#define PRINT4Q 0
54#define PRINT_NUM_TE_TYPES 1
55#define CHECK_INTS_SYMM 1
56
57/*-------------------------------------
58 Based on MBPT2::compute_mp2_energy()
59 -------------------------------------*/
60void
61TwoBodyMOIntsTransform_ijxy::compute()
62{
63 init_acc();
64 if (ints_acc_->is_committed())
65 return;
66
67 Ref<Integral> integral = factory_->integral();
68 Ref<GaussianBasisSet> bs1 = space1_->basis();
69 Ref<GaussianBasisSet> bs2 = space2_->basis();
70 Ref<GaussianBasisSet> bs3 = space3_->basis();
71 Ref<GaussianBasisSet> bs4 = space4_->basis();
72 const bool bs3_eq_bs4 = (bs3 == bs4);
73 int rank1 = space1_->rank();
74 int rank2 = space2_->rank();
75 int rank3 = space3_->rank();
76 int rank4 = space4_->rank();
77 int nbasis1 = bs1->nbasis();
78 int nbasis2 = bs2->nbasis();
79 int nbasis3 = bs3->nbasis();
80 int nbasis4 = bs4->nbasis();
81 int nshell1 = bs1->nshell();
82 int nshell2 = bs2->nshell();
83 int nshell3 = bs3->nshell();
84 int nshell4 = bs4->nshell();
85 int nfuncmax1 = bs1->max_nfunction_in_shell();
86 int nfuncmax2 = bs2->max_nfunction_in_shell();
87 int nfuncmax3 = bs3->max_nfunction_in_shell();
88 int nfuncmax4 = bs4->max_nfunction_in_shell();
89 enum te_types {eri=0, r12=1, r12t1=2};
90 const size_t memgrp_blocksize = memgrp_blksize();
91
92 // log2 of the erep tolerance
93 // (erep < 2^tol => discard)
94 const int tol = (int) (-10.0/log10(2.0)); // discard ints smaller than 10^-20
95
96 int aoint_computed = 0;
97
98 std::string tim_label("tbint_tform_");
99 tim_label += type(); tim_label += " "; tim_label += name();
100 tim_enter(tim_label.c_str());
101
102 print_header();
103
104 // Compute the storage remaining for the integral routines
105 size_t dyn_mem = distsize_to_size(compute_transform_dynamic_memory_(batchsize_));
106
107 int me = msg_->me();
108 int nproc = msg_->n();
109 const int restart_orb = restart_orbital();
110 int nijmax = compute_nij(batchsize_,rank2,nproc,me);
111
112 vector<int> mosym1 = space1_->mosym();
113 vector<int> mosym2 = space2_->mosym();
114 vector<int> mosym3 = space3_->mosym();
115 vector<int> mosym4 = space4_->mosym();
116 double** vector3 = new double*[nbasis3];
117 double** vector4 = new double*[nbasis4];
118 vector3[0] = new double[rank3*nbasis3];
119 vector4[0] = new double[rank4*nbasis4];
120 for(int i=1; i<nbasis3; i++) vector3[i] = vector3[i-1] + rank3;
121 for(int i=1; i<nbasis4; i++) vector4[i] = vector4[i-1] + rank4;
122 space3_->coefs().convert(vector3);
123 space4_->coefs().convert(vector4);
124
125 /////////////////////////////////////
126 // Begin transformation loops
127 /////////////////////////////////////
128
129 // debug print
130 if (debug_ >= 2) {
131 ExEnv::outn() << indent
132 << scprintf("node %i, begin loop over i-batches",me) << endl;
133 }
134 // end of debug print
135
136 // Initialize the integrals
137 integral->set_storage(memory_ - dyn_mem);
138 integral->set_basis(space1_->basis(),space2_->basis(),space3_->basis(),space4_->basis());
139 Ref<TwoBodyInt>* tbints = new Ref<TwoBodyInt>[thr_->nthread()];
140 for (int i=0; i<thr_->nthread(); i++) {
141 tbints[i] = integral->grt();
142 }
143 if (debug_ >= 1)
144 ExEnv::out0() << indent << scprintf("Memory used for integral storage: %i Bytes",
145 integral->storage_used()) << endl;
146
147 Ref<ThreadLock> lock = thr_->new_lock();
148 TwoBodyMOIntsTransform_12Inds** e12thread = new TwoBodyMOIntsTransform_12Inds*[thr_->nthread()];
149 for (int i=0; i<thr_->nthread(); i++) {
150 e12thread[i] = new TwoBodyMOIntsTransform_12Inds(this,i,thr_->nthread(),lock,tbints[i],-100.0,debug_);
151 }
152
153 /*-----------------------------------
154
155 Start the integrals transformation
156
157 -----------------------------------*/
158 tim_enter("mp2-r12/a passes");
159 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
160 StateOutBin stateout(top_mole_->checkpoint_file());
161 SavableState::save_state(top_mole_,stateout);
162 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
163 }
164
165 for (int pass=0; pass<npass_; pass++) {
166
167 ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
168
169 int ni = batchsize_;
170 int i_offset = restart_orb + pass*ni;
171 if (pass == npass_ - 1)
172 ni = rank1 - batchsize_*(npass_-1);
173
174 // Compute number of of i,j pairs on each node during current pass for
175 // two-el integrals
176 int nij = compute_nij(ni,rank2,nproc,me);
177
178 // debug print
179 if (debug_)
180 ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
181 // end of debug print
182
183 // Allocate and initialize some arrays
184 // (done here to avoid having these arrays
185 // overlap with arrays allocated later)
186
187 // Allocate (and initialize) some arrays
188
189 double* integral_ijrs = (double*) mem_->localdata();
190 //bzerofast(integral_ijsx, (num_te_types_*nij*memgrp_blocksize/sizeof(double)));
191 memset(integral_ijrs, 0, num_te_types_*nij*memgrp_blocksize);
192 integral_ijrs = 0;
193 mem_->sync();
194 ExEnv::out0() << indent
195 << scprintf("Begin loop over shells (ints, 1+2 q.t.)") << endl;
196
197 // Do the two electron integrals and the first two quarter transformations
198 tim_enter("ints+1qt+2qt");
199 shell_pair_data()->init();
200 for (int i=0; i<thr_->nthread(); i++) {
201 e12thread[i]->set_i_offset(i_offset);
202 e12thread[i]->set_ni(ni);
203 thr_->add_thread(i,e12thread[i]);
204# if SINGLE_THREAD_E12
205 e12thread[i]->run();
206# endif
207 }
208# if !SINGLE_THREAD_E12
209 thr_->start_threads();
210 thr_->wait_threads();
211# endif
212 tim_exit("ints+1qt+2qt");
213 ExEnv::out0() << indent << "End of loop over shells" << endl;
214
215 mem_->sync(); // Make sure ijsq is complete on each node before continuing
216 integral_ijrs = (double*) mem_->localdata();
217
218 // If bs3_eq_bs4 -- only s>r integrals are produced
219 // Produce ijsr integrals too
220 if (bs3_eq_bs4) {
221 for(int te_type=0; te_type<num_te_types_; te_type++) {
222 for (int i = 0; i<ni; i++) {
223 for (int j = 0; j<rank2; j++) {
224 int ij = i*rank2+j;
225 int ij_local = ij/nproc;
226 if (ij%nproc == me) {
227 double* ijrs_ints = (double*) ((size_t)integral_ijrs + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
228
229 for (int r = 0; r<nbasis3; r++) {
230
231 const int smin = r+1;
232 const double* rs_ptr = ijrs_ints + r*nbasis4 + smin;
233 double* sr_ptr = ijrs_ints + smin*nbasis3 + r;
234
235 for (int s = smin; s<nbasis4; s++) {
236 const double ijrs = *rs_ptr++;
237 *sr_ptr = ijrs;
238 sr_ptr += nbasis3;
239 }
240 }
241 }
242 }
243 }
244 }
245 }
246
247
248#if PRINT2Q
249 {
250 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
251 for (int i = 0; i<ni; i++) {
252 for (int j = 0; j<rank2; j++) {
253 int ij = i*rank2+j;
254 int ij_local = ij/nproc;
255 if (ij%nproc == me) {
256 const double* ijrs_ints = (const double*) ((size_t)integral_ijrs + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
257 for (int r = 0; r<nbasis3; r++) {
258 for (int s = 0; s<nbasis4; s++) {
259 double value = ijrs_ints[r*nbasis4+s];
260 printf("2Q: type = %d (%d %d|%d %d) = %12.8f\n",
261 te_type,i+i_offset,j,r,s,value);
262 }
263 }
264 }
265 }
266 }
267 }
268 }
269#endif
270
271 // Third quarter transform
272 ExEnv::out0() << indent << "Begin third q.t." << endl;
273 tim_enter("3. q.t.");
274 // Begin third quarter transformation;
275 // from (ij|rs) stored as ijrs
276 // generate (ij|xs) stored as ijxs
277
278 const int xs_size = rank3 * nbasis4 * sizeof(double);
279 double* xs_ints = new double[rank3*nbasis4];
280 for (int i = 0; i<ni; i++) {
281 for (int j = 0; j<rank2; j++) {
282 int ij = i*rank2+j;
283 int ij_local = ij/nproc;
284 if (ij%nproc == me) {
285
286 for(int te_type=0; te_type<num_te_types_; te_type++) {
287
288 const double *rs_ptr = (const double*) ((size_t)integral_ijrs + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
289
290 // third quarter transform
291 // xs = xr * rs
292 const char notransp = 'n';
293 const char transp = 't';
294 const double one = 1.0;
295 const double zero = 0.0;
296 F77_DGEMM(&notransp,&transp,&nbasis4,&rank3,&nbasis3,&one,rs_ptr,&nbasis4,
297 vector3[0],&rank3,&zero,xs_ints,&nbasis4);
298
299 // copy the result back to integrals_ijsq
300 memcpy((void*)rs_ptr,(const void*)xs_ints,xs_size);
301 }
302 }
303 }
304 }
305 delete[] xs_ints;
306 tim_exit("3. q.t.");
307 ExEnv::out0() << indent << "End of third q.t." << endl;
308 integral_ijrs = 0;
309
310 double* integral_ijxs = (double*) mem_->localdata();
311#if PRINT3Q
312 {
313 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
314 for (int i = 0; i<ni; i++) {
315 for (int j = 0; j<rank2; j++) {
316 int ij = i*rank2+j;
317 int ij_local = ij/nproc;
318 if (ij%nproc == me) {
319 const double* ijxs_ints = (const double*) ((size_t)integral_ijxs + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
320 for (int x = 0; x<rank3; x++) {
321 for (int s = 0; s<nbasis4; s++) {
322 double value = ijxs_ints[x*nbasis4+s];
323 printf("3Q: type = %d (%d %d|%d %d) = %12.8f\n",
324 te_type,i+i_offset,j,x,s,value);
325 }
326 }
327 }
328 }
329 }
330 }
331 }
332#endif
333
334 // Fourth quarter transform
335 ExEnv::out0() << indent << "Begin fourth q.t." << endl;
336 tim_enter("4. q.t.");
337 // Begin fourth quarter transformation;
338 // generate (ix|jy) stored as ijxy
339
340 double* ijxy_ints = new double[rank3*rank4];
341 const size_t xy_size = rank3*rank4*sizeof(double);
342 for(int te_type=0; te_type<num_te_types_; te_type++) {
343
344 for (int i = 0; i<ni; i++) {
345 for (int j = 0; j<rank2; j++) {
346 int ij = i*rank2+j;
347 int ij_local = ij/nproc;
348 if (ij%nproc == me) {
349
350 const double *xs_ptr = (const double*) ((size_t)integral_ijxs + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
351
352 // fourth quarter transform
353 // xy = xs * sy
354 const char notransp = 'n';
355 const double one = 1.0;
356 const double zero = 0.0;
357 F77_DGEMM(&notransp,&notransp,&rank4,&rank3,&nbasis4,&one,vector4[0],&rank4,
358 xs_ptr,&nbasis4,&zero,ijxy_ints,&rank4);
359
360 // copy the result back to integrals_ijsx
361 memcpy((void*)xs_ptr,(const void*)ijxy_ints,xy_size);
362 }
363 }
364 }
365 }
366 delete[] ijxy_ints;
367 tim_exit("4. q.t.");
368 ExEnv::out0() << indent << "End of fourth q.t." << endl;
369
370 integral_ijxs = 0;
371 double* integral_ijxy = (double*) mem_->localdata();
372
373 // Zero out nonsymmetric integrals -- Pitzer theorem in action
374 {
375 for (int i = 0; i<ni; i++) {
376 for (int j = 0; j<rank2; j++) {
377 int ij = i*rank2+j;
378 int ij_local = ij/nproc;
379 if (ij%nproc == me) {
380 const int ij_sym = mosym1[i+i_offset] ^ mosym2[j];
381 for(int te_type=0; te_type<num_te_types_; te_type++) {
382 double* ijxy_ptr = (double*) ((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
383 for (int x = 0; x<rank3; x++) {
384 const int ijx_sym = ij_sym ^ mosym3[x];
385 for (int y = 0; y<rank4; y++, ijxy_ptr++) {
386 if (ijx_sym ^ mosym4[y]) {
387 *ijxy_ptr = 0.0;
388 }
389 }
390 }
391 }
392 }
393 }
394 }
395 }
396 // Sync up tasks before integrals are committed
397 mem_->sync();
398
399#if PRINT4Q
400 {
401 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
402 for (int i = 0; i<ni; i++) {
403 for (int j = 0; j<rank2; j++) {
404 int ij = i*rank2+j;
405 int ij_local = ij/nproc;
406 if (ij%nproc == me) {
407 const double* ijxy_ints = (const double*)((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
408 for (int x = 0; x<rank3; x++) {
409 for (int y = 0; y<rank4; y++) {
410 double value = ijxy_ints[x*rank4+y];
411 printf("4Q: type = %d (%d %d|%d %d) = %12.8f\n",
412 te_type,i+i_offset,j,x,y,value);
413 }
414 }
415 }
416 }
417 }
418 }
419 }
420#endif
421
422 // Push locally stored integrals to an accumulator
423 // This could involve storing the data to disk or simply remembering the pointer
424 tim_enter("MO ints store");
425 ints_acc_->store_memorygrp(mem_,ni,memgrp_blocksize);
426 tim_exit("MO ints store");
427 mem_->sync();
428
429 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
430 StateOutBin stateout(top_mole_->checkpoint_file());
431 SavableState::save_state(top_mole_,stateout);
432 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
433 }
434
435 } // end of loop over passes
436 tim_exit("mp2-r12/a passes");
437 if (debug_)
438 ExEnv::out0() << indent << "End of mp2-r12/a transformation" << endl;
439 // Done storing integrals - commit the content
440 // WARNING: it is not safe to use mem until deactivate has been called on the accumulator
441 // After that deactivate the size of mem will be 0 [mem->set_localsize(0)]
442 ints_acc_->commit();
443
444
445 for (int i=0; i<thr_->nthread(); i++) {
446 delete e12thread[i];
447 }
448 delete[] e12thread;
449 delete[] tbints; tbints = 0;
450 delete[] vector3[0]; delete[] vector3;
451 delete[] vector4[0]; delete[] vector4;
452
453 tim_exit(tim_label.c_str());
454
455 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint()) {
456 StateOutBin stateout(top_mole_->checkpoint_file());
457 SavableState::save_state(top_mole_,stateout);
458 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
459 }
460
461 print_footer();
462
463#if CHECK_INTS_SYMM
464 ExEnv::out0() << indent << "Detecting non-totally-symmetric integrals ... ";
465 check_int_symm();
466 ExEnv::out0() << "none" << endl;
467#endif
468
469}
470
471
472////////////////////////////////////////////////////////////////////////////
473
474// Local Variables:
475// mode: c++
476// c-file-style: "CLJ-CONDENSED"
477// End:
Note: See TracBrowser for help on using the repository browser.