source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbptr12/compute_ixjy.cc

Candidate_v1.6.1
Last change on this file was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 14.6 KB
Line 
1//
2// compute_ixjy.cc
3//
4// Copyright (C) 2004 Edward Valeev
5//
6// Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
7// Maintainer: EV
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#include <stdexcept>
29#include <stdlib.h>
30#include <string.h>
31#include <math.h>
32#include <limits.h>
33
34#include <scconfig.h>
35#include <util/misc/formio.h>
36#include <util/misc/timer.h>
37#include <util/class/class.h>
38#include <util/state/state.h>
39#include <util/state/state_text.h>
40#include <util/state/state_bin.h>
41#include <math/scmat/matrix.h>
42#include <chemistry/qc/mbpt/bzerofast.h>
43#include <chemistry/qc/mbptr12/transform_ixjy.h>
44#include <chemistry/qc/mbptr12/blas.h>
45#include <chemistry/qc/mbptr12/transform_13inds.h>
46
47using namespace std;
48using namespace sc;
49
50#define SINGLE_THREAD_E13 0
51#define PRINT2Q 0
52#define PRINT3Q 0
53#define PRINT4Q 0
54#define PRINT_NUM_TE_TYPES 1
55#define CHECK_INTS_SYMM 1
56
57/*-------------------------------------
58 Based on MBPT2::compute_mp2_energy()
59 -------------------------------------*/
60void
61TwoBodyMOIntsTransform_ixjy::compute()
62{
63 init_acc();
64 if (ints_acc_->is_committed())
65 return;
66
67 Ref<Integral> integral = factory_->integral();
68 Ref<GaussianBasisSet> bs1 = space1_->basis();
69 Ref<GaussianBasisSet> bs2 = space2_->basis();
70 Ref<GaussianBasisSet> bs3 = space3_->basis();
71 Ref<GaussianBasisSet> bs4 = space4_->basis();
72 int rank1 = space1_->rank();
73 int rank2 = space2_->rank();
74 int rank3 = space3_->rank();
75 int rank4 = space4_->rank();
76 int nbasis1 = bs1->nbasis();
77 int nbasis2 = bs2->nbasis();
78 int nbasis3 = bs3->nbasis();
79 int nbasis4 = bs4->nbasis();
80 int nshell1 = bs1->nshell();
81 int nshell2 = bs2->nshell();
82 int nshell3 = bs3->nshell();
83 int nshell4 = bs4->nshell();
84 int nfuncmax1 = bs1->max_nfunction_in_shell();
85 int nfuncmax2 = bs2->max_nfunction_in_shell();
86 int nfuncmax3 = bs3->max_nfunction_in_shell();
87 int nfuncmax4 = bs4->max_nfunction_in_shell();
88 enum te_types {eri=0, r12=1, r12t1=2};
89 const size_t memgrp_blocksize = memgrp_blksize();
90
91 // log2 of the erep tolerance
92 // (erep < 2^tol => discard)
93 const int tol = (int) (-10.0/log10(2.0)); // discard ints smaller than 10^-20
94
95 int aoint_computed = 0;
96
97 std::string tim_label("tbint_tform_ikjy ");
98 tim_label += name_;
99 tim_enter(tim_label.c_str());
100
101 print_header();
102
103 // Compute the storage remaining for the integral routines
104 size_t dyn_mem = distsize_to_size(compute_transform_dynamic_memory_(batchsize_));
105
106 int me = msg_->me();
107 int nproc = msg_->n();
108 const int restart_orb = restart_orbital();
109 int nijmax = compute_nij(batchsize_,rank3,nproc,me);
110
111 vector<int> mosym1 = space1_->mosym();
112 vector<int> mosym2 = space2_->mosym();
113 vector<int> mosym3 = space3_->mosym();
114 vector<int> mosym4 = space4_->mosym();
115 double** vector2 = new double*[nbasis2];
116 double** vector4 = new double*[nbasis4];
117 vector2[0] = new double[rank2*nbasis2];
118 vector4[0] = new double[rank4*nbasis4];
119 for(int i=1; i<nbasis2; i++) vector2[i] = vector2[i-1] + rank2;
120 for(int i=1; i<nbasis4; i++) vector4[i] = vector4[i-1] + rank4;
121 space2_->coefs().convert(vector2);
122 space4_->coefs().convert(vector4);
123
124 /////////////////////////////////////
125 // Begin transformation loops
126 /////////////////////////////////////
127
128 // debug print
129 if (debug_ >= 2) {
130 ExEnv::outn() << indent
131 << scprintf("node %i, begin loop over i-batches",me) << endl;
132 }
133 // end of debug print
134
135 // Initialize the integrals
136 integral->set_storage(memory_ - dyn_mem);
137 integral->set_basis(space1_->basis(),space2_->basis(),space3_->basis(),space4_->basis());
138 Ref<TwoBodyInt>* tbints = new Ref<TwoBodyInt>[thr_->nthread()];
139 for (int i=0; i<thr_->nthread(); i++) {
140 tbints[i] = integral->grt();
141 }
142 if (debug_ >= 1)
143 ExEnv::out0() << indent << scprintf("Memory used for integral storage: %i Bytes",
144 integral->storage_used()) << endl;
145
146 Ref<ThreadLock> lock = thr_->new_lock();
147 TwoBodyMOIntsTransform_13Inds** e13thread = new TwoBodyMOIntsTransform_13Inds*[thr_->nthread()];
148 for (int i=0; i<thr_->nthread(); i++) {
149 e13thread[i] = new TwoBodyMOIntsTransform_13Inds(this,i,thr_->nthread(),lock,tbints[i],-100.0,debug_);
150 }
151
152 /*-----------------------------------
153
154 Start the integrals transformation
155
156 -----------------------------------*/
157 tim_enter("mp2-r12/a passes");
158 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
159 StateOutBin stateout(top_mole_->checkpoint_file());
160 SavableState::save_state(top_mole_,stateout);
161 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
162 }
163
164 for (int pass=0; pass<npass_; pass++) {
165
166 ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
167
168 int ni = batchsize_;
169 int i_offset = restart_orb + pass*ni;
170 if (pass == npass_ - 1)
171 ni = rank1 - batchsize_*(npass_-1);
172
173 // Compute number of of i,j pairs on each node during current pass for
174 // two-el integrals
175 int nij = compute_nij(ni,rank3,nproc,me);
176
177 // debug print
178 if (debug_)
179 ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
180 // end of debug print
181
182 // Allocate and initialize some arrays
183 // (done here to avoid having these arrays
184 // overlap with arrays allocated later)
185
186 // Allocate (and initialize) some arrays
187
188 double* integral_ijsq = (double*) mem_->localdata();
189 //bzerofast(integral_ijsx, (num_te_types_*nij*memgrp_blocksize/sizeof(double)));
190 memset(integral_ijsq, 0, num_te_types_*nij*memgrp_blocksize);
191 integral_ijsq = 0;
192 mem_->sync();
193 ExEnv::out0() << indent
194 << scprintf("Begin loop over shells (ints, 1+2 q.t.)") << endl;
195
196 // Do the two electron integrals and the first two quarter transformations
197 tim_enter("ints+1qt+2qt");
198 shell_pair_data()->init();
199 for (int i=0; i<thr_->nthread(); i++) {
200 e13thread[i]->set_i_offset(i_offset);
201 e13thread[i]->set_ni(ni);
202 thr_->add_thread(i,e13thread[i]);
203# if SINGLE_THREAD_E13
204 e13thread[i]->run();
205# endif
206 }
207# if !SINGLE_THREAD_E13
208 thr_->start_threads();
209 thr_->wait_threads();
210# endif
211 tim_exit("ints+1qt+2qt");
212 ExEnv::out0() << indent << "End of loop over shells" << endl;
213
214 mem_->sync(); // Make sure ijsq is complete on each node before continuing
215 integral_ijsq = (double*) mem_->localdata();
216
217#if PRINT2Q
218 {
219 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
220 for (int i = 0; i<ni; i++) {
221 for (int q = 0; q<nbasis2; q++) {
222 for (int j = 0; j<rank3; j++) {
223 int ij = i*rank3+j;
224 int ij_local = ij/nproc;
225 if (ij%nproc == me) {
226 const double* ijsq_ints = (const double*) ((size_t)integral_ijsq + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
227 for (int s = 0; s<nbasis4; s++) {
228 double value = ijsq_ints[s*rank2+q];
229 printf("2Q: type = %d (%d %d|%d %d) = %12.8f\n",
230 te_type,i+i_offset,q,j,s,value);
231 }
232 }
233 }
234 }
235 }
236 }
237 }
238#endif
239
240 // Third quarter transform
241 ExEnv::out0() << indent << "Begin third q.t." << endl;
242 tim_enter("3. q.t.");
243 // Begin third quarter transformation;
244 // from (iq|js) stored as ijsq
245 // generate (ix|js) stored as ijsx
246
247 const int sx_size = nbasis4 * rank2 * sizeof(double);
248 double* sx_ints = new double[nbasis4 * rank2];
249 for (int i = 0; i<ni; i++) {
250 for (int j = 0; j<rank3; j++) {
251 int ij = i*rank3+j;
252 int ij_local = ij/nproc;
253 if (ij%nproc == me) {
254
255 for(int te_type=0; te_type<num_te_types_; te_type++) {
256
257 const double *sq_ptr = (const double*) ((size_t)integral_ijsq + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
258
259 // fourth quarter transform
260 // sx = sq * qx
261 const char notransp = 'n';
262 const double one = 1.0;
263 const double zero = 0.0;
264 F77_DGEMM(&notransp,&notransp,&rank2,&nbasis4,&nbasis2,&one,vector2[0],&rank2,
265 sq_ptr,&nbasis2,&zero,sx_ints,&rank2);
266
267 // copy the result back to integrals_ijsq
268 memcpy((void*)sq_ptr,(const void*)sx_ints,sx_size);
269 }
270 }
271 }
272 }
273 delete[] sx_ints;
274 tim_exit("3. q.t.");
275 ExEnv::out0() << indent << "End of third q.t." << endl;
276 integral_ijsq = 0;
277
278 double* integral_ijsx = (double*) mem_->localdata();
279#if PRINT3Q
280 {
281 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
282 for (int i = 0; i<ni; i++) {
283 for (int x = 0; x<rank2; x++) {
284 for (int j = 0; j<rank3; j++) {
285 int ij = i*rank3+j;
286 int ij_local = ij/nproc;
287 if (ij%nproc == me) {
288 const double* ijsx_ints = (const double*) ((size_t)integral_ijsx + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
289 for (int s = 0; s<nbasis4; s++) {
290 double value = ijsx_ints[s*rank2+x];
291 printf("3Q: type = %d (%d %d|%d %d) = %12.8f\n",
292 te_type,i+i_offset,x,j,s,value);
293 }
294 }
295 }
296 }
297 }
298 }
299 }
300#endif
301
302 // Fourth quarter transform
303 ExEnv::out0() << indent << "Begin fourth q.t." << endl;
304 tim_enter("4. q.t.");
305 // Begin fourth quarter transformation;
306 // generate (ix|jy) stored as ijxy
307
308 double* ijxy_ints = new double[rank2*rank4];
309 const size_t xy_size = rank2*rank4*sizeof(double);
310 for(int te_type=0; te_type<num_te_types_; te_type++) {
311
312 for (int i = 0; i<ni; i++) {
313 for (int j = 0; j<rank3; j++) {
314 int ij = i*rank3+j;
315 int ij_local = ij/nproc;
316 if (ij%nproc == me) {
317
318 const double *sx_ptr = (const double*) ((size_t)integral_ijsx + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
319
320 // fourth quarter transform
321 // xy = sx^t * sy
322 const char notransp = 'n';
323 const char transp = 't';
324 const double one = 1.0;
325 const double zero = 0.0;
326 F77_DGEMM(&notransp,&transp,&rank4,&rank2,&nbasis4,&one,vector4[0],&rank4,
327 sx_ptr,&rank2,&zero,ijxy_ints,&rank4);
328
329 // copy the result back to integrals_ijsx
330 memcpy((void*)sx_ptr,(const void*)ijxy_ints,xy_size);
331 }
332 }
333 }
334 }
335 delete[] ijxy_ints;
336 tim_exit("4. q.t.");
337 ExEnv::out0() << indent << "End of fourth q.t." << endl;
338
339 integral_ijsx = 0;
340 double* integral_ijxy = (double*) mem_->localdata();
341
342 // Zero out nonsymmetric integrals -- Pitzer theorem in action
343 {
344 for (int i = 0; i<ni; i++) {
345 for (int j = 0; j<rank3; j++) {
346 int ij = i*rank3+j;
347 int ij_local = ij/nproc;
348 if (ij%nproc == me) {
349 const int ij_sym = mosym1[i+i_offset] ^ mosym3[j];
350 for(int te_type=0; te_type<num_te_types_; te_type++) {
351 double* ijxy_ptr = (double*) ((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
352 for (int x = 0; x<rank2; x++) {
353 const int ijx_sym = ij_sym ^ mosym2[x];
354 for (int y = 0; y<rank4; y++, ijxy_ptr++) {
355 if (ijx_sym ^ mosym4[y]) {
356 *ijxy_ptr = 0.0;
357 }
358 }
359 }
360 }
361 }
362 }
363 }
364 }
365 // Sync up tasks before integrals are committed
366 mem_->sync();
367
368#if PRINT4Q
369 {
370 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
371 for (int i = 0; i<ni; i++) {
372 for (int x = 0; x<rank2; x++) {
373 for (int j = 0; j<rank3; j++) {
374 int ij = i*rank3+j;
375 int ij_local = ij/nproc;
376 if (ij%nproc == me) {
377 const double* ijxy_ints = (const double*)((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
378 for (int y = 0; y<rank4; y++) {
379 double value = ijxy_ints[x*rank4+y];
380 printf("4Q: type = %d (%d %d|%d %d) = %12.8f\n",
381 te_type,i+i_offset,x,j,y,value);
382 }
383 }
384 }
385 }
386 }
387 }
388 }
389#endif
390
391 // Push locally stored integrals to an accumulator
392 // This could involve storing the data to disk or simply remembering the pointer
393 tim_enter("MO ints store");
394 ints_acc_->store_memorygrp(mem_,ni,memgrp_blocksize);
395 tim_exit("MO ints store");
396 mem_->sync();
397
398 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
399 StateOutBin stateout(top_mole_->checkpoint_file());
400 SavableState::save_state(top_mole_,stateout);
401 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
402 }
403
404 } // end of loop over passes
405 tim_exit("mp2-r12/a passes");
406 if (debug_)
407 ExEnv::out0() << indent << "End of mp2-r12/a transformation" << endl;
408 // Done storing integrals - commit the content
409 // WARNING: it is not safe to use mem until deactivate has been called on the accumulator
410 // After that deactivate the size of mem will be 0 [mem->set_localsize(0)]
411 ints_acc_->commit();
412
413
414 for (int i=0; i<thr_->nthread(); i++) {
415 delete e13thread[i];
416 }
417 delete[] e13thread;
418 delete[] tbints; tbints = 0;
419 delete[] vector2[0]; delete[] vector2;
420 delete[] vector4[0]; delete[] vector4;
421
422 tim_exit(tim_label.c_str());
423
424 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint()) {
425 StateOutBin stateout(top_mole_->checkpoint_file());
426 SavableState::save_state(top_mole_,stateout);
427 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
428 }
429
430 print_footer();
431
432#if CHECK_INTS_SYMM
433 ExEnv::out0() << indent << "Detecting non-totally-symmetric integrals ... ";
434 check_int_symm();
435 ExEnv::out0() << "none" << endl;
436#endif
437
438}
439
440
441////////////////////////////////////////////////////////////////////////////
442
443// Local Variables:
444// mode: c++
445// c-file-style: "CLJ-CONDENSED"
446// End:
Note: See TracBrowser for help on using the repository browser.