source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbptr12/transform_13inds.cc@ 860145

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since 860145 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 15.9 KB
Line 
1//
2// transform_13inds.cc
3//
4// Copyright (C) 2001 Edward Valeev
5//
6// Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
7// Maintainer: EV
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#ifdef __GNUC__
29#pragma implementation
30#endif
31
32#include <math.h>
33#include <stdexcept>
34
35#include <util/misc/formio.h>
36#include <util/misc/timer.h>
37#include <chemistry/qc/basis/gpetite.h>
38#include <chemistry/qc/mbpt/bzerofast.h>
39#include <chemistry/qc/mbpt/util.h>
40#include <chemistry/qc/basis/distshpair.h>
41#include <chemistry/qc/mbptr12/blas.h>
42#include <chemistry/qc/mbptr12/transform_13inds.h>
43
44using namespace std;
45using namespace sc;
46
47#define PRINT1Q 0
48#define PRINT2Q 0
49#define PRINT_NUM_TE_TYPES 1
50
51// The FAST_BUT_WRONG flags is useful for exercising the communications
52// layer. It causes the first and second quarter transformation to be
53// omitted, but all communication is still performed. This permits
54// problems in communications libraries to be more quickly identified.
55#define FAST_BUT_WRONG 0
56
57TwoBodyMOIntsTransform_13Inds::TwoBodyMOIntsTransform_13Inds(
58 const Ref<TwoBodyMOIntsTransform>& tform, int mythread, int nthread,
59 const Ref<ThreadLock>& lock, const Ref<TwoBodyInt> &tbint, double tol, int debug) :
60 tform_(tform), mythread_(mythread), nthread_(nthread), lock_(lock), tbint_(tbint),
61 tol_(tol), debug_(debug)
62{
63 timer_ = new RegionTimer();
64 aoint_computed_ = 0;
65 ni_ = tform_->batchsize();
66 i_offset_ = 0;
67}
68
69TwoBodyMOIntsTransform_13Inds::~TwoBodyMOIntsTransform_13Inds()
70{
71 timer_ = 0;
72}
73
74/*
75 Distribute work by SR
76
77 for all PQ
78 compute unique (PQ|RS)
79 transform to (IM|RS) stored as rsim where M are all AOs for basis set 2
80 end PQ
81
82 transform (IM|RS) to (IM|JS) stored as ijsm and accumulate to the tasks that holds respective ij-pairs.
83
84 end SR
85*/
86
87void
88TwoBodyMOIntsTransform_13Inds::run()
89{
90 Ref<MemoryGrp> mem = tform_->mem();
91 Ref<MessageGrp> msg = tform_->msg();
92 Ref<R12IntsAcc> ints_acc = tform_->ints_acc();
93 const int me = msg->me();
94 const int nproc = msg->n();
95 Ref<MOIndexSpace> space1 = tform_->space1();
96 Ref<MOIndexSpace> space2 = tform_->space2();
97 Ref<MOIndexSpace> space3 = tform_->space3();
98 Ref<MOIndexSpace> space4 = tform_->space4();
99
100 Ref<GaussianBasisSet> bs1 = space1->basis();
101 Ref<GaussianBasisSet> bs2 = space2->basis();
102 Ref<GaussianBasisSet> bs3 = space3->basis();
103 Ref<GaussianBasisSet> bs4 = space4->basis();
104 const bool bs1_eq_bs2 = (bs1 == bs2);
105 const bool bs3_eq_bs4 = (bs3 == bs4);
106
107 const bool dynamic = tform_->dynamic();
108 const double print_percent = tform_->print_percent();
109
110 const int ni = ni_;
111 const int rank1 = space1->rank();
112 const int rank3 = space3->rank();
113 const int nfuncmax1 = bs1->max_nfunction_in_shell();
114 const int nfuncmax2 = bs2->max_nfunction_in_shell();
115 const int nfuncmax3 = bs3->max_nfunction_in_shell();
116 const int nfuncmax4 = bs4->max_nfunction_in_shell();
117 const int nsh1 = bs1->nshell();
118 const int nsh2 = bs2->nshell();
119 const int nsh3 = bs3->nshell();
120 const int nsh4 = bs4->nshell();
121 const int nbasis1 = bs1->nbasis();
122 const int nbasis2 = bs2->nbasis();
123 const int nbasis3 = bs3->nbasis();
124 const int nbasis4 = bs4->nbasis();
125 double dtol = pow(2.0,tol_);
126 const size_t memgrp_blksize = tform_->memgrp_blksize()/sizeof(double);
127
128 double** vector1 = new double*[nbasis1];
129 double** vector3 = new double*[nbasis3];
130 vector1[0] = new double[rank1*nbasis1];
131 vector3[0] = new double[rank3*nbasis3];
132 for(int i=1; i<nbasis1; i++) vector1[i] = vector1[i-1] + rank1;
133 for(int i=1; i<nbasis3; i++) vector3[i] = vector3[i-1] + rank3;
134 space1->coefs().convert(vector1);
135 space3->coefs().convert(vector3);
136
137 /*-------------------------------------------------------------
138 Find integrals buffers to 1/r12, r12, and [r12,T1] integrals
139 -------------------------------------------------------------*/
140 const int num_te_types = tform_->num_te_types();
141 const double *intbuf[TwoBodyInt::num_tbint_types];
142 intbuf[TwoBodyInt::eri] = tbint_->buffer(TwoBodyInt::eri);
143 intbuf[TwoBodyInt::r12] = tbint_->buffer(TwoBodyInt::r12);
144 intbuf[TwoBodyInt::r12t1] = tbint_->buffer(TwoBodyInt::r12t1);
145 intbuf[TwoBodyInt::r12t2] = tbint_->buffer(TwoBodyInt::r12t2);
146
147 /*-----------------------------------------------------
148 Allocate buffers for partially transformed integrals
149 -----------------------------------------------------*/
150 double *ijsq_contrib; // local contributions to integral_ijsq
151 double *ijrq_contrib; // local contributions to integral_ijrq
152 double **rsiq_ints = new double*[num_te_types]; // quarter-transformed integrals for each RS pair
153 for(int te_type=0;te_type<num_te_types;te_type++) {
154 rsiq_ints[te_type] = new double[ni*nbasis2*nfuncmax3*nfuncmax4];
155 }
156 ijsq_contrib = mem->malloc_local_double(nbasis2*nfuncmax4);
157 if (bs3_eq_bs4)
158 ijrq_contrib = mem->malloc_local_double(nbasis2*nfuncmax4);
159 else
160 ijrq_contrib = NULL;
161
162 /*-----------------------------
163 Initialize work distribution
164 -----------------------------*/
165 sc::DistShellPair shellpairs(msg,nthread_,mythread_,lock_,bs4,bs3,dynamic,
166 tform_->shell_pair_data());
167 shellpairs.set_debug(debug_);
168 if (debug_) shellpairs.set_print_percent(print_percent/10.0);
169 else shellpairs.set_print_percent(print_percent);
170 int work_per_thread = bs3_eq_bs4 ?
171 ((nsh3*(nsh3+1))/2)/(nproc*nthread_) :
172 (nsh3*nsh4)/(nproc*nthread_) ;
173 int print_interval = work_per_thread/100;
174 int time_interval = work_per_thread/10;
175 int print_index = 0;
176 if (print_interval == 0) print_interval = 1;
177 if (time_interval == 0) time_interval = 1;
178 if (work_per_thread == 0) work_per_thread = 1;
179
180 if (debug_) {
181 lock_->lock();
182 ExEnv::outn() << scprintf("%d:%d: starting get_task loop",me,mythread_) << endl;
183 lock_->unlock();
184 }
185
186 Ref<GenPetite4> p4list
187 = construct_gpetite(bs1,bs2,bs3,bs4);
188
189#if FAST_BUT_WRONG
190 for(int te_type=0;te_type<num_te_types;te_type++) {
191 bzerofast(rsiq_ints[te_type], ni*nbasis2*nfuncmax3*nfuncmax4);
192 bzerofast(ijsq_contrib[te_type], nbasis2*nfuncmax4);
193 if (bs3_eq_bs4)
194 bzerofast(ijrq_contrib[te_type], nbasis2*nfuncmax4);
195 }
196#endif
197
198 int R = 0;
199 int S = 0;
200 while (shellpairs.get_task(S,R)) {
201 // if bs3_eq_bs4 then S >= R always (see sc::exp::DistShellPair)
202 int nr = bs3->shell(R).nfunction();
203 int r_offset = bs3->shell_to_function(R);
204
205 int ns = bs4->shell(S).nfunction();
206 int s_offset = bs4->shell_to_function(S);
207
208 int nrs = nr*ns;
209
210 if (debug_ > 1 && (print_index++)%print_interval == 0) {
211 lock_->lock();
212 ExEnv::outn() << scprintf("%d:%d: (PQ|%d %d) %d%%",
213 me,mythread_,R,S,(100*print_index)/work_per_thread)
214 << endl;
215 lock_->unlock();
216 }
217 if (debug_ > 1 && (print_index)%time_interval == 0) {
218 lock_->lock();
219 ExEnv::outn() << scprintf("timer for %d:%d:",me,mythread_) << endl;
220 timer_->print();
221 lock_->unlock();
222 }
223
224#if !FAST_BUT_WRONG
225 // Zero out 1 q.t. storage
226 for(int te_type=0;te_type<num_te_types;te_type++)
227 bzerofast(rsiq_ints[te_type], nrs*ni*nbasis2);
228
229 for (int P=0; P<nsh1; P++) {
230 int np = bs1->shell(P).nfunction();
231 int p_offset = bs1->shell_to_function(P);
232
233 int Qmax = (bs1_eq_bs2 ? P : nsh2-1);
234 for (int Q=0; Q<=Qmax; Q++) {
235 int nq = bs2->shell(Q).nfunction();
236 int q_offset = bs2->shell_to_function(Q);
237
238 // check if symmetry unique and compute degeneracy
239 int deg = p4list->in_p4(P,Q,R,S);
240 if (deg == 0)
241 continue;
242 double symfac = (double) deg;
243
244 if (tbint_->log2_shell_bound(P,Q,R,S) < tol_) {
245 continue; // skip shell quartets less than tol
246 }
247
248 aoint_computed_++;
249
250 timer_->enter("AO integrals");
251 tbint_->compute_shell(P,Q,R,S);
252 timer_->exit("AO integrals");
253
254 timer_->enter("1. q.t.");
255
256 // Begin first quarter transformation;
257 // generate (iq|rs) for i active
258 // if bs1_eq_bs2 then (ip|rs) are also generated
259 // store the integrals as rsiq
260 for(int te_type=0; te_type<num_te_types; te_type++) {
261 const double *pqrs_ptr = intbuf[te_type];
262
263 for (int bf1 = 0; bf1 < np; bf1++) {
264 int p = p_offset + bf1;
265 int qmax = (bs1_eq_bs2 && P == Q) ? bf1 : nq-1;
266
267 for (int bf2 = 0; bf2 <= qmax; bf2++) {
268 int q = q_offset + bf2;
269
270 for (int bf3 = 0; bf3 < nr; bf3++) {
271 int smin = (bs3_eq_bs4 && R == S) ? bf3 : 0;
272 pqrs_ptr += smin;
273
274 for (int bf4 = smin; bf4 <ns; bf4++) {
275
276 // Only transform integrals larger than the threshold
277 if (fabs(*pqrs_ptr) > dtol) {
278
279 double* rsiq_ptr = &rsiq_ints[te_type][q + nbasis2*(0 + ni*(bf4 + ns*bf3))];
280 const double* c_pi = vector1[p] + i_offset_;
281
282 double* rsip_ptr;
283 const double* c_qi;
284 if (bs1_eq_bs2) {
285 rsip_ptr = &rsiq_ints[te_type][p + nbasis2*(0 + ni*(bf4 + ns*bf3))];
286 c_qi = vector1[q] + i_offset_;
287 }
288
289 double rsiq_int_contrib = *pqrs_ptr;
290 // multiply each integral by its symmetry degeneracy factor
291 rsiq_int_contrib *= symfac;
292
293 if (bs1_eq_bs2) {
294
295 double rsip_int_contrib = rsiq_int_contrib;
296 if (te_type == TwoBodyInt::r12t1)
297 rsip_int_contrib = -1.0*rsiq_int_contrib;
298
299 if (p == q) {
300 for (int i=0; i<ni; i++) {
301 *rsiq_ptr += *c_pi++ * rsiq_int_contrib;
302 rsiq_ptr += nbasis2;
303 }
304 }
305 else {
306 // p != q
307 for (int i=0; i<ni; i++) {
308 *rsip_ptr += *c_qi++ * rsip_int_contrib;
309 rsip_ptr += nbasis2;
310 *rsiq_ptr += *c_pi++ * rsiq_int_contrib;
311 rsiq_ptr += nbasis2;
312 }
313 }
314
315 }
316 else {
317
318 for (int i=0; i<ni; i++) {
319 *rsiq_ptr += *c_pi++ * rsiq_int_contrib;
320 rsiq_ptr += nbasis2;
321 }
322
323 } // endif bs1_eq_bs2
324 } // endif dtol
325
326 pqrs_ptr++;
327 } // exit bf4 loop
328 } // exit bf3 loop
329 } // exit bf2 loop
330 pqrs_ptr += (nq - qmax - 1) * nrs;
331 } // exit bf1 loop
332 // end of first quarter transformation
333 }
334 timer_->exit("1. q.t.");
335
336 } // exit P loop
337 } // exit Q loop
338#endif // !FAST_BUT_WRONG
339
340#if PRINT1Q
341 {
342 lock_->lock();
343 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
344 for (int i = 0; i<ni; i++) {
345 for (int q = 0; q<nbasis2; q++) {
346 for (int r = 0; r<nr; r++) {
347 int rr = r+r_offset;
348 for (int s = 0; s<ns; s++) {
349 int ss = s+s_offset;
350 double value = rsiq_ints[te_type][q+nbasis2*(i+ni*(s+ns*r))];
351 printf("1Q: type = %d (%d %d|%d %d) = %12.8f\n",
352 te_type,i+i_offset_,q,rr,ss,value);
353 }
354 }
355 }
356 }
357 }
358 lock_->unlock();
359 }
360#endif
361
362 const int niq = ni*nbasis2;
363
364 timer_->enter("2. q.t.");
365 // Begin second quarter transformation;
366 // generate (iq|js) stored as ijsq (also generate (iq|jr), if needed)
367
368 for(int te_type=0; te_type<num_te_types; te_type++) {
369 for (int i=0; i<ni; i++) {
370 for (int j=0; j<rank3; j++) {
371
372#if !FAST_BUT_WRONG
373 bzerofast(ijsq_contrib, nbasis2*ns);
374 if (bs3_eq_bs4)
375 bzerofast(ijrq_contrib, nbasis2*nr);
376
377 int ij_proc = (i*rank3 + j)%nproc;
378 int ij_index = (i*rank3 + j)/nproc;
379 const size_t ijsq_start = (size_t)(num_te_types*ij_index + te_type) * memgrp_blksize;
380 const double *rsiq_ptr = rsiq_ints[te_type] + i*nbasis2;
381
382 if (bs3_eq_bs4) {
383
384 for (int bf3 = 0; bf3 < nr; bf3++) {
385 int r = r_offset + bf3;
386 int smin = (bs3_eq_bs4 && R == S) ? bf3 : 0;
387 rsiq_ptr += smin*niq;
388
389 for (int bf4 = smin; bf4 <ns; bf4++, rsiq_ptr+=niq) {
390 int s = s_offset + bf4;
391
392 // second quarter transform
393 // rs = js
394 // rs = jr
395
396 double* ijsq_ptr = ijsq_contrib + bf4*nbasis2;
397 double* ijrq_ptr = ijrq_contrib + bf3*nbasis2;
398 const double* i_ptr = rsiq_ptr;
399
400 const double c_rj = vector3[r][j];
401 const double c_sj = vector3[s][j];
402
403 if (r != s) {
404 for (int q=0; q<nbasis2; q++) {
405
406 double value = *i_ptr++;
407 *ijsq_ptr++ += c_rj * value;
408 *ijrq_ptr++ += c_sj * value;
409
410 }
411 }
412 else {
413 for (int q=0; q<nbasis2; q++) {
414
415 double value = *i_ptr++;
416 *ijsq_ptr++ += c_rj * value;
417
418 }
419 }
420 }
421 }
422 }
423 else {
424
425 for (int bf3 = 0; bf3 < nr; bf3++) {
426 int r= r_offset + bf3;
427 double* ijsq_ptr = ijsq_contrib;
428
429 for (int bf4 = 0; bf4 <ns; bf4++, rsiq_ptr+=niq) {
430
431 // second quarter transform
432 // rs = js
433 const double* i_ptr = rsiq_ptr;
434
435 const double c_rj = vector3[r][j];
436
437 for (int q=0; q<nbasis2; q++) {
438
439 double value = *i_ptr++;
440 *ijsq_ptr++ += c_rj * value;
441
442 }
443 }
444 }
445 }
446
447 // We now have contributions to ijsq (and ijrq) for one pair i,j,
448 // all q, and s in S (r in R); send ijsq (and ijrq) to the node
449 // (ij_proc) which is going to have this ij pair
450#endif // !FAST_BUT_WRONG
451
452 // Sum the ijsq_contrib to the appropriate place
453 size_t ij_offset = (size_t)nbasis2*s_offset + ijsq_start;
454 mem->sum_reduction_on_node(ijsq_contrib,
455 ij_offset, ns*nbasis2, ij_proc);
456
457 if (bs3_eq_bs4) {
458 size_t ij_offset = (size_t)nbasis2*r_offset + ijsq_start;
459 mem->sum_reduction_on_node(ijrq_contrib,
460 ij_offset, nr*nbasis2, ij_proc);
461 }
462
463 } // endif j
464 } // endif i
465 } // endif te_type
466 timer_->exit("2. q.t.");
467
468 } // exit while get_task
469
470 if (debug_) {
471 lock_->lock();
472 ExEnv::outn() << scprintf("%d:%d: done with get_task loop",me,mythread_) << endl;
473 lock_->unlock();
474 }
475
476 for(int te_type=0; te_type<num_te_types; te_type++) {
477 delete[] rsiq_ints[te_type];
478 }
479 delete[] rsiq_ints;
480 mem->free_local_double(ijsq_contrib);
481 if (bs3_eq_bs4)
482 mem->free_local_double(ijrq_contrib);
483 delete[] vector1[0]; delete[] vector1;
484 delete[] vector3[0]; delete[] vector3;
485}
486
487////////////////////////////////////////////////////////////////////////////
488
489// Local Variables:
490// mode: c++
491// c-file-style: "CLJ-CONDENSED"
492// End:
Note: See TracBrowser for help on using the repository browser.