source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbptr12/transform_12inds.cc@ 1513599

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since 1513599 was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 14.3 KB
Line 
1//
2// transform_12inds.cc
3//
4// Copyright (C) 2001 Edward Valeev
5//
6// Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
7// Maintainer: EV
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#ifdef __GNUC__
29#pragma implementation
30#endif
31
32#include <math.h>
33#include <stdexcept>
34
35#include <util/misc/formio.h>
36#include <util/misc/timer.h>
37#include <chemistry/qc/basis/gpetite.h>
38#include <chemistry/qc/mbpt/bzerofast.h>
39#include <chemistry/qc/mbpt/util.h>
40#include <chemistry/qc/basis/distshpair.h>
41#include <chemistry/qc/mbptr12/blas.h>
42#include <chemistry/qc/mbptr12/transform_12inds.h>
43
44using namespace std;
45using namespace sc;
46
47#define PRINT1Q 0
48#define PRINT_NUM_TE_TYPES 1
49
50// The FAST_BUT_WRONG flags is useful for exercising the communications
51// layer. It causes the first and second quarter transformation to be
52// omitted, but all communication is still performed. This permits
53// problems in communications libraries to be more quickly identified.
54#define FAST_BUT_WRONG 0
55
56TwoBodyMOIntsTransform_12Inds::TwoBodyMOIntsTransform_12Inds(
57 const Ref<TwoBodyMOIntsTransform>& tform, int mythread, int nthread,
58 const Ref<ThreadLock>& lock, const Ref<TwoBodyInt> &tbint, double tol, int debug) :
59 tform_(tform), mythread_(mythread), nthread_(nthread), lock_(lock), tbint_(tbint),
60 tol_(tol), debug_(debug)
61{
62 timer_ = new RegionTimer();
63 aoint_computed_ = 0;
64 ni_ = tform_->batchsize();
65 i_offset_ = 0;
66}
67
68TwoBodyMOIntsTransform_12Inds::~TwoBodyMOIntsTransform_12Inds()
69{
70 timer_ = 0;
71}
72
73/*
74 Distribute work by SR
75
76 for all PQ
77 compute unique (PQ|RS)
78 transform to (RS|IM) where M are all AOs for basis set 2
79 end PQ
80
81 use BLAS to transform each rsIM to rsIX
82 transform RSIX to IJXS and accumulate to the tasks that holds respective ij-pairs.
83
84 end SR
85*/
86
87void
88TwoBodyMOIntsTransform_12Inds::run()
89{
90 Ref<MemoryGrp> mem = tform_->mem();
91 Ref<MessageGrp> msg = tform_->msg();
92 Ref<R12IntsAcc> ints_acc = tform_->ints_acc();
93 const int me = msg->me();
94 const int nproc = msg->n();
95 Ref<MOIndexSpace> space1 = tform_->space1();
96 Ref<MOIndexSpace> space2 = tform_->space2();
97 Ref<MOIndexSpace> space3 = tform_->space3();
98 Ref<MOIndexSpace> space4 = tform_->space4();
99
100 Ref<GaussianBasisSet> bs1 = space1->basis();
101 Ref<GaussianBasisSet> bs2 = space2->basis();
102 Ref<GaussianBasisSet> bs3 = space3->basis();
103 Ref<GaussianBasisSet> bs4 = space4->basis();
104 const bool bs1_eq_bs2 = (bs1 == bs2);
105 const bool bs3_eq_bs4 = (bs3 == bs4);
106
107 const bool dynamic = tform_->dynamic();
108 const double print_percent = tform_->print_percent();
109
110 const int ni = ni_;
111 const int rank1 = space1->rank();
112 const int rank2 = space2->rank();
113 const int nfuncmax1 = bs1->max_nfunction_in_shell();
114 const int nfuncmax2 = bs2->max_nfunction_in_shell();
115 const int nfuncmax3 = bs3->max_nfunction_in_shell();
116 const int nfuncmax4 = bs4->max_nfunction_in_shell();
117 const int nsh1 = bs1->nshell();
118 const int nsh2 = bs2->nshell();
119 const int nsh3 = bs3->nshell();
120 const int nsh4 = bs4->nshell();
121 const int nbasis1 = bs1->nbasis();
122 const int nbasis2 = bs2->nbasis();
123 const int nbasis3 = bs3->nbasis();
124 const int nbasis4 = bs4->nbasis();
125 double dtol = pow(2.0,tol_);
126 const size_t memgrp_blksize = tform_->memgrp_blksize()/sizeof(double);
127
128 double** vector1 = new double*[nbasis1];
129 double** vector2 = new double*[nbasis2];
130 vector1[0] = new double[rank1*nbasis1];
131 vector2[0] = new double[rank2*nbasis2];
132 for(int i=1; i<nbasis1; i++) vector1[i] = vector1[i-1] + rank1;
133 for(int i=1; i<nbasis2; i++) vector2[i] = vector2[i-1] + rank2;
134 space1->coefs().convert(vector1);
135 space2->coefs().convert(vector2);
136
137 /*-------------------------------------------------------------
138 Find integrals buffers to 1/r12, r12, and [r12,T1] integrals
139 -------------------------------------------------------------*/
140 const int num_te_types = tform_->num_te_types();
141 const double *intbuf[TwoBodyInt::num_tbint_types];
142 intbuf[TwoBodyInt::eri] = tbint_->buffer(TwoBodyInt::eri);
143 intbuf[TwoBodyInt::r12] = tbint_->buffer(TwoBodyInt::r12);
144 intbuf[TwoBodyInt::r12t1] = tbint_->buffer(TwoBodyInt::r12t1);
145 intbuf[TwoBodyInt::r12t2] = tbint_->buffer(TwoBodyInt::r12t2);
146
147 /*-----------------------------------------------------
148 Allocate buffers for partially transformed integrals
149 -----------------------------------------------------*/
150 double *ijrs_contrib; // local contributions to integral_ijrs
151 double **rsiq_ints = new double*[num_te_types]; // quarter-transformed integrals for each RS pair
152 for(int te_type=0;te_type<num_te_types;te_type++) {
153 rsiq_ints[te_type] = new double[ni*nbasis2*nfuncmax3*nfuncmax4];
154 }
155 ijrs_contrib = mem->malloc_local_double(ni*rank2*nfuncmax3*nfuncmax4);
156
157 /*-----------------------------
158 Initialize work distribution
159 -----------------------------*/
160 sc::DistShellPair shellpairs(msg,nthread_,mythread_,lock_,bs4,bs3,dynamic,
161 tform_->shell_pair_data());
162 shellpairs.set_debug(debug_);
163 if (debug_) shellpairs.set_print_percent(print_percent/10.0);
164 else shellpairs.set_print_percent(print_percent);
165 int work_per_thread = bs3_eq_bs4 ?
166 ((nsh3*(nsh3+1))/2)/(nproc*nthread_) :
167 (nsh3*nsh4)/(nproc*nthread_) ;
168 int print_interval = work_per_thread/100;
169 int time_interval = work_per_thread/10;
170 int print_index = 0;
171 if (print_interval == 0) print_interval = 1;
172 if (time_interval == 0) time_interval = 1;
173 if (work_per_thread == 0) work_per_thread = 1;
174
175 if (debug_) {
176 lock_->lock();
177 ExEnv::outn() << scprintf("%d:%d: starting get_task loop",me,mythread_) << endl;
178 lock_->unlock();
179 }
180
181 Ref<GenPetite4> p4list
182 = construct_gpetite(bs1,bs2,bs3,bs4);
183
184#if FAST_BUT_WRONG
185 for(int te_type=0;te_type<num_te_types;te_type++) {
186 bzerofast(rsiq_ints[te_type], ni*nbasis2*nfuncmax3*nfuncmax4);
187 }
188 bzerofast(ijrs_contrib, ni*nbasis2*nfuncmax3*nfuncmax4);
189#endif
190
191 int R = 0;
192 int S = 0;
193 while (shellpairs.get_task(S,R)) {
194 // if bs3_eq_bs4 then S >= R always (see sc::exp::DistShellPair)
195 int nr = bs3->shell(R).nfunction();
196 int r_offset = bs3->shell_to_function(R);
197
198 int ns = bs4->shell(S).nfunction();
199 int s_offset = bs4->shell_to_function(S);
200
201 const int nrs = nr*ns;
202
203 if (debug_ > 1 && (print_index++)%print_interval == 0) {
204 lock_->lock();
205 ExEnv::outn() << scprintf("%d:%d: (PQ|%d %d) %d%%",
206 me,mythread_,R,S,(100*print_index)/work_per_thread)
207 << endl;
208 lock_->unlock();
209 }
210 if (debug_ > 1 && (print_index)%time_interval == 0) {
211 lock_->lock();
212 ExEnv::outn() << scprintf("timer for %d:%d:",me,mythread_) << endl;
213 timer_->print();
214 lock_->unlock();
215 }
216
217#if !FAST_BUT_WRONG
218 // Zero out 1 q.t. storage
219 for(int te_type=0;te_type<num_te_types;te_type++)
220 bzerofast(rsiq_ints[te_type], nrs*ni*nbasis2);
221
222 for (int P=0; P<nsh1; P++) {
223 int np = bs1->shell(P).nfunction();
224 int p_offset = bs1->shell_to_function(P);
225
226 int Qmax = (bs1_eq_bs2 ? P : nsh2-1);
227 for (int Q=0; Q<=Qmax; Q++) {
228 int nq = bs2->shell(Q).nfunction();
229 int q_offset = bs2->shell_to_function(Q);
230
231 // check if symmetry unique and compute degeneracy
232 int deg = p4list->in_p4(P,Q,R,S);
233 if (deg == 0)
234 continue;
235 double symfac = (double) deg;
236
237 if (tbint_->log2_shell_bound(P,Q,R,S) < tol_) {
238 continue; // skip shell quartets less than tol
239 }
240
241 aoint_computed_++;
242
243 timer_->enter("AO integrals");
244 tbint_->compute_shell(P,Q,R,S);
245 timer_->exit("AO integrals");
246
247 timer_->enter("1. q.t.");
248
249 // Begin first quarter transformation;
250 // generate (iq|rs) for i active
251 // if bs1_eq_bs2 then (ip|rs) are also generated
252 // store the integrals as rsiq
253 for(int te_type=0; te_type<num_te_types; te_type++) {
254 const double *pqrs_ptr = intbuf[te_type];
255
256 for (int bf1 = 0; bf1 < np; bf1++) {
257 int p = p_offset + bf1;
258 int qmax = (bs1_eq_bs2 && P == Q) ? bf1 : nq-1;
259
260 for (int bf2 = 0; bf2 <= qmax; bf2++) {
261 int q = q_offset + bf2;
262
263 for (int bf3 = 0; bf3 < nr; bf3++) {
264 int smin = (bs3_eq_bs4 && R == S) ? bf3 : 0;
265 pqrs_ptr += smin;
266
267 for (int bf4 = smin; bf4 <ns; bf4++) {
268
269 // Only transform integrals larger than the threshold
270 if (fabs(*pqrs_ptr) > dtol) {
271
272 double* rsiq_ptr = &rsiq_ints[te_type][q + nbasis2*(0 + ni*(bf4 + ns*bf3))];
273 const double* c_pi = vector1[p] + i_offset_;
274
275 double* rsip_ptr;
276 const double* c_qi;
277 if (bs1_eq_bs2) {
278 rsip_ptr = &rsiq_ints[te_type][p + nbasis2*(0 + ni*(bf4 + ns*bf3))];
279 c_qi = vector1[q] + i_offset_;
280 }
281
282 double rsiq_int_contrib = *pqrs_ptr;
283 // multiply each integral by its symmetry degeneracy factor
284 rsiq_int_contrib *= symfac;
285
286 if (bs1_eq_bs2) {
287
288 double rsip_int_contrib = rsiq_int_contrib;
289 if (te_type == TwoBodyInt::r12t1)
290 rsip_int_contrib = -1.0*rsiq_int_contrib;
291
292 if (p == q) {
293 for (int i=0; i<ni; i++) {
294 *rsiq_ptr += *c_pi++ * rsiq_int_contrib;
295 rsiq_ptr += nbasis2;
296 }
297 }
298 else {
299 // p != q
300 for (int i=0; i<ni; i++) {
301 *rsip_ptr += *c_qi++ * rsip_int_contrib;
302 rsip_ptr += nbasis2;
303 *rsiq_ptr += *c_pi++ * rsiq_int_contrib;
304 rsiq_ptr += nbasis2;
305 }
306 }
307
308 }
309 else {
310
311 for (int i=0; i<ni; i++) {
312 *rsiq_ptr += *c_pi++ * rsiq_int_contrib;
313 rsiq_ptr += nbasis2;
314 }
315
316 } // endif bs1_eq_bs2
317 } // endif dtol
318
319 pqrs_ptr++;
320 } // exit bf4 loop
321 } // exit bf3 loop
322 } // exit bf2 loop
323 pqrs_ptr += (nq - qmax - 1) * nrs;
324 } // exit bf1 loop
325 // end of first quarter transformation
326 }
327 timer_->exit("1. q.t.");
328
329 } // exit P loop
330 } // exit Q loop
331#endif // !FAST_BUT_WRONG
332
333#if PRINT1Q
334 {
335 lock_->lock();
336 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
337 for (int i = 0; i<ni; i++) {
338 for (int q = 0; q<nbasis2; q++) {
339 for (int r = 0; r<nr; r++) {
340 int rr = r+r_offset;
341 for (int s = 0; s<ns; s++) {
342 int ss = s+s_offset;
343 double value = rsiq_ints[te_type][q+nbasis2*(i+ni*(s+ns*r))];
344 printf("1Q: type = %d (%d %d|%d %d) = %12.8f\n",
345 te_type,i+i_offset_,q,rr,ss,value);
346 }
347 }
348 }
349 }
350 }
351 lock_->unlock();
352 }
353#endif
354
355 const int nij = ni*rank2;
356 const int niq = ni*nbasis2;
357 double* ij_ints = new double[nij];
358
359 timer_->enter("2. q.t.");
360 // Begin second quarter transformation;
361 // generate (ij|rs) stored as ijrs
362
363 bzerofast(ijrs_contrib, ni*rank2*nrs);
364
365 for(int te_type=0; te_type<num_te_types; te_type++) {
366 const double *rsiq_ptr = rsiq_ints[te_type];
367 double *ijrs_ptr = ijrs_contrib;
368
369 for (int bf3 = 0; bf3 < nr; bf3++) {
370 int smin = (bs3_eq_bs4 && R == S) ? bf3 : 0;
371 rsiq_ptr += smin*niq;
372 ijrs_ptr += smin;
373
374 for (int bf4 = smin; bf4 <ns; bf4++) {
375
376 // second quarter transform
377 // ij = iq * qj
378 const char notransp = 'n';
379 const double one = 1.0;
380 const double zero = 0.0;
381 F77_DGEMM(&notransp,&notransp,&rank2,&ni,&nbasis2,&one,vector2[0],&rank2,
382 rsiq_ptr,&nbasis2,&zero,ij_ints,&rank2);
383
384 // Copy rsij integrals into ijrs integrals
385 const double* ij_src = ij_ints;
386 double* ij_dst = ijrs_ptr;
387 for(int i=0; i<ni; i++)
388 for(int j=0; j<rank2; j++) {
389 *ij_dst = *ij_src++;
390 ij_dst += nrs;
391 }
392
393 rsiq_ptr += niq;
394 ijrs_ptr++;
395
396 }
397 }
398
399 // Send the integrals out
400 double* ijr_ptr = ijrs_contrib;
401 for (int i=0; i<ni; i++) {
402 for (int j=0; j<rank2; j++) {
403
404 int ij_proc = (i*rank2 + j)%nproc;
405 int ij_index = (i*rank2 + j)/nproc;
406 const size_t ijrs_start = (size_t)(num_te_types*ij_index + te_type) * memgrp_blksize;
407 size_t ijr_offset = (size_t)s_offset + r_offset*nbasis4 + ijrs_start;
408
409 for (int bf3 = 0; bf3 < nr; bf3++) {
410 // Sum the ijrs_contrib to the appropriate place
411 mem->sum_reduction_on_node(ijr_ptr, ijr_offset, ns, ij_proc);
412 ijr_offset += nbasis4;
413 ijr_ptr += ns;
414 }
415 }
416 }
417 }
418
419 timer_->exit("2. q.t.");
420
421 delete[] ij_ints;
422
423 } // exit while get_task
424
425 if (debug_) {
426 lock_->lock();
427 ExEnv::outn() << scprintf("%d:%d: done with get_task loop",me,mythread_) << endl;
428 lock_->unlock();
429 }
430
431 for(int te_type=0; te_type<num_te_types; te_type++) {
432 delete[] rsiq_ints[te_type];
433 }
434 delete[] rsiq_ints;
435 mem->free_local_double(ijrs_contrib);
436 delete[] vector1[0]; delete[] vector1;
437 delete[] vector2[0]; delete[] vector2;
438}
439
440////////////////////////////////////////////////////////////////////////////
441
442// Local Variables:
443// mode: c++
444// c-file-style: "CLJ-CONDENSED"
445// End:
Note: See TracBrowser for help on using the repository browser.