1 | //
|
---|
2 | // distsh.cc
|
---|
3 | // based on: mbpt/distsh.cc
|
---|
4 | //
|
---|
5 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
6 | //
|
---|
7 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
8 | // Updated: Edward Valeev <edward.valeev@chemistry.gatech.edu>
|
---|
9 | // Maintainer: LPS
|
---|
10 | //
|
---|
11 | // This file is part of the SC Toolkit.
|
---|
12 | //
|
---|
13 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
14 | // it under the terms of the GNU Library General Public License as published by
|
---|
15 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
16 | // any later version.
|
---|
17 | //
|
---|
18 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
21 | // GNU Library General Public License for more details.
|
---|
22 | //
|
---|
23 | // You should have received a copy of the GNU Library General Public License
|
---|
24 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
25 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
26 | //
|
---|
27 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
28 | //
|
---|
29 |
|
---|
30 | #ifdef __GNUC__
|
---|
31 | #pragma implementation
|
---|
32 | #endif
|
---|
33 |
|
---|
34 | #include <math.h>
|
---|
35 |
|
---|
36 | #include <util/misc/formio.h>
|
---|
37 | #include <chemistry/qc/basis/distshpair.h>
|
---|
38 |
|
---|
39 | using namespace std;
|
---|
40 | using namespace sc;
|
---|
41 |
|
---|
42 | // Defining REVERSE_ORDERING does the small tasks first. This would give
|
---|
43 | // poorer load balancing and it included only for debugging (in particular,
|
---|
44 | // for stressing MPI libraries up front instead of at the end of a run).
|
---|
45 | #undef REVERSE_ORDERING
|
---|
46 |
|
---|
47 | /////////////////////////////////////////////////////////////////
|
---|
48 | // Function iquicksort performs a quick sort (larger -> smaller)
|
---|
49 | // of the integer data in item by the integer indices in index;
|
---|
50 | // data in item remain unchanged
|
---|
51 | /////////////////////////////////////////////////////////////////
|
---|
52 | static void
|
---|
53 | iqs(int *item,int *index,int left,int right)
|
---|
54 | {
|
---|
55 | register int i,j;
|
---|
56 | int x,y;
|
---|
57 |
|
---|
58 | i=left; j=right;
|
---|
59 | x=item[index[(left+right)/2]];
|
---|
60 |
|
---|
61 | do {
|
---|
62 | while(item[index[i]]>x && i<right) i++;
|
---|
63 | while(x>item[index[j]] && j>left) j--;
|
---|
64 |
|
---|
65 | if (i<=j) {
|
---|
66 | if (item[index[i]] != item[index[j]]) {
|
---|
67 | y=index[i];
|
---|
68 | index[i]=index[j];
|
---|
69 | index[j]=y;
|
---|
70 | }
|
---|
71 | i++; j--;
|
---|
72 | }
|
---|
73 | } while(i<=j);
|
---|
74 |
|
---|
75 | if (left<j) iqs(item,index,left,j);
|
---|
76 | if (i<right) iqs(item,index,i,right);
|
---|
77 | }
|
---|
78 |
|
---|
79 | static void
|
---|
80 | iquicksort(int *item,int *index,long int n)
|
---|
81 | {
|
---|
82 | int i;
|
---|
83 | if (n<=0) return;
|
---|
84 | for (i=0; i<n; i++) {
|
---|
85 | index[i] = i;
|
---|
86 | }
|
---|
87 | iqs(item,index,0,n-1);
|
---|
88 | }
|
---|
89 |
|
---|
90 | /////////////////////////////////////////////////////////////////
|
---|
91 | // DistShellPair class
|
---|
92 |
|
---|
93 | DistShellPair::DistShellPair(const Ref<MessageGrp> & msg,
|
---|
94 | int nthread, int mythread,
|
---|
95 | const Ref<ThreadLock> & lock,
|
---|
96 | const Ref<GaussianBasisSet> & bs1, const Ref<GaussianBasisSet> & bs2,
|
---|
97 | bool dynamic, SharedData *shared):
|
---|
98 | msg_(msg),
|
---|
99 | nthread_(nthread),
|
---|
100 | mythread_(mythread),
|
---|
101 | lock_(lock),
|
---|
102 | bs1_(bs1), bs2_(bs2),
|
---|
103 | task_dynamic_(dynamic),
|
---|
104 | thread_dynamic_(false),
|
---|
105 | cost_(0),
|
---|
106 | Svec_(0),
|
---|
107 | Rvec_(0),
|
---|
108 | Ivec_(0),
|
---|
109 | shared_(shared)
|
---|
110 | {
|
---|
111 | ncpu_ = nthread_*msg->n();
|
---|
112 | ncpu_less_0_ = nthread_*(msg->n()-1);
|
---|
113 |
|
---|
114 | // Cannot do dynamic load balancing if there's only 1 task
|
---|
115 | if (msg->n() == 1) {
|
---|
116 | task_dynamic_ = false;
|
---|
117 | if (dynamic && shared_ != 0) thread_dynamic_ = true;
|
---|
118 | }
|
---|
119 | debug_ = 0;
|
---|
120 | print_percent_ = 10;
|
---|
121 | bs1_eq_bs2_ = (bs1_ == bs2_);
|
---|
122 | req_type_ = 18101;
|
---|
123 | ans_type_ = 18102;
|
---|
124 |
|
---|
125 | // Only for static case with different basis sets
|
---|
126 | if (!bs1_eq_bs2_) {
|
---|
127 | int nsh2 = bs2_->nshell();
|
---|
128 | incS_ = ncpu_/nsh2;
|
---|
129 | incR_ = ncpu_%nsh2;
|
---|
130 | }
|
---|
131 |
|
---|
132 | init();
|
---|
133 | }
|
---|
134 |
|
---|
135 | DistShellPair::~DistShellPair()
|
---|
136 | {
|
---|
137 | delete[] cost_;
|
---|
138 | delete[] Svec_;
|
---|
139 | delete[] Rvec_;
|
---|
140 | delete[] Ivec_;
|
---|
141 | }
|
---|
142 |
|
---|
143 | void
|
---|
144 | DistShellPair::init()
|
---|
145 | {
|
---|
146 | long int nsh1 = bs1_->nshell();
|
---|
147 | long int nsh2 = bs2_->nshell();
|
---|
148 | long int nshpairs = (bs1_eq_bs2_ ? (nsh1*(nsh1+1))/2 : nsh1*nsh2);
|
---|
149 |
|
---|
150 | if (task_dynamic_ || thread_dynamic_) {
|
---|
151 | ntask_ = nshpairs;
|
---|
152 | init_dynamic_work();
|
---|
153 | }
|
---|
154 | else {
|
---|
155 | ntask_ = nshpairs/ncpu_;
|
---|
156 | // Compute starting S_ and R_ for this thread
|
---|
157 | if (bs1_eq_bs2_) {
|
---|
158 | // This is a slightly nonobvious computation of S_ and R_ from SR = msg_->me()*nthread_ + mythread_
|
---|
159 | // when bs1_eq_bs2 == true
|
---|
160 | S_ = 0;
|
---|
161 | R_ = msg_->me()*nthread_ + mythread_;
|
---|
162 | while (R_ > S_) {
|
---|
163 | S_++;
|
---|
164 | R_ = R_ - S_;
|
---|
165 | }
|
---|
166 | }
|
---|
167 | else {
|
---|
168 | // Things are simple when basis sets are different
|
---|
169 | long int absthreadindex = msg_->me()*nthread_ + mythread_;
|
---|
170 | S_ = absthreadindex/nsh2;
|
---|
171 | R_ = absthreadindex%nsh2;
|
---|
172 | }
|
---|
173 | }
|
---|
174 |
|
---|
175 | current_shellpair_ = 0;
|
---|
176 | set_print_percent(print_percent_);
|
---|
177 | }
|
---|
178 |
|
---|
179 | void
|
---|
180 | DistShellPair::init_dynamic_work()
|
---|
181 | {
|
---|
182 | // initialize work arrays
|
---|
183 | int S,R,index;
|
---|
184 | int nsh1 = bs1_->nshell();
|
---|
185 | int nsh2 = bs2_->nshell();
|
---|
186 | delete[] cost_;
|
---|
187 | delete[] Svec_;
|
---|
188 | delete[] Rvec_;
|
---|
189 | delete[] Ivec_;
|
---|
190 | cost_ = new int[ntask_];
|
---|
191 | Svec_ = new int[ntask_];
|
---|
192 | Rvec_ = new int[ntask_];
|
---|
193 | Ivec_ = new int[ntask_];
|
---|
194 | index = 0;
|
---|
195 | for (S=0; S<nsh1; S++) {
|
---|
196 | int Rmax = (bs1_eq_bs2_ ? S : nsh2-1);
|
---|
197 | for (R=0; R<=Rmax; R++) {
|
---|
198 | cost_[index] = bs1_->shell(S).nfunction() * bs2_->shell(R).nfunction() *
|
---|
199 | bs1_->shell(S).nprimitive() * bs2_->shell(R).nprimitive();
|
---|
200 | #ifdef REVERSE_ORDERING
|
---|
201 | cost_[index] = - cost[index];
|
---|
202 | #endif
|
---|
203 | Svec_[index] = S;
|
---|
204 | Rvec_[index] = R;
|
---|
205 | Ivec_[index] = index;
|
---|
206 | index++;
|
---|
207 | }
|
---|
208 | }
|
---|
209 |
|
---|
210 | // sort work
|
---|
211 | iquicksort(cost_, Ivec_, ntask_);
|
---|
212 | if (debug_ > 1) {
|
---|
213 | ExEnv::outn() << "costs of shell pairs" << endl;
|
---|
214 | for (index=0; index<ntask_; index++) {
|
---|
215 | ExEnv::outn() << scprintf(" (%d %d):%d",Svec_[Ivec_[index]],Rvec_[Ivec_[index]],
|
---|
216 | cost_[Ivec_[index]])
|
---|
217 | << endl;
|
---|
218 | }
|
---|
219 | }
|
---|
220 | }
|
---|
221 |
|
---|
222 | void
|
---|
223 | DistShellPair::serve_tasks()
|
---|
224 | {
|
---|
225 | // process requests
|
---|
226 | long int nreq = ntask_ + ncpu_less_0_;
|
---|
227 | int nreq_left = nreq;
|
---|
228 | while (nreq_left) {
|
---|
229 | int node;
|
---|
230 | msg_->recvt(req_type_,&node,1);
|
---|
231 | int SR[2];
|
---|
232 | if (current_shellpair_ < ntask_) {
|
---|
233 | SR[0] = Svec_[Ivec_[current_shellpair_]];
|
---|
234 | SR[1] = Rvec_[Ivec_[current_shellpair_]];
|
---|
235 | msg_->sendt(node,ans_type_,SR,2);
|
---|
236 | if (current_shellpair_%print_interval_ == 0) {
|
---|
237 | ExEnv::outn() << indent
|
---|
238 | << scprintf("sent shell pair (%3d %3d) to %3d, %6.3f%% complete",
|
---|
239 | SR[0],SR[1],node,(double)current_shellpair_*100.0/nreq)
|
---|
240 | << " (" << current_shellpair_ << " of " << ntask_ << ")"
|
---|
241 | << endl;
|
---|
242 | }
|
---|
243 | current_shellpair_++;
|
---|
244 | }
|
---|
245 | else {
|
---|
246 | SR[0] = -1;
|
---|
247 | SR[1] = -1;
|
---|
248 | msg_->sendt(node,ans_type_,SR,2);
|
---|
249 | if (current_shellpair_%print_interval_ == 0) {
|
---|
250 | ExEnv::outn() << indent
|
---|
251 | << scprintf("sent no more tasks message to %3d, %6.3f%% complete",
|
---|
252 | node,(double)current_shellpair_*100.0/nreq)
|
---|
253 | << endl;
|
---|
254 | }
|
---|
255 | current_shellpair_++;
|
---|
256 | }
|
---|
257 | nreq_left--;
|
---|
258 | }
|
---|
259 |
|
---|
260 | if (debug_) {
|
---|
261 | ExEnv::outn() << "all requests processed" << endl;
|
---|
262 | }
|
---|
263 | }
|
---|
264 |
|
---|
265 | int
|
---|
266 | DistShellPair::get_task(int &S, int &R)
|
---|
267 | {
|
---|
268 | if (task_dynamic_) { // dynamic load balancing
|
---|
269 | int me = msg_->me();
|
---|
270 | if (me == 0) {
|
---|
271 | if (mythread_ == 0) serve_tasks();
|
---|
272 | return 0;
|
---|
273 | }
|
---|
274 | else {
|
---|
275 | int SR[2];
|
---|
276 |
|
---|
277 | lock_->lock();
|
---|
278 | msg_->sendt(0,req_type_,&me,1);
|
---|
279 | msg_->recvt(ans_type_,SR,2);
|
---|
280 | lock_->unlock();
|
---|
281 |
|
---|
282 | S = SR[0];
|
---|
283 | R = SR[1];
|
---|
284 | if (S == -1) return 0;
|
---|
285 | }
|
---|
286 | }
|
---|
287 | else if (thread_dynamic_) {
|
---|
288 | long my_shellpair;
|
---|
289 | lock_->lock();
|
---|
290 | my_shellpair = shared_->shellpair_++;
|
---|
291 | lock_->unlock();
|
---|
292 | if (my_shellpair < ntask_) {
|
---|
293 | S = Svec_[Ivec_[my_shellpair]];
|
---|
294 | R = Rvec_[Ivec_[my_shellpair]];
|
---|
295 | }
|
---|
296 | else {
|
---|
297 | S = R = -1;
|
---|
298 | return 0;
|
---|
299 | }
|
---|
300 | }
|
---|
301 | else { // static load balancing
|
---|
302 | int nsh1 = bs1_->nshell();
|
---|
303 | int nsh2 = bs2_->nshell();
|
---|
304 | if (S_ >= nsh1 || R_ >= nsh2) return 0;
|
---|
305 | S = S_;
|
---|
306 | R = R_;
|
---|
307 | // advance to the next S_, R_
|
---|
308 | if (bs1_eq_bs2_) {
|
---|
309 | R_ += ncpu_;
|
---|
310 | while (R_ > S_) {
|
---|
311 | S_++;
|
---|
312 | R_ = R_ - S_;
|
---|
313 | }
|
---|
314 | }
|
---|
315 | else {
|
---|
316 | S_ += incS_;
|
---|
317 | R_ += incR_;
|
---|
318 | if (R_ >= nsh2) { S_++;
|
---|
319 | R_ -= nsh2;
|
---|
320 | }
|
---|
321 | }
|
---|
322 | if (current_shellpair_%print_interval_ == 0) {
|
---|
323 | if (mythread_ == 0 && msg_->me() == 0) {
|
---|
324 | ExEnv::outn() << indent
|
---|
325 | << scprintf(" working on shell pair (%3d %3d), %6.3f%% complete",
|
---|
326 | S,R,((double)current_shellpair_*100.0)/ntask_)
|
---|
327 | << " (" << current_shellpair_ << " of " << ntask_ << ")"
|
---|
328 | << endl;
|
---|
329 | }
|
---|
330 | }
|
---|
331 | current_shellpair_++;
|
---|
332 | }
|
---|
333 | return 1;
|
---|
334 | }
|
---|
335 |
|
---|
336 | void
|
---|
337 | DistShellPair::set_print_percent(double pi)
|
---|
338 | {
|
---|
339 | print_percent_ = pi;
|
---|
340 | double print_interval = (double) print_percent_ * ntask_ / 100;
|
---|
341 | print_interval_ = (long int)floor(print_interval + 0.5);
|
---|
342 | if (print_interval_ < 1)
|
---|
343 | print_interval_ = 1;
|
---|
344 | }
|
---|
345 |
|
---|
346 | ////////////////////////////////////////////////////////////////////////////
|
---|
347 |
|
---|
348 | // Local Variables:
|
---|
349 | // mode: c++
|
---|
350 | // c-file-style: "CLJ-CONDENSED"
|
---|
351 | // End:
|
---|