[0b990d] | 1 | //
|
---|
| 2 | // distsh.cc
|
---|
| 3 | // based on: mbpt/distsh.cc
|
---|
| 4 | //
|
---|
| 5 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
| 6 | //
|
---|
| 7 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
| 8 | // Updated: Edward Valeev <edward.valeev@chemistry.gatech.edu>
|
---|
| 9 | // Maintainer: LPS
|
---|
| 10 | //
|
---|
| 11 | // This file is part of the SC Toolkit.
|
---|
| 12 | //
|
---|
| 13 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
| 14 | // it under the terms of the GNU Library General Public License as published by
|
---|
| 15 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
| 16 | // any later version.
|
---|
| 17 | //
|
---|
| 18 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
| 19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 21 | // GNU Library General Public License for more details.
|
---|
| 22 | //
|
---|
| 23 | // You should have received a copy of the GNU Library General Public License
|
---|
| 24 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
| 25 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 26 | //
|
---|
| 27 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
| 28 | //
|
---|
| 29 |
|
---|
| 30 | #ifdef __GNUC__
|
---|
| 31 | #pragma implementation
|
---|
| 32 | #endif
|
---|
| 33 |
|
---|
| 34 | #include <math.h>
|
---|
| 35 |
|
---|
| 36 | #include <util/misc/formio.h>
|
---|
| 37 | #include <chemistry/qc/basis/distshpair.h>
|
---|
| 38 |
|
---|
| 39 | using namespace std;
|
---|
| 40 | using namespace sc;
|
---|
| 41 |
|
---|
| 42 | // Defining REVERSE_ORDERING does the small tasks first. This would give
|
---|
| 43 | // poorer load balancing and it included only for debugging (in particular,
|
---|
| 44 | // for stressing MPI libraries up front instead of at the end of a run).
|
---|
| 45 | #undef REVERSE_ORDERING
|
---|
| 46 |
|
---|
| 47 | /////////////////////////////////////////////////////////////////
|
---|
| 48 | // Function iquicksort performs a quick sort (larger -> smaller)
|
---|
| 49 | // of the integer data in item by the integer indices in index;
|
---|
| 50 | // data in item remain unchanged
|
---|
| 51 | /////////////////////////////////////////////////////////////////
|
---|
| 52 | static void
|
---|
| 53 | iqs(int *item,int *index,int left,int right)
|
---|
| 54 | {
|
---|
| 55 | register int i,j;
|
---|
| 56 | int x,y;
|
---|
| 57 |
|
---|
| 58 | i=left; j=right;
|
---|
| 59 | x=item[index[(left+right)/2]];
|
---|
| 60 |
|
---|
| 61 | do {
|
---|
| 62 | while(item[index[i]]>x && i<right) i++;
|
---|
| 63 | while(x>item[index[j]] && j>left) j--;
|
---|
| 64 |
|
---|
| 65 | if (i<=j) {
|
---|
| 66 | if (item[index[i]] != item[index[j]]) {
|
---|
| 67 | y=index[i];
|
---|
| 68 | index[i]=index[j];
|
---|
| 69 | index[j]=y;
|
---|
| 70 | }
|
---|
| 71 | i++; j--;
|
---|
| 72 | }
|
---|
| 73 | } while(i<=j);
|
---|
| 74 |
|
---|
| 75 | if (left<j) iqs(item,index,left,j);
|
---|
| 76 | if (i<right) iqs(item,index,i,right);
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | static void
|
---|
| 80 | iquicksort(int *item,int *index,long int n)
|
---|
| 81 | {
|
---|
| 82 | int i;
|
---|
| 83 | if (n<=0) return;
|
---|
| 84 | for (i=0; i<n; i++) {
|
---|
| 85 | index[i] = i;
|
---|
| 86 | }
|
---|
| 87 | iqs(item,index,0,n-1);
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 | /////////////////////////////////////////////////////////////////
|
---|
| 91 | // DistShellPair class
|
---|
| 92 |
|
---|
| 93 | DistShellPair::DistShellPair(const Ref<MessageGrp> & msg,
|
---|
| 94 | int nthread, int mythread,
|
---|
| 95 | const Ref<ThreadLock> & lock,
|
---|
| 96 | const Ref<GaussianBasisSet> & bs1, const Ref<GaussianBasisSet> & bs2,
|
---|
| 97 | bool dynamic, SharedData *shared):
|
---|
| 98 | msg_(msg),
|
---|
| 99 | nthread_(nthread),
|
---|
| 100 | mythread_(mythread),
|
---|
| 101 | lock_(lock),
|
---|
| 102 | bs1_(bs1), bs2_(bs2),
|
---|
| 103 | task_dynamic_(dynamic),
|
---|
| 104 | thread_dynamic_(false),
|
---|
| 105 | cost_(0),
|
---|
| 106 | Svec_(0),
|
---|
| 107 | Rvec_(0),
|
---|
| 108 | Ivec_(0),
|
---|
| 109 | shared_(shared)
|
---|
| 110 | {
|
---|
| 111 | ncpu_ = nthread_*msg->n();
|
---|
| 112 | ncpu_less_0_ = nthread_*(msg->n()-1);
|
---|
| 113 |
|
---|
| 114 | // Cannot do dynamic load balancing if there's only 1 task
|
---|
| 115 | if (msg->n() == 1) {
|
---|
| 116 | task_dynamic_ = false;
|
---|
| 117 | if (dynamic && shared_ != 0) thread_dynamic_ = true;
|
---|
| 118 | }
|
---|
| 119 | debug_ = 0;
|
---|
| 120 | print_percent_ = 10;
|
---|
| 121 | bs1_eq_bs2_ = (bs1_ == bs2_);
|
---|
| 122 | req_type_ = 18101;
|
---|
| 123 | ans_type_ = 18102;
|
---|
| 124 |
|
---|
| 125 | // Only for static case with different basis sets
|
---|
| 126 | if (!bs1_eq_bs2_) {
|
---|
| 127 | int nsh2 = bs2_->nshell();
|
---|
| 128 | incS_ = ncpu_/nsh2;
|
---|
| 129 | incR_ = ncpu_%nsh2;
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | init();
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | DistShellPair::~DistShellPair()
|
---|
| 136 | {
|
---|
| 137 | delete[] cost_;
|
---|
| 138 | delete[] Svec_;
|
---|
| 139 | delete[] Rvec_;
|
---|
| 140 | delete[] Ivec_;
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | void
|
---|
| 144 | DistShellPair::init()
|
---|
| 145 | {
|
---|
| 146 | long int nsh1 = bs1_->nshell();
|
---|
| 147 | long int nsh2 = bs2_->nshell();
|
---|
| 148 | long int nshpairs = (bs1_eq_bs2_ ? (nsh1*(nsh1+1))/2 : nsh1*nsh2);
|
---|
| 149 |
|
---|
| 150 | if (task_dynamic_ || thread_dynamic_) {
|
---|
| 151 | ntask_ = nshpairs;
|
---|
| 152 | init_dynamic_work();
|
---|
| 153 | }
|
---|
| 154 | else {
|
---|
| 155 | ntask_ = nshpairs/ncpu_;
|
---|
| 156 | // Compute starting S_ and R_ for this thread
|
---|
| 157 | if (bs1_eq_bs2_) {
|
---|
| 158 | // This is a slightly nonobvious computation of S_ and R_ from SR = msg_->me()*nthread_ + mythread_
|
---|
| 159 | // when bs1_eq_bs2 == true
|
---|
| 160 | S_ = 0;
|
---|
| 161 | R_ = msg_->me()*nthread_ + mythread_;
|
---|
| 162 | while (R_ > S_) {
|
---|
| 163 | S_++;
|
---|
| 164 | R_ = R_ - S_;
|
---|
| 165 | }
|
---|
| 166 | }
|
---|
| 167 | else {
|
---|
| 168 | // Things are simple when basis sets are different
|
---|
| 169 | long int absthreadindex = msg_->me()*nthread_ + mythread_;
|
---|
| 170 | S_ = absthreadindex/nsh2;
|
---|
| 171 | R_ = absthreadindex%nsh2;
|
---|
| 172 | }
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 | current_shellpair_ = 0;
|
---|
| 176 | set_print_percent(print_percent_);
|
---|
| 177 | }
|
---|
| 178 |
|
---|
| 179 | void
|
---|
| 180 | DistShellPair::init_dynamic_work()
|
---|
| 181 | {
|
---|
| 182 | // initialize work arrays
|
---|
| 183 | int S,R,index;
|
---|
| 184 | int nsh1 = bs1_->nshell();
|
---|
| 185 | int nsh2 = bs2_->nshell();
|
---|
| 186 | delete[] cost_;
|
---|
| 187 | delete[] Svec_;
|
---|
| 188 | delete[] Rvec_;
|
---|
| 189 | delete[] Ivec_;
|
---|
| 190 | cost_ = new int[ntask_];
|
---|
| 191 | Svec_ = new int[ntask_];
|
---|
| 192 | Rvec_ = new int[ntask_];
|
---|
| 193 | Ivec_ = new int[ntask_];
|
---|
| 194 | index = 0;
|
---|
| 195 | for (S=0; S<nsh1; S++) {
|
---|
| 196 | int Rmax = (bs1_eq_bs2_ ? S : nsh2-1);
|
---|
| 197 | for (R=0; R<=Rmax; R++) {
|
---|
| 198 | cost_[index] = bs1_->shell(S).nfunction() * bs2_->shell(R).nfunction() *
|
---|
| 199 | bs1_->shell(S).nprimitive() * bs2_->shell(R).nprimitive();
|
---|
| 200 | #ifdef REVERSE_ORDERING
|
---|
| 201 | cost_[index] = - cost[index];
|
---|
| 202 | #endif
|
---|
| 203 | Svec_[index] = S;
|
---|
| 204 | Rvec_[index] = R;
|
---|
| 205 | Ivec_[index] = index;
|
---|
| 206 | index++;
|
---|
| 207 | }
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 | // sort work
|
---|
| 211 | iquicksort(cost_, Ivec_, ntask_);
|
---|
| 212 | if (debug_ > 1) {
|
---|
| 213 | ExEnv::outn() << "costs of shell pairs" << endl;
|
---|
| 214 | for (index=0; index<ntask_; index++) {
|
---|
| 215 | ExEnv::outn() << scprintf(" (%d %d):%d",Svec_[Ivec_[index]],Rvec_[Ivec_[index]],
|
---|
| 216 | cost_[Ivec_[index]])
|
---|
| 217 | << endl;
|
---|
| 218 | }
|
---|
| 219 | }
|
---|
| 220 | }
|
---|
| 221 |
|
---|
| 222 | void
|
---|
| 223 | DistShellPair::serve_tasks()
|
---|
| 224 | {
|
---|
| 225 | // process requests
|
---|
| 226 | long int nreq = ntask_ + ncpu_less_0_;
|
---|
| 227 | int nreq_left = nreq;
|
---|
| 228 | while (nreq_left) {
|
---|
| 229 | int node;
|
---|
| 230 | msg_->recvt(req_type_,&node,1);
|
---|
| 231 | int SR[2];
|
---|
| 232 | if (current_shellpair_ < ntask_) {
|
---|
| 233 | SR[0] = Svec_[Ivec_[current_shellpair_]];
|
---|
| 234 | SR[1] = Rvec_[Ivec_[current_shellpair_]];
|
---|
| 235 | msg_->sendt(node,ans_type_,SR,2);
|
---|
| 236 | if (current_shellpair_%print_interval_ == 0) {
|
---|
| 237 | ExEnv::outn() << indent
|
---|
| 238 | << scprintf("sent shell pair (%3d %3d) to %3d, %6.3f%% complete",
|
---|
| 239 | SR[0],SR[1],node,(double)current_shellpair_*100.0/nreq)
|
---|
| 240 | << " (" << current_shellpair_ << " of " << ntask_ << ")"
|
---|
| 241 | << endl;
|
---|
| 242 | }
|
---|
| 243 | current_shellpair_++;
|
---|
| 244 | }
|
---|
| 245 | else {
|
---|
| 246 | SR[0] = -1;
|
---|
| 247 | SR[1] = -1;
|
---|
| 248 | msg_->sendt(node,ans_type_,SR,2);
|
---|
| 249 | if (current_shellpair_%print_interval_ == 0) {
|
---|
| 250 | ExEnv::outn() << indent
|
---|
| 251 | << scprintf("sent no more tasks message to %3d, %6.3f%% complete",
|
---|
| 252 | node,(double)current_shellpair_*100.0/nreq)
|
---|
| 253 | << endl;
|
---|
| 254 | }
|
---|
| 255 | current_shellpair_++;
|
---|
| 256 | }
|
---|
| 257 | nreq_left--;
|
---|
| 258 | }
|
---|
| 259 |
|
---|
| 260 | if (debug_) {
|
---|
| 261 | ExEnv::outn() << "all requests processed" << endl;
|
---|
| 262 | }
|
---|
| 263 | }
|
---|
| 264 |
|
---|
| 265 | int
|
---|
| 266 | DistShellPair::get_task(int &S, int &R)
|
---|
| 267 | {
|
---|
| 268 | if (task_dynamic_) { // dynamic load balancing
|
---|
| 269 | int me = msg_->me();
|
---|
| 270 | if (me == 0) {
|
---|
| 271 | if (mythread_ == 0) serve_tasks();
|
---|
| 272 | return 0;
|
---|
| 273 | }
|
---|
| 274 | else {
|
---|
| 275 | int SR[2];
|
---|
| 276 |
|
---|
| 277 | lock_->lock();
|
---|
| 278 | msg_->sendt(0,req_type_,&me,1);
|
---|
| 279 | msg_->recvt(ans_type_,SR,2);
|
---|
| 280 | lock_->unlock();
|
---|
| 281 |
|
---|
| 282 | S = SR[0];
|
---|
| 283 | R = SR[1];
|
---|
| 284 | if (S == -1) return 0;
|
---|
| 285 | }
|
---|
| 286 | }
|
---|
| 287 | else if (thread_dynamic_) {
|
---|
| 288 | long my_shellpair;
|
---|
| 289 | lock_->lock();
|
---|
| 290 | my_shellpair = shared_->shellpair_++;
|
---|
| 291 | lock_->unlock();
|
---|
| 292 | if (my_shellpair < ntask_) {
|
---|
| 293 | S = Svec_[Ivec_[my_shellpair]];
|
---|
| 294 | R = Rvec_[Ivec_[my_shellpair]];
|
---|
| 295 | }
|
---|
| 296 | else {
|
---|
| 297 | S = R = -1;
|
---|
| 298 | return 0;
|
---|
| 299 | }
|
---|
| 300 | }
|
---|
| 301 | else { // static load balancing
|
---|
| 302 | int nsh1 = bs1_->nshell();
|
---|
| 303 | int nsh2 = bs2_->nshell();
|
---|
| 304 | if (S_ >= nsh1 || R_ >= nsh2) return 0;
|
---|
| 305 | S = S_;
|
---|
| 306 | R = R_;
|
---|
| 307 | // advance to the next S_, R_
|
---|
| 308 | if (bs1_eq_bs2_) {
|
---|
| 309 | R_ += ncpu_;
|
---|
| 310 | while (R_ > S_) {
|
---|
| 311 | S_++;
|
---|
| 312 | R_ = R_ - S_;
|
---|
| 313 | }
|
---|
| 314 | }
|
---|
| 315 | else {
|
---|
| 316 | S_ += incS_;
|
---|
| 317 | R_ += incR_;
|
---|
| 318 | if (R_ >= nsh2) { S_++;
|
---|
| 319 | R_ -= nsh2;
|
---|
| 320 | }
|
---|
| 321 | }
|
---|
| 322 | if (current_shellpair_%print_interval_ == 0) {
|
---|
| 323 | if (mythread_ == 0 && msg_->me() == 0) {
|
---|
| 324 | ExEnv::outn() << indent
|
---|
| 325 | << scprintf(" working on shell pair (%3d %3d), %6.3f%% complete",
|
---|
| 326 | S,R,((double)current_shellpair_*100.0)/ntask_)
|
---|
| 327 | << " (" << current_shellpair_ << " of " << ntask_ << ")"
|
---|
| 328 | << endl;
|
---|
| 329 | }
|
---|
| 330 | }
|
---|
| 331 | current_shellpair_++;
|
---|
| 332 | }
|
---|
| 333 | return 1;
|
---|
| 334 | }
|
---|
| 335 |
|
---|
| 336 | void
|
---|
| 337 | DistShellPair::set_print_percent(double pi)
|
---|
| 338 | {
|
---|
| 339 | print_percent_ = pi;
|
---|
| 340 | double print_interval = (double) print_percent_ * ntask_ / 100;
|
---|
| 341 | print_interval_ = (long int)floor(print_interval + 0.5);
|
---|
| 342 | if (print_interval_ < 1)
|
---|
| 343 | print_interval_ = 1;
|
---|
| 344 | }
|
---|
| 345 |
|
---|
| 346 | ////////////////////////////////////////////////////////////////////////////
|
---|
| 347 |
|
---|
| 348 | // Local Variables:
|
---|
| 349 | // mode: c++
|
---|
| 350 | // c-file-style: "CLJ-CONDENSED"
|
---|
| 351 | // End:
|
---|