| 1 | //
 | 
|---|
| 2 | // pregtime.cc
 | 
|---|
| 3 | //
 | 
|---|
| 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
 | 
|---|
| 5 | //
 | 
|---|
| 6 | // Author: Curtis Janssen <cljanss@limitpt.com>
 | 
|---|
| 7 | // Maintainer: LPS
 | 
|---|
| 8 | //
 | 
|---|
| 9 | // This file is part of the SC Toolkit.
 | 
|---|
| 10 | //
 | 
|---|
| 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
 | 
|---|
| 12 | // it under the terms of the GNU Library General Public License as published by
 | 
|---|
| 13 | // the Free Software Foundation; either version 2, or (at your option)
 | 
|---|
| 14 | // any later version.
 | 
|---|
| 15 | //
 | 
|---|
| 16 | // The SC Toolkit is distributed in the hope that it will be useful,
 | 
|---|
| 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
| 19 | // GNU Library General Public License for more details.
 | 
|---|
| 20 | //
 | 
|---|
| 21 | // You should have received a copy of the GNU Library General Public License
 | 
|---|
| 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
 | 
|---|
| 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 | 
|---|
| 24 | //
 | 
|---|
| 25 | // The U.S. Government is granted a limited license as per AL 91-7.
 | 
|---|
| 26 | //
 | 
|---|
| 27 | 
 | 
|---|
| 28 | #ifdef __GNUC__
 | 
|---|
| 29 | #pragma implementation
 | 
|---|
| 30 | #endif
 | 
|---|
| 31 | 
 | 
|---|
| 32 | #ifdef HAVE_CONFIG_H
 | 
|---|
| 33 | #  include <scconfig.h>
 | 
|---|
| 34 | #endif
 | 
|---|
| 35 | 
 | 
|---|
| 36 | #include <iostream>
 | 
|---|
| 37 | #include <iomanip>
 | 
|---|
| 38 | 
 | 
|---|
| 39 | #include <util/misc/formio.h>
 | 
|---|
| 40 | #include <util/group/pregtime.h>
 | 
|---|
| 41 | 
 | 
|---|
| 42 | using namespace std;
 | 
|---|
| 43 | using namespace sc;
 | 
|---|
| 44 | 
 | 
|---|
| 45 | static ClassDesc ParallelRegionTimer_cd(
 | 
|---|
| 46 |   typeid(ParallelRegionTimer),"ParallelRegionTimer",1,"public RegionTimer",
 | 
|---|
| 47 |   0, create<ParallelRegionTimer>, 0);
 | 
|---|
| 48 | 
 | 
|---|
| 49 | ParallelRegionTimer::ParallelRegionTimer(const Ref<KeyVal> &keyval):
 | 
|---|
| 50 |   RegionTimer(keyval)
 | 
|---|
| 51 | {
 | 
|---|
| 52 |   msg_ = MessageGrp::get_default_messagegrp();
 | 
|---|
| 53 | }
 | 
|---|
| 54 | 
 | 
|---|
| 55 | ParallelRegionTimer::ParallelRegionTimer(const Ref<MessageGrp>&msg,
 | 
|---|
| 56 |                                          const char *topname,
 | 
|---|
| 57 |                                          int cpu_time, int wall_time):
 | 
|---|
| 58 |   RegionTimer(topname, cpu_time, wall_time),
 | 
|---|
| 59 |   msg_(msg)
 | 
|---|
| 60 | {
 | 
|---|
| 61 | }
 | 
|---|
| 62 | 
 | 
|---|
| 63 | ParallelRegionTimer::~ParallelRegionTimer()
 | 
|---|
| 64 | {
 | 
|---|
| 65 | }
 | 
|---|
| 66 | 
 | 
|---|
| 67 | static void
 | 
|---|
| 68 | send_string(const Ref<MessageGrp>& msg, int node, const char *s)
 | 
|---|
| 69 | {
 | 
|---|
| 70 |   int l = strlen(s);
 | 
|---|
| 71 |   msg->send(node, l);
 | 
|---|
| 72 |   msg->send(node, s, l);
 | 
|---|
| 73 | }
 | 
|---|
| 74 | 
 | 
|---|
| 75 | static char *
 | 
|---|
| 76 | recv_string(const Ref<MessageGrp>& msg, int node)
 | 
|---|
| 77 | {
 | 
|---|
| 78 |   int l;
 | 
|---|
| 79 |   msg->recv(node, l);
 | 
|---|
| 80 |   char *s = new char[l+1];
 | 
|---|
| 81 |   s[l] = '\0';
 | 
|---|
| 82 |   msg->recv(node, s, l);
 | 
|---|
| 83 |   return s;
 | 
|---|
| 84 | }
 | 
|---|
| 85 | 
 | 
|---|
| 86 | void
 | 
|---|
| 87 | ParallelRegionTimer::send_subregions(int node, const TimedRegion *r) const
 | 
|---|
| 88 | {
 | 
|---|
| 89 |   TimedRegion *subr = r->subregions();
 | 
|---|
| 90 | 
 | 
|---|
| 91 |   // rewind to the beginning
 | 
|---|
| 92 |   if (subr) { while (subr->prev()) subr = subr->prev(); }
 | 
|---|
| 93 | 
 | 
|---|
| 94 |   while (subr) {
 | 
|---|
| 95 |       msg_->send(node, 1);
 | 
|---|
| 96 |       send_string(msg_, node, subr->name());
 | 
|---|
| 97 |       send_subregions(node, subr);
 | 
|---|
| 98 |       subr = subr->next();
 | 
|---|
| 99 |     };
 | 
|---|
| 100 | 
 | 
|---|
| 101 |   msg_->send(node, 0);
 | 
|---|
| 102 | }
 | 
|---|
| 103 | 
 | 
|---|
| 104 | void
 | 
|---|
| 105 | ParallelRegionTimer::recv_subregions(int node, TimedRegion *r) const
 | 
|---|
| 106 | {
 | 
|---|
| 107 |   int has_subregions;
 | 
|---|
| 108 |   msg_->recv(node, has_subregions);
 | 
|---|
| 109 |   while (has_subregions) {
 | 
|---|
| 110 |       char *name = recv_string(msg_, node);
 | 
|---|
| 111 |       TimedRegion *region = r->findinsubregion(name);
 | 
|---|
| 112 |       delete[] name;
 | 
|---|
| 113 |       recv_subregions(node, region);
 | 
|---|
| 114 |       msg_->recv(node, has_subregions);
 | 
|---|
| 115 |     }
 | 
|---|
| 116 | }
 | 
|---|
| 117 | 
 | 
|---|
| 118 | void
 | 
|---|
| 119 | ParallelRegionTimer::all_reduce_regions() const
 | 
|---|
| 120 | {
 | 
|---|
| 121 |   Ref<MachineTopology> topology = msg_->topology();
 | 
|---|
| 122 | 
 | 
|---|
| 123 |   // accumulate all the regions onto node zero
 | 
|---|
| 124 |   Ref<GlobalMsgIter> i_reduce(topology->global_msg_iter(msg_, 0));
 | 
|---|
| 125 |   for (i_reduce->backwards(); !i_reduce->done(); i_reduce->next()) {
 | 
|---|
| 126 |       if (i_reduce->send()) {
 | 
|---|
| 127 |           send_subregions(i_reduce->sendto(), top_);
 | 
|---|
| 128 |         }
 | 
|---|
| 129 |       if (i_reduce->recv()) {
 | 
|---|
| 130 |           recv_subregions(i_reduce->recvfrom(), top_);
 | 
|---|
| 131 |         }
 | 
|---|
| 132 |     }
 | 
|---|
| 133 | 
 | 
|---|
| 134 |   // broadcast the regions to all the nodes
 | 
|---|
| 135 |   Ref<GlobalMsgIter> i_bcast(topology->global_msg_iter(msg_, 0));
 | 
|---|
| 136 |   for (i_bcast->forwards(); !i_bcast->done(); i_bcast->next()) {
 | 
|---|
| 137 |       if (i_bcast->send()) {
 | 
|---|
| 138 |           send_subregions(i_bcast->sendto(), top_);
 | 
|---|
| 139 |         }
 | 
|---|
| 140 |       if (i_bcast->recv()) {
 | 
|---|
| 141 |           recv_subregions(i_bcast->recvfrom(), top_);
 | 
|---|
| 142 |         }
 | 
|---|
| 143 |     }
 | 
|---|
| 144 | }
 | 
|---|
| 145 | 
 | 
|---|
| 146 | void
 | 
|---|
| 147 | ParallelRegionTimer::print(ostream &o) const
 | 
|---|
| 148 | {
 | 
|---|
| 149 |   int i,j;
 | 
|---|
| 150 | 
 | 
|---|
| 151 |   if (msg_->n() == 1) {
 | 
|---|
| 152 |       RegionTimer::print(o);
 | 
|---|
| 153 |       return;
 | 
|---|
| 154 |     }
 | 
|---|
| 155 | 
 | 
|---|
| 156 |   update_top();
 | 
|---|
| 157 | 
 | 
|---|
| 158 |   // make sure all the nodes have the same regions
 | 
|---|
| 159 |   all_reduce_regions();
 | 
|---|
| 160 | 
 | 
|---|
| 161 |   int n = nregion();
 | 
|---|
| 162 | 
 | 
|---|
| 163 |   double *cpu_time = 0;
 | 
|---|
| 164 |   double *wall_time = 0;
 | 
|---|
| 165 |   double *flops = 0;
 | 
|---|
| 166 |   double *min_cpu_time = 0;
 | 
|---|
| 167 |   double *min_wall_time = 0;
 | 
|---|
| 168 |   double *min_flops = 0;
 | 
|---|
| 169 |   double *max_cpu_time = 0;
 | 
|---|
| 170 |   double *max_wall_time = 0;
 | 
|---|
| 171 |   double *max_flops = 0;
 | 
|---|
| 172 |   double *avg_cpu_time = 0;
 | 
|---|
| 173 |   double *avg_wall_time = 0;
 | 
|---|
| 174 |   double *avg_flops = 0;
 | 
|---|
| 175 |   if (cpu_time_) {
 | 
|---|
| 176 |       cpu_time = new double[n];
 | 
|---|
| 177 |       get_cpu_times(cpu_time);
 | 
|---|
| 178 |       min_cpu_time = new double[n];
 | 
|---|
| 179 |       get_cpu_times(min_cpu_time);
 | 
|---|
| 180 |       max_cpu_time = new double[n];
 | 
|---|
| 181 |       get_cpu_times(max_cpu_time);
 | 
|---|
| 182 |       avg_cpu_time = new double[n];
 | 
|---|
| 183 |       get_cpu_times(avg_cpu_time);
 | 
|---|
| 184 |       msg_->max(max_cpu_time,n);
 | 
|---|
| 185 |       msg_->min(min_cpu_time,n);
 | 
|---|
| 186 |       msg_->sum(avg_cpu_time,n);
 | 
|---|
| 187 |       for (i=0; i<n; i++) {
 | 
|---|
| 188 |           avg_cpu_time[i] /= msg_->n();
 | 
|---|
| 189 |         }
 | 
|---|
| 190 |     }
 | 
|---|
| 191 |   if (wall_time_) {
 | 
|---|
| 192 |       wall_time = new double[n];
 | 
|---|
| 193 |       get_wall_times(wall_time);
 | 
|---|
| 194 |       min_wall_time = new double[n];
 | 
|---|
| 195 |       get_wall_times(min_wall_time);
 | 
|---|
| 196 |       max_wall_time = new double[n];
 | 
|---|
| 197 |       get_wall_times(max_wall_time);
 | 
|---|
| 198 |       avg_wall_time = new double[n];
 | 
|---|
| 199 |       get_wall_times(avg_wall_time);
 | 
|---|
| 200 |       msg_->max(max_wall_time,n);
 | 
|---|
| 201 |       msg_->min(min_wall_time,n);
 | 
|---|
| 202 |       msg_->sum(avg_wall_time,n);
 | 
|---|
| 203 |       for (i=0; i<n; i++) {
 | 
|---|
| 204 |           avg_wall_time[i] /= msg_->n();
 | 
|---|
| 205 |         }
 | 
|---|
| 206 |     }
 | 
|---|
| 207 |   const char *flops_name = 0;
 | 
|---|
| 208 |   if (flops_) {
 | 
|---|
| 209 |       flops= new double[n];
 | 
|---|
| 210 |       get_flops(flops);
 | 
|---|
| 211 |       if (cpu_time_) {
 | 
|---|
| 212 |         for (i=0; i<n; i++) {
 | 
|---|
| 213 |           if (fabs(cpu_time[i]) > 1.0e-10) flops[i] /= cpu_time[i]*1000000.;
 | 
|---|
| 214 |           else flops[i] = 0.0;
 | 
|---|
| 215 |           }
 | 
|---|
| 216 |         flops_name = "MFLOP/S";
 | 
|---|
| 217 |         }
 | 
|---|
| 218 |       else if (wall_time_) {
 | 
|---|
| 219 |         for (i=0; i<n; i++) {
 | 
|---|
| 220 |           if (fabs(wall_time[i]) > 1.0e-10) flops[i] /= wall_time[i]*1000000.;
 | 
|---|
| 221 |           else flops[i] = 0.0;
 | 
|---|
| 222 |           }
 | 
|---|
| 223 |         flops_name = "MFLOP/WS";
 | 
|---|
| 224 |         }
 | 
|---|
| 225 |       else {
 | 
|---|
| 226 |         for (i=0; i<n; i++) {
 | 
|---|
| 227 |           flops[i] /= 1000000.;
 | 
|---|
| 228 |           }
 | 
|---|
| 229 |         flops_name = "mflops";
 | 
|---|
| 230 |         }
 | 
|---|
| 231 |       min_flops= new double[n];
 | 
|---|
| 232 |       memcpy(min_flops, flops, sizeof(double)*n);
 | 
|---|
| 233 |       max_flops= new double[n];
 | 
|---|
| 234 |       memcpy(max_flops, flops, sizeof(double)*n);
 | 
|---|
| 235 |       avg_flops= new double[n];
 | 
|---|
| 236 |       memcpy(avg_flops, flops, sizeof(double)*n);
 | 
|---|
| 237 |       msg_->max(max_flops,n);
 | 
|---|
| 238 |       msg_->min(min_flops,n);
 | 
|---|
| 239 |       msg_->sum(avg_flops,n);
 | 
|---|
| 240 |       for (i=0; i<n; i++) {
 | 
|---|
| 241 |           avg_flops[i] /= msg_->n();
 | 
|---|
| 242 |         }
 | 
|---|
| 243 |     }
 | 
|---|
| 244 | 
 | 
|---|
| 245 |   if (msg_->me() == 0) {
 | 
|---|
| 246 |       const char **names = new const char*[n];
 | 
|---|
| 247 |       get_region_names(names);
 | 
|---|
| 248 |       int *depth = new int[n];
 | 
|---|
| 249 |       get_depth(depth);
 | 
|---|
| 250 | 
 | 
|---|
| 251 |       int maxwidth = 0;
 | 
|---|
| 252 |       double maxtime = 0.0;
 | 
|---|
| 253 |       for (i=0; i<n; i++) {
 | 
|---|
| 254 |           int width = strlen(names[i]) + 2 * depth[i] + 2;
 | 
|---|
| 255 |           if (width > maxwidth) maxwidth = width;
 | 
|---|
| 256 |           if (cpu_time_ && max_cpu_time[i] > maxtime)
 | 
|---|
| 257 |               maxtime = max_cpu_time[i];
 | 
|---|
| 258 |           if (wall_time_ && max_wall_time[i] > maxtime)
 | 
|---|
| 259 |               maxtime = max_wall_time[i];
 | 
|---|
| 260 |           if (flops_ && max_flops[i] > maxtime)
 | 
|---|
| 261 |               maxtime = max_flops[i];
 | 
|---|
| 262 |         }
 | 
|---|
| 263 | 
 | 
|---|
| 264 |       int maxtimewidth = 4;
 | 
|---|
| 265 |       while (maxtime >= 10.0) { maxtime/=10.0; maxtimewidth++; }
 | 
|---|
| 266 | 
 | 
|---|
| 267 |       o.setf(ios::right);
 | 
|---|
| 268 | 
 | 
|---|
| 269 |       for (i=0; i<maxwidth; i++) o << " ";
 | 
|---|
| 270 |       if (cpu_time_) {
 | 
|---|
| 271 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
| 272 |           o << setw(maxtimewidth+1) << " CPU";
 | 
|---|
| 273 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
| 274 |         }
 | 
|---|
| 275 |       if (wall_time_) {
 | 
|---|
| 276 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
| 277 |           o << setw(maxtimewidth+1) << " Wall";
 | 
|---|
| 278 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
| 279 |         }
 | 
|---|
| 280 |       if (flops_) {
 | 
|---|
| 281 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
| 282 |           o << " " << setw(maxtimewidth+1) << flops_name;
 | 
|---|
| 283 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
| 284 |         }
 | 
|---|
| 285 |       o << endl;
 | 
|---|
| 286 | 
 | 
|---|
| 287 |       for (i=0; i<maxwidth; i++) o << " ";
 | 
|---|
| 288 |       if (cpu_time_) {
 | 
|---|
| 289 |           o << setw(maxtimewidth+1) << " min";
 | 
|---|
| 290 |           o << setw(maxtimewidth+1) << " max";
 | 
|---|
| 291 |           o << setw(maxtimewidth+1) << " avg";
 | 
|---|
| 292 |         }
 | 
|---|
| 293 |       if (wall_time_) {
 | 
|---|
| 294 |           o << setw(maxtimewidth+1) << " min";
 | 
|---|
| 295 |           o << setw(maxtimewidth+1) << " max";
 | 
|---|
| 296 |           o << setw(maxtimewidth+1) << " avg";
 | 
|---|
| 297 |         }
 | 
|---|
| 298 |       if (flops_) {
 | 
|---|
| 299 |           o << setw(maxtimewidth+1) << " min";
 | 
|---|
| 300 |           o << setw(maxtimewidth+1) << " max";
 | 
|---|
| 301 |           o << setw(maxtimewidth+1) << " avg";
 | 
|---|
| 302 |         }
 | 
|---|
| 303 |       o << endl;
 | 
|---|
| 304 | 
 | 
|---|
| 305 |       o.setf(ios::fixed);
 | 
|---|
| 306 |       o.precision(2);
 | 
|---|
| 307 |       for (i=0; i<n; i++) {
 | 
|---|
| 308 |           int width = strlen(names[i]) + 2 * depth[i] + 2;
 | 
|---|
| 309 |           for (j=0; j<depth[i]; j++) o << "  ";
 | 
|---|
| 310 |           o << names[i] << ": ";
 | 
|---|
| 311 |           for (j=width; j<maxwidth; j++) o << " ";
 | 
|---|
| 312 |           if (cpu_time_) {
 | 
|---|
| 313 |               o << " " << setw(maxtimewidth) << min_cpu_time[i];
 | 
|---|
| 314 |               o << " " << setw(maxtimewidth) << max_cpu_time[i];
 | 
|---|
| 315 |               o << " " << setw(maxtimewidth) << avg_cpu_time[i];
 | 
|---|
| 316 |             }                    
 | 
|---|
| 317 |           if (wall_time_) {
 | 
|---|
| 318 |               o << " " << setw(maxtimewidth) << min_wall_time[i];
 | 
|---|
| 319 |               o << " " << setw(maxtimewidth) << max_wall_time[i];
 | 
|---|
| 320 |               o << " " << setw(maxtimewidth) << avg_wall_time[i];
 | 
|---|
| 321 |             }
 | 
|---|
| 322 |           if (flops_) {
 | 
|---|
| 323 |               o << " " << setw(maxtimewidth) << min_flops[i];
 | 
|---|
| 324 |               o << " " << setw(maxtimewidth) << max_flops[i];
 | 
|---|
| 325 |               o << " " << setw(maxtimewidth) << avg_flops[i];
 | 
|---|
| 326 |             }
 | 
|---|
| 327 |           o << endl;
 | 
|---|
| 328 |         }
 | 
|---|
| 329 | 
 | 
|---|
| 330 |       delete[] names;
 | 
|---|
| 331 |       delete[] depth;
 | 
|---|
| 332 |     }
 | 
|---|
| 333 | 
 | 
|---|
| 334 |   delete[] cpu_time;
 | 
|---|
| 335 |   delete[] min_cpu_time;
 | 
|---|
| 336 |   delete[] max_cpu_time;
 | 
|---|
| 337 |   delete[] avg_cpu_time;
 | 
|---|
| 338 |   delete[] wall_time;
 | 
|---|
| 339 |   delete[] min_wall_time;
 | 
|---|
| 340 |   delete[] max_wall_time;
 | 
|---|
| 341 |   delete[] avg_wall_time;
 | 
|---|
| 342 |   delete[] flops;
 | 
|---|
| 343 |   delete[] min_flops;
 | 
|---|
| 344 |   delete[] max_flops;
 | 
|---|
| 345 |   delete[] avg_flops;
 | 
|---|
| 346 | }
 | 
|---|
| 347 | 
 | 
|---|
| 348 | /////////////////////////////////////////////////////////////////////////////
 | 
|---|
| 349 | 
 | 
|---|
| 350 | // Local Variables:
 | 
|---|
| 351 | // mode: c++
 | 
|---|
| 352 | // c-file-style: "CLJ"
 | 
|---|
| 353 | // End:
 | 
|---|