| [0b990d] | 1 | //
 | 
|---|
 | 2 | // pregtime.cc
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
 | 
|---|
 | 5 | //
 | 
|---|
 | 6 | // Author: Curtis Janssen <cljanss@limitpt.com>
 | 
|---|
 | 7 | // Maintainer: LPS
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // This file is part of the SC Toolkit.
 | 
|---|
 | 10 | //
 | 
|---|
 | 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
 | 
|---|
 | 12 | // it under the terms of the GNU Library General Public License as published by
 | 
|---|
 | 13 | // the Free Software Foundation; either version 2, or (at your option)
 | 
|---|
 | 14 | // any later version.
 | 
|---|
 | 15 | //
 | 
|---|
 | 16 | // The SC Toolkit is distributed in the hope that it will be useful,
 | 
|---|
 | 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
 | 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
 | 19 | // GNU Library General Public License for more details.
 | 
|---|
 | 20 | //
 | 
|---|
 | 21 | // You should have received a copy of the GNU Library General Public License
 | 
|---|
 | 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
 | 
|---|
 | 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 | 
|---|
 | 24 | //
 | 
|---|
 | 25 | // The U.S. Government is granted a limited license as per AL 91-7.
 | 
|---|
 | 26 | //
 | 
|---|
 | 27 | 
 | 
|---|
 | 28 | #ifdef __GNUC__
 | 
|---|
 | 29 | #pragma implementation
 | 
|---|
 | 30 | #endif
 | 
|---|
 | 31 | 
 | 
|---|
 | 32 | #ifdef HAVE_CONFIG_H
 | 
|---|
 | 33 | #  include <scconfig.h>
 | 
|---|
 | 34 | #endif
 | 
|---|
 | 35 | 
 | 
|---|
 | 36 | #include <iostream>
 | 
|---|
 | 37 | #include <iomanip>
 | 
|---|
 | 38 | 
 | 
|---|
 | 39 | #include <util/misc/formio.h>
 | 
|---|
 | 40 | #include <util/group/pregtime.h>
 | 
|---|
 | 41 | 
 | 
|---|
 | 42 | using namespace std;
 | 
|---|
 | 43 | using namespace sc;
 | 
|---|
 | 44 | 
 | 
|---|
 | 45 | static ClassDesc ParallelRegionTimer_cd(
 | 
|---|
 | 46 |   typeid(ParallelRegionTimer),"ParallelRegionTimer",1,"public RegionTimer",
 | 
|---|
 | 47 |   0, create<ParallelRegionTimer>, 0);
 | 
|---|
 | 48 | 
 | 
|---|
 | 49 | ParallelRegionTimer::ParallelRegionTimer(const Ref<KeyVal> &keyval):
 | 
|---|
 | 50 |   RegionTimer(keyval)
 | 
|---|
 | 51 | {
 | 
|---|
 | 52 |   msg_ = MessageGrp::get_default_messagegrp();
 | 
|---|
 | 53 | }
 | 
|---|
 | 54 | 
 | 
|---|
 | 55 | ParallelRegionTimer::ParallelRegionTimer(const Ref<MessageGrp>&msg,
 | 
|---|
 | 56 |                                          const char *topname,
 | 
|---|
 | 57 |                                          int cpu_time, int wall_time):
 | 
|---|
 | 58 |   RegionTimer(topname, cpu_time, wall_time),
 | 
|---|
 | 59 |   msg_(msg)
 | 
|---|
 | 60 | {
 | 
|---|
 | 61 | }
 | 
|---|
 | 62 | 
 | 
|---|
 | 63 | ParallelRegionTimer::~ParallelRegionTimer()
 | 
|---|
 | 64 | {
 | 
|---|
 | 65 | }
 | 
|---|
 | 66 | 
 | 
|---|
 | 67 | static void
 | 
|---|
 | 68 | send_string(const Ref<MessageGrp>& msg, int node, const char *s)
 | 
|---|
 | 69 | {
 | 
|---|
 | 70 |   int l = strlen(s);
 | 
|---|
 | 71 |   msg->send(node, l);
 | 
|---|
 | 72 |   msg->send(node, s, l);
 | 
|---|
 | 73 | }
 | 
|---|
 | 74 | 
 | 
|---|
 | 75 | static char *
 | 
|---|
 | 76 | recv_string(const Ref<MessageGrp>& msg, int node)
 | 
|---|
 | 77 | {
 | 
|---|
 | 78 |   int l;
 | 
|---|
 | 79 |   msg->recv(node, l);
 | 
|---|
 | 80 |   char *s = new char[l+1];
 | 
|---|
 | 81 |   s[l] = '\0';
 | 
|---|
 | 82 |   msg->recv(node, s, l);
 | 
|---|
 | 83 |   return s;
 | 
|---|
 | 84 | }
 | 
|---|
 | 85 | 
 | 
|---|
 | 86 | void
 | 
|---|
 | 87 | ParallelRegionTimer::send_subregions(int node, const TimedRegion *r) const
 | 
|---|
 | 88 | {
 | 
|---|
 | 89 |   TimedRegion *subr = r->subregions();
 | 
|---|
 | 90 | 
 | 
|---|
 | 91 |   // rewind to the beginning
 | 
|---|
 | 92 |   if (subr) { while (subr->prev()) subr = subr->prev(); }
 | 
|---|
 | 93 | 
 | 
|---|
 | 94 |   while (subr) {
 | 
|---|
 | 95 |       msg_->send(node, 1);
 | 
|---|
 | 96 |       send_string(msg_, node, subr->name());
 | 
|---|
 | 97 |       send_subregions(node, subr);
 | 
|---|
 | 98 |       subr = subr->next();
 | 
|---|
 | 99 |     };
 | 
|---|
 | 100 | 
 | 
|---|
 | 101 |   msg_->send(node, 0);
 | 
|---|
 | 102 | }
 | 
|---|
 | 103 | 
 | 
|---|
 | 104 | void
 | 
|---|
 | 105 | ParallelRegionTimer::recv_subregions(int node, TimedRegion *r) const
 | 
|---|
 | 106 | {
 | 
|---|
 | 107 |   int has_subregions;
 | 
|---|
 | 108 |   msg_->recv(node, has_subregions);
 | 
|---|
 | 109 |   while (has_subregions) {
 | 
|---|
 | 110 |       char *name = recv_string(msg_, node);
 | 
|---|
 | 111 |       TimedRegion *region = r->findinsubregion(name);
 | 
|---|
 | 112 |       delete[] name;
 | 
|---|
 | 113 |       recv_subregions(node, region);
 | 
|---|
 | 114 |       msg_->recv(node, has_subregions);
 | 
|---|
 | 115 |     }
 | 
|---|
 | 116 | }
 | 
|---|
 | 117 | 
 | 
|---|
 | 118 | void
 | 
|---|
 | 119 | ParallelRegionTimer::all_reduce_regions() const
 | 
|---|
 | 120 | {
 | 
|---|
 | 121 |   Ref<MachineTopology> topology = msg_->topology();
 | 
|---|
 | 122 | 
 | 
|---|
 | 123 |   // accumulate all the regions onto node zero
 | 
|---|
 | 124 |   Ref<GlobalMsgIter> i_reduce(topology->global_msg_iter(msg_, 0));
 | 
|---|
 | 125 |   for (i_reduce->backwards(); !i_reduce->done(); i_reduce->next()) {
 | 
|---|
 | 126 |       if (i_reduce->send()) {
 | 
|---|
 | 127 |           send_subregions(i_reduce->sendto(), top_);
 | 
|---|
 | 128 |         }
 | 
|---|
 | 129 |       if (i_reduce->recv()) {
 | 
|---|
 | 130 |           recv_subregions(i_reduce->recvfrom(), top_);
 | 
|---|
 | 131 |         }
 | 
|---|
 | 132 |     }
 | 
|---|
 | 133 | 
 | 
|---|
 | 134 |   // broadcast the regions to all the nodes
 | 
|---|
 | 135 |   Ref<GlobalMsgIter> i_bcast(topology->global_msg_iter(msg_, 0));
 | 
|---|
 | 136 |   for (i_bcast->forwards(); !i_bcast->done(); i_bcast->next()) {
 | 
|---|
 | 137 |       if (i_bcast->send()) {
 | 
|---|
 | 138 |           send_subregions(i_bcast->sendto(), top_);
 | 
|---|
 | 139 |         }
 | 
|---|
 | 140 |       if (i_bcast->recv()) {
 | 
|---|
 | 141 |           recv_subregions(i_bcast->recvfrom(), top_);
 | 
|---|
 | 142 |         }
 | 
|---|
 | 143 |     }
 | 
|---|
 | 144 | }
 | 
|---|
 | 145 | 
 | 
|---|
 | 146 | void
 | 
|---|
 | 147 | ParallelRegionTimer::print(ostream &o) const
 | 
|---|
 | 148 | {
 | 
|---|
 | 149 |   int i,j;
 | 
|---|
 | 150 | 
 | 
|---|
 | 151 |   if (msg_->n() == 1) {
 | 
|---|
 | 152 |       RegionTimer::print(o);
 | 
|---|
 | 153 |       return;
 | 
|---|
 | 154 |     }
 | 
|---|
 | 155 | 
 | 
|---|
 | 156 |   update_top();
 | 
|---|
 | 157 | 
 | 
|---|
 | 158 |   // make sure all the nodes have the same regions
 | 
|---|
 | 159 |   all_reduce_regions();
 | 
|---|
 | 160 | 
 | 
|---|
 | 161 |   int n = nregion();
 | 
|---|
 | 162 | 
 | 
|---|
 | 163 |   double *cpu_time = 0;
 | 
|---|
 | 164 |   double *wall_time = 0;
 | 
|---|
 | 165 |   double *flops = 0;
 | 
|---|
 | 166 |   double *min_cpu_time = 0;
 | 
|---|
 | 167 |   double *min_wall_time = 0;
 | 
|---|
 | 168 |   double *min_flops = 0;
 | 
|---|
 | 169 |   double *max_cpu_time = 0;
 | 
|---|
 | 170 |   double *max_wall_time = 0;
 | 
|---|
 | 171 |   double *max_flops = 0;
 | 
|---|
 | 172 |   double *avg_cpu_time = 0;
 | 
|---|
 | 173 |   double *avg_wall_time = 0;
 | 
|---|
 | 174 |   double *avg_flops = 0;
 | 
|---|
 | 175 |   if (cpu_time_) {
 | 
|---|
 | 176 |       cpu_time = new double[n];
 | 
|---|
 | 177 |       get_cpu_times(cpu_time);
 | 
|---|
 | 178 |       min_cpu_time = new double[n];
 | 
|---|
 | 179 |       get_cpu_times(min_cpu_time);
 | 
|---|
 | 180 |       max_cpu_time = new double[n];
 | 
|---|
 | 181 |       get_cpu_times(max_cpu_time);
 | 
|---|
 | 182 |       avg_cpu_time = new double[n];
 | 
|---|
 | 183 |       get_cpu_times(avg_cpu_time);
 | 
|---|
 | 184 |       msg_->max(max_cpu_time,n);
 | 
|---|
 | 185 |       msg_->min(min_cpu_time,n);
 | 
|---|
 | 186 |       msg_->sum(avg_cpu_time,n);
 | 
|---|
 | 187 |       for (i=0; i<n; i++) {
 | 
|---|
 | 188 |           avg_cpu_time[i] /= msg_->n();
 | 
|---|
 | 189 |         }
 | 
|---|
 | 190 |     }
 | 
|---|
 | 191 |   if (wall_time_) {
 | 
|---|
 | 192 |       wall_time = new double[n];
 | 
|---|
 | 193 |       get_wall_times(wall_time);
 | 
|---|
 | 194 |       min_wall_time = new double[n];
 | 
|---|
 | 195 |       get_wall_times(min_wall_time);
 | 
|---|
 | 196 |       max_wall_time = new double[n];
 | 
|---|
 | 197 |       get_wall_times(max_wall_time);
 | 
|---|
 | 198 |       avg_wall_time = new double[n];
 | 
|---|
 | 199 |       get_wall_times(avg_wall_time);
 | 
|---|
 | 200 |       msg_->max(max_wall_time,n);
 | 
|---|
 | 201 |       msg_->min(min_wall_time,n);
 | 
|---|
 | 202 |       msg_->sum(avg_wall_time,n);
 | 
|---|
 | 203 |       for (i=0; i<n; i++) {
 | 
|---|
 | 204 |           avg_wall_time[i] /= msg_->n();
 | 
|---|
 | 205 |         }
 | 
|---|
 | 206 |     }
 | 
|---|
 | 207 |   const char *flops_name = 0;
 | 
|---|
 | 208 |   if (flops_) {
 | 
|---|
 | 209 |       flops= new double[n];
 | 
|---|
 | 210 |       get_flops(flops);
 | 
|---|
 | 211 |       if (cpu_time_) {
 | 
|---|
 | 212 |         for (i=0; i<n; i++) {
 | 
|---|
 | 213 |           if (fabs(cpu_time[i]) > 1.0e-10) flops[i] /= cpu_time[i]*1000000.;
 | 
|---|
 | 214 |           else flops[i] = 0.0;
 | 
|---|
 | 215 |           }
 | 
|---|
 | 216 |         flops_name = "MFLOP/S";
 | 
|---|
 | 217 |         }
 | 
|---|
 | 218 |       else if (wall_time_) {
 | 
|---|
 | 219 |         for (i=0; i<n; i++) {
 | 
|---|
 | 220 |           if (fabs(wall_time[i]) > 1.0e-10) flops[i] /= wall_time[i]*1000000.;
 | 
|---|
 | 221 |           else flops[i] = 0.0;
 | 
|---|
 | 222 |           }
 | 
|---|
 | 223 |         flops_name = "MFLOP/WS";
 | 
|---|
 | 224 |         }
 | 
|---|
 | 225 |       else {
 | 
|---|
 | 226 |         for (i=0; i<n; i++) {
 | 
|---|
 | 227 |           flops[i] /= 1000000.;
 | 
|---|
 | 228 |           }
 | 
|---|
 | 229 |         flops_name = "mflops";
 | 
|---|
 | 230 |         }
 | 
|---|
 | 231 |       min_flops= new double[n];
 | 
|---|
 | 232 |       memcpy(min_flops, flops, sizeof(double)*n);
 | 
|---|
 | 233 |       max_flops= new double[n];
 | 
|---|
 | 234 |       memcpy(max_flops, flops, sizeof(double)*n);
 | 
|---|
 | 235 |       avg_flops= new double[n];
 | 
|---|
 | 236 |       memcpy(avg_flops, flops, sizeof(double)*n);
 | 
|---|
 | 237 |       msg_->max(max_flops,n);
 | 
|---|
 | 238 |       msg_->min(min_flops,n);
 | 
|---|
 | 239 |       msg_->sum(avg_flops,n);
 | 
|---|
 | 240 |       for (i=0; i<n; i++) {
 | 
|---|
 | 241 |           avg_flops[i] /= msg_->n();
 | 
|---|
 | 242 |         }
 | 
|---|
 | 243 |     }
 | 
|---|
 | 244 | 
 | 
|---|
 | 245 |   if (msg_->me() == 0) {
 | 
|---|
 | 246 |       const char **names = new const char*[n];
 | 
|---|
 | 247 |       get_region_names(names);
 | 
|---|
 | 248 |       int *depth = new int[n];
 | 
|---|
 | 249 |       get_depth(depth);
 | 
|---|
 | 250 | 
 | 
|---|
 | 251 |       int maxwidth = 0;
 | 
|---|
 | 252 |       double maxtime = 0.0;
 | 
|---|
 | 253 |       for (i=0; i<n; i++) {
 | 
|---|
 | 254 |           int width = strlen(names[i]) + 2 * depth[i] + 2;
 | 
|---|
 | 255 |           if (width > maxwidth) maxwidth = width;
 | 
|---|
 | 256 |           if (cpu_time_ && max_cpu_time[i] > maxtime)
 | 
|---|
 | 257 |               maxtime = max_cpu_time[i];
 | 
|---|
 | 258 |           if (wall_time_ && max_wall_time[i] > maxtime)
 | 
|---|
 | 259 |               maxtime = max_wall_time[i];
 | 
|---|
 | 260 |           if (flops_ && max_flops[i] > maxtime)
 | 
|---|
 | 261 |               maxtime = max_flops[i];
 | 
|---|
 | 262 |         }
 | 
|---|
 | 263 | 
 | 
|---|
 | 264 |       int maxtimewidth = 4;
 | 
|---|
 | 265 |       while (maxtime >= 10.0) { maxtime/=10.0; maxtimewidth++; }
 | 
|---|
 | 266 | 
 | 
|---|
 | 267 |       o.setf(ios::right);
 | 
|---|
 | 268 | 
 | 
|---|
 | 269 |       for (i=0; i<maxwidth; i++) o << " ";
 | 
|---|
 | 270 |       if (cpu_time_) {
 | 
|---|
 | 271 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
 | 272 |           o << setw(maxtimewidth+1) << " CPU";
 | 
|---|
 | 273 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
 | 274 |         }
 | 
|---|
 | 275 |       if (wall_time_) {
 | 
|---|
 | 276 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
 | 277 |           o << setw(maxtimewidth+1) << " Wall";
 | 
|---|
 | 278 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
 | 279 |         }
 | 
|---|
 | 280 |       if (flops_) {
 | 
|---|
 | 281 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
 | 282 |           o << " " << setw(maxtimewidth+1) << flops_name;
 | 
|---|
 | 283 |           o << setw(maxtimewidth+1) << " ";
 | 
|---|
 | 284 |         }
 | 
|---|
 | 285 |       o << endl;
 | 
|---|
 | 286 | 
 | 
|---|
 | 287 |       for (i=0; i<maxwidth; i++) o << " ";
 | 
|---|
 | 288 |       if (cpu_time_) {
 | 
|---|
 | 289 |           o << setw(maxtimewidth+1) << " min";
 | 
|---|
 | 290 |           o << setw(maxtimewidth+1) << " max";
 | 
|---|
 | 291 |           o << setw(maxtimewidth+1) << " avg";
 | 
|---|
 | 292 |         }
 | 
|---|
 | 293 |       if (wall_time_) {
 | 
|---|
 | 294 |           o << setw(maxtimewidth+1) << " min";
 | 
|---|
 | 295 |           o << setw(maxtimewidth+1) << " max";
 | 
|---|
 | 296 |           o << setw(maxtimewidth+1) << " avg";
 | 
|---|
 | 297 |         }
 | 
|---|
 | 298 |       if (flops_) {
 | 
|---|
 | 299 |           o << setw(maxtimewidth+1) << " min";
 | 
|---|
 | 300 |           o << setw(maxtimewidth+1) << " max";
 | 
|---|
 | 301 |           o << setw(maxtimewidth+1) << " avg";
 | 
|---|
 | 302 |         }
 | 
|---|
 | 303 |       o << endl;
 | 
|---|
 | 304 | 
 | 
|---|
 | 305 |       o.setf(ios::fixed);
 | 
|---|
 | 306 |       o.precision(2);
 | 
|---|
 | 307 |       for (i=0; i<n; i++) {
 | 
|---|
 | 308 |           int width = strlen(names[i]) + 2 * depth[i] + 2;
 | 
|---|
 | 309 |           for (j=0; j<depth[i]; j++) o << "  ";
 | 
|---|
 | 310 |           o << names[i] << ": ";
 | 
|---|
 | 311 |           for (j=width; j<maxwidth; j++) o << " ";
 | 
|---|
 | 312 |           if (cpu_time_) {
 | 
|---|
 | 313 |               o << " " << setw(maxtimewidth) << min_cpu_time[i];
 | 
|---|
 | 314 |               o << " " << setw(maxtimewidth) << max_cpu_time[i];
 | 
|---|
 | 315 |               o << " " << setw(maxtimewidth) << avg_cpu_time[i];
 | 
|---|
 | 316 |             }                    
 | 
|---|
 | 317 |           if (wall_time_) {
 | 
|---|
 | 318 |               o << " " << setw(maxtimewidth) << min_wall_time[i];
 | 
|---|
 | 319 |               o << " " << setw(maxtimewidth) << max_wall_time[i];
 | 
|---|
 | 320 |               o << " " << setw(maxtimewidth) << avg_wall_time[i];
 | 
|---|
 | 321 |             }
 | 
|---|
 | 322 |           if (flops_) {
 | 
|---|
 | 323 |               o << " " << setw(maxtimewidth) << min_flops[i];
 | 
|---|
 | 324 |               o << " " << setw(maxtimewidth) << max_flops[i];
 | 
|---|
 | 325 |               o << " " << setw(maxtimewidth) << avg_flops[i];
 | 
|---|
 | 326 |             }
 | 
|---|
 | 327 |           o << endl;
 | 
|---|
 | 328 |         }
 | 
|---|
 | 329 | 
 | 
|---|
 | 330 |       delete[] names;
 | 
|---|
 | 331 |       delete[] depth;
 | 
|---|
 | 332 |     }
 | 
|---|
 | 333 | 
 | 
|---|
 | 334 |   delete[] cpu_time;
 | 
|---|
 | 335 |   delete[] min_cpu_time;
 | 
|---|
 | 336 |   delete[] max_cpu_time;
 | 
|---|
 | 337 |   delete[] avg_cpu_time;
 | 
|---|
 | 338 |   delete[] wall_time;
 | 
|---|
 | 339 |   delete[] min_wall_time;
 | 
|---|
 | 340 |   delete[] max_wall_time;
 | 
|---|
 | 341 |   delete[] avg_wall_time;
 | 
|---|
 | 342 |   delete[] flops;
 | 
|---|
 | 343 |   delete[] min_flops;
 | 
|---|
 | 344 |   delete[] max_flops;
 | 
|---|
 | 345 |   delete[] avg_flops;
 | 
|---|
 | 346 | }
 | 
|---|
 | 347 | 
 | 
|---|
 | 348 | /////////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 349 | 
 | 
|---|
 | 350 | // Local Variables:
 | 
|---|
 | 351 | // mode: c++
 | 
|---|
 | 352 | // c-file-style: "CLJ"
 | 
|---|
 | 353 | // End:
 | 
|---|