| [0b990d] | 1 | //
 | 
|---|
 | 2 | // reduce.cc
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // Copyright (C) 1996 Limit Point Systems, Inc.
 | 
|---|
 | 5 | //
 | 
|---|
 | 6 | // Author: Curtis Janssen <cljanss@limitpt.com>
 | 
|---|
 | 7 | // Maintainer: LPS
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // This file is part of the SC Toolkit.
 | 
|---|
 | 10 | //
 | 
|---|
 | 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
 | 
|---|
 | 12 | // it under the terms of the GNU Library General Public License as published by
 | 
|---|
 | 13 | // the Free Software Foundation; either version 2, or (at your option)
 | 
|---|
 | 14 | // any later version.
 | 
|---|
 | 15 | //
 | 
|---|
 | 16 | // The SC Toolkit is distributed in the hope that it will be useful,
 | 
|---|
 | 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
 | 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
 | 19 | // GNU Library General Public License for more details.
 | 
|---|
 | 20 | //
 | 
|---|
 | 21 | // You should have received a copy of the GNU Library General Public License
 | 
|---|
 | 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
 | 
|---|
 | 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 | 
|---|
 | 24 | //
 | 
|---|
 | 25 | // The U.S. Government is granted a limited license as per AL 91-7.
 | 
|---|
 | 26 | //
 | 
|---|
 | 27 | 
 | 
|---|
 | 28 | #ifdef HAVE_CONFIG_H
 | 
|---|
 | 29 | #include <scconfig.h>
 | 
|---|
 | 30 | #endif
 | 
|---|
 | 31 | #include <util/group/message.h>
 | 
|---|
 | 32 | 
 | 
|---|
 | 33 | using namespace sc;
 | 
|---|
 | 34 | 
 | 
|---|
 | 35 | /////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 36 | // instantiate templates
 | 
|---|
 | 37 | 
 | 
|---|
 | 38 | #ifdef EXPLICIT_TEMPLATE_INSTANTIATION
 | 
|---|
 | 39 | template class GrpReduce<double>;
 | 
|---|
 | 40 | template class GrpReduce<unsigned int>;
 | 
|---|
 | 41 | template class GrpReduce<int>;
 | 
|---|
 | 42 | template class GrpReduce<long>;
 | 
|---|
 | 43 | template class GrpReduce<float>;
 | 
|---|
 | 44 | template class GrpReduce<short>;
 | 
|---|
 | 45 | template class GrpReduce<char>;
 | 
|---|
 | 46 | template class GrpReduce<unsigned char>;
 | 
|---|
 | 47 | template class GrpReduce<signed char>;
 | 
|---|
 | 48 | 
 | 
|---|
 | 49 | template class GrpFunctionReduce<double>;
 | 
|---|
 | 50 | template class GrpFunctionReduce<unsigned int>;
 | 
|---|
 | 51 | template class GrpFunctionReduce<int>;
 | 
|---|
 | 52 | template class GrpFunctionReduce<long>;
 | 
|---|
 | 53 | template class GrpFunctionReduce<float>;
 | 
|---|
 | 54 | template class GrpFunctionReduce<short>;
 | 
|---|
 | 55 | template class GrpFunctionReduce<char>;
 | 
|---|
 | 56 | template class GrpFunctionReduce<unsigned char>;
 | 
|---|
 | 57 | template class GrpFunctionReduce<signed char>;
 | 
|---|
 | 58 | 
 | 
|---|
 | 59 | template class GrpMinReduce<double>;
 | 
|---|
 | 60 | template class GrpMinReduce<unsigned int>;
 | 
|---|
 | 61 | template class GrpMinReduce<int>;
 | 
|---|
 | 62 | template class GrpMinReduce<long>;
 | 
|---|
 | 63 | template class GrpMinReduce<float>;
 | 
|---|
 | 64 | template class GrpMinReduce<short>;
 | 
|---|
 | 65 | template class GrpMinReduce<char>;
 | 
|---|
 | 66 | template class GrpMinReduce<unsigned char>;
 | 
|---|
 | 67 | template class GrpMinReduce<signed char>;
 | 
|---|
 | 68 | 
 | 
|---|
 | 69 | template class GrpMaxReduce<double>;
 | 
|---|
 | 70 | template class GrpMaxReduce<unsigned int>;
 | 
|---|
 | 71 | template class GrpMaxReduce<int>;
 | 
|---|
 | 72 | template class GrpMaxReduce<long>;
 | 
|---|
 | 73 | template class GrpMaxReduce<float>;
 | 
|---|
 | 74 | template class GrpMaxReduce<short>;
 | 
|---|
 | 75 | template class GrpMaxReduce<char>;
 | 
|---|
 | 76 | template class GrpMaxReduce<unsigned char>;
 | 
|---|
 | 77 | template class GrpMaxReduce<signed char>;
 | 
|---|
 | 78 | 
 | 
|---|
 | 79 | template class GrpSumReduce<double>;
 | 
|---|
 | 80 | template class GrpSumReduce<unsigned int>;
 | 
|---|
 | 81 | template class GrpSumReduce<int>;
 | 
|---|
 | 82 | template class GrpSumReduce<long>;
 | 
|---|
 | 83 | template class GrpSumReduce<float>;
 | 
|---|
 | 84 | template class GrpSumReduce<short>;
 | 
|---|
 | 85 | template class GrpSumReduce<char>;
 | 
|---|
 | 86 | template class GrpSumReduce<unsigned char>;
 | 
|---|
 | 87 | template class GrpSumReduce<signed char>;
 | 
|---|
 | 88 | 
 | 
|---|
 | 89 | template class GrpProductReduce<double>;
 | 
|---|
 | 90 | template class GrpProductReduce<unsigned int>;
 | 
|---|
 | 91 | template class GrpProductReduce<int>;
 | 
|---|
 | 92 | template class GrpProductReduce<long>;
 | 
|---|
 | 93 | template class GrpProductReduce<float>;
 | 
|---|
 | 94 | template class GrpProductReduce<short>;
 | 
|---|
 | 95 | template class GrpProductReduce<char>;
 | 
|---|
 | 96 | template class GrpProductReduce<unsigned char>;
 | 
|---|
 | 97 | template class GrpProductReduce<signed char>;
 | 
|---|
 | 98 | 
 | 
|---|
 | 99 | template class GrpArithmeticOrReduce<unsigned int>;
 | 
|---|
 | 100 | template class GrpArithmeticOrReduce<int>;
 | 
|---|
 | 101 | template class GrpArithmeticOrReduce<long>;
 | 
|---|
 | 102 | template class GrpArithmeticOrReduce<short>;
 | 
|---|
 | 103 | template class GrpArithmeticOrReduce<char>;
 | 
|---|
 | 104 | template class GrpArithmeticOrReduce<unsigned char>;
 | 
|---|
 | 105 | template class GrpArithmeticOrReduce<signed char>;
 | 
|---|
 | 106 | 
 | 
|---|
 | 107 | template class GrpArithmeticAndReduce<unsigned int>;
 | 
|---|
 | 108 | template class GrpArithmeticAndReduce<int>;
 | 
|---|
 | 109 | template class GrpArithmeticAndReduce<long>;
 | 
|---|
 | 110 | template class GrpArithmeticAndReduce<short>;
 | 
|---|
 | 111 | template class GrpArithmeticAndReduce<char>;
 | 
|---|
 | 112 | template class GrpArithmeticAndReduce<unsigned char>;
 | 
|---|
 | 113 | template class GrpArithmeticAndReduce<signed char>;
 | 
|---|
 | 114 | 
 | 
|---|
 | 115 | template class GrpArithmeticXOrReduce<unsigned int>;
 | 
|---|
 | 116 | template class GrpArithmeticXOrReduce<int>;
 | 
|---|
 | 117 | template class GrpArithmeticXOrReduce<long>;
 | 
|---|
 | 118 | template class GrpArithmeticXOrReduce<short>;
 | 
|---|
 | 119 | template class GrpArithmeticXOrReduce<char>;
 | 
|---|
 | 120 | template class GrpArithmeticXOrReduce<unsigned char>;
 | 
|---|
 | 121 | template class GrpArithmeticXOrReduce<signed char>;
 | 
|---|
 | 122 | #endif
 | 
|---|
 | 123 | 
 | 
|---|
 | 124 | /////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 125 | // sum reduction members
 | 
|---|
 | 126 | 
 | 
|---|
 | 127 | template <class T>
 | 
|---|
 | 128 | void
 | 
|---|
 | 129 | do_sum(MessageGrp* grp, T* data, int n, T* tmp, int target)
 | 
|---|
 | 130 | {
 | 
|---|
 | 131 |   GrpSumReduce<T> gred;
 | 
|---|
 | 132 |   grp->reduce(data, n, gred, tmp, target);
 | 
|---|
 | 133 | }
 | 
|---|
 | 134 | 
 | 
|---|
 | 135 | void
 | 
|---|
 | 136 | MessageGrp::sum(double* data, int n, double* tmp, int target)
 | 
|---|
 | 137 | {
 | 
|---|
 | 138 |   do_sum(this, data, n, tmp, target);
 | 
|---|
 | 139 | }
 | 
|---|
 | 140 | 
 | 
|---|
 | 141 | void
 | 
|---|
 | 142 | MessageGrp::sum(unsigned int* data, int n, unsigned int* tmp, int target)
 | 
|---|
 | 143 | {
 | 
|---|
 | 144 |   do_sum(this, data, n, tmp, target);
 | 
|---|
 | 145 | }
 | 
|---|
 | 146 | 
 | 
|---|
 | 147 | void
 | 
|---|
 | 148 | MessageGrp::sum(int* data, int n, int* tmp, int target)
 | 
|---|
 | 149 | {
 | 
|---|
 | 150 |   do_sum(this, data, n, tmp, target);
 | 
|---|
 | 151 | }
 | 
|---|
 | 152 | 
 | 
|---|
 | 153 | void
 | 
|---|
 | 154 | MessageGrp::sum(char* data, int n, char* tmp, int target)
 | 
|---|
 | 155 | {
 | 
|---|
 | 156 |   do_sum(this, data, n, tmp, target);
 | 
|---|
 | 157 | }
 | 
|---|
 | 158 | 
 | 
|---|
 | 159 | void
 | 
|---|
 | 160 | MessageGrp::sum(unsigned char* data, int n, unsigned char* tmp, int target)
 | 
|---|
 | 161 | {
 | 
|---|
 | 162 |   do_sum(this, data, n, tmp, target);
 | 
|---|
 | 163 | }
 | 
|---|
 | 164 | 
 | 
|---|
 | 165 | void
 | 
|---|
 | 166 | MessageGrp::sum(signed char* data, int n, signed char* tmp, int target)
 | 
|---|
 | 167 | {
 | 
|---|
 | 168 |   do_sum(this, data, n, tmp, target);
 | 
|---|
 | 169 | }
 | 
|---|
 | 170 | 
 | 
|---|
 | 171 | /////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 172 | // min reduction members
 | 
|---|
 | 173 | 
 | 
|---|
 | 174 | template <class T>
 | 
|---|
 | 175 | void
 | 
|---|
 | 176 | do_max(MessageGrp* grp, T* data, int n, T* tmp, int target)
 | 
|---|
 | 177 | {
 | 
|---|
 | 178 |   GrpMaxReduce<T> gred;
 | 
|---|
 | 179 |   grp->reduce(data, n, gred, tmp, target);
 | 
|---|
 | 180 | }
 | 
|---|
 | 181 | 
 | 
|---|
 | 182 | void
 | 
|---|
 | 183 | MessageGrp::max(double* data, int n, double* tmp, int target)
 | 
|---|
 | 184 | {
 | 
|---|
 | 185 |   do_max(this, data, n, tmp, target);
 | 
|---|
 | 186 | }
 | 
|---|
 | 187 | 
 | 
|---|
 | 188 | void
 | 
|---|
 | 189 | MessageGrp::max(unsigned int* data, int n, unsigned int* tmp, int target)
 | 
|---|
 | 190 | {
 | 
|---|
 | 191 |   do_max(this, data, n, tmp, target);
 | 
|---|
 | 192 | }
 | 
|---|
 | 193 | 
 | 
|---|
 | 194 | void
 | 
|---|
 | 195 | MessageGrp::max(int* data, int n, int* tmp, int target)
 | 
|---|
 | 196 | {
 | 
|---|
 | 197 |   do_max(this, data, n, tmp, target);
 | 
|---|
 | 198 | }
 | 
|---|
 | 199 | 
 | 
|---|
 | 200 | void
 | 
|---|
 | 201 | MessageGrp::max(char* data, int n, char* tmp, int target)
 | 
|---|
 | 202 | {
 | 
|---|
 | 203 |   do_max(this, data, n, tmp, target);
 | 
|---|
 | 204 | }
 | 
|---|
 | 205 | 
 | 
|---|
 | 206 | void
 | 
|---|
 | 207 | MessageGrp::max(unsigned char* data, int n, unsigned char* tmp, int target)
 | 
|---|
 | 208 | {
 | 
|---|
 | 209 |   do_max(this, data, n, tmp, target);
 | 
|---|
 | 210 | }
 | 
|---|
 | 211 | 
 | 
|---|
 | 212 | void
 | 
|---|
 | 213 | MessageGrp::max(signed char* data, int n, signed char* tmp, int target)
 | 
|---|
 | 214 | {
 | 
|---|
 | 215 |   do_max(this, data, n, tmp, target);
 | 
|---|
 | 216 | }
 | 
|---|
 | 217 | 
 | 
|---|
 | 218 | /////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 219 | // max reduction members
 | 
|---|
 | 220 | 
 | 
|---|
 | 221 | template <class T>
 | 
|---|
 | 222 | void
 | 
|---|
 | 223 | do_min(MessageGrp* grp, T* data, int n, T* tmp, int target)
 | 
|---|
 | 224 | {
 | 
|---|
 | 225 |   GrpMinReduce<T> gred;
 | 
|---|
 | 226 |   grp->reduce(data, n, gred, tmp, target);
 | 
|---|
 | 227 | }
 | 
|---|
 | 228 | 
 | 
|---|
 | 229 | void
 | 
|---|
 | 230 | MessageGrp::min(double* data, int n, double* tmp, int target)
 | 
|---|
 | 231 | {
 | 
|---|
 | 232 |   do_min(this, data, n, tmp, target);
 | 
|---|
 | 233 | }
 | 
|---|
 | 234 | 
 | 
|---|
 | 235 | void
 | 
|---|
 | 236 | MessageGrp::min(unsigned int* data, int n, unsigned int* tmp, int target)
 | 
|---|
 | 237 | {
 | 
|---|
 | 238 |   do_min(this, data, n, tmp, target);
 | 
|---|
 | 239 | }
 | 
|---|
 | 240 | 
 | 
|---|
 | 241 | void
 | 
|---|
 | 242 | MessageGrp::min(int* data, int n, int* tmp, int target)
 | 
|---|
 | 243 | {
 | 
|---|
 | 244 |   do_min(this, data, n, tmp, target);
 | 
|---|
 | 245 | }
 | 
|---|
 | 246 | 
 | 
|---|
 | 247 | void
 | 
|---|
 | 248 | MessageGrp::min(char* data, int n, char* tmp, int target)
 | 
|---|
 | 249 | {
 | 
|---|
 | 250 |   do_min(this, data, n, tmp, target);
 | 
|---|
 | 251 | }
 | 
|---|
 | 252 | 
 | 
|---|
 | 253 | void
 | 
|---|
 | 254 | MessageGrp::min(unsigned char* data, int n, unsigned char* tmp, int target)
 | 
|---|
 | 255 | {
 | 
|---|
 | 256 |   do_min(this, data, n, tmp, target);
 | 
|---|
 | 257 | }
 | 
|---|
 | 258 | 
 | 
|---|
 | 259 | void
 | 
|---|
 | 260 | MessageGrp::min(signed char* data, int n, signed char* tmp, int target)
 | 
|---|
 | 261 | {
 | 
|---|
 | 262 |   do_min(this, data, n, tmp, target);
 | 
|---|
 | 263 | }
 | 
|---|
 | 264 | 
 | 
|---|
 | 265 | /////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 266 | // generic reduction
 | 
|---|
 | 267 | 
 | 
|---|
 | 268 | void
 | 
|---|
 | 269 | MessageGrp::reduce(double* data, int n, GrpReduce<double>& red,
 | 
|---|
 | 270 |                    double* scratch, int target)
 | 
|---|
 | 271 | {
 | 
|---|
 | 272 |   int tgop_max = gop_max_/sizeof(double);
 | 
|---|
 | 273 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 274 | 
 | 
|---|
 | 275 |   int passed_scratch;
 | 
|---|
 | 276 |   if (!scratch) {
 | 
|---|
 | 277 |       scratch = new double[n>tgop_max?tgop_max:n];
 | 
|---|
 | 278 |       passed_scratch = 0;
 | 
|---|
 | 279 |     }
 | 
|---|
 | 280 |   else passed_scratch = 1;
 | 
|---|
 | 281 | 
 | 
|---|
 | 282 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 283 |                                                     (target== -1?0:target)));
 | 
|---|
 | 284 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 285 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 286 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 287 |           if (i->send()) {
 | 
|---|
 | 288 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 289 |             }
 | 
|---|
 | 290 |           if (i->recv()) {
 | 
|---|
 | 291 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 292 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 293 |             }
 | 
|---|
 | 294 |         }
 | 
|---|
 | 295 |       if (n > tgop_max) sync();
 | 
|---|
 | 296 |     }
 | 
|---|
 | 297 | 
 | 
|---|
 | 298 |   if (target == -1) {
 | 
|---|
 | 299 |       bcast(data, n, 0);
 | 
|---|
 | 300 |     }
 | 
|---|
 | 301 | 
 | 
|---|
 | 302 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 303 | }
 | 
|---|
 | 304 | 
 | 
|---|
 | 305 | void
 | 
|---|
 | 306 | MessageGrp::reduce(unsigned int* data, int n, GrpReduce<unsigned int>& red,
 | 
|---|
 | 307 |                    unsigned int* scratch, int target)
 | 
|---|
 | 308 | {
 | 
|---|
 | 309 |   int tgop_max = gop_max_/sizeof(unsigned int);
 | 
|---|
 | 310 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 311 | 
 | 
|---|
 | 312 |   int passed_scratch;
 | 
|---|
 | 313 |   if (!scratch) {
 | 
|---|
 | 314 |       scratch = new unsigned int[n>tgop_max?tgop_max:n];
 | 
|---|
 | 315 |       passed_scratch = 0;
 | 
|---|
 | 316 |     }
 | 
|---|
 | 317 |   else passed_scratch = 1;
 | 
|---|
 | 318 | 
 | 
|---|
 | 319 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 320 |                                                     (target== -1?0:target)));
 | 
|---|
 | 321 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 322 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 323 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 324 |           if (i->send()) {
 | 
|---|
 | 325 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 326 |             }
 | 
|---|
 | 327 |           if (i->recv()) {
 | 
|---|
 | 328 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 329 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 330 |             }
 | 
|---|
 | 331 |         }
 | 
|---|
 | 332 |       if (n > tgop_max) sync();
 | 
|---|
 | 333 |     }
 | 
|---|
 | 334 | 
 | 
|---|
 | 335 |   if (target == -1) {
 | 
|---|
 | 336 |       bcast(data, n, 0);
 | 
|---|
 | 337 |     }
 | 
|---|
 | 338 | 
 | 
|---|
 | 339 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 340 | }
 | 
|---|
 | 341 | 
 | 
|---|
 | 342 | void
 | 
|---|
 | 343 | MessageGrp::reduce(int* data, int n, GrpReduce<int>& red,
 | 
|---|
 | 344 |                    int* scratch, int target)
 | 
|---|
 | 345 | {
 | 
|---|
 | 346 |   int tgop_max = gop_max_/sizeof(int);
 | 
|---|
 | 347 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 348 | 
 | 
|---|
 | 349 |   int passed_scratch;
 | 
|---|
 | 350 |   if (!scratch) {
 | 
|---|
 | 351 |       scratch = new int[n>tgop_max?tgop_max:n];
 | 
|---|
 | 352 |       passed_scratch = 0;
 | 
|---|
 | 353 |     }
 | 
|---|
 | 354 |   else passed_scratch = 1;
 | 
|---|
 | 355 | 
 | 
|---|
 | 356 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 357 |                                                     (target== -1?0:target)));
 | 
|---|
 | 358 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 359 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 360 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 361 |           if (i->send()) {
 | 
|---|
 | 362 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 363 |             }
 | 
|---|
 | 364 |           if (i->recv()) {
 | 
|---|
 | 365 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 366 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 367 |             }
 | 
|---|
 | 368 |         }
 | 
|---|
 | 369 |       if (n > tgop_max) sync();
 | 
|---|
 | 370 |     }
 | 
|---|
 | 371 | 
 | 
|---|
 | 372 |   if (target == -1) {
 | 
|---|
 | 373 |       bcast(data, n, 0);
 | 
|---|
 | 374 |     }
 | 
|---|
 | 375 | 
 | 
|---|
 | 376 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 377 | }
 | 
|---|
 | 378 | 
 | 
|---|
 | 379 | void
 | 
|---|
 | 380 | MessageGrp::reduce(char* data, int n, GrpReduce<char>& red,
 | 
|---|
 | 381 |                    char* scratch, int target)
 | 
|---|
 | 382 | {
 | 
|---|
 | 383 |   int tgop_max = gop_max_/sizeof(char);
 | 
|---|
 | 384 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 385 | 
 | 
|---|
 | 386 |   int passed_scratch;
 | 
|---|
 | 387 |   if (!scratch) {
 | 
|---|
 | 388 |       scratch = new char[n>tgop_max?tgop_max:n];
 | 
|---|
 | 389 |       passed_scratch = 0;
 | 
|---|
 | 390 |     }
 | 
|---|
 | 391 |   else passed_scratch = 1;
 | 
|---|
 | 392 | 
 | 
|---|
 | 393 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 394 |                                                     (target== -1?0:target)));
 | 
|---|
 | 395 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 396 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 397 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 398 |           if (i->send()) {
 | 
|---|
 | 399 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 400 |             }
 | 
|---|
 | 401 |           if (i->recv()) {
 | 
|---|
 | 402 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 403 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 404 |             }
 | 
|---|
 | 405 |         }
 | 
|---|
 | 406 |       if (n > tgop_max) sync();
 | 
|---|
 | 407 |     }
 | 
|---|
 | 408 | 
 | 
|---|
 | 409 |   if (target == -1) {
 | 
|---|
 | 410 |       bcast(data, n, 0);
 | 
|---|
 | 411 |     }
 | 
|---|
 | 412 | 
 | 
|---|
 | 413 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 414 | }
 | 
|---|
 | 415 | 
 | 
|---|
 | 416 | void
 | 
|---|
 | 417 | MessageGrp::reduce(unsigned char* data, int n, GrpReduce<unsigned char>& red,
 | 
|---|
 | 418 |                    unsigned char* scratch, int target)
 | 
|---|
 | 419 | {
 | 
|---|
 | 420 |   int tgop_max = gop_max_/sizeof(unsigned char);
 | 
|---|
 | 421 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 422 | 
 | 
|---|
 | 423 |   int passed_scratch;
 | 
|---|
 | 424 |   if (!scratch) {
 | 
|---|
 | 425 |       scratch = new unsigned char[n>tgop_max?tgop_max:n];
 | 
|---|
 | 426 |       passed_scratch = 0;
 | 
|---|
 | 427 |     }
 | 
|---|
 | 428 |   else passed_scratch = 1;
 | 
|---|
 | 429 | 
 | 
|---|
 | 430 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 431 |                                                     (target== -1?0:target)));
 | 
|---|
 | 432 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 433 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 434 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 435 |           if (i->send()) {
 | 
|---|
 | 436 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 437 |             }
 | 
|---|
 | 438 |           if (i->recv()) {
 | 
|---|
 | 439 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 440 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 441 |             }
 | 
|---|
 | 442 |         }
 | 
|---|
 | 443 |       if (n > tgop_max) sync();
 | 
|---|
 | 444 |     }
 | 
|---|
 | 445 | 
 | 
|---|
 | 446 |   if (target == -1) {
 | 
|---|
 | 447 |       bcast(data, n, 0);
 | 
|---|
 | 448 |     }
 | 
|---|
 | 449 | 
 | 
|---|
 | 450 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 451 | }
 | 
|---|
 | 452 | 
 | 
|---|
 | 453 | void
 | 
|---|
 | 454 | MessageGrp::reduce(signed char* data, int n, GrpReduce<signed char>& red,
 | 
|---|
 | 455 |                    signed char* scratch, int target)
 | 
|---|
 | 456 | {
 | 
|---|
 | 457 |   int tgop_max = gop_max_/sizeof(signed char);
 | 
|---|
 | 458 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 459 | 
 | 
|---|
 | 460 |   int passed_scratch;
 | 
|---|
 | 461 |   if (!scratch) {
 | 
|---|
 | 462 |       scratch = new signed char[n>tgop_max?tgop_max:n];
 | 
|---|
 | 463 |       passed_scratch = 0;
 | 
|---|
 | 464 |     }
 | 
|---|
 | 465 |   else passed_scratch = 1;
 | 
|---|
 | 466 | 
 | 
|---|
 | 467 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 468 |                                                     (target== -1?0:target)));
 | 
|---|
 | 469 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 470 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 471 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 472 |           if (i->send()) {
 | 
|---|
 | 473 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 474 |             }
 | 
|---|
 | 475 |           if (i->recv()) {
 | 
|---|
 | 476 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 477 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 478 |             }
 | 
|---|
 | 479 |         }
 | 
|---|
 | 480 |       if (n > tgop_max) sync();
 | 
|---|
 | 481 |     }
 | 
|---|
 | 482 | 
 | 
|---|
 | 483 |   if (target == -1) {
 | 
|---|
 | 484 |       bcast(data, n, 0);
 | 
|---|
 | 485 |     }
 | 
|---|
 | 486 | 
 | 
|---|
 | 487 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 488 | }
 | 
|---|
 | 489 | 
 | 
|---|
 | 490 | void
 | 
|---|
 | 491 | MessageGrp::reduce(short* data, int n, GrpReduce<short>& red,
 | 
|---|
 | 492 |                    short* scratch, int target)
 | 
|---|
 | 493 | {
 | 
|---|
 | 494 |   int tgop_max = gop_max_/sizeof(short);
 | 
|---|
 | 495 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 496 | 
 | 
|---|
 | 497 |   int passed_scratch;
 | 
|---|
 | 498 |   if (!scratch) {
 | 
|---|
 | 499 |       scratch = new short[n>tgop_max?tgop_max:n];
 | 
|---|
 | 500 |       passed_scratch = 0;
 | 
|---|
 | 501 |     }
 | 
|---|
 | 502 |   else passed_scratch = 1;
 | 
|---|
 | 503 | 
 | 
|---|
 | 504 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 505 |                                                     (target== -1?0:target)));
 | 
|---|
 | 506 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 507 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 508 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 509 |           if (i->send()) {
 | 
|---|
 | 510 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 511 |             }
 | 
|---|
 | 512 |           if (i->recv()) {
 | 
|---|
 | 513 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 514 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 515 |             }
 | 
|---|
 | 516 |         }
 | 
|---|
 | 517 |       if (n > tgop_max) sync();
 | 
|---|
 | 518 |     }
 | 
|---|
 | 519 | 
 | 
|---|
 | 520 |   if (target == -1) {
 | 
|---|
 | 521 |       bcast(data, n, 0);
 | 
|---|
 | 522 |     }
 | 
|---|
 | 523 | 
 | 
|---|
 | 524 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 525 | }
 | 
|---|
 | 526 | 
 | 
|---|
 | 527 | void
 | 
|---|
 | 528 | MessageGrp::reduce(float* data, int n, GrpReduce<float>& red,
 | 
|---|
 | 529 |                    float* scratch, int target)
 | 
|---|
 | 530 | {
 | 
|---|
 | 531 |   int tgop_max = gop_max_/sizeof(float);
 | 
|---|
 | 532 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 533 | 
 | 
|---|
 | 534 |   int passed_scratch;
 | 
|---|
 | 535 |   if (!scratch) {
 | 
|---|
 | 536 |       scratch = new float[n>tgop_max?tgop_max:n];
 | 
|---|
 | 537 |       passed_scratch = 0;
 | 
|---|
 | 538 |     }
 | 
|---|
 | 539 |   else passed_scratch = 1;
 | 
|---|
 | 540 | 
 | 
|---|
 | 541 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 542 |                                                     (target== -1?0:target)));
 | 
|---|
 | 543 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 544 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 545 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 546 |           if (i->send()) {
 | 
|---|
 | 547 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 548 |             }
 | 
|---|
 | 549 |           if (i->recv()) {
 | 
|---|
 | 550 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 551 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 552 |             }
 | 
|---|
 | 553 |         }
 | 
|---|
 | 554 |       if (n > tgop_max) sync();
 | 
|---|
 | 555 |     }
 | 
|---|
 | 556 | 
 | 
|---|
 | 557 |   if (target == -1) {
 | 
|---|
 | 558 |       bcast(data, n, 0);
 | 
|---|
 | 559 |     }
 | 
|---|
 | 560 | 
 | 
|---|
 | 561 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 562 | }
 | 
|---|
 | 563 | 
 | 
|---|
 | 564 | void
 | 
|---|
 | 565 | MessageGrp::reduce(long* data, int n, GrpReduce<long>& red,
 | 
|---|
 | 566 |                    long* scratch, int target)
 | 
|---|
 | 567 | {
 | 
|---|
 | 568 |   int tgop_max = gop_max_/sizeof(long);
 | 
|---|
 | 569 |   if (tgop_max == 0) tgop_max = gop_max_?1:n;
 | 
|---|
 | 570 | 
 | 
|---|
 | 571 |   int passed_scratch;
 | 
|---|
 | 572 |   if (!scratch) {
 | 
|---|
 | 573 |       scratch = new long[n>tgop_max?tgop_max:n];
 | 
|---|
 | 574 |       passed_scratch = 0;
 | 
|---|
 | 575 |     }
 | 
|---|
 | 576 |   else passed_scratch = 1;
 | 
|---|
 | 577 | 
 | 
|---|
 | 578 |   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
 | 
|---|
 | 579 |                                                     (target== -1?0:target)));
 | 
|---|
 | 580 |   for (i->backwards(); !i->done(); i->next()) {
 | 
|---|
 | 581 |       for (int idat=0; idat<n; idat+=tgop_max) {
 | 
|---|
 | 582 |           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
 | 
|---|
 | 583 |           if (i->send()) {
 | 
|---|
 | 584 |               send(i->sendto(), &data[idat], ndat);
 | 
|---|
 | 585 |             }
 | 
|---|
 | 586 |           if (i->recv()) {
 | 
|---|
 | 587 |               recv(i->recvfrom(), scratch, ndat);
 | 
|---|
 | 588 |               red.reduce(&data[idat], scratch, ndat);
 | 
|---|
 | 589 |             }
 | 
|---|
 | 590 |         }
 | 
|---|
 | 591 |       if (n > tgop_max) sync();
 | 
|---|
 | 592 |     }
 | 
|---|
 | 593 | 
 | 
|---|
 | 594 |   if (target == -1) {
 | 
|---|
 | 595 |       bcast(data, n, 0);
 | 
|---|
 | 596 |     }
 | 
|---|
 | 597 | 
 | 
|---|
 | 598 |   if (!passed_scratch) delete[] scratch;
 | 
|---|
 | 599 | }
 | 
|---|
 | 600 | 
 | 
|---|
 | 601 | #ifdef EXPLICIT_TEMPLATE_INSTANTIATION
 | 
|---|
 | 602 | #define INSTANTIATE_DO_X(func,type) \
 | 
|---|
 | 603 |     template void func(MessageGrp*, type *, int, type *, int)
 | 
|---|
 | 604 | 
 | 
|---|
 | 605 | INSTANTIATE_DO_X(do_sum,unsigned int);
 | 
|---|
 | 606 | INSTANTIATE_DO_X(do_sum,int);
 | 
|---|
 | 607 | INSTANTIATE_DO_X(do_sum,double);
 | 
|---|
 | 608 | INSTANTIATE_DO_X(do_sum,char);
 | 
|---|
 | 609 | INSTANTIATE_DO_X(do_sum,unsigned char);
 | 
|---|
 | 610 | INSTANTIATE_DO_X(do_sum,signed char);
 | 
|---|
 | 611 | 
 | 
|---|
 | 612 | INSTANTIATE_DO_X(do_max,unsigned int);
 | 
|---|
 | 613 | INSTANTIATE_DO_X(do_max,int);
 | 
|---|
 | 614 | INSTANTIATE_DO_X(do_max,double);
 | 
|---|
 | 615 | INSTANTIATE_DO_X(do_max,char);
 | 
|---|
 | 616 | INSTANTIATE_DO_X(do_max,unsigned char);
 | 
|---|
 | 617 | INSTANTIATE_DO_X(do_max,signed char);
 | 
|---|
 | 618 | 
 | 
|---|
 | 619 | INSTANTIATE_DO_X(do_min,unsigned int);
 | 
|---|
 | 620 | INSTANTIATE_DO_X(do_min,int);
 | 
|---|
 | 621 | INSTANTIATE_DO_X(do_min,double);
 | 
|---|
 | 622 | INSTANTIATE_DO_X(do_min,char);
 | 
|---|
 | 623 | INSTANTIATE_DO_X(do_min,unsigned char);
 | 
|---|
 | 624 | INSTANTIATE_DO_X(do_min,signed char);
 | 
|---|
 | 625 | 
 | 
|---|
 | 626 | #endif
 | 
|---|
 | 627 | 
 | 
|---|
 | 628 | /////////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 629 | 
 | 
|---|
 | 630 | // Local Variables:
 | 
|---|
 | 631 | // mode: c++
 | 
|---|
 | 632 | // c-file-style: "CLJ"
 | 
|---|
 | 633 | // End:
 | 
|---|