| [0b990d] | 1 | //
 | 
|---|
 | 2 | // memarmci.cc
 | 
|---|
 | 3 | // based on memshm.cc
 | 
|---|
 | 4 | //
 | 
|---|
 | 5 | // Copyright (C) 1996 Limit Point Systems, Inc.
 | 
|---|
 | 6 | //
 | 
|---|
 | 7 | // Author: Curtis Janssen <cljanss@ca.sandia.gov>
 | 
|---|
 | 8 | // Maintainer: SNL
 | 
|---|
 | 9 | //
 | 
|---|
 | 10 | // This file is part of the SC Toolkit.
 | 
|---|
 | 11 | //
 | 
|---|
 | 12 | // The SC Toolkit is free software; you can redistribute it and/or modify
 | 
|---|
 | 13 | // it under the terms of the GNU Library General Public License as published by
 | 
|---|
 | 14 | // the Free Software Foundation; either version 2, or (at your option)
 | 
|---|
 | 15 | // any later version.
 | 
|---|
 | 16 | //
 | 
|---|
 | 17 | // The SC Toolkit is distributed in the hope that it will be useful,
 | 
|---|
 | 18 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
 | 19 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
 | 20 | // GNU Library General Public License for more details.
 | 
|---|
 | 21 | //
 | 
|---|
 | 22 | // You should have received a copy of the GNU Library General Public License
 | 
|---|
 | 23 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
 | 
|---|
 | 24 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 | 
|---|
 | 25 | //
 | 
|---|
 | 26 | // The U.S. Government is granted a limited license as per AL 91-7.
 | 
|---|
 | 27 | //
 | 
|---|
 | 28 | 
 | 
|---|
 | 29 | #ifndef _util_group_memarmci_cc
 | 
|---|
 | 30 | #define _util_group_memarmci_cc
 | 
|---|
 | 31 | 
 | 
|---|
 | 32 | #ifdef __GNUC__
 | 
|---|
 | 33 | #pragma implementation
 | 
|---|
 | 34 | #endif
 | 
|---|
 | 35 | 
 | 
|---|
 | 36 | extern "C" {
 | 
|---|
 | 37 | #include <armci.h>
 | 
|---|
 | 38 | }
 | 
|---|
 | 39 | 
 | 
|---|
 | 40 | #include <stdexcept>
 | 
|---|
 | 41 | 
 | 
|---|
 | 42 | #include <util/misc/formio.h>
 | 
|---|
 | 43 | #include <util/class/scexception.h>
 | 
|---|
 | 44 | #include <util/group/memarmci.h>
 | 
|---|
 | 45 | 
 | 
|---|
 | 46 | using namespace sc;
 | 
|---|
 | 47 | 
 | 
|---|
 | 48 | static ClassDesc ARMCIMemoryGrp_cd(
 | 
|---|
 | 49 |   typeid(ARMCIMemoryGrp),"ARMCIMemoryGrp",1,"public RDMAMemoryGrp",
 | 
|---|
 | 50 |   0, create<ARMCIMemoryGrp>, 0);
 | 
|---|
 | 51 | 
 | 
|---|
 | 52 | ARMCIMemoryGrp::ARMCIMemoryGrp(const Ref<MessageGrp>& msg):
 | 
|---|
 | 53 |   RDMAMemoryGrp(msg)
 | 
|---|
 | 54 | {
 | 
|---|
 | 55 |   init();
 | 
|---|
 | 56 | }
 | 
|---|
 | 57 | 
 | 
|---|
 | 58 | ARMCIMemoryGrp::ARMCIMemoryGrp(const Ref<KeyVal>& keyval):
 | 
|---|
 | 59 |   RDMAMemoryGrp(keyval)
 | 
|---|
 | 60 | {
 | 
|---|
 | 61 |   init();
 | 
|---|
 | 62 | }
 | 
|---|
 | 63 | 
 | 
|---|
 | 64 | void
 | 
|---|
 | 65 | ARMCIMemoryGrp::init()
 | 
|---|
 | 66 | {
 | 
|---|
 | 67 |   armci_lock_ = ThreadGrp::get_default_threadgrp()->new_lock();
 | 
|---|
 | 68 |   //debug_ = 1;
 | 
|---|
 | 69 |   all_data_ = 0;
 | 
|---|
 | 70 |   ARMCI_Init();
 | 
|---|
 | 71 | }
 | 
|---|
 | 72 | 
 | 
|---|
 | 73 | void
 | 
|---|
 | 74 | ARMCIMemoryGrp::finalize()
 | 
|---|
 | 75 | {
 | 
|---|
 | 76 |   set_localsize(0);
 | 
|---|
 | 77 |   ARMCI_Finalize();
 | 
|---|
 | 78 | }
 | 
|---|
 | 79 | 
 | 
|---|
 | 80 | void
 | 
|---|
 | 81 | ARMCIMemoryGrp::set_localsize(size_t localsize)
 | 
|---|
 | 82 | {
 | 
|---|
 | 83 |   ARMCI_AllFence();
 | 
|---|
 | 84 | 
 | 
|---|
 | 85 |   // this will initialize the offsets_ array
 | 
|---|
 | 86 |   RDMAMemoryGrp::set_localsize(localsize);
 | 
|---|
 | 87 | 
 | 
|---|
 | 88 |   if (all_data_) {
 | 
|---|
 | 89 |       ARMCI_Free(data_);
 | 
|---|
 | 90 |       delete[] all_data_;
 | 
|---|
 | 91 |       all_data_ = 0;
 | 
|---|
 | 92 |       data_ = 0;
 | 
|---|
 | 93 |       ARMCI_Destroy_mutexes();
 | 
|---|
 | 94 |     }
 | 
|---|
 | 95 | 
 | 
|---|
 | 96 |   if (localsize == 0) return;
 | 
|---|
 | 97 | 
 | 
|---|
 | 98 |   all_data_ = new void*[n()];
 | 
|---|
 | 99 |   int r;
 | 
|---|
 | 100 |   r = ARMCI_Malloc(all_data_, localsize);
 | 
|---|
 | 101 |   data_ = reinterpret_cast<char*>(all_data_[me()]);
 | 
|---|
 | 102 | 
 | 
|---|
 | 103 |   if (debug_) {
 | 
|---|
 | 104 |     for (int i=0; i<n(); i++) {
 | 
|---|
 | 105 |       std::cout << me() << ": all_data[" << i
 | 
|---|
 | 106 |                 << "] = " << all_data_[i] << std::endl;
 | 
|---|
 | 107 |     }
 | 
|---|
 | 108 |   }
 | 
|---|
 | 109 | 
 | 
|---|
 | 110 |   ARMCI_Create_mutexes(1);
 | 
|---|
 | 111 | }
 | 
|---|
 | 112 | 
 | 
|---|
 | 113 | void
 | 
|---|
 | 114 | ARMCIMemoryGrp::retrieve_data(void *data, int node, int offset,
 | 
|---|
 | 115 |                               int size, int lock)
 | 
|---|
 | 116 | {
 | 
|---|
 | 117 |   if (armci_lock_.nonnull()) armci_lock_->lock();
 | 
|---|
 | 118 |   if (lock) ARMCI_Lock(0, node);
 | 
|---|
 | 119 |   ARMCI_Get(reinterpret_cast<char*>(all_data_[node])+offset, data, size, node);
 | 
|---|
 | 120 |   if (armci_lock_.nonnull()) armci_lock_->unlock();
 | 
|---|
 | 121 | }
 | 
|---|
 | 122 | 
 | 
|---|
 | 123 | void
 | 
|---|
 | 124 | ARMCIMemoryGrp::replace_data(void *data, int node, int offset,
 | 
|---|
 | 125 |                              int size, int unlock)
 | 
|---|
 | 126 | {
 | 
|---|
 | 127 |   if (armci_lock_.nonnull()) armci_lock_->lock();
 | 
|---|
 | 128 |   ARMCI_Put(data, reinterpret_cast<char*>(all_data_[node])+offset, size, node);
 | 
|---|
 | 129 |   if (unlock) {
 | 
|---|
 | 130 |       ARMCI_Fence(node);
 | 
|---|
 | 131 |       ARMCI_Unlock(0, node);
 | 
|---|
 | 132 |     }
 | 
|---|
 | 133 |   if (armci_lock_.nonnull()) armci_lock_->unlock();
 | 
|---|
 | 134 | }
 | 
|---|
 | 135 | 
 | 
|---|
 | 136 | void
 | 
|---|
 | 137 | ARMCIMemoryGrp::sum_data(double *data, int node, int offset, int size)
 | 
|---|
 | 138 | {
 | 
|---|
 | 139 |   int doffset = offset/sizeof(double);
 | 
|---|
 | 140 |   int dsize = size/sizeof(double);
 | 
|---|
 | 141 | 
 | 
|---|
 | 142 |   void *src = data;
 | 
|---|
 | 143 |   void *dst = reinterpret_cast<double*>(all_data_[node])+doffset;
 | 
|---|
 | 144 | 
 | 
|---|
 | 145 |   armci_giov_t acc_dat;
 | 
|---|
 | 146 |   acc_dat.src_ptr_array = &src;
 | 
|---|
 | 147 |   acc_dat.dst_ptr_array = &dst;
 | 
|---|
 | 148 |   acc_dat.bytes = dsize * sizeof(double);
 | 
|---|
 | 149 |   acc_dat.ptr_array_len = 1;
 | 
|---|
 | 150 |   double scale = 1.0;
 | 
|---|
 | 151 | 
 | 
|---|
 | 152 |   if (debug_) {
 | 
|---|
 | 153 |       std::cout << me() << ": summing " << dsize
 | 
|---|
 | 154 |                 << " doubles from "
 | 
|---|
 | 155 |                 << (void*)src
 | 
|---|
 | 156 |                 << " to "
 | 
|---|
 | 157 |                 << (void*)dst
 | 
|---|
 | 158 |                 << " on " << node
 | 
|---|
 | 159 |                 << " (base dest=" << (void*)all_data_[node] << ")"
 | 
|---|
 | 160 |                 << std::endl;
 | 
|---|
 | 161 |       for (int i=0; i<dsize; i++) {
 | 
|---|
 | 162 |           std::cout << me() << ": src[" << i << "] = "
 | 
|---|
 | 163 |                     << data[i] << std::endl;
 | 
|---|
 | 164 |         }
 | 
|---|
 | 165 | //        for (int i=0; i<dsize; i++) {
 | 
|---|
 | 166 | //            std::cout << me() << ": dst[" << i << "] = "
 | 
|---|
 | 167 | //                      << ((double*)(all_data_[node]))[doffset+i]
 | 
|---|
 | 168 | //                      << std::endl;
 | 
|---|
 | 169 | //          }
 | 
|---|
 | 170 |     }
 | 
|---|
 | 171 | 
 | 
|---|
 | 172 |   if (armci_lock_.nonnull()) armci_lock_->lock();
 | 
|---|
 | 173 |   // Original code sending all data at once:
 | 
|---|
 | 174 |   // ARMCI_AccV(ARMCI_ACC_DBL, &scale, &acc_dat, 1, node);
 | 
|---|
 | 175 |   // Hack to send smaller chunks to not overflow buffers in ARMCI:
 | 
|---|
 | 176 |   int incr = 32768;
 | 
|---|
 | 177 |   for (int i=0; i<size; i+=incr) {
 | 
|---|
 | 178 |       void *tsrc = (&(((char*)src)[i]));
 | 
|---|
 | 179 |       void *tdst = (&(((char*)dst)[i]));
 | 
|---|
 | 180 |       acc_dat.src_ptr_array = &tsrc;
 | 
|---|
 | 181 |       acc_dat.dst_ptr_array = &tdst;
 | 
|---|
 | 182 |       if (size - i > incr) acc_dat.bytes = incr;
 | 
|---|
 | 183 |       else acc_dat.bytes = (size-i);
 | 
|---|
 | 184 |       acc_dat.ptr_array_len = 1;
 | 
|---|
 | 185 |       ARMCI_AccV(ARMCI_ACC_DBL, &scale, &acc_dat, 1, node);
 | 
|---|
 | 186 |     }
 | 
|---|
 | 187 |   // Send data all at once using the contiguous routine (which does not exist):
 | 
|---|
 | 188 |   // ARMCI_Acc(ARMCI_ACC_DBL, &scale, src, dst, size, node);
 | 
|---|
 | 189 |   if (armci_lock_.nonnull()) armci_lock_->unlock();
 | 
|---|
 | 190 | }
 | 
|---|
 | 191 | 
 | 
|---|
 | 192 | void
 | 
|---|
 | 193 | ARMCIMemoryGrp::sync()
 | 
|---|
 | 194 | {
 | 
|---|
 | 195 |   ARMCI_Barrier();
 | 
|---|
 | 196 | }
 | 
|---|
 | 197 | 
 | 
|---|
 | 198 | void
 | 
|---|
 | 199 | ARMCIMemoryGrp::deactivate()
 | 
|---|
 | 200 | {
 | 
|---|
 | 201 |   // Really, this is still active after deactivate is called.
 | 
|---|
 | 202 |   // However, we'll at least make sure that all outstanding
 | 
|---|
 | 203 |   // requests are finished.
 | 
|---|
 | 204 |   ARMCI_AllFence();
 | 
|---|
 | 205 | }
 | 
|---|
 | 206 | 
 | 
|---|
 | 207 | void*
 | 
|---|
 | 208 | ARMCIMemoryGrp::malloc_local(size_t nbyte)
 | 
|---|
 | 209 | {
 | 
|---|
 | 210 |   void* buf = ARMCI_Malloc_local(nbyte);
 | 
|---|
 | 211 |   if (buf == NULL)
 | 
|---|
 | 212 |     throw MemAllocFailed("malloc_local -- failed to allocate memory",
 | 
|---|
 | 213 |                          __FILE__, __LINE__, nbyte, this->class_desc());
 | 
|---|
 | 214 |   return buf;
 | 
|---|
 | 215 | }
 | 
|---|
 | 216 | 
 | 
|---|
 | 217 | void
 | 
|---|
 | 218 | ARMCIMemoryGrp::free_local(void *data)
 | 
|---|
 | 219 | {
 | 
|---|
 | 220 |   ARMCI_Free_local(data);
 | 
|---|
 | 221 | }
 | 
|---|
 | 222 | 
 | 
|---|
 | 223 | ARMCIMemoryGrp::~ARMCIMemoryGrp()
 | 
|---|
 | 224 | {
 | 
|---|
 | 225 |   finalize();
 | 
|---|
 | 226 | }
 | 
|---|
 | 227 | 
 | 
|---|
 | 228 | void
 | 
|---|
 | 229 | ARMCIMemoryGrp::print(std::ostream &o) const
 | 
|---|
 | 230 | {
 | 
|---|
 | 231 |   RDMAMemoryGrp::print(o);
 | 
|---|
 | 232 | }
 | 
|---|
 | 233 | 
 | 
|---|
 | 234 | #endif
 | 
|---|
 | 235 | 
 | 
|---|
 | 236 | /////////////////////////////////////////////////////////////////////////////
 | 
|---|
 | 237 | 
 | 
|---|
 | 238 | // Local Variables:
 | 
|---|
 | 239 | // mode: c++
 | 
|---|
 | 240 | // c-file-style: "CLJ"
 | 
|---|
 | 241 | // End:
 | 
|---|