| [0b990d] | 1 | // | 
|---|
|  | 2 | // memarmci.cc | 
|---|
|  | 3 | // based on memshm.cc | 
|---|
|  | 4 | // | 
|---|
|  | 5 | // Copyright (C) 1996 Limit Point Systems, Inc. | 
|---|
|  | 6 | // | 
|---|
|  | 7 | // Author: Curtis Janssen <cljanss@ca.sandia.gov> | 
|---|
|  | 8 | // Maintainer: SNL | 
|---|
|  | 9 | // | 
|---|
|  | 10 | // This file is part of the SC Toolkit. | 
|---|
|  | 11 | // | 
|---|
|  | 12 | // The SC Toolkit is free software; you can redistribute it and/or modify | 
|---|
|  | 13 | // it under the terms of the GNU Library General Public License as published by | 
|---|
|  | 14 | // the Free Software Foundation; either version 2, or (at your option) | 
|---|
|  | 15 | // any later version. | 
|---|
|  | 16 | // | 
|---|
|  | 17 | // The SC Toolkit is distributed in the hope that it will be useful, | 
|---|
|  | 18 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
|  | 19 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
|  | 20 | // GNU Library General Public License for more details. | 
|---|
|  | 21 | // | 
|---|
|  | 22 | // You should have received a copy of the GNU Library General Public License | 
|---|
|  | 23 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to | 
|---|
|  | 24 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 
|---|
|  | 25 | // | 
|---|
|  | 26 | // The U.S. Government is granted a limited license as per AL 91-7. | 
|---|
|  | 27 | // | 
|---|
|  | 28 |  | 
|---|
|  | 29 | #ifndef _util_group_memarmci_cc | 
|---|
|  | 30 | #define _util_group_memarmci_cc | 
|---|
|  | 31 |  | 
|---|
|  | 32 | #ifdef __GNUC__ | 
|---|
|  | 33 | #pragma implementation | 
|---|
|  | 34 | #endif | 
|---|
|  | 35 |  | 
|---|
|  | 36 | extern "C" { | 
|---|
|  | 37 | #include <armci.h> | 
|---|
|  | 38 | } | 
|---|
|  | 39 |  | 
|---|
|  | 40 | #include <stdexcept> | 
|---|
|  | 41 |  | 
|---|
|  | 42 | #include <util/misc/formio.h> | 
|---|
|  | 43 | #include <util/class/scexception.h> | 
|---|
|  | 44 | #include <util/group/memarmci.h> | 
|---|
|  | 45 |  | 
|---|
|  | 46 | using namespace sc; | 
|---|
|  | 47 |  | 
|---|
|  | 48 | static ClassDesc ARMCIMemoryGrp_cd( | 
|---|
|  | 49 | typeid(ARMCIMemoryGrp),"ARMCIMemoryGrp",1,"public RDMAMemoryGrp", | 
|---|
|  | 50 | 0, create<ARMCIMemoryGrp>, 0); | 
|---|
|  | 51 |  | 
|---|
|  | 52 | ARMCIMemoryGrp::ARMCIMemoryGrp(const Ref<MessageGrp>& msg): | 
|---|
|  | 53 | RDMAMemoryGrp(msg) | 
|---|
|  | 54 | { | 
|---|
|  | 55 | init(); | 
|---|
|  | 56 | } | 
|---|
|  | 57 |  | 
|---|
|  | 58 | ARMCIMemoryGrp::ARMCIMemoryGrp(const Ref<KeyVal>& keyval): | 
|---|
|  | 59 | RDMAMemoryGrp(keyval) | 
|---|
|  | 60 | { | 
|---|
|  | 61 | init(); | 
|---|
|  | 62 | } | 
|---|
|  | 63 |  | 
|---|
|  | 64 | void | 
|---|
|  | 65 | ARMCIMemoryGrp::init() | 
|---|
|  | 66 | { | 
|---|
|  | 67 | armci_lock_ = ThreadGrp::get_default_threadgrp()->new_lock(); | 
|---|
|  | 68 | //debug_ = 1; | 
|---|
|  | 69 | all_data_ = 0; | 
|---|
|  | 70 | ARMCI_Init(); | 
|---|
|  | 71 | } | 
|---|
|  | 72 |  | 
|---|
|  | 73 | void | 
|---|
|  | 74 | ARMCIMemoryGrp::finalize() | 
|---|
|  | 75 | { | 
|---|
|  | 76 | set_localsize(0); | 
|---|
|  | 77 | ARMCI_Finalize(); | 
|---|
|  | 78 | } | 
|---|
|  | 79 |  | 
|---|
|  | 80 | void | 
|---|
|  | 81 | ARMCIMemoryGrp::set_localsize(size_t localsize) | 
|---|
|  | 82 | { | 
|---|
|  | 83 | ARMCI_AllFence(); | 
|---|
|  | 84 |  | 
|---|
|  | 85 | // this will initialize the offsets_ array | 
|---|
|  | 86 | RDMAMemoryGrp::set_localsize(localsize); | 
|---|
|  | 87 |  | 
|---|
|  | 88 | if (all_data_) { | 
|---|
|  | 89 | ARMCI_Free(data_); | 
|---|
|  | 90 | delete[] all_data_; | 
|---|
|  | 91 | all_data_ = 0; | 
|---|
|  | 92 | data_ = 0; | 
|---|
|  | 93 | ARMCI_Destroy_mutexes(); | 
|---|
|  | 94 | } | 
|---|
|  | 95 |  | 
|---|
|  | 96 | if (localsize == 0) return; | 
|---|
|  | 97 |  | 
|---|
|  | 98 | all_data_ = new void*[n()]; | 
|---|
|  | 99 | int r; | 
|---|
|  | 100 | r = ARMCI_Malloc(all_data_, localsize); | 
|---|
|  | 101 | data_ = reinterpret_cast<char*>(all_data_[me()]); | 
|---|
|  | 102 |  | 
|---|
|  | 103 | if (debug_) { | 
|---|
|  | 104 | for (int i=0; i<n(); i++) { | 
|---|
|  | 105 | std::cout << me() << ": all_data[" << i | 
|---|
|  | 106 | << "] = " << all_data_[i] << std::endl; | 
|---|
|  | 107 | } | 
|---|
|  | 108 | } | 
|---|
|  | 109 |  | 
|---|
|  | 110 | ARMCI_Create_mutexes(1); | 
|---|
|  | 111 | } | 
|---|
|  | 112 |  | 
|---|
|  | 113 | void | 
|---|
|  | 114 | ARMCIMemoryGrp::retrieve_data(void *data, int node, int offset, | 
|---|
|  | 115 | int size, int lock) | 
|---|
|  | 116 | { | 
|---|
|  | 117 | if (armci_lock_.nonnull()) armci_lock_->lock(); | 
|---|
|  | 118 | if (lock) ARMCI_Lock(0, node); | 
|---|
|  | 119 | ARMCI_Get(reinterpret_cast<char*>(all_data_[node])+offset, data, size, node); | 
|---|
|  | 120 | if (armci_lock_.nonnull()) armci_lock_->unlock(); | 
|---|
|  | 121 | } | 
|---|
|  | 122 |  | 
|---|
|  | 123 | void | 
|---|
|  | 124 | ARMCIMemoryGrp::replace_data(void *data, int node, int offset, | 
|---|
|  | 125 | int size, int unlock) | 
|---|
|  | 126 | { | 
|---|
|  | 127 | if (armci_lock_.nonnull()) armci_lock_->lock(); | 
|---|
|  | 128 | ARMCI_Put(data, reinterpret_cast<char*>(all_data_[node])+offset, size, node); | 
|---|
|  | 129 | if (unlock) { | 
|---|
|  | 130 | ARMCI_Fence(node); | 
|---|
|  | 131 | ARMCI_Unlock(0, node); | 
|---|
|  | 132 | } | 
|---|
|  | 133 | if (armci_lock_.nonnull()) armci_lock_->unlock(); | 
|---|
|  | 134 | } | 
|---|
|  | 135 |  | 
|---|
|  | 136 | void | 
|---|
|  | 137 | ARMCIMemoryGrp::sum_data(double *data, int node, int offset, int size) | 
|---|
|  | 138 | { | 
|---|
|  | 139 | int doffset = offset/sizeof(double); | 
|---|
|  | 140 | int dsize = size/sizeof(double); | 
|---|
|  | 141 |  | 
|---|
|  | 142 | void *src = data; | 
|---|
|  | 143 | void *dst = reinterpret_cast<double*>(all_data_[node])+doffset; | 
|---|
|  | 144 |  | 
|---|
|  | 145 | armci_giov_t acc_dat; | 
|---|
|  | 146 | acc_dat.src_ptr_array = &src; | 
|---|
|  | 147 | acc_dat.dst_ptr_array = &dst; | 
|---|
|  | 148 | acc_dat.bytes = dsize * sizeof(double); | 
|---|
|  | 149 | acc_dat.ptr_array_len = 1; | 
|---|
|  | 150 | double scale = 1.0; | 
|---|
|  | 151 |  | 
|---|
|  | 152 | if (debug_) { | 
|---|
|  | 153 | std::cout << me() << ": summing " << dsize | 
|---|
|  | 154 | << " doubles from " | 
|---|
|  | 155 | << (void*)src | 
|---|
|  | 156 | << " to " | 
|---|
|  | 157 | << (void*)dst | 
|---|
|  | 158 | << " on " << node | 
|---|
|  | 159 | << " (base dest=" << (void*)all_data_[node] << ")" | 
|---|
|  | 160 | << std::endl; | 
|---|
|  | 161 | for (int i=0; i<dsize; i++) { | 
|---|
|  | 162 | std::cout << me() << ": src[" << i << "] = " | 
|---|
|  | 163 | << data[i] << std::endl; | 
|---|
|  | 164 | } | 
|---|
|  | 165 | //        for (int i=0; i<dsize; i++) { | 
|---|
|  | 166 | //            std::cout << me() << ": dst[" << i << "] = " | 
|---|
|  | 167 | //                      << ((double*)(all_data_[node]))[doffset+i] | 
|---|
|  | 168 | //                      << std::endl; | 
|---|
|  | 169 | //          } | 
|---|
|  | 170 | } | 
|---|
|  | 171 |  | 
|---|
|  | 172 | if (armci_lock_.nonnull()) armci_lock_->lock(); | 
|---|
|  | 173 | // Original code sending all data at once: | 
|---|
|  | 174 | // ARMCI_AccV(ARMCI_ACC_DBL, &scale, &acc_dat, 1, node); | 
|---|
|  | 175 | // Hack to send smaller chunks to not overflow buffers in ARMCI: | 
|---|
|  | 176 | int incr = 32768; | 
|---|
|  | 177 | for (int i=0; i<size; i+=incr) { | 
|---|
|  | 178 | void *tsrc = (&(((char*)src)[i])); | 
|---|
|  | 179 | void *tdst = (&(((char*)dst)[i])); | 
|---|
|  | 180 | acc_dat.src_ptr_array = &tsrc; | 
|---|
|  | 181 | acc_dat.dst_ptr_array = &tdst; | 
|---|
|  | 182 | if (size - i > incr) acc_dat.bytes = incr; | 
|---|
|  | 183 | else acc_dat.bytes = (size-i); | 
|---|
|  | 184 | acc_dat.ptr_array_len = 1; | 
|---|
|  | 185 | ARMCI_AccV(ARMCI_ACC_DBL, &scale, &acc_dat, 1, node); | 
|---|
|  | 186 | } | 
|---|
|  | 187 | // Send data all at once using the contiguous routine (which does not exist): | 
|---|
|  | 188 | // ARMCI_Acc(ARMCI_ACC_DBL, &scale, src, dst, size, node); | 
|---|
|  | 189 | if (armci_lock_.nonnull()) armci_lock_->unlock(); | 
|---|
|  | 190 | } | 
|---|
|  | 191 |  | 
|---|
|  | 192 | void | 
|---|
|  | 193 | ARMCIMemoryGrp::sync() | 
|---|
|  | 194 | { | 
|---|
|  | 195 | ARMCI_Barrier(); | 
|---|
|  | 196 | } | 
|---|
|  | 197 |  | 
|---|
|  | 198 | void | 
|---|
|  | 199 | ARMCIMemoryGrp::deactivate() | 
|---|
|  | 200 | { | 
|---|
|  | 201 | // Really, this is still active after deactivate is called. | 
|---|
|  | 202 | // However, we'll at least make sure that all outstanding | 
|---|
|  | 203 | // requests are finished. | 
|---|
|  | 204 | ARMCI_AllFence(); | 
|---|
|  | 205 | } | 
|---|
|  | 206 |  | 
|---|
|  | 207 | void* | 
|---|
|  | 208 | ARMCIMemoryGrp::malloc_local(size_t nbyte) | 
|---|
|  | 209 | { | 
|---|
|  | 210 | void* buf = ARMCI_Malloc_local(nbyte); | 
|---|
|  | 211 | if (buf == NULL) | 
|---|
|  | 212 | throw MemAllocFailed("malloc_local -- failed to allocate memory", | 
|---|
|  | 213 | __FILE__, __LINE__, nbyte, this->class_desc()); | 
|---|
|  | 214 | return buf; | 
|---|
|  | 215 | } | 
|---|
|  | 216 |  | 
|---|
|  | 217 | void | 
|---|
|  | 218 | ARMCIMemoryGrp::free_local(void *data) | 
|---|
|  | 219 | { | 
|---|
|  | 220 | ARMCI_Free_local(data); | 
|---|
|  | 221 | } | 
|---|
|  | 222 |  | 
|---|
|  | 223 | ARMCIMemoryGrp::~ARMCIMemoryGrp() | 
|---|
|  | 224 | { | 
|---|
|  | 225 | finalize(); | 
|---|
|  | 226 | } | 
|---|
|  | 227 |  | 
|---|
|  | 228 | void | 
|---|
|  | 229 | ARMCIMemoryGrp::print(std::ostream &o) const | 
|---|
|  | 230 | { | 
|---|
|  | 231 | RDMAMemoryGrp::print(o); | 
|---|
|  | 232 | } | 
|---|
|  | 233 |  | 
|---|
|  | 234 | #endif | 
|---|
|  | 235 |  | 
|---|
|  | 236 | ///////////////////////////////////////////////////////////////////////////// | 
|---|
|  | 237 |  | 
|---|
|  | 238 | // Local Variables: | 
|---|
|  | 239 | // mode: c++ | 
|---|
|  | 240 | // c-file-style: "CLJ" | 
|---|
|  | 241 | // End: | 
|---|