1 | //
|
---|
2 | // memory.h
|
---|
3 | //
|
---|
4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
5 | //
|
---|
6 | // Author: Curtis Janssen <cljanss@limitpt.com>
|
---|
7 | // Maintainer: LPS
|
---|
8 | //
|
---|
9 | // This file is part of the SC Toolkit.
|
---|
10 | //
|
---|
11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
12 | // it under the terms of the GNU Library General Public License as published by
|
---|
13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
14 | // any later version.
|
---|
15 | //
|
---|
16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | // GNU Library General Public License for more details.
|
---|
20 | //
|
---|
21 | // You should have received a copy of the GNU Library General Public License
|
---|
22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | //
|
---|
25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
26 | //
|
---|
27 |
|
---|
28 | #ifdef __GNUC__
|
---|
29 | #pragma interface
|
---|
30 | #endif
|
---|
31 |
|
---|
32 | #ifndef _util_group_memory_h
|
---|
33 | #define _util_group_memory_h
|
---|
34 |
|
---|
35 | #include <iostream>
|
---|
36 |
|
---|
37 | #include <scconfig.h>
|
---|
38 | #include <util/class/class.h>
|
---|
39 | #include <util/group/thread.h>
|
---|
40 |
|
---|
41 | namespace sc {
|
---|
42 |
|
---|
43 | #if 0 // this can be used to catch accidental conversions to int
|
---|
44 | class distsize_t {
|
---|
45 | friend size_t distsize_to_size(const distsize_t &a);
|
---|
46 | friend distsize_t operator *(const int &a,const distsize_t &b);
|
---|
47 | friend distsize_t operator +(const int &a,const distsize_t &b);
|
---|
48 | friend distsize_t operator -(const int &a,const distsize_t &b);
|
---|
49 | friend distsize_t operator /(const int &a,const distsize_t &b);
|
---|
50 | friend distsize_t operator %(const int &a,const distsize_t &b);
|
---|
51 | friend ostream& operator <<(ostream& o, const distsize_t &s);
|
---|
52 | private:
|
---|
53 | unsigned long long s;
|
---|
54 | public:
|
---|
55 | distsize_t(): s(999999999999999LL) {}
|
---|
56 | distsize_t(int a): s(a) {}
|
---|
57 | distsize_t(unsigned int a): s(a) {}
|
---|
58 | distsize_t(unsigned long long a): s(a) {}
|
---|
59 | distsize_t &operator =(const distsize_t &a)
|
---|
60 | { s=a.s; return *this; }
|
---|
61 | distsize_t &operator +=(const distsize_t &a)
|
---|
62 | { s+=a.s; return *this; }
|
---|
63 | distsize_t operator *(const distsize_t &a) const
|
---|
64 | { return s*a.s; }
|
---|
65 | distsize_t operator +(const distsize_t &a) const
|
---|
66 | { return s+a.s; }
|
---|
67 | distsize_t operator -(const distsize_t &a) const
|
---|
68 | { return s-a.s; }
|
---|
69 | distsize_t operator /(const distsize_t &a) const
|
---|
70 | { return s/a.s; }
|
---|
71 | distsize_t operator %(const distsize_t &a) const
|
---|
72 | { return s%a.s; }
|
---|
73 | bool operator <(const distsize_t &a) const
|
---|
74 | { return s<a.s; }
|
---|
75 | bool operator <=(const distsize_t &a) const
|
---|
76 | { return s<=a.s; }
|
---|
77 | bool operator >(const distsize_t &a) const
|
---|
78 | { return s>a.s; }
|
---|
79 | bool operator >=(const distsize_t &a) const
|
---|
80 | { return s>=a.s; }
|
---|
81 | bool operator ==(const distsize_t &a) const
|
---|
82 | { return s==a.s; }
|
---|
83 | distsize_t operator *(const int &a) const
|
---|
84 | { return s*a; }
|
---|
85 | distsize_t operator +(const int &a) const
|
---|
86 | { return s+a; }
|
---|
87 | distsize_t operator -(const int &a) const
|
---|
88 | { return s-a; }
|
---|
89 | distsize_t operator /(const int &a) const
|
---|
90 | { return s/a; }
|
---|
91 | distsize_t operator %(const int &a) const
|
---|
92 | { return s%a; }
|
---|
93 | };
|
---|
94 | inline distsize_t operator *(const int &a,const distsize_t &b)
|
---|
95 | { return a*b.s; }
|
---|
96 | inline distsize_t operator +(const int &a,const distsize_t &b)
|
---|
97 | { return a+b.s; }
|
---|
98 | inline distsize_t operator -(const int &a,const distsize_t &b)
|
---|
99 | { return a-b.s; }
|
---|
100 | inline distsize_t operator /(const int &a,const distsize_t &b)
|
---|
101 | { return a/b.s; }
|
---|
102 | inline distsize_t operator %(const int &a,const distsize_t &b)
|
---|
103 | { return a%b.s; }
|
---|
104 | inline ostream& operator <<(ostream& o, const distsize_t &s) { return o<<s.s; }
|
---|
105 | inline size_t distsize_to_size(const distsize_t &a) {return a.s;}
|
---|
106 | #elif defined(HAVE_LONG_LONG)
|
---|
107 | typedef unsigned long long distsize_t;
|
---|
108 | typedef long long distssize_t;
|
---|
109 | inline size_t distsize_to_size(const distsize_t &a) {return a;}
|
---|
110 | #else
|
---|
111 | typedef unsigned long distsize_t;
|
---|
112 | typedef long distssize_t;
|
---|
113 | inline size_t distsize_to_size(const distsize_t &a) {return a;}
|
---|
114 | #endif
|
---|
115 |
|
---|
116 | /** The MemoryGrp abstract class provides a way of accessing distributed
|
---|
117 | memory in a parallel machine. Several specializations are available. For
|
---|
118 | one processor, ProcMemoryGrp provides a simple stub implementation.
|
---|
119 | Parallel specializations include ShmMemoryGrp, MTMPIMemoryGrp, and
|
---|
120 | ARMCIMemoryGrp. The particular specializations that work depend highly on
|
---|
121 | the target hardware and software environment.
|
---|
122 |
|
---|
123 | */
|
---|
124 | class MemoryGrp: public DescribedClass {
|
---|
125 | private:
|
---|
126 | Ref<ThreadLock> *locks_;
|
---|
127 | int nlock_;
|
---|
128 |
|
---|
129 | void init_locks();
|
---|
130 |
|
---|
131 |
|
---|
132 | protected:
|
---|
133 | // derived classes must fill in all these
|
---|
134 | // ~MemoryGrp deletes the arrays
|
---|
135 | int me_;
|
---|
136 | int n_;
|
---|
137 | distsize_t *offsets_; // offsets_[n_] is the fence for all data
|
---|
138 |
|
---|
139 | // set to nonzero for debugging information
|
---|
140 | int debug_;
|
---|
141 |
|
---|
142 | void obtain_local_lock(size_t start, size_t fence);
|
---|
143 | void release_local_lock(size_t start, size_t fence);
|
---|
144 | public:
|
---|
145 | MemoryGrp();
|
---|
146 | MemoryGrp(const Ref<KeyVal>&);
|
---|
147 | virtual ~MemoryGrp();
|
---|
148 |
|
---|
149 | /// Returns who I am.
|
---|
150 | int me() const { return me_; }
|
---|
151 | /// Returns how many nodes there are.
|
---|
152 | int n() const { return n_; }
|
---|
153 |
|
---|
154 | /** Set the size of locally held memory.
|
---|
155 | When memory is accessed using a global offset counting
|
---|
156 | starts at node 0 and proceeds up to node n() - 1. */
|
---|
157 | virtual void set_localsize(size_t) = 0;
|
---|
158 | /// Returns the amount of memory residing locally on me().
|
---|
159 | size_t localsize() { return distsize_to_size(offsets_[me_+1]-offsets_[me_]); }
|
---|
160 | /// Returns a pointer to the local data.
|
---|
161 | virtual void *localdata() = 0;
|
---|
162 | /// Returns the global offset to this node's memory.
|
---|
163 | distsize_t localoffset() { return offsets_[me_]; }
|
---|
164 | /// Returns the amount of memory residing on node.
|
---|
165 | int size(int node)
|
---|
166 | { return distsize_to_size(offsets_[node+1] - offsets_[node]); }
|
---|
167 | /// Returns the global offset to node's memory.
|
---|
168 | distsize_t offset(int node) { return offsets_[node]; }
|
---|
169 | /// Returns the sum of all memory allocated on all nodes.
|
---|
170 | distsize_t totalsize() { return offsets_[n_]; }
|
---|
171 |
|
---|
172 | /// Activate is called before the memory is to be used.
|
---|
173 | virtual void activate();
|
---|
174 | /// Deactivate is called after the memory has been used.
|
---|
175 | virtual void deactivate();
|
---|
176 |
|
---|
177 | /// This gives write access to the memory location. No locking is done.
|
---|
178 | virtual void *obtain_writeonly(distsize_t offset, int size) = 0;
|
---|
179 | /** Only one thread can have an unreleased obtain_readwrite at a time.
|
---|
180 | The actual memory region locked can be larger than that requested.
|
---|
181 | If the memory region is already locked this will block. For this
|
---|
182 | reason, data should be held as read/write for as short a time as
|
---|
183 | possible. */
|
---|
184 | virtual void *obtain_readwrite(distsize_t offset, int size) = 0;
|
---|
185 | /// This gives read access to the memory location. No locking is done.
|
---|
186 | virtual void *obtain_readonly(distsize_t offset, int size) = 0;
|
---|
187 | /// This is called when read access is no longer needed.
|
---|
188 | virtual void release_readonly(void *data, distsize_t offset, int size) = 0;
|
---|
189 | /// This is called when write access is no longer needed.
|
---|
190 | virtual void release_writeonly(void *data, distsize_t offset, int size)=0;
|
---|
191 | /** This is called when read/write access is no longer needed.
|
---|
192 | The memory will be unlocked. */
|
---|
193 | virtual void release_readwrite(void *data, distsize_t offset, int size)=0;
|
---|
194 |
|
---|
195 | virtual void sum_reduction(double *data, distsize_t doffset, int dsize);
|
---|
196 | virtual void sum_reduction_on_node(double *data, size_t doffset, int dsize,
|
---|
197 | int node = -1);
|
---|
198 |
|
---|
199 | /** Synchronizes all the nodes. This is useful after remote memory
|
---|
200 | writes to be certain that all of the writes have completed and the
|
---|
201 | data can be accessed locally, for example. */
|
---|
202 | virtual void sync() = 0;
|
---|
203 |
|
---|
204 | /** Allocate data that will be accessed locally only. Using this
|
---|
205 | for data that will be used for global operation can improve
|
---|
206 | efficiency. Data allocated in this way must be freed with
|
---|
207 | free_local_double. */
|
---|
208 | virtual void* malloc_local(size_t nbyte);
|
---|
209 | virtual double* malloc_local_double(size_t ndouble);
|
---|
210 |
|
---|
211 | /** Free data that was allocated with malloc_local_double. */
|
---|
212 | virtual void free_local(void *data);
|
---|
213 | virtual void free_local_double(double *data);
|
---|
214 |
|
---|
215 | /** Processes outstanding requests. Some memory group implementations
|
---|
216 | don't have access to real shared memory or even active messages.
|
---|
217 | Instead, requests are processed whenever certain memory group
|
---|
218 | routines are called. This can cause large latencies and buffer
|
---|
219 | overflows. If this is a problem, then the catchup member can be
|
---|
220 | called to process all outstanding requests. */
|
---|
221 | virtual void catchup();
|
---|
222 |
|
---|
223 | /// Prints out information about the object.
|
---|
224 | virtual void print(std::ostream &o = ExEnv::out0()) const;
|
---|
225 |
|
---|
226 | /** Create a memory group. This routine looks for a -memorygrp
|
---|
227 | argument, and then the environmental variable MEMORYGRP to decide
|
---|
228 | which specialization of MemoryGrp would be appropriate. The
|
---|
229 | argument to -memorygrp or the value of the environmental variable
|
---|
230 | should be either string for a ParsedKeyVal constructor or a
|
---|
231 | classname. The default ThreadGrp and MessageGrp objects should be
|
---|
232 | initialized before this is called. */
|
---|
233 | static MemoryGrp* initial_memorygrp(int &argc, char** argv);
|
---|
234 | static MemoryGrp* initial_memorygrp();
|
---|
235 | /** The default memory group contains the primary memory group to
|
---|
236 | be used by an application. */
|
---|
237 | static void set_default_memorygrp(const Ref<MemoryGrp>&);
|
---|
238 | /** Returns the default memory group. If the default memory
|
---|
239 | group has not yet been set, then one is created.
|
---|
240 | The particular specialization used is determined by
|
---|
241 | configuration options and which specializations are being
|
---|
242 | used for MessageGrp and ThreadGrp. */
|
---|
243 | static MemoryGrp* get_default_memorygrp();
|
---|
244 | };
|
---|
245 |
|
---|
246 |
|
---|
247 | /** The MemoryGrpBuf class provides access to pieces of the
|
---|
248 | global shared memory that have been obtained with MemoryGrp.
|
---|
249 | MemoryGrpBuf is a template class that is parameterized on
|
---|
250 | data_t. All lengths and offsets of given in terms
|
---|
251 | of sizeof(data_t). */
|
---|
252 | template <class data_t>
|
---|
253 | class MemoryGrpBuf {
|
---|
254 | Ref<MemoryGrp> grp_;
|
---|
255 | enum AccessType { None, Read, Write, ReadWrite };
|
---|
256 | AccessType accesstype_;
|
---|
257 | data_t *data_;
|
---|
258 | distsize_t offset_;
|
---|
259 | int length_;
|
---|
260 | public:
|
---|
261 | /** Creates a new MemoryGrpBuf given a MemoryGrp
|
---|
262 | reference. This is a template class parameterized on
|
---|
263 | data_t. */
|
---|
264 | MemoryGrpBuf(const Ref<MemoryGrp> &);
|
---|
265 | /** Request write only access to global memory at the global address
|
---|
266 | offset and with size length. Writing the same bit of memory twice
|
---|
267 | without an intervening sync of the MemoryGrp will have undefined
|
---|
268 | results. */
|
---|
269 | data_t *writeonly(distsize_t offset, int length);
|
---|
270 | /** Request read write access to global memory at the global address
|
---|
271 | offset and with size length. This will lock the memory it uses
|
---|
272 | until release is called unless locking has been turned off in the
|
---|
273 | MemoryGrp object. */
|
---|
274 | data_t *readwrite(distsize_t offset, int length);
|
---|
275 | /** Request read only access to global memory at the global address
|
---|
276 | offset and with size length. Writing to the
|
---|
277 | specified region without an intervening sync of the MemoryGrp
|
---|
278 | will have undefined results. */
|
---|
279 | const data_t *readonly(distsize_t offset, int length);
|
---|
280 | /** These behave like writeonly, readwrite, and readonly, except the
|
---|
281 | offset is local to the node specified by node. If node = -1, then
|
---|
282 | the local node is used. */
|
---|
283 | data_t *writeonly_on_node(size_t offset, int length, int node = -1);
|
---|
284 | data_t *readwrite_on_node(size_t offset, int length, int node = -1);
|
---|
285 | const data_t *readonly_on_node(size_t offset, int length, int node = -1);
|
---|
286 | /** Release the access to the chunk of global memory that was obtained
|
---|
287 | with writeonly, readwrite, readonly, writeonly_on_node,
|
---|
288 | readwrite_on_node, and readonly_on_node. */
|
---|
289 | void release();
|
---|
290 | /// The length of the current bit of memory.
|
---|
291 | int length() const { return length_; }
|
---|
292 | };
|
---|
293 |
|
---|
294 | //////////////////////////////////////////////////////////////////////
|
---|
295 | // MemoryGrpBuf members
|
---|
296 |
|
---|
297 | template <class data_t>
|
---|
298 | MemoryGrpBuf<data_t>::MemoryGrpBuf(const Ref<MemoryGrp> & grp)
|
---|
299 | {
|
---|
300 | grp_ = grp;
|
---|
301 | accesstype_ = None;
|
---|
302 | }
|
---|
303 |
|
---|
304 | template <class data_t>
|
---|
305 | data_t *
|
---|
306 | MemoryGrpBuf<data_t>::writeonly(distsize_t offset, int length)
|
---|
307 | {
|
---|
308 | if (accesstype_ != None) release();
|
---|
309 | data_ = (data_t *) grp_->obtain_writeonly(sizeof(data_t)*offset,
|
---|
310 | sizeof(data_t)*length);
|
---|
311 | offset_ = offset;
|
---|
312 | length_ = length;
|
---|
313 | accesstype_ = Write;
|
---|
314 | return data_;
|
---|
315 | }
|
---|
316 |
|
---|
317 | template <class data_t>
|
---|
318 | data_t *
|
---|
319 | MemoryGrpBuf<data_t>::readwrite(distsize_t offset, int length)
|
---|
320 | {
|
---|
321 | if (accesstype_ != None) release();
|
---|
322 | data_ = (data_t *) grp_->obtain_readwrite(sizeof(data_t)*offset,
|
---|
323 | sizeof(data_t)*length);
|
---|
324 | offset_ = offset;
|
---|
325 | length_ = length;
|
---|
326 | accesstype_ = ReadWrite;
|
---|
327 | return data_;
|
---|
328 | }
|
---|
329 |
|
---|
330 | template <class data_t>
|
---|
331 | const data_t *
|
---|
332 | MemoryGrpBuf<data_t>::readonly(distsize_t offset, int length)
|
---|
333 | {
|
---|
334 | if (accesstype_ != None) release();
|
---|
335 | data_ = (data_t *) grp_->obtain_readonly(sizeof(data_t)*offset,
|
---|
336 | sizeof(data_t)*length);
|
---|
337 | offset_ = offset;
|
---|
338 | length_ = length;
|
---|
339 | accesstype_ = Read;
|
---|
340 | return data_;
|
---|
341 | }
|
---|
342 |
|
---|
343 | template <class data_t>
|
---|
344 | data_t *
|
---|
345 | MemoryGrpBuf<data_t>::writeonly_on_node(size_t offset, int length, int node)
|
---|
346 | {
|
---|
347 | if (node == -1) node = grp_->me();
|
---|
348 | return writeonly(offset + grp_->offset(node)/sizeof(data_t), length);
|
---|
349 | }
|
---|
350 |
|
---|
351 | template <class data_t>
|
---|
352 | data_t *
|
---|
353 | MemoryGrpBuf<data_t>::readwrite_on_node(size_t offset, int length, int node)
|
---|
354 | {
|
---|
355 | if (node == -1) node = grp_->me();
|
---|
356 | return readwrite(offset + grp_->offset(node)/sizeof(data_t), length);
|
---|
357 | }
|
---|
358 |
|
---|
359 | template <class data_t>
|
---|
360 | const data_t *
|
---|
361 | MemoryGrpBuf<data_t>::readonly_on_node(size_t offset, int length, int node)
|
---|
362 | {
|
---|
363 | if (node == -1) node = grp_->me();
|
---|
364 | return readonly(offset + grp_->offset(node)/sizeof(data_t), length);
|
---|
365 | }
|
---|
366 |
|
---|
367 | template <class data_t>
|
---|
368 | void
|
---|
369 | MemoryGrpBuf<data_t>::release()
|
---|
370 | {
|
---|
371 | if (accesstype_ == Write)
|
---|
372 | grp_->release_writeonly((data_t *)data_,
|
---|
373 | sizeof(data_t)*offset_, sizeof(data_t)*length_);
|
---|
374 | if (accesstype_ == Read)
|
---|
375 | grp_->release_readonly(data_, sizeof(data_t)*offset_,
|
---|
376 | sizeof(data_t)*length_);
|
---|
377 | if (accesstype_ == ReadWrite)
|
---|
378 | grp_->release_readwrite(data_, sizeof(data_t)*offset_,
|
---|
379 | sizeof(data_t)*length_);
|
---|
380 |
|
---|
381 | accesstype_ = None;
|
---|
382 | }
|
---|
383 |
|
---|
384 | }
|
---|
385 |
|
---|
386 | #endif
|
---|
387 |
|
---|
388 | // Local Variables:
|
---|
389 | // mode: c++
|
---|
390 | // c-file-style: "CLJ"
|
---|
391 | // End:
|
---|