Skip to content

Commit ef1c135

Browse files
committed
Cleanup & Support very large arrays > 100 GB
1 parent 7149a55 commit ef1c135

38 files changed

+351
-365
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ endmacro()
3131
# the final executable name
3232
set(EXE_NAME babelstream)
3333

34-
# for chrono and some basic CXX features, models can overwrite this if required
35-
set(CMAKE_CXX_STANDARD 11)
34+
# for chrono, make_unique, and some basic CXX features, models can overwrite this if required
35+
set(CMAKE_CXX_STANDARD 14)
3636

3737
if (NOT CMAKE_BUILD_TYPE)
3838
message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'")

src/Stream.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,8 @@ class Stream
3535
// Copy memory between host and device
3636
virtual void init_arrays(T initA, T initB, T initC) = 0;
3737
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) = 0;
38-
3938
};
4039

41-
4240
// Implementation specific device functions
4341
void listDevices(void);
4442
std::string getDeviceName(const int);

src/StreamModels.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
#endif
3737

3838
template <typename T>
39-
std::unique_ptr<Stream<T>> make_stream(int ARRAY_SIZE, unsigned int deviceIndex) {
39+
std::unique_ptr<Stream<T>> make_stream(size_t ARRAY_SIZE, size_t deviceIndex) {
4040
#if defined(CUDA)
4141
// Use the CUDA implementation
4242
return std::make_unique<CUDAStream<T>>(ARRAY_SIZE, deviceIndex);

src/acc/ACCStream.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
#include "ACCStream.h"
99

1010
template <class T>
11-
ACCStream<T>::ACCStream(const int ARRAY_SIZE, int device)
11+
ACCStream<T>::ACCStream(const size_t ARRAY_SIZE, int device)
12+
: array_size{ARRAY_SIZE}
1213
{
1314
acc_device_t device_type = acc_get_device_type();
1415
acc_set_device_num(device, device_type);
1516

16-
array_size = ARRAY_SIZE;
17-
1817
// Set up data region on device
1918
this->a = new T[array_size];
2019
this->b = new T[array_size];
@@ -32,7 +31,7 @@ template <class T>
3231
ACCStream<T>::~ACCStream()
3332
{
3433
// End data region on device
35-
int array_size = this->array_size;
34+
size_t array_size = this->array_size;
3635

3736
T * restrict a = this->a;
3837
T * restrict b = this->b;
@@ -49,12 +48,12 @@ ACCStream<T>::~ACCStream()
4948
template <class T>
5049
void ACCStream<T>::init_arrays(T initA, T initB, T initC)
5150
{
52-
int array_size = this->array_size;
51+
size_t array_size = this->array_size;
5352
T * restrict a = this->a;
5453
T * restrict b = this->b;
5554
T * restrict c = this->c;
5655
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
57-
for (int i = 0; i < array_size; i++)
56+
for (size_t i = 0; i < array_size; i++)
5857
{
5958
a[i] = initA;
6059
b[i] = initB;
@@ -75,11 +74,11 @@ void ACCStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::ve
7574
template <class T>
7675
void ACCStream<T>::copy()
7776
{
78-
int array_size = this->array_size;
77+
size_t array_size = this->array_size;
7978
T * restrict a = this->a;
8079
T * restrict c = this->c;
8180
#pragma acc parallel loop present(a[0:array_size], c[0:array_size]) wait
82-
for (int i = 0; i < array_size; i++)
81+
for (size_t i = 0; i < array_size; i++)
8382
{
8483
c[i] = a[i];
8584
}
@@ -90,11 +89,11 @@ void ACCStream<T>::mul()
9089
{
9190
const T scalar = startScalar;
9291

93-
int array_size = this->array_size;
92+
size_t array_size = this->array_size;
9493
T * restrict b = this->b;
9594
T * restrict c = this->c;
9695
#pragma acc parallel loop present(b[0:array_size], c[0:array_size]) wait
97-
for (int i = 0; i < array_size; i++)
96+
for (size_t i = 0; i < array_size; i++)
9897
{
9998
b[i] = scalar * c[i];
10099
}
@@ -103,12 +102,12 @@ void ACCStream<T>::mul()
103102
template <class T>
104103
void ACCStream<T>::add()
105104
{
106-
int array_size = this->array_size;
105+
size_t array_size = this->array_size;
107106
T * restrict a = this->a;
108107
T * restrict b = this->b;
109108
T * restrict c = this->c;
110109
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
111-
for (int i = 0; i < array_size; i++)
110+
for (size_t i = 0; i < array_size; i++)
112111
{
113112
c[i] = a[i] + b[i];
114113
}
@@ -119,12 +118,12 @@ void ACCStream<T>::triad()
119118
{
120119
const T scalar = startScalar;
121120

122-
int array_size = this->array_size;
121+
size_t array_size = this->array_size;
123122
T * restrict a = this->a;
124123
T * restrict b = this->b;
125124
T * restrict c = this->c;
126125
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
127-
for (int i = 0; i < array_size; i++)
126+
for (size_t i = 0; i < array_size; i++)
128127
{
129128
a[i] = b[i] + scalar * c[i];
130129
}
@@ -135,12 +134,12 @@ void ACCStream<T>::nstream()
135134
{
136135
const T scalar = startScalar;
137136

138-
int array_size = this->array_size;
137+
size_t array_size = this->array_size;
139138
T * restrict a = this->a;
140139
T * restrict b = this->b;
141140
T * restrict c = this->c;
142141
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
143-
for (int i = 0; i < array_size; i++)
142+
for (size_t i = 0; i < array_size; i++)
144143
{
145144
a[i] += b[i] + scalar * c[i];
146145
}
@@ -151,11 +150,11 @@ T ACCStream<T>::dot()
151150
{
152151
T sum{};
153152

154-
int array_size = this->array_size;
153+
size_t array_size = this->array_size;
155154
T * restrict a = this->a;
156155
T * restrict b = this->b;
157156
#pragma acc parallel loop reduction(+:sum) present(a[0:array_size], b[0:array_size]) wait
158-
for (int i = 0; i < array_size; i++)
157+
for (size_t i = 0; i < array_size; i++)
159158
{
160159
sum += a[i] * b[i];
161160
}

src/acc/ACCStream.h

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,23 @@ template <class T>
2020
class ACCStream : public Stream<T>
2121
{
2222

23-
struct A{
24-
T *a;
25-
T *b;
26-
T *c;
27-
};
23+
struct A{
24+
T *a;
25+
T *b;
26+
T *c;
27+
};
2828

2929
protected:
3030
// Size of arrays
31-
int array_size;
31+
size_t array_size;
3232
A aa;
3333
// Device side pointers
3434
T *a;
3535
T *b;
3636
T *c;
3737

3838
public:
39-
ACCStream(const int, int);
39+
ACCStream(const size_t, int);
4040
~ACCStream();
4141

4242
virtual void copy() override;
@@ -48,7 +48,4 @@ class ACCStream : public Stream<T>
4848

4949
virtual void init_arrays(T initA, T initB, T initC) override;
5050
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
51-
52-
53-
5451
};

0 commit comments

Comments
 (0)