8
8
#include " ACCStream.h"
9
9
10
10
template <class T >
11
- ACCStream<T>::ACCStream(const int ARRAY_SIZE, int device)
11
+ ACCStream<T>::ACCStream(const size_t ARRAY_SIZE, int device)
12
+ : array_size{ARRAY_SIZE}
12
13
{
13
14
acc_device_t device_type = acc_get_device_type ();
14
15
acc_set_device_num (device, device_type);
15
16
16
- array_size = ARRAY_SIZE;
17
-
18
17
// Set up data region on device
19
18
this ->a = new T[array_size];
20
19
this ->b = new T[array_size];
@@ -32,7 +31,7 @@ template <class T>
32
31
ACCStream<T>::~ACCStream ()
33
32
{
34
33
// End data region on device
35
- int array_size = this ->array_size ;
34
+ size_t array_size = this ->array_size ;
36
35
37
36
T * restrict a = this ->a ;
38
37
T * restrict b = this ->b ;
@@ -49,12 +48,12 @@ ACCStream<T>::~ACCStream()
49
48
template <class T >
50
49
void ACCStream<T>::init_arrays(T initA, T initB, T initC)
51
50
{
52
- int array_size = this ->array_size ;
51
+ size_t array_size = this ->array_size ;
53
52
T * restrict a = this ->a ;
54
53
T * restrict b = this ->b ;
55
54
T * restrict c = this ->c ;
56
55
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
57
- for (int i = 0 ; i < array_size; i++)
56
+ for (size_t i = 0 ; i < array_size; i++)
58
57
{
59
58
a[i] = initA;
60
59
b[i] = initB;
@@ -75,11 +74,11 @@ void ACCStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::ve
75
74
template <class T >
76
75
void ACCStream<T>::copy()
77
76
{
78
- int array_size = this ->array_size ;
77
+ size_t array_size = this ->array_size ;
79
78
T * restrict a = this ->a ;
80
79
T * restrict c = this ->c ;
81
80
#pragma acc parallel loop present(a[0:array_size], c[0:array_size]) wait
82
- for (int i = 0 ; i < array_size; i++)
81
+ for (size_t i = 0 ; i < array_size; i++)
83
82
{
84
83
c[i] = a[i];
85
84
}
@@ -90,11 +89,11 @@ void ACCStream<T>::mul()
90
89
{
91
90
const T scalar = startScalar;
92
91
93
- int array_size = this ->array_size ;
92
+ size_t array_size = this ->array_size ;
94
93
T * restrict b = this ->b ;
95
94
T * restrict c = this ->c ;
96
95
#pragma acc parallel loop present(b[0:array_size], c[0:array_size]) wait
97
- for (int i = 0 ; i < array_size; i++)
96
+ for (size_t i = 0 ; i < array_size; i++)
98
97
{
99
98
b[i] = scalar * c[i];
100
99
}
@@ -103,12 +102,12 @@ void ACCStream<T>::mul()
103
102
template <class T >
104
103
void ACCStream<T>::add()
105
104
{
106
- int array_size = this ->array_size ;
105
+ size_t array_size = this ->array_size ;
107
106
T * restrict a = this ->a ;
108
107
T * restrict b = this ->b ;
109
108
T * restrict c = this ->c ;
110
109
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
111
- for (int i = 0 ; i < array_size; i++)
110
+ for (size_t i = 0 ; i < array_size; i++)
112
111
{
113
112
c[i] = a[i] + b[i];
114
113
}
@@ -119,12 +118,12 @@ void ACCStream<T>::triad()
119
118
{
120
119
const T scalar = startScalar;
121
120
122
- int array_size = this ->array_size ;
121
+ size_t array_size = this ->array_size ;
123
122
T * restrict a = this ->a ;
124
123
T * restrict b = this ->b ;
125
124
T * restrict c = this ->c ;
126
125
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
127
- for (int i = 0 ; i < array_size; i++)
126
+ for (size_t i = 0 ; i < array_size; i++)
128
127
{
129
128
a[i] = b[i] + scalar * c[i];
130
129
}
@@ -135,12 +134,12 @@ void ACCStream<T>::nstream()
135
134
{
136
135
const T scalar = startScalar;
137
136
138
- int array_size = this ->array_size ;
137
+ size_t array_size = this ->array_size ;
139
138
T * restrict a = this ->a ;
140
139
T * restrict b = this ->b ;
141
140
T * restrict c = this ->c ;
142
141
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
143
- for (int i = 0 ; i < array_size; i++)
142
+ for (size_t i = 0 ; i < array_size; i++)
144
143
{
145
144
a[i] += b[i] + scalar * c[i];
146
145
}
@@ -151,11 +150,11 @@ T ACCStream<T>::dot()
151
150
{
152
151
T sum{};
153
152
154
- int array_size = this ->array_size ;
153
+ size_t array_size = this ->array_size ;
155
154
T * restrict a = this ->a ;
156
155
T * restrict b = this ->b ;
157
156
#pragma acc parallel loop reduction(+:sum) present(a[0:array_size], b[0:array_size]) wait
158
- for (int i = 0 ; i < array_size; i++)
157
+ for (size_t i = 0 ; i < array_size; i++)
159
158
{
160
159
sum += a[i] * b[i];
161
160
}
0 commit comments