@@ -27,16 +27,16 @@ unsigned int num_times = 100;
27
27
unsigned int deviceIndex = 0 ;
28
28
bool use_float = false ;
29
29
bool output_as_csv = false ;
30
- Unit unit = MegaByte;
30
+ Unit unit{Unit::Kind:: MegaByte} ;
31
31
bool silence_errors = false ;
32
32
std::string csv_separator = " ," ;
33
33
34
34
// Benchmarks:
35
35
constexpr size_t num_benchmarks = 6 ;
36
- array<char const *, num_benchmarks> labels = {" Copy" , " Add" , " Mul" , " Triad" , " Dot" , " Nstream" };
36
+ std:: array<char const *, num_benchmarks> labels = {" Copy" , " Add" , " Mul" , " Triad" , " Dot" , " Nstream" };
37
37
// Weights data moved by benchmark & therefore achieved BW:
38
38
// bytes = weight * sizeof(T) * ARRAY_SIZE -> bw = bytes / dur
39
- array<size_t , num_benchmarks> weight = {/* Copy:*/ 2 , /* Add:*/ 2 , /* Mul:*/ 3 , /* Triad:*/ 3 , /* Dot:*/ 2 , /* Nstream:*/ 4 };
39
+ std:: array<size_t , num_benchmarks> weight = {/* Copy:*/ 2 , /* Add:*/ 2 , /* Mul:*/ 3 , /* Triad:*/ 3 , /* Dot:*/ 2 , /* Nstream:*/ 4 };
40
40
41
41
// Options for running the benchmark:
42
42
// - Classic 5 kernels (Copy, Add, Mul, Triad, Dot).
@@ -63,19 +63,17 @@ bool run_benchmark(int id) {
63
63
// Prints all available benchmark labels:
64
64
template <typename OStream>
65
65
void print_labels (OStream& os) {
66
- for (int i = 0 ; i < num_benchmarks; ++i) {
66
+ for (size_t i = 0 ; i < num_benchmarks; ++i) {
67
67
os << labels[i];
68
68
if (i != (num_benchmarks - 1 )) os << " ," ;
69
69
}
70
70
}
71
71
72
- // Clock and duration types:
73
- using clk_t = chrono::high_resolution_clock;
74
- using dur_t = chrono::duration<double >;
75
-
76
72
// Returns duration of executing function f:
77
73
template <typename F>
78
74
double time (F&& f) {
75
+ using clk_t = std::chrono::high_resolution_clock;
76
+ using dur_t = std::chrono::duration<double >;
79
77
auto start = clk_t::now ();
80
78
f ();
81
79
return dur_t (clk_t::now () - start).count ();
@@ -107,7 +105,7 @@ int main(int argc, char *argv[])
107
105
else
108
106
run<double >();
109
107
110
- return 0 ;
108
+ return EXIT_SUCCESS ;
111
109
}
112
110
113
111
// Run specified kernels
@@ -163,7 +161,7 @@ void run()
163
161
<< " num_times" << csv_separator
164
162
<< " n_elements" << csv_separator
165
163
<< " sizeof" << csv_separator
166
- << " max_" << unit.lower () << " _per_sec" << csv_separator
164
+ << " max_" << unit.str () << " _per_sec" << csv_separator
167
165
<< " min_runtime" << csv_separator
168
166
<< " max_runtime" << csv_separator
169
167
<< " avg_runtime" << std::endl;
@@ -182,10 +180,10 @@ void run()
182
180
auto fmt_cli = [](char const * function, double bandwidth, double dt_min, double dt_max, double dt_avg) {
183
181
std::cout
184
182
<< std::left << std::setw (12 ) << function
185
- << std::left << std::setw (12 ) << setprecision (3 ) << bandwidth
186
- << std::left << std::setw (12 ) << setprecision (5 ) << dt_min
187
- << std::left << std::setw (12 ) << setprecision (5 ) << dt_max
188
- << std::left << std::setw (12 ) << setprecision (5 ) << dt_avg
183
+ << std::left << std::setw (12 ) << std:: setprecision (3 ) << bandwidth
184
+ << std::left << std::setw (12 ) << std:: setprecision (5 ) << dt_min
185
+ << std::left << std::setw (12 ) << std:: setprecision (5 ) << dt_max
186
+ << std::left << std::setw (12 ) << std:: setprecision (5 ) << dt_avg
189
187
<< std::endl;
190
188
};
191
189
auto fmt_result = [&](char const * function, size_t num_times, size_t num_elements,
@@ -213,15 +211,15 @@ void run()
213
211
std::cout.precision (ss);
214
212
}
215
213
216
- auto stream = construct_stream <T>(ARRAY_SIZE, deviceIndex);
214
+ auto stream = make_stream <T>(ARRAY_SIZE, deviceIndex);
217
215
auto initElapsedS = time ([&] { stream->init_arrays (startA, startB, startC); });
218
216
219
217
// Result of the Dot kernel, if used.
220
218
T sum{};
221
- vector<vector<double >> timings = run_all<T>(stream, sum);
219
+ std:: vector<std:: vector<double >> timings = run_all<T>(stream, sum);
222
220
223
221
// Create & read host vectors:
224
- vector<T> a (ARRAY_SIZE), b (ARRAY_SIZE), c (ARRAY_SIZE);
222
+ std:: vector<T> a (ARRAY_SIZE), b (ARRAY_SIZE), c (ARRAY_SIZE);
225
223
auto readElapsedS = time ([&] { stream->read_arrays (a, b, c); });
226
224
227
225
check_solution<T>(num_times, a, b, c, sum);
@@ -250,7 +248,7 @@ void run()
250
248
<< std::left << std::setw (12 ) << " Max"
251
249
<< std::left << std::setw (12 ) << " Average"
252
250
<< std::endl
253
- << fixed;
251
+ << std:: fixed;
254
252
}
255
253
256
254
for (int i = 0 ; i < num_benchmarks; ++i)
@@ -280,13 +278,13 @@ void check_solution(const unsigned int ntimes, std::vector<T>& a, std::vector<T>
280
278
281
279
const T scalar = startScalar;
282
280
283
- for (int b = 0 ; b < num_benchmarks; ++b)
281
+ for (size_t b = 0 ; b < num_benchmarks; ++b)
284
282
{
285
283
if (!run_benchmark (b)) continue ;
286
284
287
285
for (unsigned int i = 0 ; i < ntimes; i++)
288
286
{
289
- switch (( Benchmark)b ) {
287
+ switch (static_cast < Benchmark>(b) ) {
290
288
case Benchmark::Copy: goldC = goldA; break ;
291
289
case Benchmark::Mul: goldB = scalar * goldC; break ;
292
290
case Benchmark::Add: goldC = goldA + goldB; break ;
@@ -307,7 +305,7 @@ void check_solution(const unsigned int ntimes, std::vector<T>& a, std::vector<T>
307
305
errC /= c.size ();
308
306
long double errSum = std::fabs ((sum - goldSum)/goldSum);
309
307
310
- long double epsi = std::numeric_limits<T>::epsilon () * 100 .0 ;
308
+ long double epsi = std::numeric_limits<T>::epsilon () * 1000 .0 ;
311
309
312
310
bool failed = false ;
313
311
if (errA > epsi) {
@@ -425,7 +423,7 @@ void parseArguments(int argc, char *argv[])
425
423
}
426
424
else
427
425
{
428
- auto p = find_if (labels.begin (), labels.end (), [&](char const * label) {
426
+ auto p = std:: find_if (labels.begin (), labels.end (), [&](char const * label) {
429
427
return std::string (label) == key;
430
428
});
431
429
if (p == labels.end ()) {
@@ -435,7 +433,7 @@ void parseArguments(int argc, char *argv[])
435
433
std::cerr << std::endl;
436
434
std::exit (EXIT_FAILURE);
437
435
}
438
- selection = (Benchmark)(distance (labels.begin (), p));
436
+ selection = (Benchmark)(std:: distance (labels.begin (), p));
439
437
}
440
438
}
441
439
else if (!std::string (" --csv" ).compare (argv[i]))
@@ -444,19 +442,27 @@ void parseArguments(int argc, char *argv[])
444
442
}
445
443
else if (!std::string (" --mibibytes" ).compare (argv[i]))
446
444
{
447
- unit = Unit (MibiByte);
445
+ unit = Unit (Unit::Kind:: MibiByte);
448
446
}
449
447
else if (!std::string (" --megabytes" ).compare (argv[i]))
450
448
{
451
- unit = Unit (MegaByte);
449
+ unit = Unit (Unit::Kind:: MegaByte);
452
450
}
453
451
else if (!std::string (" --gibibytes" ).compare (argv[i]))
454
452
{
455
- unit = Unit (GibiByte);
453
+ unit = Unit (Unit::Kind:: GibiByte);
456
454
}
457
455
else if (!std::string (" --gigabytes" ).compare (argv[i]))
458
456
{
459
- unit = Unit (GigaByte);
457
+ unit = Unit (Unit::Kind::GigaByte);
458
+ }
459
+ else if (!std::string (" --tebibytes" ).compare (argv[i]))
460
+ {
461
+ unit = Unit (Unit::Kind::TebiByte);
462
+ }
463
+ else if (!std::string (" --terabytes" ).compare (argv[i]))
464
+ {
465
+ unit = Unit (Unit::Kind::TeraByte);
460
466
}
461
467
else if (!std::string (" --silence-errors" ).compare (argv[i]))
462
468
{
@@ -481,6 +487,8 @@ void parseArguments(int argc, char *argv[])
481
487
std::cout << " --mibibytes Use MiB=2^20 for bandwidth calculation (default MB=10^6)" << std::endl;
482
488
std::cout << " --gigibytes Use GiB=2^30 for bandwidth calculation (default MB=10^6)" << std::endl;
483
489
std::cout << " --gigabytes Use GB=10^9 for bandwidth calculation (default MB=10^6)" << std::endl;
490
+ std::cout << " --tebibytes Use TiB=2^40 for bandwidth calculation (default MB=10^6)" << std::endl;
491
+ std::cout << " --terabytes Use TB=10^12 for bandwidth calculation (default MB=10^6)" << std::endl;
484
492
std::cout << " --silence-errors Ignores validation errors." << std::endl;
485
493
std::cout << std::endl;
486
494
std::exit (EXIT_SUCCESS);
0 commit comments