Skip to content

Commit 6dadc3d

Browse files
authored
[SingleSource/Vectorizer] Add unit tests for the vplan-native path. (#20)
This patch adds unit tests for LLVM's VPlan-native path, as requested in https://reviews.llvm.org/D157484. The new Vectorizer/VPlanNativePath subdirectory is only enabled if the compiler is clang. For all source files in that directory, the flags "-mllvm -enable-vplan-native-path" are added. Four different scenarios are tested for outer-loop vectorization: - Matrix multiplication, where the second of three loops is vectorized. - A test where the vectorized loop has an auxiliary induction variable. - A test for indirect and strided memory accesses. - A nesting of three loops where the outer-most one is vectorized.
1 parent 5372532 commit 6dadc3d

File tree

4 files changed

+155
-0
lines changed

4 files changed

+155
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
llvm_singlesource()
22
set_property(TARGET runtime-checks PROPERTY CXX_STANDARD 17)
3+
4+
# The VPlan-native path is specific to llvm.
5+
if ("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
6+
add_subdirectory(VPlanNativePath)
7+
endif()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Enable the VPlan-native path for outer-loop vectorization. Disable
2+
# vectorization in general (`#pragma clang loop vectorize(enable)` overwrites
3+
# this) because the two code paths do not mix well.
4+
list(APPEND CXXFLAGS "-mllvm" "-enable-vplan-native-path" "-fno-vectorize")
5+
6+
llvm_singlesource()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#include <cstddef>
2+
#include <cstdint>
3+
#include <iostream>
4+
#include <memory>
5+
6+
#include "../common.h"
7+
8+
// Tests for outer-loop vectorization in LLVM's VPlan-native path.
9+
10+
#define DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV(Args, Loop) \
11+
auto ScalarFn = [] Args { \
12+
_Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
13+
}; \
14+
auto VectorFn = [] Args { \
15+
_Pragma("clang loop vectorize(enable)") Loop \
16+
};
17+
18+
#define DEFINE_SCALAR_AND_VECTOR_FN_FOR_NESTED_OLV(Args, Loop) \
19+
auto ScalarFn = [] Args { \
20+
for (size_t I = 0; I < N; I++) { \
21+
_Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
22+
} \
23+
}; \
24+
auto VectorFn = [] Args { \
25+
for (size_t I = 0; I < N; I++) { \
26+
_Pragma("clang loop vectorize(enable)") Loop \
27+
} \
28+
};
29+
30+
int main() {
31+
rng = std::mt19937(15);
32+
33+
{
34+
// A matrix-multiplication where the second loop of the triple loop nest
35+
// is vectorized (the macro adds the outer-most loop).
36+
DEFINE_SCALAR_AND_VECTOR_FN_FOR_NESTED_OLV(
37+
(size_t N, size_t M, size_t L,
38+
int32_t *__restrict__ A, const int32_t *B, const int32_t *C),
39+
for (size_t J = 0; J < L; J++) {
40+
int32_t X = 0;
41+
for (size_t K = 0; K < M; K++)
42+
X += B[I * M + K] * C[K * L + J];
43+
A[I * L + J] = X;
44+
});
45+
46+
std::cout << "Checking matrix-multiplication\n";
47+
48+
size_t N = 100, M = 100, L = 100;
49+
std::unique_ptr<int32_t[]> A_Reference(new int32_t[N * L]);
50+
std::unique_ptr<int32_t[]> A_ToCheck(new int32_t[N * L]);
51+
std::unique_ptr<int32_t[]> B(new int32_t[N * M]);
52+
std::unique_ptr<int32_t[]> C(new int32_t[M * L]);
53+
init_data(B, N * M);
54+
init_data(C, M * L);
55+
56+
ScalarFn(N, M, L, &A_Reference[0], &B[0], &C[0]);
57+
VectorFn(N, M, L, &A_ToCheck[0], &B[0], &C[0]);
58+
check(A_Reference, A_ToCheck, N*L);
59+
}
60+
61+
{
62+
// A test where the vectorized loop itself has an auxiliary IV.
63+
DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV(
64+
(size_t N, int32_t *__restrict__ A, const int32_t *B),
65+
for (size_t I = 0, AuxIV = 333; I < N; I++, AuxIV += 12) {
66+
int32_t X = B[I];
67+
for (size_t J = 0; J < N; J++) {
68+
X += AuxIV * B[J];
69+
}
70+
A[I] = AuxIV + X;
71+
});
72+
73+
std::cout << "Checking loop with auxiliary IV\n";
74+
75+
size_t N = 123;
76+
std::unique_ptr<int32_t[]> A_Reference(new int32_t[N]);
77+
std::unique_ptr<int32_t[]> A_ToCheck(new int32_t[N]);
78+
std::unique_ptr<int32_t[]> B(new int32_t[N]);
79+
init_data(B, N);
80+
81+
ScalarFn(N, &A_Reference[0], &B[0]);
82+
VectorFn(N, &A_ToCheck[0], &B[0]);
83+
check(A_Reference, A_ToCheck, N);
84+
}
85+
86+
{
87+
// A test for irregular memory accesses patterns.
88+
DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV(
89+
(size_t N, size_t M,
90+
int32_t *__restrict__ A, const int32_t *B, const int32_t *C),
91+
for (size_t I = 0; I < N; I++) {
92+
int32_t X = 0;
93+
for (size_t J = 0; J < M / 2; J++) {
94+
int32_t Idx = C[J * 2];
95+
X += B[Idx % N];
96+
}
97+
A[I] = X;
98+
})
99+
100+
std::cout << "Checking loop with indirect memory accesses\n";
101+
102+
size_t N = 123, M = 456;
103+
std::unique_ptr<int32_t[]> A_Reference(new int32_t[N]);
104+
std::unique_ptr<int32_t[]> A_ToCheck(new int32_t[N]);
105+
std::unique_ptr<int32_t[]> B(new int32_t[N]);
106+
std::unique_ptr<int32_t[]> C(new int32_t[M]);
107+
init_data(B, N);
108+
init_data(C, M);
109+
110+
ScalarFn(N, M, &A_Reference[0], &B[0], &C[0]);
111+
VectorFn(N, M, &A_ToCheck[0], &B[0], &C[0]);
112+
check(A_Reference, A_ToCheck, N);
113+
}
114+
115+
{
116+
// A test where the vectorized loop contains a loop which contains
117+
// another loop itself.
118+
DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV(
119+
(size_t N, size_t M, size_t L, int32_t *__restrict__ A),
120+
for (size_t I = 0; I < N; I++) {
121+
for (size_t J = 0; J < M; J++) {
122+
for (size_t K = 0; K < L; K++) {
123+
A[I * (M * L) + J * L + K] = I * J * K;
124+
}
125+
}
126+
});
127+
128+
std::cout << "Checking triple-loop-nest\n";
129+
130+
size_t N = 123, M = 45, L = 67;
131+
std::unique_ptr<int32_t[]> A_Reference(new int32_t[N * M * L]);
132+
std::unique_ptr<int32_t[]> A_ToCheck(new int32_t[N * M * L]);
133+
134+
ScalarFn(N, M, L, &A_Reference[0]);
135+
VectorFn(N, M, L, &A_ToCheck[0]);
136+
check(A_Reference, A_ToCheck, N * M * L);
137+
}
138+
139+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Checking matrix-multiplication
2+
Checking loop with auxiliary IV
3+
Checking loop with indirect memory accesses
4+
Checking triple-loop-nest
5+
exit 0

0 commit comments

Comments
 (0)