Skip to content

Commit 510d6c7

Browse files
committed
add some PTX tests for load/store ordering
1 parent 6647cef commit 510d6c7

File tree

2 files changed

+125
-8
lines changed

2 files changed

+125
-8
lines changed

src/driver.jl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,7 @@ end
151151

152152
const __llvm_initialized = Ref(false)
153153

154-
@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...)
155-
# XXX: remove on next major version
156-
if !isempty(kwargs)
157-
Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm)
158-
config = CompilerConfig(job.config; kwargs...)
159-
job = CompilerJob(job.source, config)
160-
end
161-
154+
function initialize_llvm()
162155
if !__llvm_initialized[]
163156
InitializeAllTargets()
164157
InitializeAllTargetInfos()
@@ -167,6 +160,17 @@ const __llvm_initialized = Ref(false)
167160
InitializeAllTargetMCs()
168161
__llvm_initialized[] = true
169162
end
163+
end
164+
165+
@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...)
166+
# XXX: remove on next major version
167+
if !isempty(kwargs)
168+
Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm)
169+
config = CompilerConfig(job.config; kwargs...)
170+
job = CompilerJob(job.source, config)
171+
end
172+
173+
initialize_llvm()
170174

171175
@tracepoint "IR generation" begin
172176
ir, compiled = irgen(job)

test/atomics.jl

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
function run_pass(backend, pass, mod)
2+
GPUCompiler.initialize_llvm()
3+
4+
fake_job, _ = backend.create_job(identity, (Int,))
5+
6+
# TODO: Set DL?
7+
asm, meta = JuliaContext(opaque_pointers=true) do ctx
8+
ir = parse(LLVM.Module, mod)
9+
ir = pass(fake_job, ir)
10+
GPUCompiler.emit_asm(fake_job, ir, LLVM.API.LLVMAssemblyFile)
11+
end
12+
write(stdout, asm)
13+
end
14+
15+
@testset "PTX" begin
16+
# PTX backend doesn't support larger than i64 atomics
17+
@test @filecheck begin
18+
mod = """define void @test(ptr %a) nounwind {
19+
%1 = load atomic i128, ptr %a seq_cst, align 16
20+
store atomic i128 %1, ptr %a seq_cst, align 16
21+
ret void
22+
}
23+
"""
24+
check"CHECK: LLVM error: Undefined external symbol \"__sync_val_compare_and_swap_16\""
25+
26+
run_pass(PTX, (_, ir)-> ir, mod)
27+
end
28+
29+
# Note: Unordered gets eliminated here
30+
31+
@test @filecheck begin
32+
mod = """define void @test(ptr %a) nounwind {
33+
%1 = load atomic i64, ptr %a monotonic, align 8
34+
store atomic i64 %1, ptr %a monotonic, align 8
35+
ret void
36+
}
37+
"""
38+
check"CHECK: .target sm_70"
39+
check"CHECK: ld.volatile.u64"
40+
check"CHECK: st.volatile.u64"
41+
42+
run_pass(PTX, (_, ir)-> ir, mod)
43+
end
44+
45+
# Note: PTX backend doesn't support store/release yet
46+
@test @filecheck begin
47+
mod = """define void @test(ptr %a) nounwind {
48+
%1 = load atomic i64, ptr %a acquire, align 8
49+
store atomic i64 %1, ptr %a release, align 8
50+
ret void
51+
}
52+
"""
53+
check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store release (s64)"
54+
55+
run_pass(PTX, (_, ir)-> ir, mod)
56+
end
57+
58+
# Note: PTX backend doesn't support seq_cst yet
59+
@test @filecheck begin
60+
mod = """define void @test(ptr %a) nounwind {
61+
%1 = load atomic i64, ptr %a seq_cst, align 8
62+
store atomic i64 %1, ptr %a seq_cst, align 8
63+
ret void
64+
}
65+
"""
66+
check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store seq_cst (s64)"
67+
68+
run_pass(PTX, (_, ir)-> ir, mod)
69+
end
70+
71+
@test @filecheck begin
72+
mod = """define void @test(ptr %a) nounwind {
73+
%1 = load atomic i32, ptr %a monotonic, align 4
74+
store atomic i32 %1, ptr %a monotonic, align 4
75+
ret void
76+
}
77+
"""
78+
check"CHECK: .target sm_70"
79+
check"CHECK: ld.volatile.u32"
80+
check"CHECK: st.volatile.u32"
81+
82+
run_pass(PTX, (_, ir)-> ir, mod)
83+
end
84+
85+
@test @filecheck begin
86+
mod = """define void @test(ptr %a) nounwind {
87+
%1 = load atomic i16, ptr %a monotonic, align 2
88+
store atomic i16 %1, ptr %a monotonic, align 2
89+
ret void
90+
}
91+
"""
92+
check"CHECK: .target sm_70"
93+
check"CHECK: ld.volatile.u16"
94+
check"CHECK: st.volatile.u16"
95+
96+
run_pass(PTX, (_, ir)-> ir, mod)
97+
end
98+
99+
@test @filecheck begin
100+
mod = """define void @test(ptr %a) nounwind {
101+
%1 = load atomic i8, ptr %a monotonic, align 1
102+
store atomic i8 %1, ptr %a monotonic, align 1
103+
ret void
104+
}
105+
"""
106+
check"CHECK: .target sm_70"
107+
check"CHECK: ld.volatile.u8"
108+
check"CHECK: st.volatile.u8"
109+
110+
run_pass(PTX, (_, ir)-> ir, mod)
111+
end
112+
113+
end # PTX

0 commit comments

Comments
 (0)