diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa9032ea2574c..9089bfd2a0381 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21413,7 +21413,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } // Turn 'store undef, Ptr' -> nothing. - if (Value.isUndef() && ST->isUnindexed()) + if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile()) return Chain; // Try to infer better alignment information than the store already has. diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index ae20ab1de3a2d..e53653408feb4 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -255,7 +255,6 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) # %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr - store volatile i64 %dispatch.id, ptr addrspace(1) %ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll index 80732d5de1e20..ca339938161bd 100644 --- a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -73,7 +73,7 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace( define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1 %bc = bitcast i64 %undef to <2 x i32> - store volatile <2 x i32> %bc, ptr addrspace(1) %out + store <2 x i32> %bc, ptr addrspace(1) %out ret void } @@ -83,7 +83,7 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1 %bc = bitcast i64 %undef to <2 x i32> %elt1 = extractelement <2 x i32> %bc, i32 1 - store volatile i32 %elt1, ptr addrspace(1) %out + store i32 %elt1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll index 3b3e107a62967..397502711283e 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -98,8 +98,8 @@ define void @func_use_lds_global() { ret void } -; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function -define void @func_use_lds_global_constexpr_cast() { +; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function +define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) { ; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast: ; GFX8-SDAG: ; %bb.0: ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -153,7 +153,7 @@ define void @func_use_lds_global_constexpr_cast() { ; GISEL-NEXT: s_setpc_b64 s[30:31] ; GISEL-NEXT: .LBB1_2: ; GISEL-NEXT: s_endpgm - store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4 + store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index 8009f917aef5a..f90753652baa5 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s @@ -126,10 +126,21 @@ define amdgpu_cs void @caller() { declare amdgpu_gfx void @callee(i32) define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) { -; GFX9-LABEL: workgroup_ids_gfx: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: workgroup_ids_gfx: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v[2:3], v0, off +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v[4:5], v0, off +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: workgroup_ids_gfx: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx: ; GFX9ARCH-SDAG: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll index dd892ec3d59b3..99090da4da513 100644 --- a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll +++ b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll @@ -9,64 +9,64 @@ declare hidden i32 @strnlen(ptr nocapture, i32) #1 declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1 -; ERROR: error: :0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp +; ERROR: error: :0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr, ptr addrspace(1)): unsupported call to function memcmp ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+12 -define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 { +define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p, ptr addrspace(1) %out) #0 { entry: %cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2) - store volatile i32 %cmp, ptr addrspace(1) undef + store i32 %cmp, ptr addrspace(1) %out ret void } -; ERROR: error: :0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr +; ERROR: error: :0:0: in function test_memchr void (ptr addrspace(1), i32, i64, ptr addrspace(1)): unsupported call to function memchr ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+12 -define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 { +define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len, ptr addrspace(1) %out) #0 { %res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len) - store volatile ptr addrspace(1) %res, ptr addrspace(1) undef + store ptr addrspace(1) %res, ptr addrspace(1) %out ret void } -; ERROR: error: :0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy +; ERROR: error: :0:0: in function test_strcpy void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcpy ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+12 -define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 { +define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src, ptr addrspace(1) %out) #0 { %res = call ptr @strcpy(ptr %dst, ptr %src) - store volatile ptr %res, ptr addrspace(1) undef + store ptr %res, ptr addrspace(1) %out ret void } -; ERROR: error: :0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp +; ERROR: error: :0:0: in function test_strcmp void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcmp ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+12 -define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 { +define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1, ptr addrspace(1) %out) #0 { %res = call i32 @strcmp(ptr %src0, ptr %src1) - store volatile i32 %res, ptr addrspace(1) undef + store i32 %res, ptr addrspace(1) %out ret void } -; ERROR: error: :0:0: in function test_strlen void (ptr): unsupported call to function strlen +; ERROR: error: :0:0: in function test_strlen void (ptr, ptr addrspace(1)): unsupported call to function strlen ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+12 -define amdgpu_kernel void @test_strlen(ptr %src) #0 { +define amdgpu_kernel void @test_strlen(ptr %src, ptr addrspace(1) %out) #0 { %res = call i32 @strlen(ptr %src) - store volatile i32 %res, ptr addrspace(1) undef + store i32 %res, ptr addrspace(1) %out ret void } -; ERROR: error: :0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen +; ERROR: error: :0:0: in function test_strnlen void (ptr, i32, ptr addrspace(1)): unsupported call to function strnlen ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+12 -define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 { +define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size, ptr addrspace(1) %out) #0 { %res = call i32 @strnlen(ptr %src, i32 %size) - store volatile i32 %res, ptr addrspace(1) undef + store i32 %res, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll index 4941760a78c79..109d28a3e3c59 100644 --- a/llvm/test/CodeGen/NVPTX/store-undef.ll +++ b/llvm/test/CodeGen/NVPTX/store-undef.ll @@ -90,3 +90,61 @@ define void @test_store_def(i64 %param0, i32 %param1, ptr %out) { store %struct.T %S2, ptr %out ret void } + +define void @test_store_volatile_undef(ptr %out, <8 x i32> %vec) { +; CHECK-LABEL: test_store_volatile_undef( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<23>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_undef_param_0]; +; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6}; +; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2; +; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_store_volatile_undef_param_1]; +; CHECK-NEXT: ld.param.v4.u32 {%r11, %r12, %r13, %r14}, [test_store_volatile_undef_param_1+16]; +; CHECK-NEXT: st.volatile.v4.u32 [%rd3], {%r11, %r12, %r13, %r14}; +; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r7, %r8, %r9, %r10}; +; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r15, %r16, %r17, %r18}; +; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r19, %r20, %r21, %r22}; +; CHECK-NEXT: ret; + store volatile %struct.T undef, ptr %out + store volatile <8 x i32> %vec, ptr undef + store volatile <8 x i32> undef, ptr %out + ret void +} + +define void @test_store_volatile_of_poison(ptr %out) { +; CHECK-LABEL: test_store_volatile_of_poison( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_of_poison_param_0]; +; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6}; +; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2; +; CHECK-NEXT: ret; + store volatile %struct.T poison, ptr %out + ret void +} + +define void @test_store_volatile_to_poison(%struct.T %param) { +; CHECK-LABEL: test_store_volatile_to_poison( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_to_poison_param_0]; +; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_store_volatile_to_poison_param_0+8]; +; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_store_volatile_to_poison_param_0+16]; +; CHECK-NEXT: st.volatile.v4.u32 [%rd2], {%r3, %r4, %r5, %r6}; +; CHECK-NEXT: st.volatile.v2.u32 [%rd3], {%r1, %r2}; +; CHECK-NEXT: st.volatile.u64 [%rd4], %rd1; +; CHECK-NEXT: ret; + store volatile %struct.T %param, ptr poison + ret void +}