From 71f39f98c10a82cde5efe4a4b84391bb580c1d7e Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Wed, 24 Jul 2024 09:21:03 -0400 Subject: [PATCH] Revert "[SelectionDAG] Preserve volatile undef stores. (#99918)" This reverts commit 404071b059818ba51b6a832e5e09a93a4a8b579e. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- .../abi-attribute-hints-undefined-behavior.ll | 1 + .../CodeGen/AMDGPU/bitcast-vector-extract.ll | 4 +- .../AMDGPU/lds-global-non-entry-func.ll | 6 +- .../lower-work-group-id-intrinsics-pal.ll | 23 ++------ llvm/test/CodeGen/AMDGPU/mem-builtins.ll | 36 ++++++------ llvm/test/CodeGen/NVPTX/store-undef.ll | 58 ------------------- 7 files changed, 31 insertions(+), 99 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 060e66175d965..cd0440077f526 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21413,7 +21413,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } // Turn 'store undef, Ptr' -> nothing. - if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile()) + if (Value.isUndef() && ST->isUnindexed()) return Chain; // Try to infer better alignment information than the store already has. diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index e53653408feb4..ae20ab1de3a2d 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -255,6 +255,7 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) # %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr + store volatile i64 %dispatch.id, ptr addrspace(1) %ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll index ca339938161bd..80732d5de1e20 100644 --- a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -73,7 +73,7 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace( define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1 %bc = bitcast i64 %undef to <2 x i32> - store <2 x i32> %bc, ptr addrspace(1) %out + store volatile <2 x i32> %bc, ptr addrspace(1) %out ret void } @@ -83,7 +83,7 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1 %bc = bitcast i64 %undef to <2 x i32> %elt1 = extractelement <2 x i32> %bc, i32 1 - store i32 %elt1, ptr addrspace(1) %out + store volatile i32 %elt1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll index 397502711283e..3b3e107a62967 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -98,8 +98,8 @@ define void @func_use_lds_global() { ret void } -; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function -define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) { +; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function +define void @func_use_lds_global_constexpr_cast() { ; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast: ; GFX8-SDAG: ; %bb.0: ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -153,7 +153,7 @@ define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) { ; GISEL-NEXT: s_setpc_b64 s[30:31] ; GISEL-NEXT: .LBB1_2: ; GISEL-NEXT: s_endpgm - store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4 + store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index f90753652baa5..8009f917aef5a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s @@ -126,21 +126,10 @@ define amdgpu_cs void @caller() { declare amdgpu_gfx void @callee(i32) define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) { -; GFX9-SDAG-LABEL: workgroup_ids_gfx: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: global_store_dword v[2:3], v0, off -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: global_store_dword v[4:5], v0, off -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: workgroup_ids_gfx: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: workgroup_ids_gfx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx: ; GFX9ARCH-SDAG: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll index 99090da4da513..dd892ec3d59b3 100644 --- a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll +++ b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll @@ -9,64 +9,64 @@ declare hidden i32 @strnlen(ptr nocapture, i32) #1 declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1 -; ERROR: error: :0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr, ptr addrspace(1)): unsupported call to function memcmp +; ERROR: error: :0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+12 -define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p, ptr addrspace(1) %out) #0 { +define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 { entry: %cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2) - store i32 %cmp, ptr addrspace(1) %out + store volatile i32 %cmp, ptr addrspace(1) undef ret void } -; ERROR: error: :0:0: in function test_memchr void (ptr addrspace(1), i32, i64, ptr addrspace(1)): unsupported call to function memchr +; ERROR: error: :0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+12 -define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len, ptr addrspace(1) %out) #0 { +define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 { %res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len) - store ptr addrspace(1) %res, ptr addrspace(1) %out + store volatile ptr addrspace(1) %res, ptr addrspace(1) undef ret void } -; ERROR: error: :0:0: in function test_strcpy void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcpy +; ERROR: error: :0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+12 -define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src, ptr addrspace(1) %out) #0 { +define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 { %res = call ptr @strcpy(ptr %dst, ptr %src) - store ptr %res, ptr addrspace(1) %out + store volatile ptr %res, ptr addrspace(1) undef ret void } -; ERROR: error: :0:0: in function test_strcmp void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcmp +; ERROR: error: :0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+12 -define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1, ptr addrspace(1) %out) #0 { +define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 { %res = call i32 @strcmp(ptr %src0, ptr %src1) - store i32 %res, ptr addrspace(1) %out + store volatile i32 %res, ptr addrspace(1) undef ret void } -; ERROR: error: :0:0: in function test_strlen void (ptr, ptr addrspace(1)): unsupported call to function strlen +; ERROR: error: :0:0: in function test_strlen void (ptr): unsupported call to function strlen ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+12 -define amdgpu_kernel void @test_strlen(ptr %src, ptr addrspace(1) %out) #0 { +define amdgpu_kernel void @test_strlen(ptr %src) #0 { %res = call i32 @strlen(ptr %src) - store i32 %res, ptr addrspace(1) %out + store volatile i32 %res, ptr addrspace(1) undef ret void } -; ERROR: error: :0:0: in function test_strnlen void (ptr, i32, ptr addrspace(1)): unsupported call to function strnlen +; ERROR: error: :0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+12 -define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size, ptr addrspace(1) %out) #0 { +define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 { %res = call i32 @strnlen(ptr %src, i32 %size) - store i32 %res, ptr addrspace(1) %out + store volatile i32 %res, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll index 109d28a3e3c59..4941760a78c79 100644 --- a/llvm/test/CodeGen/NVPTX/store-undef.ll +++ b/llvm/test/CodeGen/NVPTX/store-undef.ll @@ -90,61 +90,3 @@ define void @test_store_def(i64 %param0, i32 %param1, ptr %out) { store %struct.T %S2, ptr %out ret void } - -define void @test_store_volatile_undef(ptr %out, <8 x i32> %vec) { -; CHECK-LABEL: test_store_volatile_undef( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<23>; -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_undef_param_0]; -; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4}; -; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6}; -; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2; -; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_store_volatile_undef_param_1]; -; CHECK-NEXT: ld.param.v4.u32 {%r11, %r12, %r13, %r14}, [test_store_volatile_undef_param_1+16]; -; CHECK-NEXT: st.volatile.v4.u32 [%rd3], {%r11, %r12, %r13, %r14}; -; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r7, %r8, %r9, %r10}; -; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r15, %r16, %r17, %r18}; -; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r19, %r20, %r21, %r22}; -; CHECK-NEXT: ret; - store volatile %struct.T undef, ptr %out - store volatile <8 x i32> %vec, ptr undef - store volatile <8 x i32> undef, ptr %out - ret void -} - -define void @test_store_volatile_of_poison(ptr %out) { -; CHECK-LABEL: test_store_volatile_of_poison( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<7>; -; CHECK-NEXT: .reg .b64 %rd<3>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_of_poison_param_0]; -; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4}; -; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6}; -; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2; -; CHECK-NEXT: ret; - store volatile %struct.T poison, ptr %out - ret void -} - -define void @test_store_volatile_to_poison(%struct.T %param) { -; CHECK-LABEL: test_store_volatile_to_poison( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<7>; -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_to_poison_param_0]; -; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_store_volatile_to_poison_param_0+8]; -; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_store_volatile_to_poison_param_0+16]; -; CHECK-NEXT: st.volatile.v4.u32 [%rd2], {%r3, %r4, %r5, %r6}; -; CHECK-NEXT: st.volatile.v2.u32 [%rd3], {%r1, %r2}; -; CHECK-NEXT: st.volatile.u64 [%rd4], %rd1; -; CHECK-NEXT: ret; - store volatile %struct.T %param, ptr poison - ret void -}