Skip to content

Revert "[SelectionDAG] Preserve volatile undef stores." #100374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21413,7 +21413,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}

// Turn 'store undef, Ptr' -> nothing.
if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
if (Value.isUndef() && ST->isUnindexed())
return Chain;

// Try to infer better alignment information than the store already has.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
ret void
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
%bc = bitcast i64 %undef to <2 x i32>
store <2 x i32> %bc, ptr addrspace(1) %out
store volatile <2 x i32> %bc, ptr addrspace(1) %out
ret void
}

Expand All @@ -83,7 +83,7 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
%bc = bitcast i64 %undef to <2 x i32>
%elt1 = extractelement <2 x i32> %bc, i32 1
store i32 %elt1, ptr addrspace(1) %out
store volatile i32 %elt1, ptr addrspace(1) %out
ret void
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ define void @func_use_lds_global() {
ret void
}

; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast() {
; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -153,7 +153,7 @@ define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB1_2:
; GISEL-NEXT: s_endpgm
store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4
store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4
ret void
}

Expand Down
23 changes: 6 additions & 17 deletions llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
Expand Down Expand Up @@ -126,21 +126,10 @@ define amdgpu_cs void @caller() {
declare amdgpu_gfx void @callee(i32)

define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
; GFX9-SDAG-LABEL: workgroup_ids_gfx:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[2:3], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[4:5], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: workgroup_ids_gfx:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX9-LABEL: workgroup_ids_gfx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
; GFX9ARCH-SDAG: ; %bb.0:
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/mem-builtins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,64 @@ declare hidden i32 @strnlen(ptr nocapture, i32) #1
declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1


; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr, ptr addrspace(1)): unsupported call to function memcmp
; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+12
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p, ptr addrspace(1) %out) #0 {
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 {
entry:
%cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2)
store i32 %cmp, ptr addrspace(1) %out
store volatile i32 %cmp, ptr addrspace(1) undef
ret void
}

; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64, ptr addrspace(1)): unsupported call to function memchr
; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+12
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len, ptr addrspace(1) %out) #0 {
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 {
%res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len)
store ptr addrspace(1) %res, ptr addrspace(1) %out
store volatile ptr addrspace(1) %res, ptr addrspace(1) undef
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcpy
; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+12
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src, ptr addrspace(1) %out) #0 {
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
%res = call ptr @strcpy(ptr %dst, ptr %src)
store ptr %res, ptr addrspace(1) %out
store volatile ptr %res, ptr addrspace(1) undef
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcmp
; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+12
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1, ptr addrspace(1) %out) #0 {
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
%res = call i32 @strcmp(ptr %src0, ptr %src1)
store i32 %res, ptr addrspace(1) %out
store volatile i32 %res, ptr addrspace(1) undef
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr, ptr addrspace(1)): unsupported call to function strlen
; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr): unsupported call to function strlen

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+12
define amdgpu_kernel void @test_strlen(ptr %src, ptr addrspace(1) %out) #0 {
define amdgpu_kernel void @test_strlen(ptr %src) #0 {
%res = call i32 @strlen(ptr %src)
store i32 %res, ptr addrspace(1) %out
store volatile i32 %res, ptr addrspace(1) undef
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32, ptr addrspace(1)): unsupported call to function strnlen
; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+12
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size, ptr addrspace(1) %out) #0 {
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 {
%res = call i32 @strnlen(ptr %src, i32 %size)
store i32 %res, ptr addrspace(1) %out
store volatile i32 %res, ptr addrspace(1) undef
ret void
}

Expand Down
58 changes: 0 additions & 58 deletions llvm/test/CodeGen/NVPTX/store-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,61 +90,3 @@ define void @test_store_def(i64 %param0, i32 %param1, ptr %out) {
store %struct.T %S2, ptr %out
ret void
}

define void @test_store_volatile_undef(ptr %out, <8 x i32> %vec) {
; CHECK-LABEL: test_store_volatile_undef(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<23>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_undef_param_0];
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2;
; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_store_volatile_undef_param_1];
; CHECK-NEXT: ld.param.v4.u32 {%r11, %r12, %r13, %r14}, [test_store_volatile_undef_param_1+16];
; CHECK-NEXT: st.volatile.v4.u32 [%rd3], {%r11, %r12, %r13, %r14};
; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r7, %r8, %r9, %r10};
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r15, %r16, %r17, %r18};
; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r19, %r20, %r21, %r22};
; CHECK-NEXT: ret;
store volatile %struct.T undef, ptr %out
store volatile <8 x i32> %vec, ptr undef
store volatile <8 x i32> undef, ptr %out
ret void
}

define void @test_store_volatile_of_poison(ptr %out) {
; CHECK-LABEL: test_store_volatile_of_poison(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_of_poison_param_0];
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2;
; CHECK-NEXT: ret;
store volatile %struct.T poison, ptr %out
ret void
}

define void @test_store_volatile_to_poison(%struct.T %param) {
; CHECK-LABEL: test_store_volatile_to_poison(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_to_poison_param_0];
; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_store_volatile_to_poison_param_0+8];
; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_store_volatile_to_poison_param_0+16];
; CHECK-NEXT: st.volatile.v4.u32 [%rd2], {%r3, %r4, %r5, %r6};
; CHECK-NEXT: st.volatile.v2.u32 [%rd3], {%r1, %r2};
; CHECK-NEXT: st.volatile.u64 [%rd4], %rd1;
; CHECK-NEXT: ret;
store volatile %struct.T %param, ptr poison
ret void
}
Loading