-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[X86] Remove extra MOV after widening atomic load #138635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/2894ccd1
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesThis change adds patterns to optimize out an extra MOV Stack:
Full diff: https://github.com/llvm/llvm-project/pull/138635.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 167e27eddd71e..8ad8a0a6194d6 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))),
+ (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
+ (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
+
// Floating point loads/stores.
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 9ee8b4fc5ac7f..935d058a52f8f 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -149,8 +149,7 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
define <2 x i8> @atomic_vec2_i8(ptr %x) {
; CHECK3-LABEL: atomic_vec2_i8:
; CHECK3: ## %bb.0:
-; CHECK3-NEXT: movzwl (%rdi), %eax
-; CHECK3-NEXT: movd %eax, %xmm0
+; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK3-NEXT: retq
;
; CHECK0-LABEL: atomic_vec2_i8:
@@ -165,11 +164,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) {
}
define <2 x i16> @atomic_vec2_i16(ptr %x) {
-; CHECK-LABEL: atomic_vec2_i16:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: retq
+; CHECK3-LABEL: atomic_vec2_i16:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_i16:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT: retq
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
ret <2 x i16> %ret
}
@@ -177,8 +180,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) {
define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
; CHECK-LABEL: atomic_vec2_ptr270:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq (%rdi), %xmm0
; CHECK-NEXT: retq
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
ret <2 x ptr addrspace(270)> %ret
@@ -187,8 +189,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
; CHECK-LABEL: atomic_vec2_i32_align:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq (%rdi), %xmm0
; CHECK-NEXT: retq
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
ret <2 x i32> %ret
@@ -197,8 +198,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
define <2 x float> @atomic_vec2_float_align(ptr %x) {
; CHECK-LABEL: atomic_vec2_float_align:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq (%rdi), %xmm0
; CHECK-NEXT: retq
%ret = load atomic <2 x float>, ptr %x acquire, align 8
ret <2 x float> %ret
@@ -354,11 +354,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
}
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_i8:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: retq
+; CHECK3-LABEL: atomic_vec4_i8:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec4_i8:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT: retq
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
ret <4 x i8> %ret
}
@@ -366,8 +370,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
; CHECK-LABEL: atomic_vec4_i16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq (%rdi), %xmm0
; CHECK-NEXT: retq
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
ret <4 x i16> %ret
|
This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503
4383732
to
0fcd430
Compare
0b8a020
to
561e379
Compare
@@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; | |||
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; | |||
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; | |||
|
|||
def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))), | |||
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this will dereference 32-bits
This change adds patterns to optimize out an extra MOV
present after widening the atomic load.
Stack: