Skip to content

Commit 64927af

Browse files
committed
[PhaseOrdering][X86] Add better SSE/AVX test coverage for add-sub tests
1 parent 9a9b70a commit 64927af

File tree

2 files changed

+64
-30
lines changed

2 files changed

+64
-30
lines changed

llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll

+32-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -O3 -S | FileCheck %s
3-
; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
4-
5-
target triple = "x86_64--"
6-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
4+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
5+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
6+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE
7+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
8+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
9+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
710

811
; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
912
; That may require some coordination between VectorCombine, SLP, and other passes.
@@ -100,16 +103,30 @@ define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %
100103

101104
; PR58139
102105
define <2 x double> @_mm_complexmult_pd_naive(<2 x double> %a, <2 x double> %b) {
103-
; CHECK-LABEL: @_mm_complexmult_pd_naive(
104-
; CHECK-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1
105-
; CHECK-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
106-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
107-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
108-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0
109-
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
110-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer
111-
; CHECK-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]])
112-
; CHECK-NEXT: ret <2 x double> [[TMP7]]
106+
; SSE-LABEL: @_mm_complexmult_pd_naive(
107+
; SSE-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1
108+
; SSE-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
109+
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
110+
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
111+
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0
112+
; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
113+
; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer
114+
; SSE-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]])
115+
; SSE-NEXT: ret <2 x double> [[TMP7]]
116+
;
117+
; AVX-LABEL: @_mm_complexmult_pd_naive(
118+
; AVX-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
119+
; AVX-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i64 1
120+
; AVX-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
121+
; AVX-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i64 1
122+
; AVX-NEXT: [[MUL10:%.*]] = fmul double [[A1]], [[B0]]
123+
; AVX-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
124+
; AVX-NEXT: [[NEG11:%.*]] = fmul double [[A1]], [[TMP1]]
125+
; AVX-NEXT: [[MADD0:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[NEG11]])
126+
; AVX-NEXT: [[MADD1:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B1]], double [[MUL10]])
127+
; AVX-NEXT: [[RES0:%.*]] = insertelement <2 x double> poison, double [[MADD0]], i64 0
128+
; AVX-NEXT: [[RES1:%.*]] = insertelement <2 x double> [[RES0]], double [[MADD1]], i64 1
129+
; AVX-NEXT: ret <2 x double> [[RES1]]
113130
;
114131
%a0 = extractelement <2 x double> %a, i32 0
115132
%a1 = extractelement <2 x double> %a, i32 1

llvm/test/Transforms/PhaseOrdering/X86/addsub.ll

+32-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -O3 -S | FileCheck %s
3-
; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
4-
5-
target triple = "x86_64--"
6-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
4+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
5+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
6+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE
7+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
8+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
9+
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
710

811
; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
912
; That may require some coordination between VectorCombine, SLP, and other passes.
@@ -100,16 +103,30 @@ define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %
100103

101104
; PR58139
102105
define <2 x double> @_mm_complexmult_pd_naive(<2 x double> %a, <2 x double> %b) {
103-
; CHECK-LABEL: @_mm_complexmult_pd_naive(
104-
; CHECK-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1
105-
; CHECK-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
106-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
107-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
108-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0
109-
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
110-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer
111-
; CHECK-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]])
112-
; CHECK-NEXT: ret <2 x double> [[TMP7]]
106+
; SSE-LABEL: @_mm_complexmult_pd_naive(
107+
; SSE-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1
108+
; SSE-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
109+
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
110+
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
111+
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0
112+
; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
113+
; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer
114+
; SSE-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]])
115+
; SSE-NEXT: ret <2 x double> [[TMP7]]
116+
;
117+
; AVX-LABEL: @_mm_complexmult_pd_naive(
118+
; AVX-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
119+
; AVX-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i64 1
120+
; AVX-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
121+
; AVX-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i64 1
122+
; AVX-NEXT: [[MUL10:%.*]] = fmul double [[A1]], [[B0]]
123+
; AVX-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
124+
; AVX-NEXT: [[NEG11:%.*]] = fmul double [[A1]], [[TMP1]]
125+
; AVX-NEXT: [[MADD0:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[NEG11]])
126+
; AVX-NEXT: [[MADD1:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B1]], double [[MUL10]])
127+
; AVX-NEXT: [[RES0:%.*]] = insertelement <2 x double> poison, double [[MADD0]], i64 0
128+
; AVX-NEXT: [[RES1:%.*]] = insertelement <2 x double> [[RES0]], double [[MADD1]], i64 1
129+
; AVX-NEXT: ret <2 x double> [[RES1]]
113130
;
114131
%a0 = extractelement <2 x double> %a, i32 0
115132
%a1 = extractelement <2 x double> %a, i32 1

0 commit comments

Comments
 (0)