|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 |
| -; RUN: opt < %s -O3 -S | FileCheck %s |
3 |
| -; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s |
4 |
| - |
5 |
| -target triple = "x86_64--" |
6 |
| -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 2 | +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE |
| 3 | +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE |
| 4 | +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX |
| 5 | +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX |
| 6 | +; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE |
| 7 | +; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE |
| 8 | +; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX |
| 9 | +; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX |
7 | 10 |
|
8 | 11 | ; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
|
9 | 12 | ; That may require some coordination between VectorCombine, SLP, and other passes.
|
@@ -100,16 +103,30 @@ define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %
|
100 | 103 |
|
101 | 104 | ; PR58139
|
102 | 105 | define <2 x double> @_mm_complexmult_pd_naive(<2 x double> %a, <2 x double> %b) {
|
103 |
| -; CHECK-LABEL: @_mm_complexmult_pd_naive( |
104 |
| -; CHECK-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1 |
105 |
| -; CHECK-NEXT: [[TMP1:%.*]] = fneg double [[B1]] |
106 |
| -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1> |
107 |
| -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0> |
108 |
| -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0 |
109 |
| -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] |
110 |
| -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer |
111 |
| -; CHECK-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]]) |
112 |
| -; CHECK-NEXT: ret <2 x double> [[TMP7]] |
| 106 | +; SSE-LABEL: @_mm_complexmult_pd_naive( |
| 107 | +; SSE-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1 |
| 108 | +; SSE-NEXT: [[TMP1:%.*]] = fneg double [[B1]] |
| 109 | +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1> |
| 110 | +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0> |
| 111 | +; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0 |
| 112 | +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] |
| 113 | +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer |
| 114 | +; SSE-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]]) |
| 115 | +; SSE-NEXT: ret <2 x double> [[TMP7]] |
| 116 | +; |
| 117 | +; AVX-LABEL: @_mm_complexmult_pd_naive( |
| 118 | +; AVX-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 |
| 119 | +; AVX-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i64 1 |
| 120 | +; AVX-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 |
| 121 | +; AVX-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i64 1 |
| 122 | +; AVX-NEXT: [[MUL10:%.*]] = fmul double [[A1]], [[B0]] |
| 123 | +; AVX-NEXT: [[TMP1:%.*]] = fneg double [[B1]] |
| 124 | +; AVX-NEXT: [[NEG11:%.*]] = fmul double [[A1]], [[TMP1]] |
| 125 | +; AVX-NEXT: [[MADD0:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[NEG11]]) |
| 126 | +; AVX-NEXT: [[MADD1:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B1]], double [[MUL10]]) |
| 127 | +; AVX-NEXT: [[RES0:%.*]] = insertelement <2 x double> poison, double [[MADD0]], i64 0 |
| 128 | +; AVX-NEXT: [[RES1:%.*]] = insertelement <2 x double> [[RES0]], double [[MADD1]], i64 1 |
| 129 | +; AVX-NEXT: ret <2 x double> [[RES1]] |
113 | 130 | ;
|
114 | 131 | %a0 = extractelement <2 x double> %a, i32 0
|
115 | 132 | %a1 = extractelement <2 x double> %a, i32 1
|
|
0 commit comments