Skip to content

Commit 1897bf6

Browse files
authored
[LoongArch] Enable FeatureExtLSX for generic-la64 processor (#113421)
This commit makes the `generic` target to support FP and LSX, as discussed in #110211. Thereby, it allows 128-bit vector to be enabled by default in the loongarch64 backend.
1 parent d5cdc26 commit 1897bf6

25 files changed

+640
-1041
lines changed

llvm/lib/Target/LoongArch/LoongArch.td

+3-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ include "LoongArchInstrInfo.td"
135135
//===----------------------------------------------------------------------===//
136136

137137
def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>;
138-
def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>;
138+
def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit,
139+
FeatureUAL,
140+
FeatureExtLSX]>;
139141

140142
// Generic 64-bit processor with double-precision floating-point support.
141143
def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit,

llvm/test/CodeGen/LoongArch/calling-conv-common.ll

+8-10
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,12 @@ define i64 @caller_large_scalars() nounwind {
123123
; CHECK-NEXT: addi.d $sp, $sp, -80
124124
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
125125
; CHECK-NEXT: st.d $zero, $sp, 24
126-
; CHECK-NEXT: st.d $zero, $sp, 16
127-
; CHECK-NEXT: st.d $zero, $sp, 8
126+
; CHECK-NEXT: vrepli.b $vr0, 0
127+
; CHECK-NEXT: vst $vr0, $sp, 8
128128
; CHECK-NEXT: ori $a0, $zero, 2
129129
; CHECK-NEXT: st.d $a0, $sp, 0
130130
; CHECK-NEXT: st.d $zero, $sp, 56
131-
; CHECK-NEXT: st.d $zero, $sp, 48
132-
; CHECK-NEXT: st.d $zero, $sp, 40
131+
; CHECK-NEXT: vst $vr0, $sp, 40
133132
; CHECK-NEXT: ori $a2, $zero, 1
134133
; CHECK-NEXT: addi.d $a0, $sp, 32
135134
; CHECK-NEXT: addi.d $a1, $sp, 0
@@ -182,14 +181,13 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
182181
; CHECK-NEXT: ori $a0, $zero, 9
183182
; CHECK-NEXT: st.d $a0, $sp, 0
184183
; CHECK-NEXT: st.d $zero, $sp, 40
185-
; CHECK-NEXT: st.d $zero, $sp, 32
186-
; CHECK-NEXT: st.d $zero, $sp, 24
184+
; CHECK-NEXT: vrepli.b $vr0, 0
185+
; CHECK-NEXT: vst $vr0, $sp, 24
187186
; CHECK-NEXT: ori $a0, $zero, 10
188187
; CHECK-NEXT: st.d $a0, $sp, 16
189188
; CHECK-NEXT: st.d $zero, $sp, 72
190-
; CHECK-NEXT: st.d $zero, $sp, 64
191-
; CHECK-NEXT: st.d $zero, $sp, 56
192-
; CHECK-NEXT: ori $t0, $zero, 8
189+
; CHECK-NEXT: ori $a0, $zero, 8
190+
; CHECK-NEXT: st.d $a0, $sp, 48
193191
; CHECK-NEXT: ori $a0, $zero, 1
194192
; CHECK-NEXT: ori $a1, $zero, 2
195193
; CHECK-NEXT: ori $a2, $zero, 3
@@ -198,7 +196,7 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
198196
; CHECK-NEXT: ori $a5, $zero, 6
199197
; CHECK-NEXT: ori $a6, $zero, 7
200198
; CHECK-NEXT: addi.d $a7, $sp, 48
201-
; CHECK-NEXT: st.d $t0, $sp, 48
199+
; CHECK-NEXT: vst $vr0, $sp, 56
202200
; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs)
203201
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
204202
; CHECK-NEXT: addi.d $sp, $sp, 96

llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll

+9-20
Original file line numberDiff line numberDiff line change
@@ -63,26 +63,17 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind {
6363
; CHECK: # %bb.0:
6464
; CHECK-NEXT: addi.d $sp, $sp, -16
6565
; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
66-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
67-
; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI3_0)
68-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
69-
; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_1)
70-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
71-
; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI3_2)
72-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
73-
; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_3)
74-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
75-
; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_4)
76-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
77-
; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_5)
78-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
79-
; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI3_6)
80-
; CHECK-NEXT: addi.d $a0, $zero, 1
81-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
82-
; CHECK-NEXT: ffint.d.l $fa0, $fa0
8366
; CHECK-NEXT: ori $a0, $zero, 0
8467
; CHECK-NEXT: lu32i.d $a0, 131072
8568
; CHECK-NEXT: lu52i.d $a0, $a0, 1026
69+
; CHECK-NEXT: vldi $vr0, -912
70+
; CHECK-NEXT: vldi $vr1, -1024
71+
; CHECK-NEXT: vldi $vr2, -1016
72+
; CHECK-NEXT: vldi $vr3, -1008
73+
; CHECK-NEXT: vldi $vr4, -1004
74+
; CHECK-NEXT: vldi $vr5, -1000
75+
; CHECK-NEXT: vldi $vr6, -996
76+
; CHECK-NEXT: vldi $vr7, -992
8677
; CHECK-NEXT: bl %plt(callee_double_in_gpr_exhausted_fprs)
8778
; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
8879
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -98,9 +89,7 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind {
9889
define double @callee_double_ret() nounwind {
9990
; CHECK-LABEL: callee_double_ret:
10091
; CHECK: # %bb.0:
101-
; CHECK-NEXT: addi.d $a0, $zero, 1
102-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
103-
; CHECK-NEXT: ffint.d.l $fa0, $fa0
92+
; CHECK-NEXT: vldi $vr0, -912
10493
; CHECK-NEXT: ret
10594
ret double 1.0
10695
}

llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s
2+
; RUN: llc --mtriple=loongarch64 --target-abi=lp64s --mattr=-f < %s | FileCheck %s
33

44
;; This file contains specific tests for the lp64s ABI.
55

llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll

+19-70
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,11 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
175175
;
176176
; LA64-LABEL: test_ctpop_i8:
177177
; LA64: # %bb.0:
178-
; LA64-NEXT: srli.d $a1, $a0, 1
179-
; LA64-NEXT: andi $a1, $a1, 85
180-
; LA64-NEXT: sub.d $a0, $a0, $a1
181-
; LA64-NEXT: andi $a1, $a0, 51
182-
; LA64-NEXT: srli.d $a0, $a0, 2
183-
; LA64-NEXT: andi $a0, $a0, 51
184-
; LA64-NEXT: add.d $a0, $a1, $a0
185-
; LA64-NEXT: srli.d $a1, $a0, 4
186-
; LA64-NEXT: add.d $a0, $a0, $a1
187-
; LA64-NEXT: andi $a0, $a0, 15
178+
; LA64-NEXT: andi $a0, $a0, 255
179+
; LA64-NEXT: vldi $vr0, 0
180+
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
181+
; LA64-NEXT: vpcnt.d $vr0, $vr0
182+
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
188183
; LA64-NEXT: ret
189184
%1 = call i8 @llvm.ctpop.i8(i8 %a)
190185
ret i8 %1
@@ -213,22 +208,11 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
213208
;
214209
; LA64-LABEL: test_ctpop_i16:
215210
; LA64: # %bb.0:
216-
; LA64-NEXT: srli.d $a1, $a0, 1
217-
; LA64-NEXT: lu12i.w $a2, 5
218-
; LA64-NEXT: ori $a2, $a2, 1365
219-
; LA64-NEXT: and $a1, $a1, $a2
220-
; LA64-NEXT: sub.d $a0, $a0, $a1
221-
; LA64-NEXT: lu12i.w $a1, 3
222-
; LA64-NEXT: ori $a1, $a1, 819
223-
; LA64-NEXT: and $a2, $a0, $a1
224-
; LA64-NEXT: srli.d $a0, $a0, 2
225-
; LA64-NEXT: and $a0, $a0, $a1
226-
; LA64-NEXT: add.d $a0, $a2, $a0
227-
; LA64-NEXT: srli.d $a1, $a0, 4
228-
; LA64-NEXT: add.d $a0, $a0, $a1
229-
; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8
230-
; LA64-NEXT: andi $a0, $a0, 15
231-
; LA64-NEXT: add.d $a0, $a0, $a1
211+
; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
212+
; LA64-NEXT: vldi $vr0, 0
213+
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
214+
; LA64-NEXT: vpcnt.d $vr0, $vr0
215+
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
232216
; LA64-NEXT: ret
233217
%1 = call i16 @llvm.ctpop.i16(i16 %a)
234218
ret i16 %1
@@ -261,26 +245,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
261245
;
262246
; LA64-LABEL: test_ctpop_i32:
263247
; LA64: # %bb.0:
264-
; LA64-NEXT: srli.d $a1, $a0, 1
265-
; LA64-NEXT: lu12i.w $a2, 349525
266-
; LA64-NEXT: ori $a2, $a2, 1365
267-
; LA64-NEXT: and $a1, $a1, $a2
268-
; LA64-NEXT: sub.d $a0, $a0, $a1
269-
; LA64-NEXT: lu12i.w $a1, 209715
270-
; LA64-NEXT: ori $a1, $a1, 819
271-
; LA64-NEXT: and $a2, $a0, $a1
272-
; LA64-NEXT: srli.d $a0, $a0, 2
273-
; LA64-NEXT: and $a0, $a0, $a1
274-
; LA64-NEXT: add.d $a0, $a2, $a0
275-
; LA64-NEXT: srli.d $a1, $a0, 4
276-
; LA64-NEXT: add.d $a0, $a0, $a1
277-
; LA64-NEXT: lu12i.w $a1, 61680
278-
; LA64-NEXT: ori $a1, $a1, 3855
279-
; LA64-NEXT: and $a0, $a0, $a1
280-
; LA64-NEXT: lu12i.w $a1, 4112
281-
; LA64-NEXT: ori $a1, $a1, 257
282-
; LA64-NEXT: mul.d $a0, $a0, $a1
283-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24
248+
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
249+
; LA64-NEXT: vldi $vr0, 0
250+
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
251+
; LA64-NEXT: vpcnt.d $vr0, $vr0
252+
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
284253
; LA64-NEXT: ret
285254
%1 = call i32 @llvm.ctpop.i32(i32 %a)
286255
ret i32 %1
@@ -327,30 +296,10 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
327296
;
328297
; LA64-LABEL: test_ctpop_i64:
329298
; LA64: # %bb.0:
330-
; LA64-NEXT: srli.d $a1, $a0, 1
331-
; LA64-NEXT: lu12i.w $a2, 349525
332-
; LA64-NEXT: ori $a2, $a2, 1365
333-
; LA64-NEXT: bstrins.d $a2, $a2, 62, 32
334-
; LA64-NEXT: and $a1, $a1, $a2
335-
; LA64-NEXT: sub.d $a0, $a0, $a1
336-
; LA64-NEXT: lu12i.w $a1, 209715
337-
; LA64-NEXT: ori $a1, $a1, 819
338-
; LA64-NEXT: bstrins.d $a1, $a1, 61, 32
339-
; LA64-NEXT: and $a2, $a0, $a1
340-
; LA64-NEXT: srli.d $a0, $a0, 2
341-
; LA64-NEXT: and $a0, $a0, $a1
342-
; LA64-NEXT: add.d $a0, $a2, $a0
343-
; LA64-NEXT: srli.d $a1, $a0, 4
344-
; LA64-NEXT: add.d $a0, $a0, $a1
345-
; LA64-NEXT: lu12i.w $a1, 61680
346-
; LA64-NEXT: ori $a1, $a1, 3855
347-
; LA64-NEXT: bstrins.d $a1, $a1, 59, 32
348-
; LA64-NEXT: and $a0, $a0, $a1
349-
; LA64-NEXT: lu12i.w $a1, 4112
350-
; LA64-NEXT: ori $a1, $a1, 257
351-
; LA64-NEXT: bstrins.d $a1, $a1, 56, 32
352-
; LA64-NEXT: mul.d $a0, $a0, $a1
353-
; LA64-NEXT: srli.d $a0, $a0, 56
299+
; LA64-NEXT: vldi $vr0, 0
300+
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
301+
; LA64-NEXT: vpcnt.d $vr0, $vr0
302+
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
354303
; LA64-NEXT: ret
355304
%1 = call i64 @llvm.ctpop.i64(i64 %a)
356305
ret i64 %1

llvm/test/CodeGen/LoongArch/double-imm.ll

+2-6
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,7 @@ define double @f64_add_fimm1(double %a) nounwind {
5959
;
6060
; LA64-LABEL: f64_add_fimm1:
6161
; LA64: # %bb.0:
62-
; LA64-NEXT: addi.d $a0, $zero, 1
63-
; LA64-NEXT: movgr2fr.d $fa1, $a0
64-
; LA64-NEXT: ffint.d.l $fa1, $fa1
62+
; LA64-NEXT: vldi $vr1, -912
6563
; LA64-NEXT: fadd.d $fa0, $fa0, $fa1
6664
; LA64-NEXT: ret
6765
%1 = fadd double %a, 1.0
@@ -79,9 +77,7 @@ define double @f64_positive_fimm1() nounwind {
7977
;
8078
; LA64-LABEL: f64_positive_fimm1:
8179
; LA64: # %bb.0:
82-
; LA64-NEXT: addi.d $a0, $zero, 1
83-
; LA64-NEXT: movgr2fr.d $fa0, $a0
84-
; LA64-NEXT: ffint.d.l $fa0, $fa0
80+
; LA64-NEXT: vldi $vr0, -912
8581
; LA64-NEXT: ret
8682
ret double 1.0
8783
}

llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll

+7-8
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,13 @@ define double @fdiv_d(double %x, double %y) {
6666
;
6767
; LA64D-FRECIPE-LABEL: fdiv_d:
6868
; LA64D-FRECIPE: # %bb.0:
69-
; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
70-
; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0)
71-
; LA64D-FRECIPE-NEXT: frecipe.d $fa3, $fa1
72-
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2
73-
; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3
74-
; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2
75-
; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
76-
; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
69+
; LA64D-FRECIPE-NEXT: frecipe.d $fa2, $fa1
70+
; LA64D-FRECIPE-NEXT: vldi $vr3, -784
71+
; LA64D-FRECIPE-NEXT: fmadd.d $fa3, $fa1, $fa2, $fa3
72+
; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa3, $fa2, $fa2
73+
; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2
74+
; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
75+
; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
7776
; LA64D-FRECIPE-NEXT: ret
7877
%div = fdiv fast double %x, %y
7978
ret double %div

llvm/test/CodeGen/LoongArch/frame.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ define i32 @test() nounwind {
1212
; CHECK-NEXT: addi.d $sp, $sp, -32
1313
; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
1414
; CHECK-NEXT: st.w $zero, $sp, 16
15-
; CHECK-NEXT: st.d $zero, $sp, 8
16-
; CHECK-NEXT: st.d $zero, $sp, 0
15+
; CHECK-NEXT: vrepli.b $vr0, 0
16+
; CHECK-NEXT: vst $vr0, $sp, 0
1717
; CHECK-NEXT: addi.d $a0, $sp, 4
1818
; CHECK-NEXT: bl %plt(test1)
1919
; CHECK-NEXT: move $a0, $zero

0 commit comments

Comments
 (0)