Skip to content

Commit ef465bf

Browse files
authored
[ARM] Fix arm32be softfp mode miscompilation for neon sdiv (llvm#97883)
Related issue: llvm#97782
1 parent 5ab9e00 commit ef465bf

File tree

2 files changed

+107
-1
lines changed

2 files changed

+107
-1
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -14798,7 +14798,7 @@ static SDValue PerformORCombine(SDNode *N,
1479814798
N0->getOperand(1),
1479914799
N0->getOperand(0),
1480014800
N1->getOperand(0));
14801-
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14801+
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
1480214802
}
1480314803
}
1480414804
}

llvm/test/CodeGen/ARM/sdiv_shl.ll

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; RUN: llc -mtriple armv7-linux -mattr=+neon %s -o - | FileCheck %s --check-prefix=LE
2+
; RUN: llc -mtriple armebv7-linux -mattr=+neon %s -o - | FileCheck %s --check-prefix=BE
3+
4+
; The key is the last vrev64 should be vrev64.16 instead of vrev64.32
5+
6+
define void @sdiv_shl(ptr %x, ptr %y) nounwind {
7+
; LE-LABEL: sdiv_shl:
8+
; LE: @ %bb.0: @ %entry
9+
; LE-NEXT: adr r2, .LCPI0_0
10+
; LE-NEXT: vld1.64 {d18, d19}, [r1]
11+
; LE-NEXT: adr r1, .LCPI0_1
12+
; LE-NEXT: vld1.64 {d16, d17}, [r2:128]
13+
; LE-NEXT: vshr.s16 q10, q9, #15
14+
; LE-NEXT: vneg.s16 q8, q8
15+
; LE-NEXT: vld1.64 {d22, d23}, [r1:128]
16+
; LE-NEXT: adr r1, .LCPI0_2
17+
; LE-NEXT: vshl.u16 q8, q10, q8
18+
; LE-NEXT: vneg.s16 q10, q11
19+
; LE-NEXT: vadd.i16 q8, q9, q8
20+
; LE-NEXT: vshl.s16 q8, q8, q10
21+
; LE-NEXT: vld1.64 {d20, d21}, [r1:128]
22+
; LE-NEXT: vbit q8, q9, q10
23+
; LE-NEXT: vst1.64 {d16, d17}, [r0]
24+
; LE: .LCPI0_0:
25+
; LE-NEXT: .short 16 @ 0x10
26+
; LE-NEXT: .short 14 @ 0xe
27+
; LE-NEXT: .short 15 @ 0xf
28+
; LE-NEXT: .short 13 @ 0xd
29+
; LE-NEXT: .short 12 @ 0xc
30+
; LE-NEXT: .short 10 @ 0xa
31+
; LE-NEXT: .short 11 @ 0xb
32+
; LE-NEXT: .short 9 @ 0x9
33+
; LE-NEXT: .LCPI0_1:
34+
; LE-NEXT: .short 0 @ 0x0
35+
; LE-NEXT: .short 2 @ 0x2
36+
; LE-NEXT: .short 1 @ 0x1
37+
; LE-NEXT: .short 3 @ 0x3
38+
; LE-NEXT: .short 4 @ 0x4
39+
; LE-NEXT: .short 6 @ 0x6
40+
; LE-NEXT: .short 5 @ 0x5
41+
; LE-NEXT: .short 7 @ 0x7
42+
; LE-NEXT: .LCPI0_2:
43+
; LE-NEXT: .short 65535 @ 0xffff
44+
; LE-NEXT: .short 0 @ 0x0
45+
; LE-NEXT: .short 0 @ 0x0
46+
; LE-NEXT: .short 0 @ 0x0
47+
; LE-NEXT: .short 0 @ 0x0
48+
; LE-NEXT: .short 0 @ 0x0
49+
; LE-NEXT: .short 0 @ 0x0
50+
; LE-NEXT: .short 0 @ 0x0
51+
;
52+
; BE-LABEL: sdiv_shl:
53+
; BE: @ %bb.0: @ %entry
54+
; BE-NEXT: adr r2, .LCPI0_0
55+
; BE-NEXT: vld1.64 {d18, d19}, [r1]
56+
; BE-NEXT: adr r1, .LCPI0_1
57+
; BE-NEXT: vld1.64 {d16, d17}, [r2:128]
58+
; BE-NEXT: vrev64.16 q8, q8
59+
; BE-NEXT: vrev64.16 q9, q9
60+
; BE-NEXT: vneg.s16 q8, q8
61+
; BE-NEXT: vld1.64 {d20, d21}, [r1:128]
62+
; BE-NEXT: adr r1, .LCPI0_2
63+
; BE-NEXT: vshr.s16 q11, q9, #15
64+
; BE-NEXT: vrev64.16 q10, q10
65+
; BE-NEXT: vshl.u16 q8, q11, q8
66+
; BE-NEXT: vld1.64 {d22, d23}, [r1:128]
67+
; BE-NEXT: vneg.s16 q10, q10
68+
; BE-NEXT: vrev64.16 q11, q11
69+
; BE-NEXT: vadd.i16 q8, q9, q8
70+
; BE-NEXT: vshl.s16 q8, q8, q10
71+
; BE-NEXT: vbit q8, q9, q11
72+
; BE-NEXT: vrev64.16 q8, q8
73+
; BE-NEXT: vst1.64 {d16, d17}, [r0]
74+
; BE: .LCPI0_0:
75+
; BE-NEXT: .short 16 @ 0x10
76+
; BE-NEXT: .short 14 @ 0xe
77+
; BE-NEXT: .short 15 @ 0xf
78+
; BE-NEXT: .short 13 @ 0xd
79+
; BE-NEXT: .short 12 @ 0xc
80+
; BE-NEXT: .short 10 @ 0xa
81+
; BE-NEXT: .short 11 @ 0xb
82+
; BE-NEXT: .short 9 @ 0x9
83+
; BE-NEXT: .LCPI0_1:
84+
; BE-NEXT: .short 0 @ 0x0
85+
; BE-NEXT: .short 2 @ 0x2
86+
; BE-NEXT: .short 1 @ 0x1
87+
; BE-NEXT: .short 3 @ 0x3
88+
; BE-NEXT: .short 4 @ 0x4
89+
; BE-NEXT: .short 6 @ 0x6
90+
; BE-NEXT: .short 5 @ 0x5
91+
; BE-NEXT: .short 7 @ 0x7
92+
; BE-NEXT: .LCPI0_2:
93+
; BE-NEXT: .short 65535 @ 0xffff
94+
; BE-NEXT: .short 0 @ 0x0
95+
; BE-NEXT: .short 0 @ 0x0
96+
; BE-NEXT: .short 0 @ 0x0
97+
; BE-NEXT: .short 0 @ 0x0
98+
; BE-NEXT: .short 0 @ 0x0
99+
; BE-NEXT: .short 0 @ 0x0
100+
; BE-NEXT: .short 0 @ 0x0
101+
entry:
102+
%0 = load <8 x i16>, ptr %y, align 8
103+
%div = sdiv <8 x i16> %0, <i16 1, i16 4, i16 2, i16 8, i16 16, i16 64, i16 32, i16 128>
104+
store <8 x i16> %div, ptr %x, align 8
105+
ret void
106+
}

0 commit comments

Comments
 (0)