3
3
4
4
%"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64 ] }
5
5
6
- define void @issue63986 (i64 %0 , i64 %idxprom ) {
6
+ define void @issue63986 (i64 %0 , i64 %idxprom , ptr inreg %ptr ) {
7
7
; CHECK-LABEL: issue63986:
8
8
; CHECK: ; %bb.0: ; %entry
9
9
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10
10
; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
11
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
12
+ ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4
13
+ ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
11
14
; CHECK-NEXT: s_mov_b64 s[4:5], 0
12
15
; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
13
16
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
14
17
; CHECK-NEXT: v_mov_b32_e32 v7, s5
15
18
; CHECK-NEXT: v_mov_b32_e32 v6, s4
16
- ; CHECK-NEXT: flat_load_dwordx4 v[6:9 ], v[6:7]
17
- ; CHECK-NEXT: v_add_co_u32_e32 v10 , vcc, s4, v4
19
+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13 ], v[6:7]
20
+ ; CHECK-NEXT: v_add_co_u32_e32 v6 , vcc, s4, v8
18
21
; CHECK-NEXT: s_add_u32 s4, s4, 16
19
- ; CHECK-NEXT: v_mov_b32_e32 v11, s5
20
22
; CHECK-NEXT: s_addc_u32 s5, s5, 0
21
23
; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
22
- ; CHECK-NEXT: v_addc_co_u32_e32 v11 , vcc, v5, v11 , vcc
24
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
23
25
; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
24
26
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
25
- ; CHECK-NEXT: flat_store_dwordx4 v[10:11 ], v[6:9 ]
27
+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7 ], v[10:13 ]
26
28
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
27
29
; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
28
30
; CHECK-NEXT: s_branch .LBB0_4
@@ -31,110 +33,116 @@ define void @issue63986(i64 %0, i64 %idxprom) {
31
33
; CHECK-NEXT: s_branch .LBB0_5
32
34
; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
33
35
; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
34
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_7
36
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_8
35
37
; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
36
- ; CHECK-NEXT: v_or_b32_e32 v2, 32, v4
37
- ; CHECK-NEXT: v_mov_b32_e32 v3, v5
38
+ ; CHECK-NEXT: s_add_u32 s4, s16, 32
39
+ ; CHECK-NEXT: s_addc_u32 s5, s17, 0
40
+ ; CHECK-NEXT: v_mov_b32_e32 v3, s5
41
+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4
42
+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
38
43
; CHECK-NEXT: s_mov_b64 s[4:5], 0
39
44
; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
40
45
; CHECK-NEXT: s_add_u32 s6, 32, s4
41
46
; CHECK-NEXT: s_addc_u32 s7, 0, s5
42
47
; CHECK-NEXT: v_mov_b32_e32 v6, s6
43
48
; CHECK-NEXT: v_mov_b32_e32 v7, s7
44
49
; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
45
- ; CHECK-NEXT: v_mov_b32_e32 v9, s5
46
- ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s4, v2
47
- ; CHECK-NEXT: v_mov_b32_e32 v7, v5
48
- ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v9, vcc
50
+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
51
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2
52
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
49
53
; CHECK-NEXT: s_add_u32 s4, s4, 1
50
- ; CHECK-NEXT: v_mov_b32_e32 v6, v4
51
54
; CHECK-NEXT: s_addc_u32 s5, s5, 0
52
55
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
53
- ; CHECK-NEXT: flat_store_byte v[8:9], v10
54
- ; CHECK-NEXT: .LBB0_7: ; %post-loop-memcpy-expansion
56
+ ; CHECK-NEXT: flat_store_byte v[6:7], v10
57
+ ; CHECK-NEXT: ; %bb.7:
58
+ ; CHECK-NEXT: v_mov_b32_e32 v7, v5
59
+ ; CHECK-NEXT: v_mov_b32_e32 v6, v4
60
+ ; CHECK-NEXT: .LBB0_8: ; %post-loop-memcpy-expansion
55
61
; CHECK-NEXT: v_and_b32_e32 v2, 15, v0
56
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0
57
62
; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
63
+ ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, v6, v0
64
+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
65
+ ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v1, vcc
58
66
; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
59
67
; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
60
- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0
61
- ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v1, vcc
62
- ; CHECK-NEXT: s_branch .LBB0_10
63
- ; CHECK-NEXT: .LBB0_8: ; %Flow14
64
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
68
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
69
+ ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4
70
+ ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
71
+ ; CHECK-NEXT: s_branch .LBB0_11
72
+ ; CHECK-NEXT: .LBB0_9: ; %Flow14
73
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
65
74
; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
66
75
; CHECK-NEXT: s_mov_b64 s[8:9], 0
67
- ; CHECK-NEXT: .LBB0_9 : ; %Flow16
68
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
76
+ ; CHECK-NEXT: .LBB0_10 : ; %Flow16
77
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
69
78
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
70
- ; CHECK-NEXT: s_cbranch_vccz .LBB0_18
71
- ; CHECK-NEXT: .LBB0_10 : ; %while.cond
79
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_19
80
+ ; CHECK-NEXT: .LBB0_11 : ; %while.cond
72
81
; CHECK-NEXT: ; =>This Loop Header: Depth=1
73
- ; CHECK-NEXT: ; Child Loop BB0_12 Depth 2
74
- ; CHECK-NEXT: ; Child Loop BB0_16 Depth 2
82
+ ; CHECK-NEXT: ; Child Loop BB0_13 Depth 2
83
+ ; CHECK-NEXT: ; Child Loop BB0_17 Depth 2
75
84
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
76
- ; CHECK-NEXT: s_cbranch_execz .LBB0_13
77
- ; CHECK-NEXT: ; %bb.11 : ; %loop-memcpy-expansion2.preheader
78
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
85
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_14
86
+ ; CHECK-NEXT: ; %bb.12 : ; %loop-memcpy-expansion2.preheader
87
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
79
88
; CHECK-NEXT: s_mov_b64 s[10:11], 0
80
89
; CHECK-NEXT: s_mov_b64 s[12:13], 0
81
- ; CHECK-NEXT: .LBB0_12 : ; %loop-memcpy-expansion2
82
- ; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1
90
+ ; CHECK-NEXT: .LBB0_13 : ; %loop-memcpy-expansion2
91
+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
83
92
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
84
- ; CHECK-NEXT: v_mov_b32_e32 v8, s12
85
- ; CHECK-NEXT: v_mov_b32_e32 v9, s13
86
- ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
87
- ; CHECK-NEXT: v_mov_b32_e32 v13, s13
88
- ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s12, v4
93
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s12
94
+ ; CHECK-NEXT: v_mov_b32_e32 v7, s13
95
+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
96
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8
89
97
; CHECK-NEXT: s_add_u32 s12, s12, 16
90
- ; CHECK-NEXT: v_addc_co_u32_e32 v13 , vcc, v5, v13 , vcc
98
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
91
99
; CHECK-NEXT: s_addc_u32 s13, s13, 0
92
100
; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
93
101
; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
94
102
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
95
- ; CHECK-NEXT: flat_store_dwordx4 v[12:13 ], v[8:11 ]
103
+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7 ], v[10:13 ]
96
104
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
97
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_12
98
- ; CHECK-NEXT: .LBB0_13 : ; %Flow15
99
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
105
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_13
106
+ ; CHECK-NEXT: .LBB0_14 : ; %Flow15
107
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
100
108
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
101
109
; CHECK-NEXT: s_mov_b64 s[8:9], -1
102
- ; CHECK-NEXT: s_cbranch_execz .LBB0_9
103
- ; CHECK-NEXT: ; %bb.14 : ; %loop-memcpy-residual-header5
104
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
110
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_10
111
+ ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual-header5
112
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
105
113
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
106
114
; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
107
- ; CHECK-NEXT: s_cbranch_execz .LBB0_8
108
- ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual4.preheader
109
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
115
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_9
116
+ ; CHECK-NEXT: ; %bb.16 : ; %loop-memcpy-residual4.preheader
117
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
110
118
; CHECK-NEXT: s_mov_b64 s[12:13], 0
111
119
; CHECK-NEXT: s_mov_b64 s[14:15], 0
112
- ; CHECK-NEXT: .LBB0_16 : ; %loop-memcpy-residual4
113
- ; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1
120
+ ; CHECK-NEXT: .LBB0_17 : ; %loop-memcpy-residual4
121
+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
114
122
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
115
123
; CHECK-NEXT: v_mov_b32_e32 v10, s15
116
- ; CHECK-NEXT: v_add_co_u32_e32 v8 , vcc, s14, v0
117
- ; CHECK-NEXT: v_addc_co_u32_e32 v9 , vcc, v1, v10, vcc
118
- ; CHECK-NEXT: flat_load_ubyte v11, v[8:9 ]
119
- ; CHECK-NEXT: v_add_co_u32_e32 v8 , vcc, s14, v6
124
+ ; CHECK-NEXT: v_add_co_u32_e32 v6 , vcc, s14, v0
125
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v1, v10, vcc
126
+ ; CHECK-NEXT: flat_load_ubyte v11, v[6:7 ]
127
+ ; CHECK-NEXT: v_add_co_u32_e32 v6 , vcc, s14, v4
120
128
; CHECK-NEXT: s_add_u32 s14, s14, 1
121
129
; CHECK-NEXT: s_addc_u32 s15, s15, 0
122
130
; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3]
123
- ; CHECK-NEXT: v_addc_co_u32_e32 v9 , vcc, v7 , v10, vcc
131
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v5 , v10, vcc
124
132
; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
125
133
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
126
- ; CHECK-NEXT: flat_store_byte v[8:9 ], v11
134
+ ; CHECK-NEXT: flat_store_byte v[6:7 ], v11
127
135
; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
128
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_16
129
- ; CHECK-NEXT: ; %bb.17 : ; %Flow
130
- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
136
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_17
137
+ ; CHECK-NEXT: ; %bb.18 : ; %Flow
138
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
131
139
; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
132
- ; CHECK-NEXT: s_branch .LBB0_8
133
- ; CHECK-NEXT: .LBB0_18 : ; %DummyReturnBlock
140
+ ; CHECK-NEXT: s_branch .LBB0_9
141
+ ; CHECK-NEXT: .LBB0_19 : ; %DummyReturnBlock
134
142
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
135
143
; CHECK-NEXT: s_setpc_b64 s[30:31]
136
144
entry:
137
- %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer" ], ptr null , i64 0 , i64 %idxprom
145
+ %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer" ], ptr %ptr , i64 0 , i64 %idxprom
138
146
%spec.select = tail call i64 @llvm.umin.i64 (i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace (4 ) inttoptr (i64 32 to ptr addrspace (4 )) to ptr ) to i64 ), i64 ptrtoint (ptr addrspacecast (ptr addrspace (4 ) null to ptr ) to i64 )), i64 56 )
139
147
tail call void @llvm.memcpy.p0.p0.i64 (ptr %arrayidx , ptr null , i64 %spec.select , i1 false )
140
148
br label %while.cond
0 commit comments