Skip to content

Commit 0934f6d

Browse files
MacDuetru
authored andcommitted
Precommit vscale-fixups.ll test (NFC)
Precommit test for llvm#100080. (cherry picked from commit c1b70fa)
1 parent 3651ae0 commit 0934f6d

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll

+47
Original file line numberDiff line numberDiff line change
@@ -384,4 +384,51 @@ for.exit:
384384
ret void
385385
}
386386

387+
;; This test demonstrates an incorrect MUL VL address calculation. Here there
388+
;; are two writes that should be `16 * vscale * vscale` apart, however,
389+
;; loop-strength-reduce has ignored the second `vscale` and offset the second
390+
;; write by `#4, mul vl` which is an offset of `16 * vscale` dropping a vscale.
391+
define void @vscale_squared_offset(ptr %alloc) #0 {
392+
; COMMON-LABEL: vscale_squared_offset:
393+
; COMMON: // %bb.0: // %entry
394+
; COMMON-NEXT: fmov z0.s, #4.00000000
395+
; COMMON-NEXT: mov x8, xzr
396+
; COMMON-NEXT: cntw x9
397+
; COMMON-NEXT: fmov z1.s, #8.00000000
398+
; COMMON-NEXT: ptrue p0.s, vl1
399+
; COMMON-NEXT: cmp x8, x9
400+
; COMMON-NEXT: b.ge .LBB6_2
401+
; COMMON-NEXT: .LBB6_1: // %for.body
402+
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
403+
; COMMON-NEXT: st1w { z0.s }, p0, [x0]
404+
; COMMON-NEXT: add x8, x8, #1
405+
; COMMON-NEXT: st1w { z1.s }, p0, [x0, #4, mul vl]
406+
; COMMON-NEXT: addvl x0, x0, #1
407+
; COMMON-NEXT: cmp x8, x9
408+
; COMMON-NEXT: b.lt .LBB6_1
409+
; COMMON-NEXT: .LBB6_2: // %for.exit
410+
; COMMON-NEXT: ret
411+
entry:
412+
%vscale = call i64 @llvm.vscale.i64()
413+
%c4_vscale = mul i64 %vscale, 4
414+
br label %for.check
415+
for.check:
416+
%i = phi i64 [ %next_i, %for.body ], [ 0, %entry ]
417+
%is_lt = icmp slt i64 %i, %c4_vscale
418+
br i1 %is_lt, label %for.body, label %for.exit
419+
for.body:
420+
%mask = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 0, i64 1)
421+
%upper_offset = mul i64 %i, %c4_vscale
422+
%upper_ptr = getelementptr float, ptr %alloc, i64 %upper_offset
423+
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 4.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), ptr %upper_ptr, i32 4, <vscale x 4 x i1> %mask)
424+
%lower_i = add i64 %i, %c4_vscale
425+
%lower_offset = mul i64 %lower_i, %c4_vscale
426+
%lower_ptr = getelementptr float, ptr %alloc, i64 %lower_offset
427+
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 8.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), ptr %lower_ptr, i32 4, <vscale x 4 x i1> %mask)
428+
%next_i = add i64 %i, 1
429+
br label %for.check
430+
for.exit:
431+
ret void
432+
}
433+
387434
attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }

0 commit comments

Comments
 (0)