Skip to content

Commit c98e41f

Browse files
authored
[LegalizeVectorTypes] Always widen fabs (#111298)
fabs and fneg are similar nodes in that they can always be expanded to integer ops, but currently they diverge when widened. If the widened vector fabs is marked as expand (and the corresponding scalar type is too), LegalizeVectorTypes thinks that it may be turned into a libcall and so will unroll it to avoid the overhead on the undef elements. However unlike the other ops in that list like fsin, fround, flog etc., an fabs marked as expand will never be legalized into a libcall. Like fneg, it can always be expanded into an integer op. This moves it below unrollExpandedOp to bring it in line with fneg, which fixes an issue on RISC-V with f16 fabs being unexpectedly scalarized when there's no zfhmin.
1 parent 3137b6a commit c98e41f

File tree

2 files changed

+12
-257
lines changed

2 files changed

+12
-257
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -4679,7 +4679,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
46794679
Res = WidenVecRes_XROUND(N);
46804680
break;
46814681

4682-
case ISD::FABS:
46834682
case ISD::FACOS:
46844683
case ISD::FASIN:
46854684
case ISD::FATAN:
@@ -4727,7 +4726,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
47274726
case ISD::CTTZ_ZERO_UNDEF:
47284727
case ISD::VP_CTTZ_ZERO_UNDEF:
47294728
case ISD::FNEG: case ISD::VP_FNEG:
4730-
case ISD::VP_FABS:
4729+
case ISD::FABS: case ISD::VP_FABS:
47314730
case ISD::VP_SQRT:
47324731
case ISD::VP_FCEIL:
47334732
case ISD::VP_FFLOOR:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

+11-255
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
33
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4-
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32-ZVFHMIN
5-
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64-ZVFHMIN
4+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
5+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
66

77
define void @fadd_v8f16(ptr %x, ptr %y) {
88
; ZVFH-LABEL: fadd_v8f16:
@@ -484,259 +484,15 @@ define void @fabs_v6f16(ptr %x) {
484484
; ZVFH-NEXT: vse16.v v8, (a0)
485485
; ZVFH-NEXT: ret
486486
;
487-
; RV32-ZVFHMIN-LABEL: fabs_v6f16:
488-
; RV32-ZVFHMIN: # %bb.0:
489-
; RV32-ZVFHMIN-NEXT: addi sp, sp, -48
490-
; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48
491-
; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
492-
; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
493-
; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
494-
; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill
495-
; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4
496-
; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8
497-
; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12
498-
; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24
499-
; RV32-ZVFHMIN-NEXT: csrr a1, vlenb
500-
; RV32-ZVFHMIN-NEXT: slli a1, a1, 1
501-
; RV32-ZVFHMIN-NEXT: sub sp, sp, a1
502-
; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
503-
; RV32-ZVFHMIN-NEXT: mv s0, a0
504-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
505-
; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0)
506-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
507-
; RV32-ZVFHMIN-NEXT: add a0, sp, a0
508-
; RV32-ZVFHMIN-NEXT: addi a0, a0, 16
509-
; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
510-
; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8
511-
; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0
512-
; RV32-ZVFHMIN-NEXT: call __extendhfsf2
513-
; RV32-ZVFHMIN-NEXT: fmv.s fs0, fa0
514-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
515-
; RV32-ZVFHMIN-NEXT: add a0, sp, a0
516-
; RV32-ZVFHMIN-NEXT: addi a0, a0, 16
517-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
518-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
519-
; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
520-
; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8
521-
; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0
522-
; RV32-ZVFHMIN-NEXT: call __extendhfsf2
523-
; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0
524-
; RV32-ZVFHMIN-NEXT: call __truncsfhf2
525-
; RV32-ZVFHMIN-NEXT: fabs.s fa5, fs0
526-
; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0
527-
; RV32-ZVFHMIN-NEXT: fmv.s fa0, fa5
528-
; RV32-ZVFHMIN-NEXT: call __truncsfhf2
529-
; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0
530-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
531-
; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0
532-
; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1
533-
; RV32-ZVFHMIN-NEXT: addi a0, sp, 16
534-
; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
535-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
536-
; RV32-ZVFHMIN-NEXT: add a0, sp, a0
537-
; RV32-ZVFHMIN-NEXT: addi a0, a0, 16
538-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
539-
; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
540-
; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8
541-
; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0
542-
; RV32-ZVFHMIN-NEXT: call __extendhfsf2
543-
; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0
544-
; RV32-ZVFHMIN-NEXT: call __truncsfhf2
545-
; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0
546-
; RV32-ZVFHMIN-NEXT: addi a1, sp, 16
547-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
548-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
549-
; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
550-
; RV32-ZVFHMIN-NEXT: addi a0, sp, 16
551-
; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
552-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
553-
; RV32-ZVFHMIN-NEXT: add a0, sp, a0
554-
; RV32-ZVFHMIN-NEXT: addi a0, a0, 16
555-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
556-
; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
557-
; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8
558-
; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0
559-
; RV32-ZVFHMIN-NEXT: call __extendhfsf2
560-
; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0
561-
; RV32-ZVFHMIN-NEXT: call __truncsfhf2
562-
; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0
563-
; RV32-ZVFHMIN-NEXT: addi a1, sp, 16
564-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
565-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
566-
; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
567-
; RV32-ZVFHMIN-NEXT: addi a0, sp, 16
568-
; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
569-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
570-
; RV32-ZVFHMIN-NEXT: add a0, sp, a0
571-
; RV32-ZVFHMIN-NEXT: addi a0, a0, 16
572-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
573-
; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4
574-
; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8
575-
; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0
576-
; RV32-ZVFHMIN-NEXT: call __extendhfsf2
577-
; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0
578-
; RV32-ZVFHMIN-NEXT: call __truncsfhf2
579-
; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0
580-
; RV32-ZVFHMIN-NEXT: addi a1, sp, 16
581-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
582-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
583-
; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
584-
; RV32-ZVFHMIN-NEXT: addi a0, sp, 16
585-
; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
586-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
587-
; RV32-ZVFHMIN-NEXT: add a0, sp, a0
588-
; RV32-ZVFHMIN-NEXT: addi a0, a0, 16
589-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
590-
; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5
591-
; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8
592-
; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0
593-
; RV32-ZVFHMIN-NEXT: call __extendhfsf2
594-
; RV32-ZVFHMIN-NEXT: fabs.s fa0, fa0
595-
; RV32-ZVFHMIN-NEXT: call __truncsfhf2
596-
; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0
597-
; RV32-ZVFHMIN-NEXT: addi a1, sp, 16
598-
; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
599-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
600-
; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
601-
; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
602-
; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
603-
; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0)
604-
; RV32-ZVFHMIN-NEXT: csrr a0, vlenb
605-
; RV32-ZVFHMIN-NEXT: slli a0, a0, 1
606-
; RV32-ZVFHMIN-NEXT: add sp, sp, a0
607-
; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
608-
; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
609-
; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
610-
; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
611-
; RV32-ZVFHMIN-NEXT: addi sp, sp, 48
612-
; RV32-ZVFHMIN-NEXT: ret
613-
;
614-
; RV64-ZVFHMIN-LABEL: fabs_v6f16:
615-
; RV64-ZVFHMIN: # %bb.0:
616-
; RV64-ZVFHMIN-NEXT: addi sp, sp, -48
617-
; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48
618-
; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
619-
; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
620-
; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
621-
; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
622-
; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8
623-
; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16
624-
; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24
625-
; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32
626-
; RV64-ZVFHMIN-NEXT: csrr a1, vlenb
627-
; RV64-ZVFHMIN-NEXT: slli a1, a1, 1
628-
; RV64-ZVFHMIN-NEXT: sub sp, sp, a1
629-
; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
630-
; RV64-ZVFHMIN-NEXT: mv s0, a0
631-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
632-
; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0)
633-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
634-
; RV64-ZVFHMIN-NEXT: add a0, sp, a0
635-
; RV64-ZVFHMIN-NEXT: addi a0, a0, 16
636-
; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
637-
; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8
638-
; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0
639-
; RV64-ZVFHMIN-NEXT: call __extendhfsf2
640-
; RV64-ZVFHMIN-NEXT: fmv.s fs0, fa0
641-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
642-
; RV64-ZVFHMIN-NEXT: add a0, sp, a0
643-
; RV64-ZVFHMIN-NEXT: addi a0, a0, 16
644-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
645-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
646-
; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
647-
; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8
648-
; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0
649-
; RV64-ZVFHMIN-NEXT: call __extendhfsf2
650-
; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0
651-
; RV64-ZVFHMIN-NEXT: call __truncsfhf2
652-
; RV64-ZVFHMIN-NEXT: fabs.s fa5, fs0
653-
; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0
654-
; RV64-ZVFHMIN-NEXT: fmv.s fa0, fa5
655-
; RV64-ZVFHMIN-NEXT: call __truncsfhf2
656-
; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0
657-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
658-
; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0
659-
; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1
660-
; RV64-ZVFHMIN-NEXT: addi a0, sp, 16
661-
; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
662-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
663-
; RV64-ZVFHMIN-NEXT: add a0, sp, a0
664-
; RV64-ZVFHMIN-NEXT: addi a0, a0, 16
665-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
666-
; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
667-
; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8
668-
; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0
669-
; RV64-ZVFHMIN-NEXT: call __extendhfsf2
670-
; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0
671-
; RV64-ZVFHMIN-NEXT: call __truncsfhf2
672-
; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0
673-
; RV64-ZVFHMIN-NEXT: addi a1, sp, 16
674-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
675-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
676-
; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
677-
; RV64-ZVFHMIN-NEXT: addi a0, sp, 16
678-
; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
679-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
680-
; RV64-ZVFHMIN-NEXT: add a0, sp, a0
681-
; RV64-ZVFHMIN-NEXT: addi a0, a0, 16
682-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
683-
; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
684-
; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8
685-
; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0
686-
; RV64-ZVFHMIN-NEXT: call __extendhfsf2
687-
; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0
688-
; RV64-ZVFHMIN-NEXT: call __truncsfhf2
689-
; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0
690-
; RV64-ZVFHMIN-NEXT: addi a1, sp, 16
691-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
692-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
693-
; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
694-
; RV64-ZVFHMIN-NEXT: addi a0, sp, 16
695-
; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
696-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
697-
; RV64-ZVFHMIN-NEXT: add a0, sp, a0
698-
; RV64-ZVFHMIN-NEXT: addi a0, a0, 16
699-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
700-
; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4
701-
; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8
702-
; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0
703-
; RV64-ZVFHMIN-NEXT: call __extendhfsf2
704-
; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0
705-
; RV64-ZVFHMIN-NEXT: call __truncsfhf2
706-
; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0
707-
; RV64-ZVFHMIN-NEXT: addi a1, sp, 16
708-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
709-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
710-
; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
711-
; RV64-ZVFHMIN-NEXT: addi a0, sp, 16
712-
; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
713-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
714-
; RV64-ZVFHMIN-NEXT: add a0, sp, a0
715-
; RV64-ZVFHMIN-NEXT: addi a0, a0, 16
716-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
717-
; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5
718-
; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8
719-
; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0
720-
; RV64-ZVFHMIN-NEXT: call __extendhfsf2
721-
; RV64-ZVFHMIN-NEXT: fabs.s fa0, fa0
722-
; RV64-ZVFHMIN-NEXT: call __truncsfhf2
723-
; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0
724-
; RV64-ZVFHMIN-NEXT: addi a1, sp, 16
725-
; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
726-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
727-
; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
728-
; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
729-
; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
730-
; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0)
731-
; RV64-ZVFHMIN-NEXT: csrr a0, vlenb
732-
; RV64-ZVFHMIN-NEXT: slli a0, a0, 1
733-
; RV64-ZVFHMIN-NEXT: add sp, sp, a0
734-
; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
735-
; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
736-
; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
737-
; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
738-
; RV64-ZVFHMIN-NEXT: addi sp, sp, 48
739-
; RV64-ZVFHMIN-NEXT: ret
487+
; ZVFHMIN-LABEL: fabs_v6f16:
488+
; ZVFHMIN: # %bb.0:
489+
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
490+
; ZVFHMIN-NEXT: vle16.v v8, (a0)
491+
; ZVFHMIN-NEXT: lui a1, 8
492+
; ZVFHMIN-NEXT: addi a1, a1, -1
493+
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
494+
; ZVFHMIN-NEXT: vse16.v v8, (a0)
495+
; ZVFHMIN-NEXT: ret
740496
%a = load <6 x half>, ptr %x
741497
%b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
742498
store <6 x half> %b, ptr %x

0 commit comments

Comments
 (0)