-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Generalize ignoreSignBitOfZero/NaN to handle more cases #141015
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch was originally part of #139861. It generalizes BTW, I find it mitigates performance regressions caused by #141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select. Full diff: https://github.com/llvm/llvm-project/pull/141015.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2ef233bc25d72..13cbf7b215557 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2783,15 +2783,41 @@ static bool ignoreSignBitOfZero(Instruction &I) {
if (!I.hasOneUse())
return false;
Instruction *User = I.user_back();
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
+ if (FPOp->hasNoSignedZeros())
+ return true;
+ }
- // fcmp treats both positive and negative zero as equal.
- if (User->getOpcode() == Instruction::FCmp)
+ switch (User->getOpcode()) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
return true;
-
- if (auto *FPOp = dyn_cast<FPMathOperator>(User))
- return FPOp->hasNoSignedZeros();
-
- return false;
+ case Instruction::FCmp:
+ // fcmp treats both positive and negative zero as equal.
+ return true;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(User)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ return true;
+ case Intrinsic::copysign:
+ return II->getArgOperand(0) == &I;
+ case Intrinsic::is_fpclass:
+ case Intrinsic::vp_is_fpclass: {
+ auto Test =
+ static_cast<FPClassTest>(
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()) &
+ FPClassTest::fcZero;
+ return Test == FPClassTest::fcZero || Test == FPClassTest::fcNone;
+ }
+ default:
+ return false;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
}
/// Return true if the sign bit of result can be ignored when the result is NaN.
@@ -2803,15 +2829,67 @@ static bool ignoreSignBitOfNaN(Instruction &I) {
if (!I.hasOneUse())
return false;
Instruction *User = I.user_back();
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
+ if (FPOp->hasNoNaNs())
+ return true;
+ }
- // fcmp ignores the sign bit of NaN.
- if (User->getOpcode() == Instruction::FCmp)
+ switch (User->getOpcode()) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
return true;
+ // Proper FP math operations ignore the sign bit of NaN.
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FCmp:
+ return true;
+ // Bitwise FP operations should preserve the sign bit of NaN.
+ case Instruction::FNeg:
+ case Instruction::Select:
+ case Instruction::PHI:
+ return false;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ if (auto *II = dyn_cast<IntrinsicInst>(User)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ return true;
+ case Intrinsic::copysign:
+ return II->getArgOperand(0) == &I;
+ // Other proper FP math intrinsics ignore the sign bit of NaN.
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximumnum:
+ case Intrinsic::minimumnum:
+ case Intrinsic::canonicalize:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ case Intrinsic::sqrt:
+ case Intrinsic::pow:
+ case Intrinsic::powi:
+ case Intrinsic::fptoui_sat:
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::is_fpclass:
+ return true;
+ default:
+ return false;
+ }
+ }
- if (auto *FPOp = dyn_cast<FPMathOperator>(User))
- return FPOp->hasNoNaNs();
-
- return false;
+ FPClassTest NoFPClass = cast<CallBase>(User)->getParamNoFPClass(
+ I.uses().begin()->getOperandNo());
+ return NoFPClass & FPClassTest::fcNan;
+ }
+ default:
+ return false;
+ }
}
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index ab4376bf78a67..b889f6c2c028c 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -1329,12 +1329,9 @@ define float @test_fabs_fsub_used_by_fpop_nnan(float %x, float %y) {
ret float %add
}
-; TODO: fadd ignores the sign bit of NaN.
define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
; CHECK-LABEL: @test_fabs_used_by_fpop_nsz(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: [[ADD:%.*]] = fadd nsz float [[SEL]], [[Y:%.*]]
; CHECK-NEXT: ret float [[ADD]]
;
@@ -1345,13 +1342,9 @@ define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
ret float %add
}
-; TODO: copysign ignores the sign bit of NaN magnitude.
define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
; CHECK-LABEL: @test_fabs_used_by_fcopysign_mag(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X1:%.*]], 0.000000e+00
-; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X1]]
-; CHECK-NEXT: [[X:%.*]] = select i1 [[CMP]], float [[X1]], float [[NEG]]
-; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
; CHECK-NEXT: ret float [[COPYSIGN]]
;
%cmp = fcmp oge float %x, 0.000000e+00
@@ -1361,6 +1354,94 @@ define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
ret float %copysign
}
+define float @test_fabs_nsz_used_by_canonicalize(float %x) {
+; CHECK-LABEL: @test_fabs_nsz_used_by_canonicalize(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[CANON:%.*]] = call float @llvm.canonicalize.f32(float [[SEL]])
+; CHECK-NEXT: ret float [[CANON]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ %canon = call float @llvm.canonicalize.f32(float %sel)
+ ret float %canon
+}
+
+define void @test_fabs_used_by_nofpclass_nan(float %x) {
+; CHECK-LABEL: @test_fabs_used_by_nofpclass_nan(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: call void @use(float nofpclass(nan) [[SEL]])
+; CHECK-NEXT: ret void
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ call void @use(float nofpclass(nan) %sel)
+ ret void
+}
+
+define i32 @test_fabs_used_fptosi(float %x) {
+; CHECK-LABEL: @test_fabs_used_fptosi(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[FPTOSI:%.*]] = fptosi float [[SEL]] to i32
+; CHECK-NEXT: ret i32 [[FPTOSI]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %fptosi = fptosi float %sel to i32
+ ret i32 %fptosi
+}
+
+define i32 @test_fabs_used_fptoui(float %x) {
+; CHECK-LABEL: @test_fabs_used_fptoui(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[FPTOSI:%.*]] = fptoui float [[SEL]] to i32
+; CHECK-NEXT: ret i32 [[FPTOSI]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %fptosi = fptoui float %sel to i32
+ ret i32 %fptosi
+}
+
+define float @test_fabs_nsz_used_by_maxnum(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_nsz_used_by_maxnum(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[Y:%.*]], float [[SEL]])
+; CHECK-NEXT: ret float [[MAX]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ %max = call float @llvm.maxnum.f32(float %y, float %sel)
+ ret float %max
+}
+
+define i1 @test_fabs_used_is_fpclass_pnorm_or_nan(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_pnorm_or_nan(
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 267)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 259)
+ ret i1 %is_fpclass
+}
+
+define i1 @test_fabs_used_is_fpclass_zero_or_pinf(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_zero_or_pinf(
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 612)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 608)
+ ret i1 %is_fpclass
+}
; Negative tests
@@ -1455,3 +1536,18 @@ define float @test_fabs_used_by_select(float %x, i1 %cond) {
%sel2 = select i1 %cond, float %sel, float 0.000000e+00
ret float %sel2
}
+
+define i1 @test_fabs_used_is_fpclass_pzero(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_pzero(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[SEL]], i32 64)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 64)
+ ret i1 %is_fpclass
+}
|
Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=5a3776af521b8ddc14a19d1954af64e2960d2397&to=bf96e741b99107ce14ef6e32751f73d98f8eb821&stat=instructions%3Au Note that stage2-O3 compile-time overhead is high (+0.07%). It is mainly caused by moving these two helpers into ValueTracking. |
This patch was originally part of #139861. It generalizes
ignoreSignBitOfZero/NaN
to handle more instructions/intrinsics.BTW, I find it mitigates performance regressions caused by #141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select.