From 8fa972b1ae83a21510d7c23f651c0ada2eb833e9 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Sun, 15 Dec 2024 13:05:28 -0500 Subject: [PATCH] SPU LLVM: Small FCGT optimization --- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 72f589192ecc..469d52d43c66 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -6234,14 +6234,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const value_t ab[2]{a, b}; std::bitset<2> safe_int_compare(0); - std::bitset<2> safe_nonzero_compare(0); + std::bitset<2> safe_finite_compare(0); for (u32 i = 0; i < 2; i++) { if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok) { safe_int_compare.set(i); - safe_nonzero_compare.set(i); + safe_finite_compare.set(i); for (u32 j = 0; j < 4; j++) { @@ -6256,7 +6256,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // we don't produce "extended range" values the same way as real hardware, it's not safe to apply // this optimization for values outside of the range of x86 floating point hardware. safe_int_compare.reset(i); - if (!exponent) safe_nonzero_compare.reset(i); + if ((value & 0x7fffffffu) >= 0x7f7ffffeu) safe_finite_compare.reset(i); } } } @@ -6267,17 +6267,20 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return eval(sext(bitcast(a) > bitcast(b))); } - const auto ai = eval(bitcast(a)); - const auto bi = eval(bitcast(b)); - - if (!safe_nonzero_compare.any()) + if (safe_finite_compare.test(1)) { - return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); + return eval(sext(fcmp_uno(clamp_negative_smax(a) > b))); } - else + + if (safe_finite_compare.test(0)) { - return eval(sext(select((ai & bi) >= 0, ai > bi, ai < bi))); + return eval(sext(fcmp_ord(a > clamp_smax(b)))); } + + const auto ai = eval(bitcast(a)); + const auto bi = eval(bitcast(b)); + + return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); }); set_vr(op.rt, fcgt(get_vr(op.ra), get_vr(op.rb)));