Skip to content

SPU LLVM: Small FCGT optimization #16740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions rpcs3/Emu/Cell/SPULLVMRecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6234,14 +6234,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const value_t<f32[4]> ab[2]{a, b};

std::bitset<2> safe_int_compare(0);
std::bitset<2> safe_nonzero_compare(0);
std::bitset<2> safe_finite_compare(0);

for (u32 i = 0; i < 2; i++)
{
if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok)
{
safe_int_compare.set(i);
safe_nonzero_compare.set(i);
safe_finite_compare.set(i);

for (u32 j = 0; j < 4; j++)
{
Expand All @@ -6256,7 +6256,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
// this optimization for values outside of the range of x86 floating point hardware.
safe_int_compare.reset(i);
if (!exponent) safe_nonzero_compare.reset(i);
if ((value & 0x7fffffffu) >= 0x7f7ffffeu) safe_finite_compare.reset(i);
}
}
}
Expand All @@ -6267,17 +6267,20 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}

const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));

if (!safe_nonzero_compare.any())
if (safe_finite_compare.test(1))
{
return eval(sext<s32[4]>(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi)));
return eval(sext<s32[4]>(fcmp_uno(clamp_negative_smax(a) > b)));
}
else

if (safe_finite_compare.test(0))
{
return eval(sext<s32[4]>(select((ai & bi) >= 0, ai > bi, ai < bi)));
return eval(sext<s32[4]>(fcmp_ord(a > clamp_smax(b))));
}

const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));

return eval(sext<s32[4]>(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi)));
});

set_vr(op.rt, fcgt(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)));
Expand Down