Skip to content

8353686: Optimize Math.cbrt for x86 64 bit platforms #24470

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2949,6 +2949,16 @@ void Assembler::mov(Register dst, Register src) {
LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
}

void Assembler::movapd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x28);
emit_operand(dst, src, 0);
}

void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
Expand Down Expand Up @@ -8290,6 +8300,14 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
emit_operand(dst, src, 0);
}

void Assembler::orpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x56, (0xC0 | encode));
}

void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ class Assembler : public AbstractAssembler {
// New cpus require use of movaps and movapd to avoid partial register stall
// when moving between registers.
void movaps(XMMRegister dst, XMMRegister src);
void movapd(XMMRegister dst, Address src);
void movapd(XMMRegister dst, XMMRegister src);

// End avoid using directly
Expand Down Expand Up @@ -2673,6 +2674,9 @@ class Assembler : public AbstractAssembler {
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// Bitwise Logical OR of Packed Floating-Point Values
void orpd(XMMRegister dst, XMMRegister src);

void unpckhpd(XMMRegister dst, XMMRegister src);
void unpcklpd(XMMRegister dst, XMMRegister src);

Expand Down
16 changes: 11 additions & 5 deletions src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
x->id() == vmIntrinsics::_dlog10
#ifdef _LP64
|| x->id() == vmIntrinsics::_dtanh
|| x->id() == vmIntrinsics::_dtanh || x->id() == vmIntrinsics::_dcbrt
#endif
) {
do_LibmIntrinsic(x);
Expand Down Expand Up @@ -888,7 +888,7 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
}
break;
case vmIntrinsics::_dpow:
if (StubRoutines::dpow() != nullptr) {
if (StubRoutines::dpow() != nullptr) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
Expand All @@ -909,18 +909,24 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != nullptr) {
if (StubRoutines::dtan() != nullptr) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtanh:
assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found");
if (StubRoutines::dtanh() != nullptr) {
assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found");
if (StubRoutines::dtanh() != nullptr) {
__ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dcbrt:
assert(StubRoutines::dcbrt() != nullptr, "cbrt intrinsic not found");
if (StubRoutines::dcbrt() != nullptr) {
__ call_runtime_leaf(StubRoutines::dcbrt(), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}

Expand Down
10 changes: 10 additions & 0 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2920,6 +2920,16 @@ void MacroAssembler::evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_l
}
}

void MacroAssembler::movapd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");

if (reachable(src)) {
Assembler::movapd(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::movapd(dst, Address(rscratch, 0));
}
}

void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
Expand Down
13 changes: 12 additions & 1 deletion src/hotspot/cpu/x86/macroAssembler_x86.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -1102,6 +1102,8 @@ class MacroAssembler: public Assembler {
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);

void andnpd(XMMRegister dst, XMMRegister src) { Assembler::andnpd(dst, src); }

void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
Expand Down Expand Up @@ -1135,6 +1137,8 @@ class MacroAssembler: public Assembler {
void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
#endif // !_LP64

void orpd(XMMRegister dst, XMMRegister src) { Assembler::orpd(dst, src); }

void cmp32_mxcsr_std(Address mxcsr_save, Register tmp, Register rscratch = noreg);
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
void ldmxcsr(AddressLiteral src, Register rscratch = noreg);
Expand Down Expand Up @@ -1424,6 +1428,10 @@ class MacroAssembler: public Assembler {
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
void evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);

void movapd(XMMRegister dst, XMMRegister src) { Assembler::movapd(dst, src); }
void movapd(XMMRegister dst, Address src) { Assembler::movapd(dst, src); }
void movapd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);

// Move Aligned Double Quadword
void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
Expand Down Expand Up @@ -1493,6 +1501,9 @@ class MacroAssembler: public Assembler {
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);

void unpckhpd(XMMRegister dst, XMMRegister src) { Assembler::unpckhpd(dst, src); }
void unpcklpd(XMMRegister dst, XMMRegister src) { Assembler::unpcklpd(dst, src); }

// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3692,6 +3692,9 @@ void StubGenerator::generate_libm_stubs() {
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) {
StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp
}
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcbrt)) {
StubRoutines::_dcbrt = generate_libmCbrt(); // from stubGenerator_x86_64_cbrt.cpp
}
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp
}
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ class StubGenerator: public StubCodeGenerator {
address generate_libmCos();
address generate_libmTan();
address generate_libmTanh();
address generate_libmCbrt();
address generate_libmExp();
address generate_libmPow();
address generate_libmLog();
Expand Down
Loading