diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index 800e771892109..598490fae1a6c 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -89,6 +89,10 @@ define_pd_global(intx, InlineSmallCode, 1000); "Use CRC32 instructions for CRC32 computation") \ product(bool, UseCryptoPmullForCRC32, false, \ "Use Crypto PMULL instructions for CRC32 computation") \ + product(uint, CryptoPmullForCRC32LowLimit, 256, DIAGNOSTIC, \ + "Minimum size in bytes when Crypto PMULL will be used." \ + "Value must be a multiple of 128.") \ + range(256, max_juint) \ product(bool, UseSIMDForMemoryOps, false, \ "Use SIMD instructions in generated memory move code") \ product(bool, UseSIMDForArrayEquals, true, \ diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index a277a68928040..f40ce4e1bfbe0 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -4331,8 +4331,9 @@ void MacroAssembler::kernel_crc32_using_crypto_pmull(Register crc, Register buf, Register len, Register tmp0, Register tmp1, Register tmp2, Register tmp3) { Label CRC_by4_loop, CRC_by1_loop, CRC_less128, CRC_by128_pre, CRC_by32_loop, CRC_less32, L_exit; assert_different_registers(crc, buf, len, tmp0, tmp1, tmp2); + assert(CryptoPmullForCRC32LowLimit >= 256, "must be for CRC_by128_loop"); - subs(tmp0, len, 384); + subs(tmp0, len, CryptoPmullForCRC32LowLimit); mvnw(crc, crc); br(Assembler::GE, CRC_by128_pre); BIND(CRC_less128); @@ -4346,13 +4347,13 @@ void MacroAssembler::kernel_crc32_using_crypto_pmull(Register crc, Register buf, b(L_exit); BIND(CRC_by32_loop); - ldp(tmp0, tmp1, Address(buf)); + ldp(tmp0, tmp1, Address(post(buf, 16))); + subs(len, len, 32); crc32x(crc, crc, tmp0); - ldp(tmp2, tmp3, Address(buf, 16)); + ldr(tmp2, Address(post(buf, 8))); crc32x(crc, crc, tmp1); - add(buf, buf, 32); + ldr(tmp3, Address(post(buf, 8))); crc32x(crc, crc, tmp2); - subs(len, len, 32); crc32x(crc, crc, tmp3); br(Assembler::GE, CRC_by32_loop); cmn(len, (u1)32); @@ -4696,8 +4697,9 @@ void MacroAssembler::kernel_crc32c_using_crypto_pmull(Register crc, Register buf Register len, Register tmp0, Register tmp1, Register tmp2, Register tmp3) { Label CRC_by4_loop, CRC_by1_loop, CRC_less128, CRC_by128_pre, CRC_by32_loop, CRC_less32, L_exit; assert_different_registers(crc, buf, len, tmp0, tmp1, tmp2); + assert(CryptoPmullForCRC32LowLimit >= 256, "must be for CRC_by128_loop"); - subs(tmp0, len, 384); + subs(tmp0, len, CryptoPmullForCRC32LowLimit); br(Assembler::GE, CRC_by128_pre); BIND(CRC_less128); subs(len, len, 32); @@ -4710,14 +4712,13 @@ void MacroAssembler::kernel_crc32c_using_crypto_pmull(Register crc, Register buf b(L_exit); BIND(CRC_by32_loop); - ldp(tmp0, tmp1, Address(buf)); + ldp(tmp0, tmp1, Address(post(buf, 16))); + subs(len, len, 32); crc32cx(crc, crc, tmp0); - ldr(tmp2, Address(buf, 16)); + ldr(tmp2, Address(post(buf, 8))); crc32cx(crc, crc, tmp1); - ldr(tmp3, Address(buf, 24)); + ldr(tmp3, Address(post(buf, 8))); crc32cx(crc, crc, tmp2); - add(buf, buf, 32); - subs(len, len, 32); crc32cx(crc, crc, tmp3); br(Assembler::GE, CRC_by32_loop); cmn(len, (u1)32); diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index 6ed7a6be58552..896eecde94d7f 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -120,6 +120,11 @@ void VM_Version::initialize() { ContendedPaddingWidth = dcache_line; } + if (!(is_aligned(CryptoPmullForCRC32LowLimit, 128))) { + warning("CryptoPmullForCRC32LowLimit must be a multiple of 128"); + CryptoPmullForCRC32LowLimit = align_down(CryptoPmullForCRC32LowLimit, 128); + } + if (os::supports_map_sync()) { // if dcpop is available publish data cache line flush size via // generic field, otherwise let if default to zero thereby @@ -148,6 +153,9 @@ void VM_Version::initialize() { if (_cpu == CPU_AMPERE && ((_model == CPU_MODEL_AMPERE_1) || (_model == CPU_MODEL_AMPERE_1A) || (_model == CPU_MODEL_AMPERE_1B))) { + if (FLAG_IS_DEFAULT(UseCryptoPmullForCRC32)) { + FLAG_SET_DEFAULT(UseCryptoPmullForCRC32, true); + } if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) { FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true); } @@ -265,6 +273,9 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(UseCryptoPmullForCRC32)) { FLAG_SET_DEFAULT(UseCryptoPmullForCRC32, true); } + if (FLAG_IS_DEFAULT(CryptoPmullForCRC32LowLimit)) { + FLAG_SET_DEFAULT(CryptoPmullForCRC32LowLimit, 384); + } if (FLAG_IS_DEFAULT(CodeEntryAlignment)) { FLAG_SET_DEFAULT(CodeEntryAlignment, 32); }