Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-99108: add support for SIMD-accelerated HMAC-BLAKE2 #132120

Merged
merged 2 commits into from
Apr 7, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 75 additions & 4 deletions Modules/hmacmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@
#include "pycore_hashtable.h"
#include "pycore_strhex.h" // _Py_strhex()

/*
* Taken from blake2module.c. In the future, detection of SIMD support
* should be delegated to https://github.com/python/cpython/pull/125011.
*/
#if defined(__x86_64__) && defined(__GNUC__)
# include <cpuid.h>
#elif defined(_M_X64)
# include <intrin.h>
#endif

#if defined(__APPLE__) && defined(__arm64__)
# undef HACL_CAN_COMPILE_SIMD128
# undef HACL_CAN_COMPILE_SIMD256
#endif

// Small mismatch between the variable names Python defines as part of configure
// at the ones HACL* expects to be set in order to enable those headers.
#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
Expand Down Expand Up @@ -1667,17 +1682,73 @@ hmacmodule_init_strings(hmacmodule_state *state)
static void
hmacmodule_init_cpu_features(hmacmodule_state *state)
{
int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0;
int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
#if defined(__x86_64__) && defined(__GNUC__)
__cpuid_count(1, 0, eax1, ebx1, ecx1, edx1);
__cpuid_count(7, 0, eax7, ebx7, ecx7, edx7);
#elif defined(_M_X64)
int info1[4] = { 0 };
__cpuidex(info1, 1, 0);
eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3];

int info7[4] = { 0 };
__cpuidex(info7, 7, 0);
eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3];
#endif
// fmt: off
(void)eax1; (void)ebx1; (void)ecx1; (void)edx1;
(void)eax7; (void)ebx7; (void)ecx7; (void)edx7;
// fmt: on

#define EBX_AVX2 (1 << 5)
#define ECX_SSE3 (1 << 0)
#define ECX_SSSE3 (1 << 9)
#define ECX_SSE4_1 (1 << 19)
#define ECX_SSE4_2 (1 << 20)
#define ECX_AVX (1 << 28)
#define EDX_SSE (1 << 25)
#define EDX_SSE2 (1 << 26)
#define EDX_CMOV (1 << 15)

bool avx = (ecx1 & ECX_AVX) != 0;
bool avx2 = (ebx7 & EBX_AVX2) != 0;

bool sse = (edx1 & EDX_SSE) != 0;
bool sse2 = (edx1 & EDX_SSE2) != 0;
bool cmov = (edx1 & EDX_CMOV) != 0;

bool sse3 = (ecx1 & ECX_SSE3) != 0;
bool sse41 = (ecx1 & ECX_SSE4_1) != 0;
bool sse42 = (ecx1 & ECX_SSE4_2) != 0;

#undef EDX_CMOV
#undef EDX_SSE2
#undef EDX_SSE
#undef ECX_AVX
#undef ECX_SSE4_2
#undef ECX_SSE4_1
#undef ECX_SSSE3
#undef ECX_SSE3
#undef EBX_AVX2

#if HACL_CAN_COMPILE_SIMD128
// TODO: use py_cpuid_features (gh-125022) to deduce what we want
state->can_run_simd128 = false;
// TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov;
#else
// fmt: off
(void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov;
// fmt: on
state->can_run_simd128 = false;
#endif

#if HACL_CAN_COMPILE_SIMD256
// TODO: use py_cpuid_features (gh-125022) to deduce what we want
state->can_run_simd256 = false;
// TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
state->can_run_simd256 = state->can_run_simd128 && avx && avx2;
#else
// fmt: off
(void)avx; (void)avx2;
// fmt: on
state->can_run_simd256 = false;
#endif
}
Expand Down
Loading