@@ -529,7 +529,11 @@ SIMSIMD_PUBLIC simsimd_capability_t _simsimd_capabilities_arm(void) {
529
529
// AdvSIMD, bits [23:20] of ID_AA64PFR0_EL1 can be used to check for `fp16` support
530
530
// - 0b0000: integers, single, double precision arithmetic
531
531
// - 0b0001: includes support for half-precision floating-point arithmetic
532
- unsigned supports_fp16 = ((id_aa64pfr0_el1 >> 20 ) & 0xF ) == 1 ;
532
+ // - 0b1111: NEON is not supported?!
533
+ // That's a really weird way to encode lack of NEON support, but it's important to
534
+ // check in case we are running on R-profile CPUs.
535
+ unsigned supports_fp16 = ((id_aa64pfr0_el1 >> 20 ) & 0xF ) == 0x1 ;
536
+ unsigned supports_neon = ((id_aa64pfr0_el1 >> 20 ) & 0xF ) != 0xF ;
533
537
534
538
// Now let's unpack the status flags from ID_AA64ZFR0_EL1
535
539
// https://developer.arm.com/documentation/ddi0601/2024-03/AArch64-Registers/ID-AA64ZFR0-EL1--SVE-Feature-ID-Register-0?lang=en
@@ -545,7 +549,6 @@ SIMSIMD_PUBLIC simsimd_capability_t _simsimd_capabilities_arm(void) {
545
549
// This value must match the existing indicator obtained from ID_AA64PFR0_EL1:
546
550
unsigned supports_sve2 = ((id_aa64zfr0_el1 ) & 0xF ) >= 1 ;
547
551
unsigned supports_sve2p1 = ((id_aa64zfr0_el1 ) & 0xF ) >= 2 ;
548
- unsigned supports_neon = 1 ; // NEON is always supported
549
552
550
553
return (simsimd_capability_t )( //
551
554
(simsimd_cap_neon_k * (supports_neon )) | //
@@ -1575,7 +1578,7 @@ SIMSIMD_PUBLIC void simsimd_find_kernel_punned( //
1575
1578
*/
1576
1579
SIMSIMD_PUBLIC void simsimd_dot_i8 (simsimd_i8_t const * a , simsimd_i8_t const * b , simsimd_size_t n ,
1577
1580
simsimd_distance_t * d ) {
1578
- #if SIMSIMD_TARGET_NEON_F16
1581
+ #if SIMSIMD_TARGET_NEON_I8
1579
1582
simsimd_dot_i8_neon (a , b , n , d );
1580
1583
#elif SIMSIMD_TARGET_ICE
1581
1584
simsimd_dot_i8_ice (a , b , n , d );
@@ -1699,7 +1702,7 @@ SIMSIMD_PUBLIC void simsimd_vdot_f16c(simsimd_f16c_t const *a, simsimd_f16c_t co
1699
1702
simsimd_distance_t * d ) {
1700
1703
#if SIMSIMD_TARGET_SVE
1701
1704
simsimd_vdot_f16c_sve (a , b , n , d );
1702
- #elif SIMSIMD_TARGET_NEON
1705
+ #elif SIMSIMD_TARGET_NEON_F16
1703
1706
simsimd_dot_f16c_neon (a , b , n , d );
1704
1707
#elif SIMSIMD_TARGET_SAPPHIRE
1705
1708
simsimd_dot_f16c_sapphire (a , b , n , d );
@@ -1759,7 +1762,7 @@ SIMSIMD_PUBLIC void simsimd_vdot_f64c(simsimd_f64c_t const *a, simsimd_f64c_t co
1759
1762
*/
1760
1763
SIMSIMD_PUBLIC void simsimd_cos_i8 (simsimd_i8_t const * a , simsimd_i8_t const * b , simsimd_size_t n ,
1761
1764
simsimd_distance_t * d ) {
1762
- #if SIMSIMD_TARGET_NEON
1765
+ #if SIMSIMD_TARGET_NEON_I8
1763
1766
simsimd_cos_i8_neon (a , b , n , d );
1764
1767
#elif SIMSIMD_TARGET_ICE
1765
1768
simsimd_cos_i8_ice (a , b , n , d );
@@ -1771,7 +1774,7 @@ SIMSIMD_PUBLIC void simsimd_cos_i8(simsimd_i8_t const *a, simsimd_i8_t const *b,
1771
1774
}
1772
1775
SIMSIMD_PUBLIC void simsimd_cos_u8 (simsimd_u8_t const * a , simsimd_u8_t const * b , simsimd_size_t n ,
1773
1776
simsimd_distance_t * d ) {
1774
- #if SIMSIMD_TARGET_NEON
1777
+ #if SIMSIMD_TARGET_NEON_I8
1775
1778
simsimd_cos_u8_neon (a , b , n , d );
1776
1779
#elif SIMSIMD_TARGET_ICE
1777
1780
simsimd_cos_u8_ice (a , b , n , d );
@@ -1837,7 +1840,7 @@ SIMSIMD_PUBLIC void simsimd_cos_f64(simsimd_f64_t const *a, simsimd_f64_t const
1837
1840
}
1838
1841
SIMSIMD_PUBLIC void simsimd_l2sq_i8 (simsimd_i8_t const * a , simsimd_i8_t const * b , simsimd_size_t n ,
1839
1842
simsimd_distance_t * d ) {
1840
- #if SIMSIMD_TARGET_NEON
1843
+ #if SIMSIMD_TARGET_NEON_I8
1841
1844
simsimd_l2sq_i8_neon (a , b , n , d );
1842
1845
#elif SIMSIMD_TARGET_ICE
1843
1846
simsimd_l2sq_i8_ice (a , b , n , d );
@@ -1849,7 +1852,7 @@ SIMSIMD_PUBLIC void simsimd_l2sq_i8(simsimd_i8_t const *a, simsimd_i8_t const *b
1849
1852
}
1850
1853
SIMSIMD_PUBLIC void simsimd_l2sq_u8 (simsimd_u8_t const * a , simsimd_u8_t const * b , simsimd_size_t n ,
1851
1854
simsimd_distance_t * d ) {
1852
- #if SIMSIMD_TARGET_NEON
1855
+ #if SIMSIMD_TARGET_NEON_I8
1853
1856
simsimd_l2sq_u8_neon (a , b , n , d );
1854
1857
#elif SIMSIMD_TARGET_ICE
1855
1858
simsimd_l2sq_u8_ice (a , b , n , d );
@@ -1915,7 +1918,7 @@ SIMSIMD_PUBLIC void simsimd_l2sq_f64(simsimd_f64_t const *a, simsimd_f64_t const
1915
1918
}
1916
1919
SIMSIMD_PUBLIC void simsimd_l2_i8 (simsimd_i8_t const * a , simsimd_i8_t const * b , simsimd_size_t n ,
1917
1920
simsimd_distance_t * d ) {
1918
- #if SIMSIMD_TARGET_NEON
1921
+ #if SIMSIMD_TARGET_NEON_I8
1919
1922
simsimd_l2_i8_neon (a , b , n , d );
1920
1923
#elif SIMSIMD_TARGET_ICE
1921
1924
simsimd_l2_i8_ice (a , b , n , d );
@@ -1927,7 +1930,7 @@ SIMSIMD_PUBLIC void simsimd_l2_i8(simsimd_i8_t const *a, simsimd_i8_t const *b,
1927
1930
}
1928
1931
SIMSIMD_PUBLIC void simsimd_l2_u8 (simsimd_u8_t const * a , simsimd_u8_t const * b , simsimd_size_t n ,
1929
1932
simsimd_distance_t * d ) {
1930
- #if SIMSIMD_TARGET_NEON
1933
+ #if SIMSIMD_TARGET_NEON_I8
1931
1934
simsimd_l2_u8_neon (a , b , n , d );
1932
1935
#elif SIMSIMD_TARGET_ICE
1933
1936
simsimd_l2_u8_ice (a , b , n , d );
@@ -2050,7 +2053,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8(simsimd_b8_t const *a, simsimd_b8_t const
2050
2053
*/
2051
2054
SIMSIMD_PUBLIC void simsimd_kl_f16 (simsimd_f16_t const * a , simsimd_f16_t const * b , simsimd_size_t n ,
2052
2055
simsimd_distance_t * d ) {
2053
- #if SIMSIMD_TARGET_NEON
2056
+ #if SIMSIMD_TARGET_NEON_F16
2054
2057
simsimd_kl_f16_neon (a , b , n , d );
2055
2058
#elif SIMSIMD_TARGET_HASWELL
2056
2059
simsimd_kl_f16_haswell (a , b , n , d );
@@ -2078,7 +2081,7 @@ SIMSIMD_PUBLIC void simsimd_kl_f64(simsimd_f64_t const *a, simsimd_f64_t const *
2078
2081
}
2079
2082
SIMSIMD_PUBLIC void simsimd_js_f16 (simsimd_f16_t const * a , simsimd_f16_t const * b , simsimd_size_t n ,
2080
2083
simsimd_distance_t * d ) {
2081
- #if SIMSIMD_TARGET_NEON
2084
+ #if SIMSIMD_TARGET_NEON_F16
2082
2085
simsimd_js_f16_neon (a , b , n , d );
2083
2086
#elif SIMSIMD_TARGET_HASWELL
2084
2087
simsimd_js_f16_haswell (a , b , n , d );
@@ -2209,7 +2212,7 @@ SIMSIMD_PUBLIC void simsimd_bilinear_f16(simsimd_f16_t const *a, simsimd_f16_t c
2209
2212
simsimd_bilinear_f16_sapphire (a , b , c , n , d );
2210
2213
#elif SIMSIMD_TARGET_HASWELL
2211
2214
simsimd_bilinear_f16_haswell (a , b , c , n , d );
2212
- #elif SIMSIMD_TARGET_NEON
2215
+ #elif SIMSIMD_TARGET_NEON_F16
2213
2216
simsimd_bilinear_f16_neon (a , b , c , n , d );
2214
2217
#else
2215
2218
simsimd_bilinear_f16_serial (a , b , c , n , d );
@@ -2221,7 +2224,7 @@ SIMSIMD_PUBLIC void simsimd_bilinear_bf16(simsimd_bf16_t const *a, simsimd_bf16_
2221
2224
simsimd_bilinear_bf16_genoa (a , b , c , n , d );
2222
2225
#elif SIMSIMD_TARGET_HASWELL
2223
2226
simsimd_bilinear_bf16_haswell (a , b , c , n , d );
2224
- #elif SIMSIMD_TARGET_NEON
2227
+ #elif SIMSIMD_TARGET_NEON_BF16
2225
2228
simsimd_bilinear_bf16_neon (a , b , c , n , d );
2226
2229
#else
2227
2230
simsimd_bilinear_bf16_serial (a , b , c , n , d );
@@ -2249,7 +2252,7 @@ SIMSIMD_PUBLIC void simsimd_bilinear_f16c(simsimd_f16c_t const *a, simsimd_f16c_
2249
2252
simsimd_size_t n , simsimd_distance_t * d ) {
2250
2253
#if SIMSIMD_TARGET_SAPPHIRE
2251
2254
simsimd_bilinear_f16c_sapphire (a , b , c , n , d );
2252
- #elif SIMSIMD_TARGET_NEON
2255
+ #elif SIMSIMD_TARGET_NEON_F16
2253
2256
simsimd_bilinear_f16c_neon (a , b , c , n , d );
2254
2257
#else
2255
2258
simsimd_bilinear_f16c_serial (a , b , c , n , d );
@@ -2259,7 +2262,7 @@ SIMSIMD_PUBLIC void simsimd_bilinear_bf16c(simsimd_bf16c_t const *a, simsimd_bf1
2259
2262
simsimd_size_t n , simsimd_distance_t * d ) {
2260
2263
#if SIMSIMD_TARGET_GENOA
2261
2264
simsimd_bilinear_bf16c_genoa (a , b , c , n , d );
2262
- #elif SIMSIMD_TARGET_NEON
2265
+ #elif SIMSIMD_TARGET_NEON_BF16
2263
2266
simsimd_bilinear_bf16c_neon (a , b , c , n , d );
2264
2267
#else
2265
2268
simsimd_bilinear_bf16c_serial (a , b , c , n , d );
@@ -2289,7 +2292,7 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f16(simsimd_f16_t const *a, simsimd_f16_
2289
2292
simsimd_mahalanobis_f16_sapphire (a , b , c , n , d );
2290
2293
#elif SIMSIMD_TARGET_HASWELL
2291
2294
simsimd_mahalanobis_f16_haswell (a , b , c , n , d );
2292
- #elif SIMSIMD_TARGET_NEON
2295
+ #elif SIMSIMD_TARGET_NEON_F16
2293
2296
simsimd_mahalanobis_f16_neon (a , b , c , n , d );
2294
2297
#else
2295
2298
simsimd_mahalanobis_f16_serial (a , b , c , n , d );
@@ -2301,7 +2304,7 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16(simsimd_bf16_t const *a, simsimd_bf
2301
2304
simsimd_mahalanobis_bf16_genoa (a , b , c , n , d );
2302
2305
#elif SIMSIMD_TARGET_HASWELL
2303
2306
simsimd_mahalanobis_bf16_haswell (a , b , c , n , d );
2304
- #elif SIMSIMD_TARGET_NEON
2307
+ #elif SIMSIMD_TARGET_NEON_BF16
2305
2308
simsimd_mahalanobis_bf16_neon (a , b , c , n , d );
2306
2309
#else
2307
2310
simsimd_mahalanobis_bf16_serial (a , b , c , n , d );
@@ -2348,7 +2351,7 @@ SIMSIMD_PUBLIC void simsimd_wsum_bf16(simsimd_bf16_t const *a, simsimd_bf16_t co
2348
2351
simsimd_wsum_bf16_skylake (a , b , n , alpha , beta , r );
2349
2352
#elif SIMSIMD_TARGET_HASWELL
2350
2353
simsimd_wsum_bf16_haswell (a , b , n , alpha , beta , r );
2351
- #elif SIMSIMD_TARGET_NEON
2354
+ #elif SIMSIMD_TARGET_NEON_BF16
2352
2355
simsimd_wsum_bf16_neon (a , b , n , alpha , beta , r );
2353
2356
#else
2354
2357
simsimd_wsum_bf16_serial (a , b , n , alpha , beta , r );
@@ -2427,7 +2430,7 @@ SIMSIMD_PUBLIC void simsimd_fma_bf16(simsimd_bf16_t const *a, simsimd_bf16_t con
2427
2430
simsimd_fma_bf16_skylake (a , b , c , n , alpha , beta , r );
2428
2431
#elif SIMSIMD_TARGET_HASWELL
2429
2432
simsimd_fma_bf16_haswell (a , b , c , n , alpha , beta , r );
2430
- #elif SIMSIMD_TARGET_NEON
2433
+ #elif SIMSIMD_TARGET_NEON_BF16
2431
2434
simsimd_fma_bf16_neon (a , b , c , n , alpha , beta , r );
2432
2435
#else
2433
2436
simsimd_fma_bf16_serial (a , b , c , n , alpha , beta , r );
0 commit comments