Skip to content

Commit 03f1afb

Browse files
committed
Revert "Improve: Call target directly from unsafe_ifunc macro whenever possible."
This reverts commit da7bef6.
1 parent da7bef6 commit 03f1afb

File tree

2 files changed

+57
-81
lines changed

2 files changed

+57
-81
lines changed

src/arch/x86_64/memchr.rs

Lines changed: 53 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,12 @@ macro_rules! unsafe_ifunc {
6767
) => {{
6868
#![allow(unused_unsafe)]
6969

70-
// Define reusable functions for AVX2, SSE2, and fallback.
70+
use core::sync::atomic::{AtomicPtr, Ordering};
71+
72+
type Fn = *mut ();
73+
type RealFn = $fnty;
74+
static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
75+
7176
#[cfg(target_feature = "sse2")]
7277
#[target_feature(enable = "sse2", enable = "avx2")]
7378
unsafe fn find_avx2(
@@ -98,87 +103,58 @@ macro_rules! unsafe_ifunc {
98103
$hay_end: *const u8,
99104
) -> $retty {
100105
use crate::arch::all::memchr::$memchrty;
101-
$memchrty::new($($needle),+)
102-
.$memchrfind($hay_start, $hay_end)
103-
}
104-
105-
// If `std` is disabled and CPU features are known at compile-time,
106-
// directly call the appropriate implementation without function pointer indirection.
107-
#[cfg(all(not(feature = "std"), target_feature = "avx2"))]
108-
unsafe {
109-
find_avx2($($needle),+, $hay_start, $hay_end)
106+
$memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
110107
}
111108

112-
#[cfg(all(not(feature = "std"), target_feature = "sse2", not(target_feature = "avx2")))]
113-
unsafe {
114-
find_sse2($($needle),+, $hay_start, $hay_end)
115-
}
116-
117-
#[cfg(all(not(feature = "std"), not(target_feature = "sse2")))]
118-
unsafe {
119-
find_fallback($($needle),+, $hay_start, $hay_end)
120-
}
121-
122-
// For runtime detection when `std` is enabled or features are not known at compile-time.
123-
#[cfg(any(feature = "std", not(any(target_feature = "avx2", target_feature = "sse2"))))]
124-
{
125-
use core::sync::atomic::{AtomicPtr, Ordering};
126-
127-
type Fn = *mut ();
128-
type RealFn = $fnty;
129-
static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
130-
131-
// Function to perform runtime detection of CPU features and choose the appropriate implementation.
132-
unsafe fn detect(
133-
$($needle: u8),+,
134-
$hay_start: *const u8,
135-
$hay_end: *const u8,
136-
) -> $retty {
137-
let fun = {
138-
#[cfg(not(target_feature = "sse2"))]
139-
{
140-
debug!(
141-
"no sse2 feature available, using fallback for {}",
142-
stringify!($memchrty),
143-
);
109+
unsafe fn detect(
110+
$($needle: u8),+,
111+
$hay_start: *const u8,
112+
$hay_end: *const u8,
113+
) -> $retty {
114+
let fun = {
115+
#[cfg(not(target_feature = "sse2"))]
116+
{
117+
debug!(
118+
"no sse2 feature available, using fallback for {}",
119+
stringify!($memchrty),
120+
);
121+
find_fallback as RealFn
122+
}
123+
#[cfg(target_feature = "sse2")]
124+
{
125+
use crate::arch::x86_64::{sse2, avx2};
126+
if avx2::memchr::$memchrty::is_available() {
127+
debug!("chose AVX2 for {}", stringify!($memchrty));
128+
find_avx2 as RealFn
129+
} else if sse2::memchr::$memchrty::is_available() {
130+
debug!("chose SSE2 for {}", stringify!($memchrty));
131+
find_sse2 as RealFn
132+
} else {
133+
debug!("chose fallback for {}", stringify!($memchrty));
144134
find_fallback as RealFn
145135
}
146-
#[cfg(target_feature = "sse2")]
147-
{
148-
use crate::arch::x86_64::{sse2, avx2};
149-
if avx2::memchr::$memchrty::is_available() {
150-
debug!("chose AVX2 for {}", stringify!($memchrty));
151-
find_avx2 as RealFn
152-
} else if sse2::memchr::$memchrty::is_available() {
153-
debug!("chose SSE2 for {}", stringify!($memchrty));
154-
find_sse2 as RealFn
155-
} else {
156-
debug!("chose fallback for {}", stringify!($memchrty));
157-
find_fallback as RealFn
158-
}
159-
}
160-
};
161-
FN.store(fun as Fn, Ordering::Relaxed);
162-
// SAFETY: The only thing we need to uphold here is the
163-
// `#[target_feature]` requirements. Since we check is_available
164-
// above before using the corresponding implementation, we are
165-
// guaranteed to only call code that is supported on the current
166-
// CPU.
167-
fun($($needle),+, $hay_start, $hay_end)
168-
}
136+
}
137+
};
138+
FN.store(fun as Fn, Ordering::Relaxed);
139+
// SAFETY: The only thing we need to uphold here is the
140+
// `#[target_feature]` requirements. Since we check is_available
141+
// above before using the corresponding implementation, we are
142+
// guaranteed to only call code that is supported on the current
143+
// CPU.
144+
fun($($needle),+, $hay_start, $hay_end)
145+
}
169146

170-
// SAFETY: By virtue of the caller contract, RealFn is a function
171-
// pointer, which is always safe to transmute with a *mut (). Also,
172-
// since we use $memchrty::is_available, it is guaranteed to be safe
173-
// to call $memchrty::$memchrfind.
174-
unsafe {
175-
let fun = FN.load(Ordering::Relaxed);
176-
core::mem::transmute::<Fn, RealFn>(fun)(
177-
$($needle),+,
178-
$hay_start,
179-
$hay_end
180-
)
181-
}
147+
// SAFETY: By virtue of the caller contract, RealFn is a function
148+
// pointer, which is always safe to transmute with a *mut (). Also,
149+
// since we use $memchrty::is_available, it is guaranteed to be safe
150+
// to call $memchrty::$memchrfind.
151+
unsafe {
152+
let fun = FN.load(Ordering::Relaxed);
153+
core::mem::transmute::<Fn, RealFn>(fun)(
154+
$($needle),+,
155+
$hay_start,
156+
$hay_end,
157+
)
182158
}
183159
}};
184160
}

src/memchr.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -508,10 +508,10 @@ unsafe fn memchr_raw(
508508
) -> Option<*const u8> {
509509
#[cfg(target_arch = "x86_64")]
510510
{
511-
// x86_64 does CPU feature detection at runtime (when std feature enabled)
512-
// in order to use AVX2 instructions even when the `avx2` feature isn't
513-
// enabled at compile time. This function also handles using a fallback if
514-
// neither AVX2 nor SSE2 (unusual) are available.
511+
// x86_64 does CPU feature detection at runtime in order to use AVX2
512+
// instructions even when the `avx2` feature isn't enabled at compile
513+
// time. This function also handles using a fallback if neither AVX2
514+
// nor SSE2 (unusual) are available.
515515
crate::arch::x86_64::memchr::memchr_raw(needle, start, end)
516516
}
517517
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]

0 commit comments

Comments
 (0)