Skip to content

Commit c369ea4

Browse files
committed
str various refactor and perf
1 parent 9382058 commit c369ea4

21 files changed

+668
-514
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ pyo3-ffi = { version = "^0.22", default-features = false, features = ["extension
7171
ryu = { version = "1", default-features = false }
7272
serde = { version = "1", default-features = false }
7373
serde_json = { version = "1", default-features = false, features = ["std", "float_roundtrip"] }
74-
simdutf8 = { version = "0.1", default-features = false, features = ["std", "aarch64_neon"] }
74+
simdutf8 = { version = "0.1", default-features = false, features = ["std", "public_imp", "aarch64_neon"] }
7575
smallvec = { version = "^1.11", default-features = false, features = ["union", "write"] }
7676
unwinding = { version = "0.2", features = ["unwinder"], optional = true }
7777
xxhash-rust = { version = "^0.8", default-features = false, features = ["xxh3"] }

build.rs

-4
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,6 @@ fn main() {
4141
if env::var("ORJSON_DISABLE_SIMD").is_err() {
4242
if let Some(true) = version_check::supports_feature("portable_simd") {
4343
println!("cargo:rustc-cfg=feature=\"unstable-simd\"");
44-
#[cfg(all(target_arch = "x86_64", target_feature = "avx512vl"))]
45-
if env::var("ORJSON_DISABLE_AVX512").is_err() {
46-
println!("cargo:rustc-cfg=feature=\"avx512\"");
47-
}
4844
}
4945
}
5046

src/deserialize/utf8.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@ use crate::util::INVALID_STR;
88
use core::ffi::c_char;
99
use std::borrow::Cow;
1010

11-
#[cfg(all(target_arch = "x86_64", not(target_feature = "sse4.2")))]
11+
#[cfg(all(target_arch = "x86_64", not(target_feature = "avx2")))]
1212
fn is_valid_utf8(buf: &[u8]) -> bool {
13-
if std::is_x86_feature_detected!("sse4.2") {
14-
simdutf8::basic::from_utf8(buf).is_ok()
13+
if std::is_x86_feature_detected!("avx2") {
14+
unsafe { simdutf8::basic::imp::x86::avx2::validate_utf8(buf).is_ok() }
1515
} else {
1616
encoding_rs::Encoding::utf8_valid_up_to(buf) == buf.len()
1717
}
1818
}
1919

20-
#[cfg(all(target_arch = "x86_64", target_feature = "sse4.2"))]
20+
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
2121
fn is_valid_utf8(buf: &[u8]) -> bool {
2222
simdutf8::basic::from_utf8(buf).is_ok()
2323
}

src/lib.rs

+2-8
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
22

3-
#![cfg_attr(
4-
all(target_arch = "x86_64", feature = "avx512"),
5-
feature(avx512_target_feature)
6-
)]
7-
#![cfg_attr(
8-
all(target_arch = "x86_64", feature = "avx512"),
9-
feature(stdarch_x86_avx512)
10-
)]
3+
#![cfg_attr(feature = "avx512", feature(avx512_target_feature))]
4+
#![cfg_attr(feature = "avx512", feature(stdarch_x86_avx512))]
115
#![cfg_attr(feature = "intrinsics", feature(core_intrinsics))]
126
#![cfg_attr(feature = "optimize", feature(optimize_attribute))]
137
#![cfg_attr(feature = "strict_provenance", feature(strict_provenance))]

src/serialize/per_type/unicode.rs

+9-6
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,19 @@ impl StrSerializer {
1717
}
1818

1919
impl Serialize for StrSerializer {
20-
#[inline]
20+
#[inline(always)]
2121
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2222
where
2323
S: Serializer,
2424
{
25-
let uni = unicode_to_str(self.ptr);
26-
if unlikely!(uni.is_none()) {
27-
err!(SerializeError::InvalidStr)
28-
}
29-
serializer.serialize_str(uni.unwrap())
25+
let uni = {
26+
let tmp = unicode_to_str(self.ptr);
27+
if unlikely!(tmp.is_none()) {
28+
err!(SerializeError::InvalidStr)
29+
};
30+
tmp.unwrap()
31+
};
32+
serializer.serialize_str(uni)
3033
}
3134
}
3235

src/serialize/writer/byteswriter.rs

+10
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ pub trait WriteExt: std::io::Write {
113113
let _ = len;
114114
}
115115

116+
#[inline]
117+
fn has_capacity(&mut self, _len: usize) -> bool {
118+
false
119+
}
120+
116121
#[inline]
117122
fn set_written(&mut self, len: usize) {
118123
let _ = len;
@@ -157,6 +162,11 @@ impl WriteExt for &mut BytesWriter {
157162
}
158163
}
159164

165+
#[inline]
166+
fn has_capacity(&mut self, len: usize) -> bool {
167+
return self.len + len <= self.cap;
168+
}
169+
160170
#[inline(always)]
161171
fn set_written(&mut self, len: usize) {
162172
self.len += len;

src/serialize/writer/formatter.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
use crate::serialize::writer::WriteExt;
55
use std::io;
66

7+
macro_rules! debug_assert_has_capacity {
8+
($writer:expr) => {
9+
debug_assert!($writer.has_capacity(4))
10+
};
11+
}
12+
713
pub trait Formatter {
814
#[inline]
915
fn write_null<W>(&mut self, writer: &mut W) -> io::Result<()>
@@ -196,7 +202,7 @@ pub trait Formatter {
196202
where
197203
W: ?Sized + io::Write + WriteExt,
198204
{
199-
reserve_minimum!(writer);
205+
debug_assert_has_capacity!(writer);
200206
unsafe { writer.write_reserved_punctuation(b']').unwrap() };
201207
Ok(())
202208
}
@@ -206,7 +212,7 @@ pub trait Formatter {
206212
where
207213
W: ?Sized + io::Write + WriteExt,
208214
{
209-
reserve_minimum!(writer);
215+
debug_assert_has_capacity!(writer);
210216
if !first {
211217
unsafe { writer.write_reserved_punctuation(b',').unwrap() }
212218
}
@@ -238,7 +244,7 @@ pub trait Formatter {
238244
where
239245
W: ?Sized + io::Write + WriteExt,
240246
{
241-
reserve_minimum!(writer);
247+
debug_assert_has_capacity!(writer);
242248
unsafe {
243249
writer.write_reserved_punctuation(b'}').unwrap();
244250
}
@@ -250,7 +256,7 @@ pub trait Formatter {
250256
where
251257
W: ?Sized + io::Write + WriteExt,
252258
{
253-
reserve_minimum!(writer);
259+
debug_assert_has_capacity!(writer);
254260
if !first {
255261
unsafe {
256262
writer.write_reserved_punctuation(b',').unwrap();
@@ -272,7 +278,7 @@ pub trait Formatter {
272278
where
273279
W: ?Sized + io::Write + WriteExt,
274280
{
275-
reserve_minimum!(writer);
281+
debug_assert_has_capacity!(writer);
276282
unsafe { writer.write_reserved_punctuation(b':') }
277283
}
278284

src/serialize/writer/json.rs

+23-3
Original file line numberDiff line numberDiff line change
@@ -572,9 +572,29 @@ macro_rules! reserve_str {
572572
};
573573
}
574574

575+
#[cfg(all(feature = "unstable-simd", not(target_arch = "x86_64")))]
576+
#[inline(always)]
577+
fn format_escaped_str<W>(writer: &mut W, value: &str)
578+
where
579+
W: ?Sized + io::Write + WriteExt,
580+
{
581+
unsafe {
582+
reserve_str!(writer, value);
583+
584+
let written = format_escaped_str_impl_generic_128(
585+
writer.as_mut_buffer_ptr(),
586+
value.as_bytes().as_ptr(),
587+
value.len(),
588+
);
589+
590+
writer.set_written(written);
591+
}
592+
}
593+
575594
#[cfg(all(
576595
feature = "unstable-simd",
577-
any(not(target_arch = "x86_64"), not(feature = "avx512"))
596+
target_arch = "x86_64",
597+
not(feature = "avx512")
578598
))]
579599
#[inline(always)]
580600
fn format_escaped_str<W>(writer: &mut W, value: &str)
@@ -584,7 +604,7 @@ where
584604
unsafe {
585605
reserve_str!(writer, value);
586606

587-
let written = format_escaped_str_impl_128(
607+
let written = format_escaped_str_impl_generic_128(
588608
writer.as_mut_buffer_ptr(),
589609
value.as_bytes().as_ptr(),
590610
value.len(),
@@ -611,7 +631,7 @@ where
611631
);
612632
writer.set_written(written);
613633
} else {
614-
let written = format_escaped_str_impl_128(
634+
let written = format_escaped_str_impl_generic_128(
615635
writer.as_mut_buffer_ptr(),
616636
value.as_bytes().as_ptr(),
617637
value.len(),

src/serialize/writer/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-License-Identifier: Apache-2.0
1+
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
22

33
mod byteswriter;
44
mod formatter;

0 commit comments

Comments
 (0)