Skip to content

Commit cf752ff

Browse files
committed
compare with jetscii aarch64 simd
1 parent ceef3c9 commit cf752ff

File tree

9 files changed

+137
-4
lines changed

9 files changed

+137
-4
lines changed

benchmarks/definitions/memchr/sherlock/common.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ regex = ['a', 't', 'o']
4545
haystack = { path = "sherlock/huge.txt" }
4646
count = 109_064
4747
engines = [
48+
"rust/jetscii/memchr3",
4849
"rust/memchr/memchr3",
4950
"rust/memchr/memchr3/fallback",
5051
"rust/memchr/memchr3/naive",
@@ -101,6 +102,7 @@ regex = ['a', 'h', 'i']
101102
haystack = { path = "sherlock/small.txt" }
102103
count = 113
103104
engines = [
105+
"rust/jetscii/memchr3",
104106
"rust/memchr/memchr3",
105107
"rust/memchr/memchr3/fallback",
106108
"rust/memchr/memchr3/naive",

benchmarks/definitions/memchr/sherlock/never.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ regex = ['<', '>', '=']
4545
haystack = { path = "sherlock/huge.txt" }
4646
count = 0
4747
engines = [
48+
"rust/jetscii/memchr3",
4849
"rust/memchr/memchr3",
4950
"rust/memchr/memchr3/fallback",
5051
"rust/memchr/memchr3/naive",
@@ -101,6 +102,7 @@ regex = ['<', '>', '=']
101102
haystack = { path = "sherlock/small.txt" }
102103
count = 0
103104
engines = [
105+
"rust/jetscii/memchr3",
104106
"rust/memchr/memchr3",
105107
"rust/memchr/memchr3/fallback",
106108
"rust/memchr/memchr3/naive",
@@ -157,6 +159,7 @@ regex = ['<', '>', '=']
157159
haystack = { path = "sherlock/tiny.txt" }
158160
count = 0
159161
engines = [
162+
"rust/jetscii/memchr3",
160163
"rust/memchr/memchr3",
161164
"rust/memchr/memchr3/fallback",
162165
"rust/memchr/memchr3/naive",
@@ -213,6 +216,7 @@ regex = ['<', '>', '=']
213216
haystack = ''
214217
count = 0
215218
engines = [
219+
"rust/jetscii/memchr3",
216220
"rust/memchr/memchr3",
217221
"rust/memchr/memchr3/fallback",
218222
"rust/memchr/memchr3/naive",

benchmarks/definitions/memchr/sherlock/rare.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ regex = ['z', 'R', 'J']
4444
haystack = { path = "sherlock/huge.txt" }
4545
count = 546
4646
engines = [
47+
"rust/jetscii/memchr3",
4748
"rust/memchr/memchr3",
4849
"rust/memchr/memchr3/fallback",
4950
"rust/memchr/memchr3/naive",
@@ -100,6 +101,7 @@ regex = ['R', 'P', 'T']
100101
haystack = { path = "sherlock/small.txt" }
101102
count = 3
102103
engines = [
104+
"rust/jetscii/memchr3",
103105
"rust/memchr/memchr3",
104106
"rust/memchr/memchr3/fallback",
105107
"rust/memchr/memchr3/naive",
@@ -156,6 +158,7 @@ regex = ['.', 'H', 'M']
156158
haystack = { path = "sherlock/tiny.txt" }
157159
count = 3
158160
engines = [
161+
"rust/jetscii/memchr3",
159162
"rust/memchr/memchr3",
160163
"rust/memchr/memchr3/fallback",
161164
"rust/memchr/memchr3/naive",

benchmarks/definitions/memchr/sherlock/uncommon.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ regex = ['b', 'p', '.']
4545
haystack = { path = "sherlock/huge.txt" }
4646
count = 19_538
4747
engines = [
48+
"rust/jetscii/memchr3",
4849
"rust/memchr/memchr3",
4950
"rust/memchr/memchr3/fallback",
5051
"rust/memchr/memchr3/naive",
@@ -101,6 +102,7 @@ regex = ['b', 'g', 'p']
101102
haystack = { path = "sherlock/small.txt" }
102103
count = 24
103104
engines = [
105+
"rust/jetscii/memchr3",
104106
"rust/memchr/memchr3",
105107
"rust/memchr/memchr3/fallback",
106108
"rust/memchr/memchr3/naive",
@@ -157,6 +159,7 @@ regex = ['l', 's', 'e']
157159
haystack = { path = "sherlock/tiny.txt" }
158160
count = 16
159161
engines = [
162+
"rust/jetscii/memchr3",
160163
"rust/memchr/memchr3",
161164
"rust/memchr/memchr3/fallback",
162165
"rust/memchr/memchr3/naive",

benchmarks/definitions/memchr/sherlock/verycommon.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ engines = [
88
"libc/memchr/oneshot",
99
"rust/bytecount/memchr/oneshot",
1010
"rust/bytecount/memchr/oneshot/wasm32",
11+
"rust/jetscii/memchr",
12+
"rust/jetscii/memchr/onlycount",
1113
"rust/memchr/memchr/onlycount",
1214
"rust/memchr/memchr/onlycount/wasm32",
1315
"rust/memchr/memchr/oneshot",
@@ -32,6 +34,8 @@ engines = [
3234
"libc/memchr/oneshot",
3335
"rust/bytecount/memchr/oneshot",
3436
"rust/bytecount/memchr/oneshot/wasm32",
37+
"rust/jetscii/memchr",
38+
"rust/jetscii/memchr/onlycount",
3539
"rust/memchr/memchr/onlycount",
3640
"rust/memchr/memchr/onlycount/wasm32",
3741
"rust/memchr/memchr/oneshot",

benchmarks/engines.toml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,70 @@
121121
bin = "cargo"
122122
args = ["clean"]
123123

124+
[[engine]]
125+
name = "rust/jetscii/memchr"
126+
cwd = "./engines/rust-jetscii"
127+
[engine.version]
128+
bin = "./target/release/main"
129+
args = ["--version"]
130+
[engine.run]
131+
bin = "./target/release/main"
132+
args = ["memchr"]
133+
[[engine.build]]
134+
bin = "cargo"
135+
args = ["build", "--release"]
136+
[[engine.clean]]
137+
bin = "cargo"
138+
args = ["clean"]
139+
140+
[[engine]]
141+
name = "rust/jetscii/memchr/onlycount"
142+
cwd = "./engines/rust-jetscii"
143+
[engine.version]
144+
bin = "./target/release/main"
145+
args = ["--version"]
146+
[engine.run]
147+
bin = "./target/release/main"
148+
args = ["memchr-onlycount"]
149+
[[engine.build]]
150+
bin = "cargo"
151+
args = ["build", "--release"]
152+
[[engine.clean]]
153+
bin = "cargo"
154+
args = ["clean"]
155+
156+
[[engine]]
157+
name = "rust/jetscii/memchr3"
158+
cwd = "./engines/rust-jetscii"
159+
[engine.version]
160+
bin = "./target/release/main"
161+
args = ["--version"]
162+
[engine.run]
163+
bin = "./target/release/main"
164+
args = ["memchr3"]
165+
[[engine.build]]
166+
bin = "cargo"
167+
args = ["build", "--release"]
168+
[[engine.clean]]
169+
bin = "cargo"
170+
args = ["clean"]
171+
172+
[[engine]]
173+
name = "rust/jetscii/memchr3/onlycount"
174+
cwd = "./engines/rust-jetscii"
175+
[engine.version]
176+
bin = "./target/release/main"
177+
args = ["--version"]
178+
[engine.run]
179+
bin = "./target/release/main"
180+
args = ["memchr3-onlycount"]
181+
[[engine.build]]
182+
bin = "cargo"
183+
args = ["build", "--release"]
184+
[[engine.clean]]
185+
bin = "cargo"
186+
args = ["clean"]
187+
124188
# Engines based on the `sliceslice` Rust crate.
125189
#
126190
# Note that this crate only supports "does the haystack match or not." It does

benchmarks/engines/rust-jetscii/Cargo.lock

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/engines/rust-jetscii/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ edition = "2021"
88

99
[dependencies]
1010
anyhow = "1.0.72"
11-
jetscii = "=0.5.3"
11+
# TODO: Temporarily use a fork of jetscii with aarch64 SIMD support
12+
jetscii = { git = "https://github.com/Dr-Emann/jetscii.git", branch = "aarch64_simd" }
1213

1314
[dependencies.shared]
1415
path = "../../shared"

benchmarks/engines/rust-jetscii/main.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ fn main() -> anyhow::Result<()> {
1818
let samples = match (&*engine, &*b.model) {
1919
("memmem-prebuilt", "count") => memmem_prebuilt_count(&b)?,
2020
("memmem-oneshot", "count") => memmem_oneshot_count(&b)?,
21+
("memchr", "count-bytes") => memchr_count(&b)?,
22+
("memchr-onlycount", "count-bytes") => memchr_only_count(&b)?,
23+
("memchr3", "count-bytes") => memchr3_count(&b)?,
24+
("memchr3-onlycount", "count-bytes") => memchr3_only_count(&b)?,
2125
(engine, model) => {
2226
anyhow::bail!("unrecognized engine '{engine}' and model '{model}'")
2327
}
@@ -47,3 +51,52 @@ fn memmem_oneshot_count(b: &Benchmark) -> anyhow::Result<Vec<Sample>> {
4751
}))
4852
})
4953
}
54+
55+
fn memchr_count(b: &Benchmark) -> anyhow::Result<Vec<Sample>> {
56+
let haystack = &b.haystack;
57+
let needle = b.one_needle_byte()?;
58+
let finder = jetscii::bytes!(needle);
59+
shared::run(b, || Ok(finder.iter(haystack).count_slow()))
60+
}
61+
62+
fn memchr_only_count(b: &Benchmark) -> anyhow::Result<Vec<Sample>> {
63+
let haystack = &b.haystack;
64+
let needle = b.one_needle_byte()?;
65+
let finder = jetscii::bytes!(needle);
66+
shared::run(b, || Ok(finder.iter(haystack).count()))
67+
}
68+
69+
fn memchr3_count(b: &Benchmark) -> anyhow::Result<Vec<Sample>> {
70+
let haystack = &b.haystack;
71+
let (n1, n2, n3) = b.three_needle_bytes()?;
72+
let finder = jetscii::bytes!(n1, n2, n3);
73+
shared::run(b, || Ok(finder.iter(haystack).count_slow()))
74+
}
75+
76+
fn memchr3_only_count(b: &Benchmark) -> anyhow::Result<Vec<Sample>> {
77+
let haystack = &b.haystack;
78+
let (n1, n2, n3) = b.three_needle_bytes()?;
79+
let finder = jetscii::bytes!(n1, n2, n3);
80+
shared::run(b, || Ok(finder.iter(haystack).count()))
81+
}
82+
83+
trait IteratorExt: Iterator {
84+
/// Like `Iterator::count`, but guarantees that it gets the count by
85+
/// iterating over each element without taking any specialized shortcuts.
86+
///
87+
/// We do this because the jetscii crate may specialize `count` in certain
88+
/// circumstances, and we'd generally like to measure how long it takes
89+
/// to find all occurrences of a needle and not just the number of them.
90+
fn count_slow(mut self) -> usize
91+
where
92+
Self: Sized,
93+
{
94+
let mut count = 0;
95+
while let Some(_) = self.next() {
96+
count += 1;
97+
}
98+
count
99+
}
100+
}
101+
102+
impl<I: Iterator> IteratorExt for I {}

0 commit comments

Comments
 (0)