Skip to content

perf: remove heap allocation in parse_query #1020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions url/benches/parse_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ fn long(bench: &mut Bencher) {
bench.iter(|| black_box(url).parse::<Url>().unwrap());
}

fn fragment(bench: &mut Bencher) {
let url = "https://example.com/parkbench?tre=es&st=uff#fragment";

bench.bytes = url.len() as u64;
bench.iter(|| black_box(url).parse::<Url>().unwrap());
}

fn plain(bench: &mut Bencher) {
let url = "https://example.com/";

Expand Down Expand Up @@ -86,6 +93,7 @@ benchmark_group!(
benches,
short,
long,
fragment,
plain,
hyphen,
leading_digit,
Expand Down
184 changes: 138 additions & 46 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@
}
}

macro_rules! ascii_tab_or_new_line_pattern {
() => {
'\t' | '\n' | '\r'
};
}

#[cfg(feature = "std")]
impl std::error::Error for ParseError {}

Expand Down Expand Up @@ -207,7 +213,7 @@
if input.len() < original_input.len() {
vfn(SyntaxViolation::C0SpaceIgnored)
}
if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
if input.chars().any(ascii_tab_or_new_line) {

Check warning on line 216 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L216

Added line #L216 was not covered by tests
vfn(SyntaxViolation::TabOrNewlineIgnored)
}
}
Expand All @@ -225,7 +231,7 @@
if input.len() < original_input.len() {
vfn(SyntaxViolation::C0SpaceIgnored)
}
if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
if input.chars().any(ascii_tab_or_new_line) {
vfn(SyntaxViolation::TabOrNewlineIgnored)
}
}
Expand Down Expand Up @@ -281,7 +287,7 @@
let utf8 = self.chars.as_str();
match self.chars.next() {
Some(c) => {
if !matches!(c, '\t' | '\n' | '\r') {
if !ascii_tab_or_new_line(c) {
return Some((c, &utf8[..c.len_utf8()]));
}
}
Expand Down Expand Up @@ -321,9 +327,7 @@
impl Iterator for Input<'_> {
type Item = char;
fn next(&mut self) -> Option<char> {
self.chars
.by_ref()
.find(|&c| !matches!(c, '\t' | '\n' | '\r'))
self.chars.by_ref().find(|&c| !ascii_tab_or_new_line(c))
}
}

Expand Down Expand Up @@ -995,7 +999,7 @@
':' if !inside_square_brackets => break,
'\\' if scheme_type.is_special() => break,
'/' | '?' | '#' => break,
'\t' | '\n' | '\r' => {
ascii_tab_or_new_line_pattern!() => {
has_ignored_chars = true;
}
'[' => {
Expand Down Expand Up @@ -1077,7 +1081,7 @@
for c in input_str.chars() {
match c {
'/' | '\\' | '?' | '#' => break,
'\t' | '\n' | '\r' => has_ignored_chars = true,
ascii_tab_or_new_line_pattern!() => has_ignored_chars = true,

Check warning on line 1084 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1084

Added line #L1084 was not covered by tests
_ => non_ignored_chars += 1,
}
bytes += c.len_utf8();
Expand Down Expand Up @@ -1473,37 +1477,81 @@
&mut self,
scheme_type: SchemeType,
scheme_end: u32,
mut input: Input<'i>,
input: Input<'i>,
) -> Option<Input<'i>> {
let len = input.chars.as_str().len();
let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead
let mut remaining = None;
while let Some(c) = input.next() {
if c == '#' && self.context == Context::UrlParser {
remaining = Some(input);
break;
} else {
self.check_url_code_point(c, &input);
query.push(c);
struct QueryPartIter<'i, 'p> {
is_url_parser: bool,
input: Input<'i>,
violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
}

impl<'i> Iterator for QueryPartIter<'i, '_> {

Check warning on line 1488 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1488

Added line #L1488 was not covered by tests
type Item = (&'i str, bool);

fn next(&mut self) -> Option<Self::Item> {
let start = self.input.chars.as_str();
// bypass self.input.next() in order to get string slices
// which are faster to operate on
while let Some(c) = self.input.chars.next() {
match c {
ascii_tab_or_new_line_pattern!() => {

Check warning on line 1497 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1497

Added line #L1497 was not covered by tests
return Some((
&start[..start.len() - self.input.chars.as_str().len() - 1],
false,

Check warning on line 1500 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1500

Added line #L1500 was not covered by tests
));
}
'#' if self.is_url_parser => {
return Some((
&start[..start.len() - self.input.chars.as_str().len() - 1],
true,

Check warning on line 1506 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1506

Added line #L1506 was not covered by tests
));
}
c => {

Check warning on line 1509 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1509

Added line #L1509 was not covered by tests
if let Some(vfn) = &self.violation_fn {
check_url_code_point(vfn, c, &self.input);

Check warning on line 1511 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1511

Added line #L1511 was not covered by tests
}
}
}
}
if start.is_empty() {
None
} else {
Some((start, false))
}
}
}

let encoding = match &self.serialization[..scheme_end as usize] {
"http" | "https" | "file" | "ftp" => self.query_encoding_override,
_ => None,
};
let query_bytes = if let Some(o) = encoding {
o(&query)
} else {
query.as_bytes().into()
let mut part_iter = QueryPartIter {
is_url_parser: self.context == Context::UrlParser,
input,
violation_fn: self.violation_fn,
};
let set = if scheme_type.is_special() {
SPECIAL_QUERY
} else {
QUERY
};
self.serialization.extend(percent_encode(&query_bytes, set));
remaining
let query_encoding_override = self.query_encoding_override.filter(|_| {
matches!(
&self.serialization[..scheme_end as usize],
"http" | "https" | "file" | "ftp"

Check warning on line 1537 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1535-L1537

Added lines #L1535 - L1537 were not covered by tests
)
});

while let Some((part, is_finished)) = part_iter.next() {
match query_encoding_override {
// slightly faster to be repetitive and not convert text to Cow
Some(o) => self.serialization.extend(percent_encode(&o(part), set)),

Check warning on line 1544 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1544

Added line #L1544 was not covered by tests
None => self
.serialization

Check warning on line 1546 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1546

Added line #L1546 was not covered by tests
.extend(percent_encode(part.as_bytes(), set)),
}
if is_finished {
return Some(part_iter.input);
}
}

None
}

fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult<Url> {
Expand All @@ -1526,31 +1574,75 @@
})
}

pub fn parse_fragment(&mut self, mut input: Input<'_>) {
while let Some((c, utf8_c)) = input.next_utf8() {
if c == '\0' {
self.log_violation(SyntaxViolation::NullInFragment)
} else {
self.check_url_code_point(c, &input);
pub fn parse_fragment(&mut self, input: Input<'_>) {
struct FragmentPartIter<'i, 'p> {
input: Input<'i>,
violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
}

impl<'i> Iterator for FragmentPartIter<'i, '_> {

Check warning on line 1583 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1583

Added line #L1583 was not covered by tests
type Item = &'i str;

fn next(&mut self) -> Option<Self::Item> {
let start = self.input.chars.as_str();
// bypass self.input.next() in order to get string slices
// which are faster to operate on
while let Some(c) = self.input.chars.next() {
match c {
ascii_tab_or_new_line_pattern!() => {

Check warning on line 1592 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1592

Added line #L1592 was not covered by tests
return Some(
&start[..start.len() - self.input.chars.as_str().len() - 1],
);
}
'\0' => {

Check warning on line 1597 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1597

Added line #L1597 was not covered by tests
if let Some(vfn) = &self.violation_fn {
vfn(SyntaxViolation::NullInFragment);
}
}
c => {

Check warning on line 1602 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1602

Added line #L1602 was not covered by tests
if let Some(vfn) = &self.violation_fn {
check_url_code_point(vfn, c, &self.input);
}
}
}
}
if start.is_empty() {
None
} else {
Some(start)
}
}
}

let part_iter = FragmentPartIter {
input,
violation_fn: self.violation_fn,
};

for part in part_iter {
self.serialization
.extend(utf8_percent_encode(utf8_c, FRAGMENT));
.extend(utf8_percent_encode(part, FRAGMENT));
}
}

#[inline]
fn check_url_code_point(&self, c: char, input: &Input<'_>) {
if let Some(vfn) = self.violation_fn {
if c == '%' {
let mut input = input.clone();
if !matches!((input.next(), input.next()), (Some(a), Some(b))
check_url_code_point(vfn, c, input)
}
}
}

fn check_url_code_point(vfn: &dyn Fn(SyntaxViolation), c: char, input: &Input<'_>) {
if c == '%' {
let mut input = input.clone();
if !matches!((input.next(), input.next()), (Some(a), Some(b))
if a.is_ascii_hexdigit() && b.is_ascii_hexdigit())
{
vfn(SyntaxViolation::PercentDecode)
}
} else if !is_url_code_point(c) {
vfn(SyntaxViolation::NonUrlCodePoint)
}
{
vfn(SyntaxViolation::PercentDecode)
}
} else if !is_url_code_point(c) {
vfn(SyntaxViolation::NonUrlCodePoint)
}
}

Expand Down Expand Up @@ -1589,7 +1681,7 @@
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
#[inline]
fn ascii_tab_or_new_line(ch: char) -> bool {
matches!(ch, '\t' | '\r' | '\n')
matches!(ch, ascii_tab_or_new_line_pattern!())
}

/// https://url.spec.whatwg.org/#ascii-alpha
Expand Down
Loading