Skip to content

perf: remove heap allocation in parse_query #1020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions url/benches/parse_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ fn long(bench: &mut Bencher) {
bench.iter(|| black_box(url).parse::<Url>().unwrap());
}

fn fragment(bench: &mut Bencher) {
let url = "https://example.com/parkbench?tre=es&st=uff#fragment";

bench.bytes = url.len() as u64;
bench.iter(|| black_box(url).parse::<Url>().unwrap());
}

fn plain(bench: &mut Bencher) {
let url = "https://example.com/";

Expand Down Expand Up @@ -86,6 +93,7 @@ benchmark_group!(
benches,
short,
long,
fragment,
plain,
hyphen,
leading_digit,
Expand Down
162 changes: 125 additions & 37 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1473,37 +1473,81 @@
&mut self,
scheme_type: SchemeType,
scheme_end: u32,
mut input: Input<'i>,
input: Input<'i>,
) -> Option<Input<'i>> {
let len = input.chars.as_str().len();
let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead
let mut remaining = None;
while let Some(c) = input.next() {
if c == '#' && self.context == Context::UrlParser {
remaining = Some(input);
break;
} else {
self.check_url_code_point(c, &input);
query.push(c);
struct QueryPartIter<'i, 'p> {
is_url_parser: bool,
input: Input<'i>,
violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
}

impl<'i> Iterator for QueryPartIter<'i, '_> {

Check warning on line 1484 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1484

Added line #L1484 was not covered by tests
type Item = (&'i str, bool);

fn next(&mut self) -> Option<Self::Item> {
let start = self.input.chars.as_str();
// bypass self.input.next() in order to get string slices
// which are faster to operate on
while let Some(c) = self.input.chars.next() {
match c {
'\t' | '\n' | '\r' => {

Check warning on line 1493 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1493

Added line #L1493 was not covered by tests
return Some((
&start[..start.len() - self.input.chars.as_str().len() - 1],
false,

Check warning on line 1496 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1496

Added line #L1496 was not covered by tests
));
}
'#' if self.is_url_parser => {
return Some((
&start[..start.len() - self.input.chars.as_str().len() - 1],
true,

Check warning on line 1502 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1502

Added line #L1502 was not covered by tests
));
}
c => {

Check warning on line 1505 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1505

Added line #L1505 was not covered by tests
if let Some(vfn) = &self.violation_fn {
check_url_code_point(vfn, c, &self.input);

Check warning on line 1507 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1507

Added line #L1507 was not covered by tests
}
}
}
}
if start.is_empty() {
None
} else {
Some((start, false))
}
}
}

let encoding = match &self.serialization[..scheme_end as usize] {
"http" | "https" | "file" | "ftp" => self.query_encoding_override,
_ => None,
};
let query_bytes = if let Some(o) = encoding {
o(&query)
} else {
query.as_bytes().into()
let mut part_iter = QueryPartIter {
is_url_parser: self.context == Context::UrlParser,
input,
violation_fn: self.violation_fn,
};
let set = if scheme_type.is_special() {
SPECIAL_QUERY
} else {
QUERY
};
self.serialization.extend(percent_encode(&query_bytes, set));
remaining
let query_encoding_override = self.query_encoding_override.filter(|_| {
matches!(
&self.serialization[..scheme_end as usize],
"http" | "https" | "file" | "ftp"

Check warning on line 1533 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1531-L1533

Added lines #L1531 - L1533 were not covered by tests
)
});

while let Some((part, is_finished)) = part_iter.next() {
match query_encoding_override {
// slightly faster to be repetitive and not convert text to Cow
Some(o) => self.serialization.extend(percent_encode(&o(part), set)),

Check warning on line 1540 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1540

Added line #L1540 was not covered by tests
None => self
.serialization

Check warning on line 1542 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1542

Added line #L1542 was not covered by tests
.extend(percent_encode(part.as_bytes(), set)),
}
if is_finished {
return Some(part_iter.input);
}
}

None
}

fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult<Url> {
Expand All @@ -1526,31 +1570,75 @@
})
}

pub fn parse_fragment(&mut self, mut input: Input<'_>) {
while let Some((c, utf8_c)) = input.next_utf8() {
if c == '\0' {
self.log_violation(SyntaxViolation::NullInFragment)
} else {
self.check_url_code_point(c, &input);
pub fn parse_fragment(&mut self, input: Input<'_>) {
struct FragmentPartIter<'i, 'p> {
input: Input<'i>,
violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
}

impl<'i> Iterator for FragmentPartIter<'i, '_> {

Check warning on line 1579 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1579

Added line #L1579 was not covered by tests
type Item = &'i str;

fn next(&mut self) -> Option<Self::Item> {
let start = self.input.chars.as_str();
// bypass self.input.next() in order to get string slices
// which are faster to operate on
while let Some(c) = self.input.chars.next() {
match c {
'\t' | '\n' | '\r' => {

Check warning on line 1588 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1588

Added line #L1588 was not covered by tests
return Some(
&start[..start.len() - self.input.chars.as_str().len() - 1],
);
}
'\0' => {

Check warning on line 1593 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1593

Added line #L1593 was not covered by tests
if let Some(vfn) = &self.violation_fn {
vfn(SyntaxViolation::NullInFragment);
}
}
c => {

Check warning on line 1598 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1598

Added line #L1598 was not covered by tests
if let Some(vfn) = &self.violation_fn {
check_url_code_point(vfn, c, &self.input);
}
}
}
}
if start.is_empty() {
None
} else {
Some(start)
}
}
}

let part_iter = FragmentPartIter {
input,
violation_fn: self.violation_fn,
};

for part in part_iter {
self.serialization
.extend(utf8_percent_encode(utf8_c, FRAGMENT));
.extend(utf8_percent_encode(part, FRAGMENT));
}
}

#[inline]
fn check_url_code_point(&self, c: char, input: &Input<'_>) {
if let Some(vfn) = self.violation_fn {
if c == '%' {
let mut input = input.clone();
if !matches!((input.next(), input.next()), (Some(a), Some(b))
check_url_code_point(vfn, c, input)
}
}
}

fn check_url_code_point(vfn: &dyn Fn(SyntaxViolation), c: char, input: &Input<'_>) {
if c == '%' {
let mut input = input.clone();
if !matches!((input.next(), input.next()), (Some(a), Some(b))
if a.is_ascii_hexdigit() && b.is_ascii_hexdigit())
{
vfn(SyntaxViolation::PercentDecode)
}
} else if !is_url_code_point(c) {
vfn(SyntaxViolation::NonUrlCodePoint)
}
{
vfn(SyntaxViolation::PercentDecode)
}
} else if !is_url_code_point(c) {
vfn(SyntaxViolation::NonUrlCodePoint)
}
}

Expand Down
Loading