Skip to content

Commit 18640a0

Browse files
committed
perf: remove heap allocation in parse_host
1 parent ffca1ef commit 18640a0

File tree

4 files changed

+75
-42
lines changed

4 files changed

+75
-42
lines changed

url/src/host.rs

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::net::{Ipv4Addr, Ipv6Addr};
1010
use alloc::borrow::Cow;
1111
use alloc::borrow::ToOwned;
1212
use alloc::string::String;
13-
use alloc::string::ToString;
1413
use alloc::vec::Vec;
1514
use core::cmp;
1615
use core::fmt::{self, Formatter};
@@ -30,8 +29,8 @@ pub(crate) enum HostInternal {
3029
Ipv6(Ipv6Addr),
3130
}
3231

33-
impl From<Host<String>> for HostInternal {
34-
fn from(host: Host<String>) -> HostInternal {
32+
impl From<Host<Cow<'_, str>>> for HostInternal {
33+
fn from(host: Host<Cow<'_, str>>) -> HostInternal {
3534
match host {
3635
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
3736
Host::Domain(_) => HostInternal::Domain,
@@ -75,20 +74,24 @@ impl Host<&str> {
7574
}
7675
}
7776

78-
impl Host<String> {
79-
/// Parse a host: either an IPv6 address in [] square brackets, or a domain.
80-
///
81-
/// <https://url.spec.whatwg.org/#host-parsing>
82-
pub fn parse(input: &str) -> Result<Self, ParseError> {
77+
impl<'a> Host<Cow<'a, str>> {
78+
pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
8379
if input.starts_with('[') {
8480
if !input.ends_with(']') {
8581
return Err(ParseError::InvalidIpv6Address);
8682
}
8783
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
8884
}
8985
let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
86+
let domain: Cow<'a, [u8]> = match domain {
87+
Cow::Borrowed(_) => match input {
88+
Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
89+
Cow::Owned(input) => Cow::Owned(input.into_bytes()),
90+
},
91+
Cow::Owned(v) => Cow::Owned(v),
92+
};
9093

91-
let domain = Self::domain_to_ascii(&domain)?;
94+
let domain = domain_to_ascii(domain)?;
9295

9396
if domain.is_empty() {
9497
return Err(ParseError::EmptyHost);
@@ -98,12 +101,11 @@ impl Host<String> {
98101
let address = parse_ipv4addr(&domain)?;
99102
Ok(Host::Ipv4(address))
100103
} else {
101-
Ok(Host::Domain(domain.to_string()))
104+
Ok(Host::Domain(domain))
102105
}
103106
}
104107

105-
// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
106-
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
108+
pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
107109
if input.starts_with('[') {
108110
if !input.ends_with(']') {
109111
return Err(ParseError::InvalidIpv6Address);
@@ -137,17 +139,49 @@ impl Host<String> {
137139
Err(ParseError::InvalidDomainCharacter)
138140
} else {
139141
Ok(Host::Domain(
140-
utf8_percent_encode(input, CONTROLS).to_string(),
142+
match utf8_percent_encode(&input, CONTROLS).into() {
143+
Cow::Owned(v) => Cow::Owned(v),
144+
Cow::Borrowed(_) => input,
145+
},
141146
))
142147
}
143148
}
144149

145-
/// convert domain with idna
146-
fn domain_to_ascii(domain: &[u8]) -> Result<Cow<'_, str>, ParseError> {
147-
idna::domain_to_ascii_cow(domain, idna::AsciiDenyList::URL).map_err(Into::into)
150+
pub(crate) fn into_owned(self) -> Host<String> {
151+
match self {
152+
Host::Domain(s) => Host::Domain(s.into_owned()),
153+
Host::Ipv4(ip) => Host::Ipv4(ip),
154+
Host::Ipv6(ip) => Host::Ipv6(ip),
155+
}
148156
}
149157
}
150158

159+
impl Host<String> {
160+
/// Parse a host: either an IPv6 address in [] square brackets, or a domain.
161+
///
162+
/// <https://url.spec.whatwg.org/#host-parsing>
163+
pub fn parse(input: &str) -> Result<Self, ParseError> {
164+
Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
165+
}
166+
167+
// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
168+
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
169+
Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())
170+
}
171+
}
172+
173+
/// convert domain with idna
174+
fn domain_to_ascii(domain: Cow<'_, [u8]>) -> Result<Cow<'_, str>, ParseError> {
175+
let value = idna::domain_to_ascii_cow(&domain, idna::AsciiDenyList::URL)?;
176+
Ok(match value {
177+
Cow::Owned(value) => Cow::Owned(value),
178+
Cow::Borrowed(_) => match domain {
179+
Cow::Borrowed(value) => unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(value)) },
180+
Cow::Owned(value) => unsafe { String::from_utf8_unchecked(value).into() },
181+
},
182+
})
183+
}
184+
151185
impl<S: AsRef<str>> fmt::Display for Host<S> {
152186
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
153187
match *self {

url/src/lib.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ use core::fmt::Write;
183183
use core::ops::{Range, RangeFrom, RangeTo};
184184
use core::{cmp, fmt, hash, mem};
185185
use percent_encoding::utf8_percent_encode;
186+
use std::borrow::Cow;
186187
#[cfg(feature = "std")]
187188
#[cfg(any(
188189
unix,
@@ -2032,9 +2033,9 @@ impl Url {
20322033
}
20332034
}
20342035
if SchemeType::from(self.scheme()).is_special() {
2035-
self.set_host_internal(Host::parse(host_substr)?, None);
2036+
self.set_host_internal(Host::parse_cow(host_substr.into())?, None);
20362037
} else {
2037-
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
2038+
self.set_host_internal(Host::parse_opaque_cow(host_substr.into())?, None);
20382039
}
20392040
} else if self.has_host() {
20402041
if scheme_type.is_special() && !scheme_type.is_file() {
@@ -2070,7 +2071,7 @@ impl Url {
20702071
}
20712072

20722073
/// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
2073-
fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2074+
fn set_host_internal(&mut self, host: Host<Cow<'_, str>>, opt_new_port: Option<Option<u16>>) {
20742075
let old_suffix_pos = if opt_new_port.is_some() {
20752076
self.path_start
20762077
} else {
@@ -2987,7 +2988,7 @@ fn path_to_file_url_segments_windows(
29872988
serialization.push(':');
29882989
}
29892990
Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2990-
let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2991+
let host = Host::parse_cow(server.to_str().ok_or(())?.into()).map_err(|_| ())?;
29912992
write!(serialization, "{}", host).unwrap();
29922993
host_end = to_u32(serialization.len()).unwrap();
29932994
host_internal = host.into();

url/src/parser.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
// except according to those terms.
88

99
use alloc::string::String;
10-
use alloc::string::ToString;
1110
use core::fmt::{self, Formatter, Write};
1211
use core::str;
12+
use std::borrow::Cow;
1313

1414
use crate::host::{Host, HostInternal};
1515
use crate::Url;
@@ -979,7 +979,7 @@ impl<'a> Parser<'a> {
979979
pub fn parse_host(
980980
mut input: Input<'_>,
981981
scheme_type: SchemeType,
982-
) -> ParseResult<(Host<String>, Input<'_>)> {
982+
) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
983983
if scheme_type.is_file() {
984984
return Parser::get_file_host(input);
985985
}
@@ -1010,34 +1010,34 @@ impl<'a> Parser<'a> {
10101010
}
10111011
bytes += c.len_utf8();
10121012
}
1013-
let replaced: String;
10141013
let host_str;
10151014
{
10161015
let host_input = input.by_ref().take(non_ignored_chars);
10171016
if has_ignored_chars {
1018-
replaced = host_input.collect();
1019-
host_str = &*replaced
1017+
host_str = Cow::Owned(host_input.collect());
10201018
} else {
10211019
for _ in host_input {}
1022-
host_str = &input_str[..bytes]
1020+
host_str = Cow::Borrowed(&input_str[..bytes]);
10231021
}
10241022
}
10251023
if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
10261024
return Err(ParseError::EmptyHost);
10271025
}
10281026
if !scheme_type.is_special() {
1029-
let host = Host::parse_opaque(host_str)?;
1027+
let host = Host::parse_opaque_cow(host_str)?;
10301028
return Ok((host, input));
10311029
}
1032-
let host = Host::parse(host_str)?;
1030+
let host = Host::parse_cow(host_str)?;
10331031
Ok((host, input))
10341032
}
10351033

1036-
fn get_file_host(input: Input<'_>) -> ParseResult<(Host<String>, Input<'_>)> {
1034+
fn get_file_host(input: Input<'_>) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
10371035
let (_, host_str, remaining) = Parser::file_host(input)?;
10381036
let host = match Host::parse(&host_str)? {
1039-
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1040-
host => host,
1037+
Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")),
1038+
Host::Domain(s) => Host::Domain(Cow::Owned(s)),
1039+
Host::Ipv4(ip) => Host::Ipv4(ip),
1040+
Host::Ipv6(ip) => Host::Ipv6(ip),
10411041
};
10421042
Ok((host, remaining))
10431043
}
@@ -1052,7 +1052,7 @@ impl<'a> Parser<'a> {
10521052
has_host = false;
10531053
HostInternal::None
10541054
} else {
1055-
match Host::parse(&host_str)? {
1055+
match Host::parse_cow(host_str)? {
10561056
Host::Domain(ref d) if d == "localhost" => {
10571057
has_host = false;
10581058
HostInternal::None
@@ -1067,7 +1067,7 @@ impl<'a> Parser<'a> {
10671067
Ok((has_host, host, remaining))
10681068
}
10691069

1070-
pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> {
1070+
pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> {
10711071
// Undo the Input abstraction here to avoid allocating in the common case
10721072
// where the host part of the input does not contain any tab or newline
10731073
let input_str = input.chars.as_str();
@@ -1082,23 +1082,21 @@ impl<'a> Parser<'a> {
10821082
}
10831083
bytes += c.len_utf8();
10841084
}
1085-
let replaced: String;
10861085
let host_str;
10871086
let mut remaining = input.clone();
10881087
{
10891088
let host_input = remaining.by_ref().take(non_ignored_chars);
10901089
if has_ignored_chars {
1091-
replaced = host_input.collect();
1092-
host_str = &*replaced
1090+
host_str = Cow::Owned(host_input.collect());
10931091
} else {
10941092
for _ in host_input {}
1095-
host_str = &input_str[..bytes]
1093+
host_str = Cow::Borrowed(&input_str[..bytes]);
10961094
}
10971095
}
1098-
if is_windows_drive_letter(host_str) {
1099-
return Ok((false, "".to_string(), input));
1096+
if is_windows_drive_letter(&host_str) {
1097+
return Ok((false, "".into(), input));
11001098
}
1101-
Ok((true, host_str.to_string(), remaining))
1099+
Ok((true, host_str, remaining))
11021100
}
11031101

11041102
pub fn parse_port<P>(

url/src/quirks.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
161161
let scheme = url.scheme();
162162
let scheme_type = SchemeType::from(scheme);
163163
if scheme_type == SchemeType::File && new_host.is_empty() {
164-
url.set_host_internal(Host::Domain(String::new()), None);
164+
url.set_host_internal(Host::Domain("".into()), None);
165165
return Ok(());
166166
}
167167

@@ -208,7 +208,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
208208
let input = Input::new_no_trim(new_hostname);
209209
let scheme_type = SchemeType::from(url.scheme());
210210
if scheme_type == SchemeType::File && new_hostname.is_empty() {
211-
url.set_host_internal(Host::Domain(String::new()), None);
211+
url.set_host_internal(Host::Domain("".into()), None);
212212
return Ok(());
213213
}
214214

0 commit comments

Comments
 (0)