@@ -73,6 +73,12 @@ macro_rules! simple_enum_error {
73
73
}
74
74
}
75
75
76
+ macro_rules! ascii_tab_or_new_line_pattern {
77
+ ( ) => {
78
+ '\t' | '\n' | '\r'
79
+ } ;
80
+ }
81
+
76
82
#[ cfg( feature = "std" ) ]
77
83
impl std:: error:: Error for ParseError { }
78
84
@@ -207,7 +213,7 @@ impl<'i> Input<'i> {
207
213
if input. len ( ) < original_input. len ( ) {
208
214
vfn ( SyntaxViolation :: C0SpaceIgnored )
209
215
}
210
- if input. chars ( ) . any ( |c| matches ! ( c , '\t' | '\n' | '\r' ) ) {
216
+ if input. chars ( ) . any ( ascii_tab_or_new_line ) {
211
217
vfn ( SyntaxViolation :: TabOrNewlineIgnored )
212
218
}
213
219
}
@@ -225,7 +231,7 @@ impl<'i> Input<'i> {
225
231
if input. len ( ) < original_input. len ( ) {
226
232
vfn ( SyntaxViolation :: C0SpaceIgnored )
227
233
}
228
- if input. chars ( ) . any ( |c| matches ! ( c , '\t' | '\n' | '\r' ) ) {
234
+ if input. chars ( ) . any ( ascii_tab_or_new_line ) {
229
235
vfn ( SyntaxViolation :: TabOrNewlineIgnored )
230
236
}
231
237
}
@@ -281,7 +287,7 @@ impl<'i> Input<'i> {
281
287
let utf8 = self . chars . as_str ( ) ;
282
288
match self . chars . next ( ) {
283
289
Some ( c) => {
284
- if !matches ! ( c , '\t' | '\n' | '\r' ) {
290
+ if !ascii_tab_or_new_line ( c ) {
285
291
return Some ( ( c, & utf8[ ..c. len_utf8 ( ) ] ) ) ;
286
292
}
287
293
}
@@ -321,9 +327,7 @@ impl<F: FnMut(char) -> bool> Pattern for F {
321
327
impl Iterator for Input < ' _ > {
322
328
type Item = char ;
323
329
fn next ( & mut self ) -> Option < char > {
324
- self . chars
325
- . by_ref ( )
326
- . find ( |& c| !matches ! ( c, '\t' | '\n' | '\r' ) )
330
+ self . chars . by_ref ( ) . find ( |& c| !ascii_tab_or_new_line ( c) )
327
331
}
328
332
}
329
333
@@ -995,7 +999,7 @@ impl<'a> Parser<'a> {
995
999
':' if !inside_square_brackets => break ,
996
1000
'\\' if scheme_type. is_special ( ) => break ,
997
1001
'/' | '?' | '#' => break ,
998
- '\t' | '\n' | '\r' => {
1002
+ ascii_tab_or_new_line_pattern ! ( ) => {
999
1003
has_ignored_chars = true ;
1000
1004
}
1001
1005
'[' => {
@@ -1077,7 +1081,7 @@ impl<'a> Parser<'a> {
1077
1081
for c in input_str. chars ( ) {
1078
1082
match c {
1079
1083
'/' | '\\' | '?' | '#' => break ,
1080
- '\t' | '\n' | '\r' => has_ignored_chars = true ,
1084
+ ascii_tab_or_new_line_pattern ! ( ) => has_ignored_chars = true ,
1081
1085
_ => non_ignored_chars += 1 ,
1082
1086
}
1083
1087
bytes += c. len_utf8 ( ) ;
@@ -1473,37 +1477,81 @@ impl<'a> Parser<'a> {
1473
1477
& mut self ,
1474
1478
scheme_type : SchemeType ,
1475
1479
scheme_end : u32 ,
1476
- mut input : Input < ' i > ,
1480
+ input : Input < ' i > ,
1477
1481
) -> Option < Input < ' i > > {
1478
- let len = input. chars . as_str ( ) . len ( ) ;
1479
- let mut query = String :: with_capacity ( len) ; // FIXME: use a streaming decoder instead
1480
- let mut remaining = None ;
1481
- while let Some ( c) = input. next ( ) {
1482
- if c == '#' && self . context == Context :: UrlParser {
1483
- remaining = Some ( input) ;
1484
- break ;
1485
- } else {
1486
- self . check_url_code_point ( c, & input) ;
1487
- query. push ( c) ;
1482
+ struct QueryPartIter < ' i , ' p > {
1483
+ is_url_parser : bool ,
1484
+ input : Input < ' i > ,
1485
+ violation_fn : Option < & ' p dyn Fn ( SyntaxViolation ) > ,
1486
+ }
1487
+
1488
+ impl < ' i > Iterator for QueryPartIter < ' i , ' _ > {
1489
+ type Item = ( & ' i str , bool ) ;
1490
+
1491
+ fn next ( & mut self ) -> Option < Self :: Item > {
1492
+ let start = self . input . chars . as_str ( ) ;
1493
+ // bypass self.input.next() in order to get string slices
1494
+ // which are faster to operate on
1495
+ while let Some ( c) = self . input . chars . next ( ) {
1496
+ match c {
1497
+ ascii_tab_or_new_line_pattern ! ( ) => {
1498
+ return Some ( (
1499
+ & start[ ..start. len ( ) - self . input . chars . as_str ( ) . len ( ) - 1 ] ,
1500
+ false ,
1501
+ ) ) ;
1502
+ }
1503
+ '#' if self . is_url_parser => {
1504
+ return Some ( (
1505
+ & start[ ..start. len ( ) - self . input . chars . as_str ( ) . len ( ) - 1 ] ,
1506
+ true ,
1507
+ ) ) ;
1508
+ }
1509
+ c => {
1510
+ if let Some ( vfn) = & self . violation_fn {
1511
+ check_url_code_point ( vfn, c, & self . input ) ;
1512
+ }
1513
+ }
1514
+ }
1515
+ }
1516
+ if start. is_empty ( ) {
1517
+ None
1518
+ } else {
1519
+ Some ( ( start, false ) )
1520
+ }
1488
1521
}
1489
1522
}
1490
1523
1491
- let encoding = match & self . serialization [ ..scheme_end as usize ] {
1492
- "http" | "https" | "file" | "ftp" => self . query_encoding_override ,
1493
- _ => None ,
1494
- } ;
1495
- let query_bytes = if let Some ( o) = encoding {
1496
- o ( & query)
1497
- } else {
1498
- query. as_bytes ( ) . into ( )
1524
+ let mut part_iter = QueryPartIter {
1525
+ is_url_parser : self . context == Context :: UrlParser ,
1526
+ input,
1527
+ violation_fn : self . violation_fn ,
1499
1528
} ;
1500
1529
let set = if scheme_type. is_special ( ) {
1501
1530
SPECIAL_QUERY
1502
1531
} else {
1503
1532
QUERY
1504
1533
} ;
1505
- self . serialization . extend ( percent_encode ( & query_bytes, set) ) ;
1506
- remaining
1534
+ let query_encoding_override = self . query_encoding_override . filter ( |_| {
1535
+ matches ! (
1536
+ & self . serialization[ ..scheme_end as usize ] ,
1537
+ "http" | "https" | "file" | "ftp"
1538
+ )
1539
+ } ) ;
1540
+
1541
+ while let Some ( ( part, is_finished) ) = part_iter. next ( ) {
1542
+ match query_encoding_override {
1543
+ // slightly faster to be repetitive and not convert text to Cow
1544
+ Some ( o) => self . serialization . extend ( percent_encode ( & o ( part) , set) ) ,
1545
+ None => self
1546
+ . serialization
1547
+ . extend ( percent_encode ( part. as_bytes ( ) , set) ) ,
1548
+ }
1549
+ if is_finished {
1550
+ return Some ( part_iter. input ) ;
1551
+ }
1552
+ }
1553
+
1554
+ None
1507
1555
}
1508
1556
1509
1557
fn fragment_only ( mut self , base_url : & Url , mut input : Input < ' _ > ) -> ParseResult < Url > {
@@ -1526,31 +1574,75 @@ impl<'a> Parser<'a> {
1526
1574
} )
1527
1575
}
1528
1576
1529
- pub fn parse_fragment ( & mut self , mut input : Input < ' _ > ) {
1530
- while let Some ( ( c, utf8_c) ) = input. next_utf8 ( ) {
1531
- if c == '\0' {
1532
- self . log_violation ( SyntaxViolation :: NullInFragment )
1533
- } else {
1534
- self . check_url_code_point ( c, & input) ;
1577
+ pub fn parse_fragment ( & mut self , input : Input < ' _ > ) {
1578
+ struct FragmentPartIter < ' i , ' p > {
1579
+ input : Input < ' i > ,
1580
+ violation_fn : Option < & ' p dyn Fn ( SyntaxViolation ) > ,
1581
+ }
1582
+
1583
+ impl < ' i > Iterator for FragmentPartIter < ' i , ' _ > {
1584
+ type Item = & ' i str ;
1585
+
1586
+ fn next ( & mut self ) -> Option < Self :: Item > {
1587
+ let start = self . input . chars . as_str ( ) ;
1588
+ // bypass self.input.next() in order to get string slices
1589
+ // which are faster to operate on
1590
+ while let Some ( c) = self . input . chars . next ( ) {
1591
+ match c {
1592
+ ascii_tab_or_new_line_pattern ! ( ) => {
1593
+ return Some (
1594
+ & start[ ..start. len ( ) - self . input . chars . as_str ( ) . len ( ) - 1 ] ,
1595
+ ) ;
1596
+ }
1597
+ '\0' => {
1598
+ if let Some ( vfn) = & self . violation_fn {
1599
+ vfn ( SyntaxViolation :: NullInFragment ) ;
1600
+ }
1601
+ }
1602
+ c => {
1603
+ if let Some ( vfn) = & self . violation_fn {
1604
+ check_url_code_point ( vfn, c, & self . input ) ;
1605
+ }
1606
+ }
1607
+ }
1608
+ }
1609
+ if start. is_empty ( ) {
1610
+ None
1611
+ } else {
1612
+ Some ( start)
1613
+ }
1535
1614
}
1615
+ }
1616
+
1617
+ let part_iter = FragmentPartIter {
1618
+ input,
1619
+ violation_fn : self . violation_fn ,
1620
+ } ;
1621
+
1622
+ for part in part_iter {
1536
1623
self . serialization
1537
- . extend ( utf8_percent_encode ( utf8_c , FRAGMENT ) ) ;
1624
+ . extend ( utf8_percent_encode ( part , FRAGMENT ) ) ;
1538
1625
}
1539
1626
}
1540
1627
1628
+ #[ inline]
1541
1629
fn check_url_code_point ( & self , c : char , input : & Input < ' _ > ) {
1542
1630
if let Some ( vfn) = self . violation_fn {
1543
- if c == '%' {
1544
- let mut input = input. clone ( ) ;
1545
- if !matches ! ( ( input. next( ) , input. next( ) ) , ( Some ( a) , Some ( b) )
1631
+ check_url_code_point ( vfn, c, input)
1632
+ }
1633
+ }
1634
+ }
1635
+
1636
+ fn check_url_code_point ( vfn : & dyn Fn ( SyntaxViolation ) , c : char , input : & Input < ' _ > ) {
1637
+ if c == '%' {
1638
+ let mut input = input. clone ( ) ;
1639
+ if !matches ! ( ( input. next( ) , input. next( ) ) , ( Some ( a) , Some ( b) )
1546
1640
if a. is_ascii_hexdigit( ) && b. is_ascii_hexdigit( ) )
1547
- {
1548
- vfn ( SyntaxViolation :: PercentDecode )
1549
- }
1550
- } else if !is_url_code_point ( c) {
1551
- vfn ( SyntaxViolation :: NonUrlCodePoint )
1552
- }
1641
+ {
1642
+ vfn ( SyntaxViolation :: PercentDecode )
1553
1643
}
1644
+ } else if !is_url_code_point ( c) {
1645
+ vfn ( SyntaxViolation :: NonUrlCodePoint )
1554
1646
}
1555
1647
}
1556
1648
@@ -1589,7 +1681,7 @@ fn c0_control_or_space(ch: char) -> bool {
1589
1681
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
1590
1682
#[ inline]
1591
1683
fn ascii_tab_or_new_line ( ch : char ) -> bool {
1592
- matches ! ( ch, '\t' | '\r' | '\n' )
1684
+ matches ! ( ch, ascii_tab_or_new_line_pattern! ( ) )
1593
1685
}
1594
1686
1595
1687
/// https://url.spec.whatwg.org/#ascii-alpha
0 commit comments