Skip to content

Commit e75b0d9

Browse files
Add new regular expressions for Chunked Encoding
This also moves some regular expressions for QUOTED_PAIR/QUOTED_STRING into this module from utilities so that they may be reused.
1 parent 22c0394 commit e75b0d9

File tree

2 files changed

+29
-26
lines changed

2 files changed

+29
-26
lines changed

src/waitress/rfc7230.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
import re
77

8+
HEXDIG = "[0-9a-fA-F]"
9+
DIGIT = "[0-9]"
10+
811
WS = "[ \t]"
912
OWS = WS + "{0,}?"
1013
RWS = WS + "{1,}?"
@@ -25,6 +28,12 @@
2528
# ; visible (printing) characters
2629
VCHAR = r"\x21-\x7e"
2730

31+
# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
32+
QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"
33+
34+
QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
35+
QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'
36+
2837
# header-field = field-name ":" OWS field-value OWS
2938
# field-name = token
3039
# field-value = *( field-content / obs-fold )
@@ -43,8 +52,24 @@
4352
# Which allows the field value here to just see if there is even a value in the first place
4453
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"
4554

46-
HEADER_FIELD = re.compile(
55+
# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
56+
# chunk-ext-name = token
57+
# chunk-ext-val = token / quoted-string
58+
59+
CHUNK_EXT_NAME = TOKEN
60+
CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
61+
CHUNK_EXT = (
62+
"(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
63+
)
64+
65+
# Pre-compiled regular expressions for use elsewhere
66+
ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
67+
ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
68+
HEADER_FIELD_RE = re.compile(
4769
(
4870
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
4971
).encode("latin-1")
5072
)
73+
QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
74+
QUOTED_STRING_RE = re.compile(QUOTED_STRING)
75+
CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))

src/waitress/utilities.py

+3-25
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import stat
2323
import time
2424

25-
from .rfc7230 import OBS_TEXT, VCHAR
25+
from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE
2626

2727
logger = logging.getLogger("waitress")
2828
queue_logger = logging.getLogger("waitress.queue")
@@ -216,40 +216,18 @@ def parse_http_date(d):
216216
return retval
217217

218218

219-
# RFC 5234 Appendix B.1 "Core Rules":
220-
# VCHAR = %x21-7E
221-
# ; visible (printing) characters
222-
vchar_re = VCHAR
223-
224-
# RFC 7230 Section 3.2.6 "Field Value Components":
225-
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
226-
# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
227-
# obs-text = %x80-FF
228-
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
229-
obs_text_re = OBS_TEXT
230-
231-
# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
232-
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"
233-
234-
quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
235-
quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'
236-
237-
quoted_string = re.compile(quoted_string_re)
238-
quoted_pair = re.compile(quoted_pair_re)
239-
240-
241219
def undquote(value):
242220
if value.startswith('"') and value.endswith('"'):
243221
# So it claims to be DQUOTE'ed, let's validate that
244-
matches = quoted_string.match(value)
222+
matches = QUOTED_STRING_RE.match(value)
245223

246224
if matches and matches.end() == len(value):
247225
# Remove the DQUOTE's from the value
248226
value = value[1:-1]
249227

250228
# Remove all backslashes that are followed by a valid vchar or
251229
# obs-text
252-
value = quoted_pair.sub(r"\1", value)
230+
value = QUOTED_PAIR_RE.sub(r"\1", value)
253231

254232
return value
255233
elif not value.startswith('"') and not value.endswith('"'):

0 commit comments

Comments
 (0)