Skip to content

Commit 85b0df4

Browse files
authored
Fix cookie unquoting regression (#11173)
1 parent 311ee1f commit 85b0df4

File tree

4 files changed

+279
-10
lines changed

4 files changed

+279
-10
lines changed

CHANGES/11173.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed cookie unquoting to properly handle octal escape sequences in cookie values (e.g., ``\012`` for newline) by vendoring the correct ``_unquote`` implementation from Python's ``http.cookies`` module -- by :user:`bdraco`.

aiohttp/_cookie_helpers.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,20 +108,49 @@ def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
108108
return mrsl_val
109109

110110

111-
def _unquote(text: str) -> str:
111+
_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
112+
113+
114+
def _unquote_replace(m: re.Match[str]) -> str:
115+
"""
116+
Replace function for _unquote_sub regex substitution.
117+
118+
Handles escaped characters in cookie values:
119+
- Octal sequences are converted to their character representation
120+
- Other escaped characters are unescaped by removing the backslash
121+
"""
122+
if m[1]:
123+
return chr(int(m[1], 8))
124+
return m[2]
125+
126+
127+
def _unquote(value: str) -> str:
112128
"""
113129
Unquote a cookie value.
114130
115131
Vendored from http.cookies._unquote to ensure compatibility.
132+
133+
Note: The original implementation checked for None, but we've removed
134+
that check since all callers already ensure the value is not None.
116135
"""
117-
# If there are no quotes, return as-is
118-
if len(text) < 2 or text[0] != '"' or text[-1] != '"':
119-
return text
120-
# Remove quotes and handle escaped characters
121-
text = text[1:-1]
122-
# Replace escaped quotes and backslashes
123-
text = text.replace('\\"', '"').replace("\\\\", "\\")
124-
return text
136+
# If there aren't any doublequotes,
137+
# then there can't be any special characters. See RFC 2109.
138+
if len(value) < 2:
139+
return value
140+
if value[0] != '"' or value[-1] != '"':
141+
return value
142+
143+
# We have to assume that we must decode this string.
144+
# Down to work.
145+
146+
# Remove the "s
147+
value = value[1:-1]
148+
149+
# Check for special sequences. Examples:
150+
# \012 --> \n
151+
# \" --> "
152+
#
153+
return _unquote_sub(_unquote_replace, value)
125154

126155

127156
def parse_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:

docs/spelling_wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ uvloop
363363
uWSGI
364364
vcvarsall
365365
vendored
366+
vendoring
366367
waituntil
367368
wakeup
368369
wakeups

tests/test_cookie_helpers.py

Lines changed: 239 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
"""Tests for internal cookie helper functions."""
22

3-
from http.cookies import CookieError, Morsel, SimpleCookie
3+
from http.cookies import (
4+
CookieError,
5+
Morsel,
6+
SimpleCookie,
7+
_unquote as simplecookie_unquote,
8+
)
49

510
import pytest
611

712
from aiohttp import _cookie_helpers as helpers
813
from aiohttp._cookie_helpers import (
14+
_unquote,
915
parse_cookie_headers,
1016
preserve_morsel_with_coded_value,
1117
)
@@ -1029,3 +1035,235 @@ def test_parse_cookie_headers_date_formats_with_attributes() -> None:
10291035
assert result[1][1]["expires"] == "Wednesday, 09-Jun-30 10:18:14 GMT"
10301036
assert result[1][1]["domain"] == ".example.com"
10311037
assert result[1][1]["samesite"] == "Strict"
1038+
1039+
1040+
@pytest.mark.parametrize(
1041+
("input_str", "expected"),
1042+
[
1043+
# Unquoted strings should remain unchanged
1044+
("simple", "simple"),
1045+
("with spaces", "with spaces"),
1046+
("", ""),
1047+
('"', '"'), # String too short to be quoted
1048+
('some"text', 'some"text'), # Quotes not at beginning/end
1049+
('text"with"quotes', 'text"with"quotes'),
1050+
],
1051+
)
1052+
def test_unquote_basic(input_str: str, expected: str) -> None:
1053+
"""Test basic _unquote functionality."""
1054+
assert _unquote(input_str) == expected
1055+
1056+
1057+
@pytest.mark.parametrize(
1058+
("input_str", "expected"),
1059+
[
1060+
# Basic quoted strings
1061+
('"quoted"', "quoted"),
1062+
('"with spaces"', "with spaces"),
1063+
('""', ""), # Empty quoted string
1064+
# Quoted string with special characters
1065+
('"hello, world!"', "hello, world!"),
1066+
('"path=/test"', "path=/test"),
1067+
],
1068+
)
1069+
def test_unquote_quoted_strings(input_str: str, expected: str) -> None:
1070+
"""Test _unquote with quoted strings."""
1071+
assert _unquote(input_str) == expected
1072+
1073+
1074+
@pytest.mark.parametrize(
1075+
("input_str", "expected"),
1076+
[
1077+
# Escaped quotes should be unescaped
1078+
(r'"say \"hello\""', 'say "hello"'),
1079+
(r'"nested \"quotes\" here"', 'nested "quotes" here'),
1080+
# Multiple escaped quotes
1081+
(r'"\"start\" middle \"end\""', '"start" middle "end"'),
1082+
],
1083+
)
1084+
def test_unquote_escaped_quotes(input_str: str, expected: str) -> None:
1085+
"""Test _unquote with escaped quotes."""
1086+
assert _unquote(input_str) == expected
1087+
1088+
1089+
@pytest.mark.parametrize(
1090+
("input_str", "expected"),
1091+
[
1092+
# Single escaped backslash
1093+
(r'"path\\to\\file"', "path\\to\\file"),
1094+
# Backslash before quote
1095+
(r'"end with slash\\"', "end with slash\\"),
1096+
# Mixed escaped characters
1097+
(r'"path\\to\\\"file\""', 'path\\to\\"file"'),
1098+
],
1099+
)
1100+
def test_unquote_escaped_backslashes(input_str: str, expected: str) -> None:
1101+
"""Test _unquote with escaped backslashes."""
1102+
assert _unquote(input_str) == expected
1103+
1104+
1105+
@pytest.mark.parametrize(
1106+
("input_str", "expected"),
1107+
[
1108+
# Common octal sequences
1109+
(r'"\012"', "\n"), # newline
1110+
(r'"\011"', "\t"), # tab
1111+
(r'"\015"', "\r"), # carriage return
1112+
(r'"\040"', " "), # space
1113+
# Octal sequences in context
1114+
(r'"line1\012line2"', "line1\nline2"),
1115+
(r'"tab\011separated"', "tab\tseparated"),
1116+
# Multiple octal sequences
1117+
(r'"\012\011\015"', "\n\t\r"),
1118+
# Mixed octal and regular text
1119+
(r'"hello\040world\041"', "hello world!"),
1120+
],
1121+
)
1122+
def test_unquote_octal_sequences(input_str: str, expected: str) -> None:
1123+
"""Test _unquote with octal escape sequences."""
1124+
assert _unquote(input_str) == expected
1125+
1126+
1127+
@pytest.mark.parametrize(
1128+
("input_str", "expected"),
1129+
[
1130+
# Test boundary values
1131+
(r'"\000"', "\x00"), # null character
1132+
(r'"\001"', "\x01"),
1133+
(r'"\177"', "\x7f"), # DEL character
1134+
(r'"\200"', "\x80"), # Extended ASCII
1135+
(r'"\377"', "\xff"), # Max octal value
1136+
# Invalid octal sequences (not 3 digits or > 377) are treated as regular escapes
1137+
(r'"\400"', "400"), # 400 octal = 256 decimal, too large
1138+
(r'"\777"', "777"), # 777 octal = 511 decimal, too large
1139+
],
1140+
)
1141+
def test_unquote_octal_full_range(input_str: str, expected: str) -> None:
1142+
"""Test _unquote with full range of valid octal sequences."""
1143+
assert _unquote(input_str) == expected
1144+
1145+
1146+
@pytest.mark.parametrize(
1147+
("input_str", "expected"),
1148+
[
1149+
# Mix of quotes, backslashes, and octal
1150+
(r'"say \"hello\"\012new line"', 'say "hello"\nnew line'),
1151+
(r'"path\\to\\file\011\011data"', "path\\to\\file\t\tdata"),
1152+
# Complex mixed example
1153+
(r'"\042quoted\042 and \134backslash\134"', '"quoted" and \\backslash\\'),
1154+
# Escaped characters that aren't special
1155+
(r'"\a\b\c"', "abc"), # \a, \b, \c -> a, b, c
1156+
],
1157+
)
1158+
def test_unquote_mixed_escapes(input_str: str, expected: str) -> None:
1159+
"""Test _unquote with mixed escape sequences."""
1160+
assert _unquote(input_str) == expected
1161+
1162+
1163+
@pytest.mark.parametrize(
1164+
("input_str", "expected"),
1165+
[
1166+
# String that starts with quote but doesn't end with one
1167+
('"not closed', '"not closed'),
1168+
# String that ends with quote but doesn't start with one
1169+
('not opened"', 'not opened"'),
1170+
# Multiple quotes
1171+
('"""', '"'),
1172+
('""""', '""'),
1173+
# Backslash at the end without anything to escape
1174+
(r'"ends with\"', "ends with\\"),
1175+
# Empty escape
1176+
(r'"test\"', "test\\"),
1177+
# Just escaped characters
1178+
(r'"\"\"\""', '"""'),
1179+
],
1180+
)
1181+
def test_unquote_edge_cases(input_str: str, expected: str) -> None:
1182+
"""Test _unquote edge cases."""
1183+
assert _unquote(input_str) == expected
1184+
1185+
1186+
@pytest.mark.parametrize(
1187+
("input_str", "expected"),
1188+
[
1189+
# JSON-like data
1190+
(r'"{\"user\":\"john\",\"id\":123}"', '{"user":"john","id":123}'),
1191+
# URL-encoded then quoted
1192+
('"hello%20world"', "hello%20world"),
1193+
# Path with backslashes (Windows-style)
1194+
(r'"C:\\Users\\John\\Documents"', "C:\\Users\\John\\Documents"),
1195+
# Complex session data
1196+
(
1197+
r'"session_data=\"user123\";expires=2024"',
1198+
'session_data="user123";expires=2024',
1199+
),
1200+
],
1201+
)
1202+
def test_unquote_real_world_examples(input_str: str, expected: str) -> None:
1203+
"""Test _unquote with real-world cookie value examples."""
1204+
assert _unquote(input_str) == expected
1205+
1206+
1207+
@pytest.mark.parametrize(
1208+
"test_value",
1209+
[
1210+
'""',
1211+
'"simple"',
1212+
r'"with \"quotes\""',
1213+
r'"with \\backslash\\"',
1214+
r'"\012newline"',
1215+
r'"complex\042quote\134slash\012"',
1216+
'"not-quoted',
1217+
'also-not-quoted"',
1218+
r'"mixed\011\042\134test"',
1219+
],
1220+
)
1221+
def test_unquote_compatibility_with_simplecookie(test_value: str) -> None:
1222+
"""Test that _unquote behaves like SimpleCookie's unquoting."""
1223+
assert _unquote(test_value) == simplecookie_unquote(test_value), (
1224+
f"Mismatch for {test_value!r}: "
1225+
f"our={_unquote(test_value)!r}, "
1226+
f"SimpleCookie={simplecookie_unquote(test_value)!r}"
1227+
)
1228+
1229+
1230+
@pytest.mark.parametrize(
1231+
("header", "expected_name", "expected_value", "expected_coded"),
1232+
[
1233+
# Test cookie values with octal escape sequences
1234+
(r'name="\012newline\012"', "name", "\nnewline\n", r'"\012newline\012"'),
1235+
(
1236+
r'tab="\011separated\011values"',
1237+
"tab",
1238+
"\tseparated\tvalues",
1239+
r'"\011separated\011values"',
1240+
),
1241+
(
1242+
r'mixed="hello\040world\041"',
1243+
"mixed",
1244+
"hello world!",
1245+
r'"hello\040world\041"',
1246+
),
1247+
(
1248+
r'complex="\042quoted\042 text with \012 newline"',
1249+
"complex",
1250+
'"quoted" text with \n newline',
1251+
r'"\042quoted\042 text with \012 newline"',
1252+
),
1253+
],
1254+
)
1255+
def test_parse_cookie_headers_uses_unquote_with_octal(
1256+
header: str, expected_name: str, expected_value: str, expected_coded: str
1257+
) -> None:
1258+
"""Test that parse_cookie_headers correctly unquotes values with octal sequences and preserves coded_value."""
1259+
result = parse_cookie_headers([header])
1260+
1261+
assert len(result) == 1
1262+
name, morsel = result[0]
1263+
1264+
# Check that octal sequences were properly decoded in the value
1265+
assert name == expected_name
1266+
assert morsel.value == expected_value
1267+
1268+
# Check that coded_value preserves the original quoted string
1269+
assert morsel.coded_value == expected_coded

0 commit comments

Comments
 (0)