Fix cookie unquoting regression (#11173)

bdraco · web-flow · commit 85b0df43bf99 · 2025-06-09T21:49:13.000-05:00
diff --git a/CHANGES/11173.bugfix.rst b/CHANGES/11173.bugfix.rst
@@ -0,0 +1 @@
+Fixed cookie unquoting to properly handle octal escape sequences in cookie values (e.g., ``\012`` for newline) by vendoring the correct ``_unquote`` implementation from Python's ``http.cookies`` module -- by :user:`bdraco`.
diff --git a/aiohttp/_cookie_helpers.py b/aiohttp/_cookie_helpers.py
@@ -108,20 +108,49 @@ def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
     return mrsl_val
 
 
-def _unquote(text: str) -> str:
+_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
+
+
+def _unquote_replace(m: re.Match[str]) -> str:
+    """
+    Replace function for _unquote_sub regex substitution.
+
+    Handles escaped characters in cookie values:
+    - Octal sequences are converted to their character representation
+    - Other escaped characters are unescaped by removing the backslash
+    """
+    if m[1]:
+        return chr(int(m[1], 8))
+    return m[2]
+
+
+def _unquote(value: str) -> str:
     """
     Unquote a cookie value.
 
     Vendored from http.cookies._unquote to ensure compatibility.
+
+    Note: The original implementation checked for None, but we've removed
+    that check since all callers already ensure the value is not None.
     """
-    # If there are no quotes, return as-is
-    if len(text) < 2 or text[0] != '"' or text[-1] != '"':
-        return text
-    # Remove quotes and handle escaped characters
-    text = text[1:-1]
-    # Replace escaped quotes and backslashes
-    text = text.replace('\\"', '"').replace("\\\\", "\\")
-    return text
+    # If there aren't any doublequotes,
+    # then there can't be any special characters.  See RFC 2109.
+    if len(value) < 2:
+        return value
+    if value[0] != '"' or value[-1] != '"':
+        return value
+
+    # We have to assume that we must decode this string.
+    # Down to work.
+
+    # Remove the "s
+    value = value[1:-1]
+
+    # Check for special sequences.  Examples:
+    #    \012 --> \n
+    #    \"   --> "
+    #
+    return _unquote_sub(_unquote_replace, value)
 
 
 def parse_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
@@ -363,6 +363,7 @@ uvloop
 uWSGI
 vcvarsall
 vendored
+vendoring
 waituntil
 wakeup
 wakeups
diff --git a/tests/test_cookie_helpers.py b/tests/test_cookie_helpers.py
@@ -1,11 +1,17 @@
 """Tests for internal cookie helper functions."""
 
-from http.cookies import CookieError, Morsel, SimpleCookie
+from http.cookies import (
+    CookieError,
+    Morsel,
+    SimpleCookie,
+    _unquote as simplecookie_unquote,
+)
 
 import pytest
 
 from aiohttp import _cookie_helpers as helpers
 from aiohttp._cookie_helpers import (
+    _unquote,
     parse_cookie_headers,
     preserve_morsel_with_coded_value,
 )
@@ -1029,3 +1035,235 @@ def test_parse_cookie_headers_date_formats_with_attributes() -> None:
     assert result[1][1]["expires"] == "Wednesday, 09-Jun-30 10:18:14 GMT"
     assert result[1][1]["domain"] == ".example.com"
     assert result[1][1]["samesite"] == "Strict"
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Unquoted strings should remain unchanged
+        ("simple", "simple"),
+        ("with spaces", "with spaces"),
+        ("", ""),
+        ('"', '"'),  # String too short to be quoted
+        ('some"text', 'some"text'),  # Quotes not at beginning/end
+        ('text"with"quotes', 'text"with"quotes'),
+    ],
+)
+def test_unquote_basic(input_str: str, expected: str) -> None:
+    """Test basic _unquote functionality."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Basic quoted strings
+        ('"quoted"', "quoted"),
+        ('"with spaces"', "with spaces"),
+        ('""', ""),  # Empty quoted string
+        # Quoted string with special characters
+        ('"hello, world!"', "hello, world!"),
+        ('"path=/test"', "path=/test"),
+    ],
+)
+def test_unquote_quoted_strings(input_str: str, expected: str) -> None:
+    """Test _unquote with quoted strings."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Escaped quotes should be unescaped
+        (r'"say \"hello\""', 'say "hello"'),
+        (r'"nested \"quotes\" here"', 'nested "quotes" here'),
+        # Multiple escaped quotes
+        (r'"\"start\" middle \"end\""', '"start" middle "end"'),
+    ],
+)
+def test_unquote_escaped_quotes(input_str: str, expected: str) -> None:
+    """Test _unquote with escaped quotes."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Single escaped backslash
+        (r'"path\\to\\file"', "path\\to\\file"),
+        # Backslash before quote
+        (r'"end with slash\\"', "end with slash\\"),
+        # Mixed escaped characters
+        (r'"path\\to\\\"file\""', 'path\\to\\"file"'),
+    ],
+)
+def test_unquote_escaped_backslashes(input_str: str, expected: str) -> None:
+    """Test _unquote with escaped backslashes."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Common octal sequences
+        (r'"\012"', "\n"),  # newline
+        (r'"\011"', "\t"),  # tab
+        (r'"\015"', "\r"),  # carriage return
+        (r'"\040"', " "),  # space
+        # Octal sequences in context
+        (r'"line1\012line2"', "line1\nline2"),
+        (r'"tab\011separated"', "tab\tseparated"),
+        # Multiple octal sequences
+        (r'"\012\011\015"', "\n\t\r"),
+        # Mixed octal and regular text
+        (r'"hello\040world\041"', "hello world!"),
+    ],
+)
+def test_unquote_octal_sequences(input_str: str, expected: str) -> None:
+    """Test _unquote with octal escape sequences."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Test boundary values
+        (r'"\000"', "\x00"),  # null character
+        (r'"\001"', "\x01"),
+        (r'"\177"', "\x7f"),  # DEL character
+        (r'"\200"', "\x80"),  # Extended ASCII
+        (r'"\377"', "\xff"),  # Max octal value
+        # Invalid octal sequences (not 3 digits or > 377) are treated as regular escapes
+        (r'"\400"', "400"),  # 400 octal = 256 decimal, too large
+        (r'"\777"', "777"),  # 777 octal = 511 decimal, too large
+    ],
+)
+def test_unquote_octal_full_range(input_str: str, expected: str) -> None:
+    """Test _unquote with full range of valid octal sequences."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # Mix of quotes, backslashes, and octal
+        (r'"say \"hello\"\012new line"', 'say "hello"\nnew line'),
+        (r'"path\\to\\file\011\011data"', "path\\to\\file\t\tdata"),
+        # Complex mixed example
+        (r'"\042quoted\042 and \134backslash\134"', '"quoted" and \\backslash\\'),
+        # Escaped characters that aren't special
+        (r'"\a\b\c"', "abc"),  # \a, \b, \c -> a, b, c
+    ],
+)
+def test_unquote_mixed_escapes(input_str: str, expected: str) -> None:
+    """Test _unquote with mixed escape sequences."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # String that starts with quote but doesn't end with one
+        ('"not closed', '"not closed'),
+        # String that ends with quote but doesn't start with one
+        ('not opened"', 'not opened"'),
+        # Multiple quotes
+        ('"""', '"'),
+        ('""""', '""'),
+        # Backslash at the end without anything to escape
+        (r'"ends with\"', "ends with\\"),
+        # Empty escape
+        (r'"test\"', "test\\"),
+        # Just escaped characters
+        (r'"\"\"\""', '"""'),
+    ],
+)
+def test_unquote_edge_cases(input_str: str, expected: str) -> None:
+    """Test _unquote edge cases."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    ("input_str", "expected"),
+    [
+        # JSON-like data
+        (r'"{\"user\":\"john\",\"id\":123}"', '{"user":"john","id":123}'),
+        # URL-encoded then quoted
+        ('"hello%20world"', "hello%20world"),
+        # Path with backslashes (Windows-style)
+        (r'"C:\\Users\\John\\Documents"', "C:\\Users\\John\\Documents"),
+        # Complex session data
+        (
+            r'"session_data=\"user123\";expires=2024"',
+            'session_data="user123";expires=2024',
+        ),
+    ],
+)
+def test_unquote_real_world_examples(input_str: str, expected: str) -> None:
+    """Test _unquote with real-world cookie value examples."""
+    assert _unquote(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    "test_value",
+    [
+        '""',
+        '"simple"',
+        r'"with \"quotes\""',
+        r'"with \\backslash\\"',
+        r'"\012newline"',
+        r'"complex\042quote\134slash\012"',
+        '"not-quoted',
+        'also-not-quoted"',
+        r'"mixed\011\042\134test"',
+    ],
+)
+def test_unquote_compatibility_with_simplecookie(test_value: str) -> None:
+    """Test that _unquote behaves like SimpleCookie's unquoting."""
+    assert _unquote(test_value) == simplecookie_unquote(test_value), (
+        f"Mismatch for {test_value!r}: "
+        f"our={_unquote(test_value)!r}, "
+        f"SimpleCookie={simplecookie_unquote(test_value)!r}"
+    )
+
+
+@pytest.mark.parametrize(
+    ("header", "expected_name", "expected_value", "expected_coded"),
+    [
+        # Test cookie values with octal escape sequences
+        (r'name="\012newline\012"', "name", "\nnewline\n", r'"\012newline\012"'),
+        (
+            r'tab="\011separated\011values"',
+            "tab",
+            "\tseparated\tvalues",
+            r'"\011separated\011values"',
+        ),
+        (
+            r'mixed="hello\040world\041"',
+            "mixed",
+            "hello world!",
+            r'"hello\040world\041"',
+        ),
+        (
+            r'complex="\042quoted\042 text with \012 newline"',
+            "complex",
+            '"quoted" text with \n newline',
+            r'"\042quoted\042 text with \012 newline"',
+        ),
+    ],
+)
+def test_parse_cookie_headers_uses_unquote_with_octal(
+    header: str, expected_name: str, expected_value: str, expected_coded: str
+) -> None:
+    """Test that parse_cookie_headers correctly unquotes values with octal sequences and preserves coded_value."""
+    result = parse_cookie_headers([header])
+
+    assert len(result) == 1
+    name, morsel = result[0]
+
+    # Check that octal sequences were properly decoded in the value
+    assert name == expected_name
+    assert morsel.value == expected_value
+
+    # Check that coded_value preserves the original quoted string
+    assert morsel.coded_value == expected_coded

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Fixed cookie unquoting to properly handle octal escape sequences in cookie values (e.g., ``\012`` for newline) by vendoring the correct ``_unquote`` implementation from Python's ``http.cookies`` module -- by :user:`bdraco`.