Skip to content

Commit 217295b

Browse files
committed
http1connection: Make content-length parsing more strict
Content-length and chunk size parsing now strictly matches the RFCs. We previously used the python int() function which accepted leading plus signs and internal underscores, which are not allowed by the HTTP RFCs (it also accepts minus signs, but these are less problematic in this context since they'd result in errors elsewhere) It is important to fix this because when combined with certain proxies, the lax parsing could result in a request smuggling vulnerability (if both Tornado and the proxy accepted an invalid content-length but interpreted it differently). This is known to occur with old versions of haproxy, although the current version of haproxy is unaffected.
1 parent e3aa6c5 commit 217295b

File tree

2 files changed

+115
-17
lines changed

2 files changed

+115
-17
lines changed

tornado/http1connection.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def write_headers(
442442
):
443443
self._expected_content_remaining = 0
444444
elif "Content-Length" in headers:
445-
self._expected_content_remaining = int(headers["Content-Length"])
445+
self._expected_content_remaining = parse_int(headers["Content-Length"])
446446
else:
447447
self._expected_content_remaining = None
448448
# TODO: headers are supposed to be of type str, but we still have some
@@ -618,7 +618,7 @@ def _read_body(
618618
headers["Content-Length"] = pieces[0]
619619

620620
try:
621-
content_length = int(headers["Content-Length"]) # type: Optional[int]
621+
content_length: Optional[int] = parse_int(headers["Content-Length"])
622622
except ValueError:
623623
# Handles non-integer Content-Length value.
624624
raise httputil.HTTPInputError(
@@ -668,7 +668,10 @@ async def _read_chunked_body(self, delegate: httputil.HTTPMessageDelegate) -> No
668668
total_size = 0
669669
while True:
670670
chunk_len_str = await self.stream.read_until(b"\r\n", max_bytes=64)
671-
chunk_len = int(chunk_len_str.strip(), 16)
671+
try:
672+
chunk_len = parse_hex_int(native_str(chunk_len_str[:-2]))
673+
except ValueError:
674+
raise httputil.HTTPInputError("invalid chunk size")
672675
if chunk_len == 0:
673676
crlf = await self.stream.read_bytes(2)
674677
if crlf != b"\r\n":
@@ -842,3 +845,21 @@ async def _server_request_loop(
842845
await asyncio.sleep(0)
843846
finally:
844847
delegate.on_close(self)
848+
849+
850+
DIGITS = re.compile(r"[0-9]+")
851+
HEXDIGITS = re.compile(r"[0-9a-fA-F]+")
852+
853+
854+
def parse_int(s: str) -> int:
855+
"""Parse a non-negative integer from a string."""
856+
if DIGITS.fullmatch(s) is None:
857+
raise ValueError("not an integer: %r" % s)
858+
return int(s)
859+
860+
861+
def parse_hex_int(s: str) -> int:
862+
"""Parse a non-negative hexadecimal integer from a string."""
863+
if HEXDIGITS.fullmatch(s) is None:
864+
raise ValueError("not a hexadecimal integer: %r" % s)
865+
return int(s, 16)

tornado/test/httpserver_test.py

+91-14
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
)
1919
from tornado.iostream import IOStream
2020
from tornado.locks import Event
21-
from tornado.log import gen_log
21+
from tornado.log import gen_log, app_log
2222
from tornado.netutil import ssl_options_to_context
2323
from tornado.simple_httpclient import SimpleAsyncHTTPClient
2424
from tornado.testing import (
@@ -41,6 +41,7 @@
4141
import ssl
4242
import sys
4343
import tempfile
44+
import textwrap
4445
import unittest
4546
import urllib.parse
4647
from io import BytesIO
@@ -118,7 +119,7 @@ class SSLTestMixin(object):
118119
def get_ssl_options(self):
119120
return dict(
120121
ssl_version=self.get_ssl_version(),
121-
**AsyncHTTPSTestCase.default_ssl_options()
122+
**AsyncHTTPSTestCase.default_ssl_options(),
122123
)
123124

124125
def get_ssl_version(self):
@@ -558,23 +559,59 @@ def test_chunked_request_uppercase(self):
558559
)
559560
self.assertEqual(json_decode(response), {"foo": ["bar"]})
560561

561-
@gen_test
562-
def test_invalid_content_length(self):
563-
with ExpectLog(
564-
gen_log, ".*Only integer Content-Length is allowed", level=logging.INFO
565-
):
566-
self.stream.write(
567-
b"""\
562+
def test_chunked_request_body_invalid_size(self):
563+
# Only hex digits are allowed in chunk sizes. Python's int() function
564+
# also accepts underscores, so make sure we reject them here.
565+
self.stream.write(
566+
b"""\
568567
POST /echo HTTP/1.1
569-
Content-Length: foo
568+
Transfer-Encoding: chunked
570569
571-
bar
570+
1_a
571+
1234567890abcdef1234567890
572+
0
572573
573574
""".replace(
574-
b"\n", b"\r\n"
575-
)
575+
b"\n", b"\r\n"
576576
)
577-
yield self.stream.read_until_close()
577+
)
578+
start_line, headers, response = self.io_loop.run_sync(
579+
lambda: read_stream_body(self.stream)
580+
)
581+
self.assertEqual(400, start_line.code)
582+
583+
@gen_test
584+
def test_invalid_content_length(self):
585+
# HTTP only allows decimal digits in content-length. Make sure we don't
586+
# accept anything else, with special attention to things accepted by the
587+
# python int() function (leading plus signs and internal underscores).
588+
test_cases = [
589+
("alphabetic", "foo"),
590+
("leading plus", "+10"),
591+
("internal underscore", "1_0"),
592+
]
593+
for name, value in test_cases:
594+
with self.subTest(name=name), closing(IOStream(socket.socket())) as stream:
595+
with ExpectLog(
596+
gen_log,
597+
".*Only integer Content-Length is allowed",
598+
level=logging.INFO,
599+
):
600+
yield stream.connect(("127.0.0.1", self.get_http_port()))
601+
stream.write(
602+
utf8(
603+
textwrap.dedent(
604+
f"""\
605+
POST /echo HTTP/1.1
606+
Content-Length: {value}
607+
Connection: close
608+
609+
1234567890
610+
"""
611+
).replace("\n", "\r\n")
612+
)
613+
)
614+
yield stream.read_until_close()
578615

579616

580617
class XHeaderTest(HandlerBaseTestCase):
@@ -1123,6 +1160,46 @@ def body_producer(write):
11231160
)
11241161

11251162

1163+
class InvalidOutputContentLengthTest(AsyncHTTPTestCase):
1164+
class MessageDelegate(HTTPMessageDelegate):
1165+
def __init__(self, connection):
1166+
self.connection = connection
1167+
1168+
def headers_received(self, start_line, headers):
1169+
content_lengths = {
1170+
"normal": "10",
1171+
"alphabetic": "foo",
1172+
"leading plus": "+10",
1173+
"underscore": "1_0",
1174+
}
1175+
self.connection.write_headers(
1176+
ResponseStartLine("HTTP/1.1", 200, "OK"),
1177+
HTTPHeaders({"Content-Length": content_lengths[headers["x-test"]]}),
1178+
)
1179+
self.connection.write(b"1234567890")
1180+
self.connection.finish()
1181+
1182+
def get_app(self):
1183+
class App(HTTPServerConnectionDelegate):
1184+
def start_request(self, server_conn, request_conn):
1185+
return InvalidOutputContentLengthTest.MessageDelegate(request_conn)
1186+
1187+
return App()
1188+
1189+
def test_invalid_output_content_length(self):
1190+
with self.subTest("normal"):
1191+
response = self.fetch("/", method="GET", headers={"x-test": "normal"})
1192+
response.rethrow()
1193+
self.assertEqual(response.body, b"1234567890")
1194+
for test in ["alphabetic", "leading plus", "underscore"]:
1195+
with self.subTest(test):
1196+
# This log matching could be tighter but I think I'm already
1197+
# over-testing here.
1198+
with ExpectLog(app_log, "Uncaught exception"):
1199+
with self.assertRaises(HTTPError):
1200+
self.fetch("/", method="GET", headers={"x-test": test})
1201+
1202+
11261203
class MaxHeaderSizeTest(AsyncHTTPTestCase):
11271204
def get_app(self):
11281205
return Application([("/", HelloWorldRequestHandler)])

0 commit comments

Comments
 (0)