Skip to content

Commit 2d4a3fe

Browse files
committed
Improve performance of sub-authority splitting in URL
1 parent 2698537 commit 2d4a3fe

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

src/urllib3/util/url.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@
6363
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
6464
ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
6565

66-
SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
66+
_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
6767
REG_NAME_PAT,
6868
IPV4_PAT,
6969
IPV6_ADDRZ_PAT,
7070
)
71-
SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL)
71+
_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
7272

7373
UNRESERVED_CHARS = set(
7474
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
@@ -365,7 +365,9 @@ def parse_url(url):
365365
scheme = scheme.lower()
366366

367367
if authority:
368-
auth, host, port = SUBAUTHORITY_RE.match(authority).groups()
368+
auth, _, host_port = authority.rpartition("@")
369+
auth = auth or None
370+
host, port = _HOST_PORT_RE.match(host_port).groups()
369371
if auth and normalize_uri:
370372
auth = _encode_invalid_chars(auth, USERINFO_CHARS)
371373
if port == "":

test/test_util.py

+10
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,16 @@ def test_netloc(self, url, expected_netloc):
438438
fragment="hash",
439439
),
440440
),
441+
# Tons of '@' causing backtracking
442+
("https://" + ("@" * 10000) + "[", False),
443+
(
444+
"https://user:" + ("@" * 10000) + "example.com",
445+
Url(
446+
scheme="https",
447+
auth="user:" + ("%40" * 9999),
448+
host="example.com",
449+
),
450+
),
441451
]
442452

443453
@pytest.mark.parametrize("url, expected_url", url_vulnerabilities)

0 commit comments

Comments
 (0)