-
-
Notifications
You must be signed in to change notification settings - Fork 31.8k
gh-105704: Disallow IPv6 URLs with invalid prefix/suffix #111261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1402,16 +1402,34 @@ def test_issue14072(self): | |
self.assertEqual(p2.path, '+31641044153') | ||
|
||
def test_invalid_bracketed_hosts(self): | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') | ||
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') | ||
cases = [ | ||
'Scheme://user@[192.0.2.146]/Path?Query', | ||
'Scheme://user@[important.com:8000]/Path?Query', | ||
'Scheme://user@[v123r.IP]/Path?Query', | ||
'Scheme://user@[v12ae]/Path?Query', | ||
'Scheme://user@[v.IP]/Path?Query', | ||
'Scheme://user@[v123.]/Path?Query', | ||
'Scheme://user@[v]/Path?Query', | ||
'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query', | ||
'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query', | ||
'Scheme://user@]v6a.ip[/Path', | ||
'Scheme://user@[v6a.ip/path?query', | ||
'Scheme://[email protected]]/path?query', | ||
'Scheme://user@prefix.[v6a.ip]/path?query', | ||
'Scheme://user@[v6a.ip].suffix/path?query', | ||
] | ||
|
||
for case in cases: | ||
with self.subTest(case=case): | ||
with self.assertRaises(ValueError): | ||
urllib.parse.urlsplit(case).hostname | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously urlsplit raised the ValueError. This PR as is changes that and only raises it upon hostname or port attribute access. We need to continue raising the ValueError at urlsplit time. Existing code must be assumed to expect that split time validation behavior rather than at attribute access time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, @gpshead. I updated the code to do the check at urlsplit time. |
||
with self.assertRaises(ValueError): | ||
urllib.parse.urlparse(case).hostname | ||
bytes_case = case.encode('utf8') | ||
with self.assertRaises(ValueError): | ||
urllib.parse.urlsplit(bytes_case).hostname | ||
with self.assertRaises(ValueError): | ||
urllib.parse.urlparse(bytes_case).hostname | ||
|
||
def test_splitting_bracketed_hosts(self): | ||
p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -206,10 +206,15 @@ def _userinfo(self): | |
def _hostinfo(self): | ||
netloc = self.netloc | ||
_, _, hostinfo = netloc.rpartition('@') | ||
_, have_open_br, bracketed = hostinfo.partition('[') | ||
bracket_prefix, have_open_br, bracketed = hostinfo.partition('[') | ||
if have_open_br: | ||
if bracket_prefix: | ||
raise ValueError('Invalid IPv6 URL') | ||
hostname, _, port = bracketed.partition(']') | ||
_, _, port = port.partition(':') | ||
_check_bracketed_host(hostname) | ||
bracket_suffix, _, port = port.partition(':') | ||
if bracket_suffix: | ||
raise ValueError('Invalid IPv6 URL') | ||
else: | ||
hostname, _, port = hostinfo.partition(':') | ||
if not port: | ||
|
@@ -236,10 +241,15 @@ def _userinfo(self): | |
def _hostinfo(self): | ||
netloc = self.netloc | ||
_, _, hostinfo = netloc.rpartition(b'@') | ||
_, have_open_br, bracketed = hostinfo.partition(b'[') | ||
bracket_prefix, have_open_br, bracketed = hostinfo.partition(b'[') | ||
if have_open_br: | ||
if bracket_prefix: | ||
raise ValueError('Invalid IPv6 URL') | ||
hostname, _, port = bracketed.partition(b']') | ||
_, _, port = port.partition(b':') | ||
_check_bracketed_host(hostname.decode(_implicit_encoding, _implicit_errors)) | ||
bracket_suffix, _, port = port.partition(b':') | ||
if bracket_suffix: | ||
raise ValueError('Invalid IPv6 URL') | ||
else: | ||
hostname, _, port = hostinfo.partition(b':') | ||
if not port: | ||
|
@@ -504,9 +514,6 @@ def _urlsplit(url, scheme=None, allow_fragments=True): | |
if (('[' in netloc and ']' not in netloc) or | ||
(']' in netloc and '[' not in netloc)): | ||
raise ValueError("Invalid IPv6 URL") | ||
if '[' in netloc and ']' in netloc: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be kept and updated to error appropriately as callers of urlsplit and urlparse expect the exception at parsing time. Not later on when accessing attributes. |
||
bracketed_host = netloc.partition('[')[2].partition(']')[0] | ||
_check_bracketed_host(bracketed_host) | ||
if allow_fragments and '#' in url: | ||
url, fragment = url.split('#', 1) | ||
if '?' in url: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This refactoring, personally makes it hard to review. Could you please share the description, which existing test cases behavior are changing now (if any)? If you don't get to it, I will try to find out too. (But this can be considered for future refactors of test code).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure - here are the differences in the tests:
If I need to split the refactoring into a separate PR, I can do that.