Skip to content

Commit a94803d

Browse files
barneygaleebonnal
authored andcommitted
pythonGH-127236: pathname2url(): generate RFC 1738 URL for absolute POSIX path (python#127194)
When handed an absolute Windows path such as `C:\foo` or `//server/share`, the `urllib.request.pathname2url()` function returns a URL with an authority section, such as `///C:/foo` or `//server/share` (or before pythonGH-126205, `////server/share`). Only the `file:` prefix is omitted. But when handed an absolute POSIX path such as `/etc/hosts`, or a Windows path of the same form (rooted but lacking a drive), the function returns a URL without an authority section, such as `/etc/hosts`. This patch corrects the discrepancy by adding a `//` prefix before drive-less, rooted paths when generating URLs.
1 parent 49a5468 commit a94803d

File tree

5 files changed

+33
-20
lines changed

5 files changed

+33
-20
lines changed

Doc/library/urllib.request.rst

+6-4
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,14 @@ The :mod:`urllib.request` module defines the following functions:
159159
'file:///C:/Program%20Files'
160160

161161
.. versionchanged:: 3.14
162-
Windows drive letters are no longer converted to uppercase.
162+
Paths beginning with a slash are converted to URLs with authority
163+
sections. For example, the path ``/etc/hosts`` is converted to
164+
the URL ``///etc/hosts``.
163165

164166
.. versionchanged:: 3.14
165-
On Windows, ``:`` characters not following a drive letter are quoted. In
166-
previous versions, :exc:`OSError` was raised if a colon character was
167-
found in any position other than the second character.
167+
Windows drive letters are no longer converted to uppercase, and ``:``
168+
characters not following a drive letter no longer cause an
169+
:exc:`OSError` exception to be raised on Windows.
168170

169171

170172
.. function:: url2pathname(url)

Lib/nturl2path.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,17 @@ def pathname2url(p):
5555
p = p[4:]
5656
if p[:4].upper() == 'UNC/':
5757
p = '//' + p[4:]
58-
drive, tail = ntpath.splitdrive(p)
59-
if drive[1:] == ':':
60-
# DOS drive specified. Add three slashes to the start, producing
61-
# an authority section with a zero-length authority, and a path
62-
# section starting with a single slash.
63-
drive = f'///{drive}'
58+
drive, root, tail = ntpath.splitroot(p)
59+
if drive:
60+
if drive[1:] == ':':
61+
# DOS drive specified. Add three slashes to the start, producing
62+
# an authority section with a zero-length authority, and a path
63+
# section starting with a single slash.
64+
drive = f'///{drive}'
65+
drive = urllib.parse.quote(drive, safe='/:')
66+
elif root:
67+
# Add explicitly empty authority to path beginning with one slash.
68+
root = f'//{root}'
6469

65-
drive = urllib.parse.quote(drive, safe='/:')
6670
tail = urllib.parse.quote(tail)
67-
return drive + tail
71+
return drive + root + tail

Lib/test/test_urllib.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1434,7 +1434,7 @@ def test_pathname2url_win(self):
14341434
self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
14351435
self.assertEqual(fn('foo:bar'), 'foo%3Abar')
14361436
# No drive letter
1437-
self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
1437+
self.assertEqual(fn("\\folder\\test\\"), '///folder/test/')
14381438
self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
14391439
self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/')
14401440
self.assertEqual(fn('\\\\some\\share\\'), '//some/share/')
@@ -1447,7 +1447,7 @@ def test_pathname2url_win(self):
14471447
self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir')
14481448
# Round-tripping
14491449
urls = ['///C:',
1450-
'/folder/test/',
1450+
'///folder/test/',
14511451
'///C:/foo/bar/spam.foo']
14521452
for url in urls:
14531453
self.assertEqual(fn(urllib.request.url2pathname(url)), url)
@@ -1456,12 +1456,12 @@ def test_pathname2url_win(self):
14561456
'test specific to POSIX pathnames')
14571457
def test_pathname2url_posix(self):
14581458
fn = urllib.request.pathname2url
1459-
self.assertEqual(fn('/'), '/')
1460-
self.assertEqual(fn('/a/b.c'), '/a/b.c')
1459+
self.assertEqual(fn('/'), '///')
1460+
self.assertEqual(fn('/a/b.c'), '///a/b.c')
14611461
self.assertEqual(fn('//a/b.c'), '////a/b.c')
14621462
self.assertEqual(fn('///a/b.c'), '/////a/b.c')
14631463
self.assertEqual(fn('////a/b.c'), '//////a/b.c')
1464-
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
1464+
self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')
14651465

14661466
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
14671467
def test_pathname2url_nonascii(self):

Lib/urllib/request.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1667,9 +1667,11 @@ def url2pathname(pathname):
16671667
def pathname2url(pathname):
16681668
"""OS-specific conversion from a file system path to a relative URL
16691669
of the 'file' scheme; not recommended for general use."""
1670-
if pathname[:2] == '//':
1671-
# Add explicitly empty authority to avoid interpreting the path
1672-
# as authority.
1670+
if pathname[:1] == '/':
1671+
# Add explicitly empty authority to absolute path. If the path
1672+
# starts with exactly one slash then this change is mostly
1673+
# cosmetic, but if it begins with two or more slashes then this
1674+
# avoids interpreting the path as a URL authority.
16731675
pathname = '//' + pathname
16741676
encoding = sys.getfilesystemencoding()
16751677
errors = sys.getfilesystemencodeerrors()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:func:`urllib.request.pathname2url` now adds an empty authority when
2+
generating a URL for a path that begins with exactly one slash. For example,
3+
the path ``/etc/hosts`` is converted to the scheme-less URL ``///etc/hosts``.
4+
As a result of this change, URLs without authorities are only generated for
5+
relative paths.

0 commit comments

Comments
 (0)