Skip to content

Commit 746a0c5

Browse files
[3.13] GH-85168: Use filesystem encoding when converting to/from file URIs (GH-126852) (#127039)
GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852) Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529. (cherry picked from commit c9b399f) Co-authored-by: Barney Gale <[email protected]>
1 parent 40b9b50 commit 746a0c5

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

Lib/test/test_urllib.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -709,10 +709,6 @@ def tearDown(self):
709709

710710
def constructLocalFileUrl(self, filePath):
711711
filePath = os.path.abspath(filePath)
712-
try:
713-
filePath.encode("utf-8")
714-
except UnicodeEncodeError:
715-
raise unittest.SkipTest("filePath is not encodable to utf8")
716712
return "file://%s" % urllib.request.pathname2url(filePath)
717713

718714
def createNewTempFile(self, data=b""):
@@ -1562,6 +1558,13 @@ def test_pathname2url_posix(self):
15621558
self.assertEqual(fn('/a/b.c'), '/a/b.c')
15631559
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
15641560

1561+
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
1562+
def test_pathname2url_nonascii(self):
1563+
encoding = sys.getfilesystemencoding()
1564+
errors = sys.getfilesystemencodeerrors()
1565+
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
1566+
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
1567+
15651568
@unittest.skipUnless(sys.platform == 'win32',
15661569
'test specific to Windows pathnames.')
15671570
def test_url2pathname_win(self):
@@ -1612,6 +1615,15 @@ def test_url2pathname_posix(self):
16121615
self.assertEqual(fn('////foo/bar'), '//foo/bar')
16131616
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
16141617

1618+
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
1619+
def test_url2pathname_nonascii(self):
1620+
encoding = sys.getfilesystemencoding()
1621+
errors = sys.getfilesystemencodeerrors()
1622+
url = os_helper.FS_NONASCII
1623+
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
1624+
url = urllib.parse.quote(url, encoding=encoding, errors=errors)
1625+
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
1626+
16151627
class Utility_Tests(unittest.TestCase):
16161628
"""Testcase to test the various utility functions in the urllib."""
16171629

Lib/test/test_urllib2.py

-4
Original file line numberDiff line numberDiff line change
@@ -717,10 +717,6 @@ def test_processors(self):
717717

718718

719719
def sanepathname2url(path):
720-
try:
721-
path.encode("utf-8")
722-
except UnicodeEncodeError:
723-
raise unittest.SkipTest("path is not encodable to utf8")
724720
urlpath = urllib.request.pathname2url(path)
725721
if os.name == "nt" and urlpath.startswith("///"):
726722
urlpath = urlpath[2:]

Lib/urllib/request.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1660,12 +1660,16 @@ def url2pathname(pathname):
16601660
# URL has an empty authority section, so the path begins on the
16611661
# third character.
16621662
pathname = pathname[2:]
1663-
return unquote(pathname)
1663+
encoding = sys.getfilesystemencoding()
1664+
errors = sys.getfilesystemencodeerrors()
1665+
return unquote(pathname, encoding=encoding, errors=errors)
16641666

16651667
def pathname2url(pathname):
16661668
"""OS-specific conversion from a file system path to a relative URL
16671669
of the 'file' scheme; not recommended for general use."""
1668-
return quote(pathname)
1670+
encoding = sys.getfilesystemencoding()
1671+
errors = sys.getfilesystemencodeerrors()
1672+
return quote(pathname, encoding=encoding, errors=errors)
16691673

16701674

16711675
ftpcache = {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix issue where :func:`urllib.request.url2pathname` and
2+
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
3+
unquoting file URIs. They now use the :term:`filesystem encoding and error
4+
handler`.

0 commit comments

Comments
 (0)