Skip to content

Commit c9b399f

Browse files
authored
GH-85168: Use filesystem encoding when converting to/from file URIs (#126852)
Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529.
1 parent 2cdfb41 commit c9b399f

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

Lib/test/test_urllib.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -609,10 +609,6 @@ def tearDown(self):
609609

610610
def constructLocalFileUrl(self, filePath):
611611
filePath = os.path.abspath(filePath)
612-
try:
613-
filePath.encode("utf-8")
614-
except UnicodeEncodeError:
615-
raise unittest.SkipTest("filePath is not encodable to utf8")
616612
return "file://%s" % urllib.request.pathname2url(filePath)
617613

618614
def createNewTempFile(self, data=b""):
@@ -1462,6 +1458,13 @@ def test_pathname2url_posix(self):
14621458
self.assertEqual(fn('/a/b.c'), '/a/b.c')
14631459
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
14641460

1461+
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
1462+
def test_pathname2url_nonascii(self):
1463+
encoding = sys.getfilesystemencoding()
1464+
errors = sys.getfilesystemencodeerrors()
1465+
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
1466+
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
1467+
14651468
@unittest.skipUnless(sys.platform == 'win32',
14661469
'test specific to Windows pathnames.')
14671470
def test_url2pathname_win(self):
@@ -1512,6 +1515,15 @@ def test_url2pathname_posix(self):
15121515
self.assertEqual(fn('////foo/bar'), '//foo/bar')
15131516
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
15141517

1518+
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
1519+
def test_url2pathname_nonascii(self):
1520+
encoding = sys.getfilesystemencoding()
1521+
errors = sys.getfilesystemencodeerrors()
1522+
url = os_helper.FS_NONASCII
1523+
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
1524+
url = urllib.parse.quote(url, encoding=encoding, errors=errors)
1525+
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
1526+
15151527
class Utility_Tests(unittest.TestCase):
15161528
"""Testcase to test the various utility functions in the urllib."""
15171529

Lib/test/test_urllib2.py

-4
Original file line numberDiff line numberDiff line change
@@ -718,10 +718,6 @@ def test_processors(self):
718718

719719

720720
def sanepathname2url(path):
721-
try:
722-
path.encode("utf-8")
723-
except UnicodeEncodeError:
724-
raise unittest.SkipTest("path is not encodable to utf8")
725721
urlpath = urllib.request.pathname2url(path)
726722
if os.name == "nt" and urlpath.startswith("///"):
727723
urlpath = urlpath[2:]

Lib/urllib/request.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1657,12 +1657,16 @@ def url2pathname(pathname):
16571657
# URL has an empty authority section, so the path begins on the
16581658
# third character.
16591659
pathname = pathname[2:]
1660-
return unquote(pathname)
1660+
encoding = sys.getfilesystemencoding()
1661+
errors = sys.getfilesystemencodeerrors()
1662+
return unquote(pathname, encoding=encoding, errors=errors)
16611663

16621664
def pathname2url(pathname):
16631665
"""OS-specific conversion from a file system path to a relative URL
16641666
of the 'file' scheme; not recommended for general use."""
1665-
return quote(pathname)
1667+
encoding = sys.getfilesystemencoding()
1668+
errors = sys.getfilesystemencodeerrors()
1669+
return quote(pathname, encoding=encoding, errors=errors)
16661670

16671671

16681672
# Utility functions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix issue where :func:`urllib.request.url2pathname` and
2+
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
3+
unquoting file URIs. They now use the :term:`filesystem encoding and error
4+
handler`.

0 commit comments

Comments
 (0)