Skip to content

Commit 7b01a97

Browse files
committed
Fix fsencode and fsdecode backports
- Mirrors the new python 3.7 implementation - Taken from `vistir` (my other library) -> discussion over at sarugaku/vistir#54 - Fixes PiDelport#13 - Fixes PiDelport#6 (I think?) Signed-off-by: Dan Ryan <[email protected]>
1 parent d580449 commit 7b01a97

File tree

1 file changed

+70
-100
lines changed

1 file changed

+70
-100
lines changed

src/backports/os.py

+70-100
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,35 @@
88
"""
99
from __future__ import unicode_literals
1010

11+
from os import name as os_name
1112
import sys
1213

1314
# XXX backport: unicode on Python 2
1415
_str = unicode if sys.version_info < (3,) else str
16+
string_types = basestring if sys.version_info[0] == 2 else str
1517

1618
# XXX backport: Use backported surrogateescape for Python 2
1719
# TODO backport: Find a way to do this without pulling in the entire future package?
1820
if sys.version_info < (3,):
1921
from future.utils.surrogateescape import register_surrogateescape
2022
register_surrogateescape()
23+
_fs_encode_errors = "backslashreplace"
24+
_fs_decode_errors = "replace"
25+
_fs_encoding = "utf-8"
26+
else:
27+
_fs_encoding = "utf-8"
28+
if os_name == "nt":
29+
_fs_error_fn = None
30+
alt_strategy = "surrogatepass"
31+
else:
32+
if sys.version_info >= (3, 3):
33+
_fs_encoding = next(iter(enc for enc in [
34+
sys.getfilesystemencoding(), sys.getdefaultencoding()
35+
]), _fs_encoding)
36+
alt_strategy = "surrogateescape"
37+
_fs_error_fn = getattr(sys, "getfilesystemencodeerrors", None)
38+
_fs_encode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
39+
_fs_decode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
2140

2241

2342
# XXX backport: This invalid_utf8_indexes() helper is shamelessly copied from
@@ -92,103 +111,54 @@ def _chunks(b, indexes):
92111
yield b[i:]
93112

94113

95-
def _fscodec():
96-
encoding = sys.getfilesystemencoding()
97-
if encoding == 'mbcs':
98-
errors = 'strict'
99-
else:
100-
errors = 'surrogateescape'
101-
102-
# XXX backport: Do we need to hack around Python 2's UTF-8 codec?
103-
import codecs # Use codecs.lookup() for name normalisation.
104-
_HACK_AROUND_PY2_UTF8 = (sys.version_info < (3,) and
105-
codecs.lookup(encoding) == codecs.lookup('utf-8'))
106-
# Do we need to hack around Python 2's ASCII codec error handler behaviour?
107-
_HACK_AROUND_PY2_ASCII = (sys.version_info < (3,) and
108-
codecs.lookup(encoding) == codecs.lookup('ascii'))
109-
110-
# XXX backport: chr(octet) became bytes([octet])
111-
_byte = chr if sys.version_info < (3,) else lambda i: bytes([i])
112-
113-
def fsencode(filename):
114-
"""
115-
Encode filename to the filesystem encoding with 'surrogateescape' error
116-
handler, return bytes unchanged. On Windows, use 'strict' error handler if
117-
the file system encoding is 'mbcs' (which is the default encoding).
118-
"""
119-
if isinstance(filename, bytes):
120-
return filename
121-
elif isinstance(filename, _str):
122-
if _HACK_AROUND_PY2_UTF8 or _HACK_AROUND_PY2_ASCII:
123-
# XXX backport: Unlike Python 3, Python 2's UTF-8 codec does not
124-
# consider surrogate codepoints invalid, so the surrogateescape
125-
# error handler never gets invoked to encode them back into high
126-
# bytes.
127-
#
128-
# This code hacks around that by manually encoding the surrogate
129-
# codepoints to high bytes, without relying on surrogateescape.
130-
#
131-
# As a *separate* issue to the above, Python2's ASCII codec has
132-
# a different problem: it correctly invokes the surrogateescape
133-
# error handler, but then seems to do additional strict
134-
# validation (?) on the interim surrogate-decoded Unicode buffer
135-
# returned by surrogateescape, and then fails with a
136-
# UnicodeEncodeError anyway.
137-
#
138-
# The fix for that happens to be the same (manual encoding),
139-
# even though the two causes are quite different.
140-
#
141-
return b''.join(
142-
(_byte(ord(c) - 0xDC00) if 0xDC00 <= ord(c) <= 0xDCFF else
143-
c.encode(encoding))
144-
for c in filename)
145-
else:
146-
return filename.encode(encoding, errors)
147-
else:
148-
# XXX backport: unicode instead of str for Python 2
149-
raise TypeError("expect bytes or {_str}, not {}".format(type(filename).__name__,
150-
_str=_str.__name__, ))
151-
152-
def fsdecode(filename):
153-
"""
154-
Decode filename from the filesystem encoding with 'surrogateescape' error
155-
handler, return str unchanged. On Windows, use 'strict' error handler if
156-
the file system encoding is 'mbcs' (which is the default encoding).
157-
"""
158-
if isinstance(filename, _str):
159-
return filename
160-
elif isinstance(filename, bytes):
161-
if _HACK_AROUND_PY2_UTF8:
162-
# XXX backport: See the remarks in fsencode() above.
163-
#
164-
# This case is slightly trickier: Python 2 will invoke the
165-
# surrogateescape error handler for most bad high byte
166-
# sequences, *except* for full UTF-8 sequences that happen to
167-
# decode to surrogate codepoints.
168-
#
169-
# For decoding, it's not trivial to sidestep the UTF-8 codec
170-
# only for surrogates like fsencode() does, but as a hack we can
171-
# split the input into separate chunks around each invalid byte,
172-
# decode the chunks separately, and join the results.
173-
#
174-
# This prevents Python 2's UTF-8 codec from seeing the encoded
175-
# surrogate sequences as valid, which lets surrogateescape take
176-
# over and escape the individual bytes.
177-
#
178-
# TODO: Improve this.
179-
#
180-
from array import array
181-
indexes = _invalid_utf8_indexes(array(str('B'), filename))
182-
return ''.join(chunk.decode(encoding, errors)
183-
for chunk in _chunks(filename, indexes))
184-
else:
185-
return filename.decode(encoding, errors)
186-
else:
187-
# XXX backport: unicode instead of str for Python 2
188-
raise TypeError("expect bytes or {_str}, not {}".format(type(filename).__name__,
189-
_str=_str.__name__, ))
190-
191-
return fsencode, fsdecode
192-
193-
fsencode, fsdecode = _fscodec()
194-
del _fscodec
114+
def _get_path(path):
115+
"""
116+
Fetch the string value from a path-like object
117+
118+
Returns **None** if there is no string value.
119+
"""
120+
121+
if isinstance(path, (string_types, bytes)):
122+
return path
123+
path_type = type(path)
124+
try:
125+
path_repr = path_type.__fspath__(path)
126+
except AttributeError:
127+
return
128+
if isinstance(path_repr, (string_types, bytes)):
129+
return path_repr
130+
return
131+
132+
133+
def fsencode(path):
134+
"""
135+
Encode a filesystem path to the proper filesystem encoding
136+
137+
:param Union[str, bytes] path: A string-like path
138+
:returns: A bytes-encoded filesystem path representation
139+
"""
140+
141+
path = _get_path(path)
142+
if path is None:
143+
raise TypeError("expected a valid path to encode")
144+
if isinstance(path, _str):
145+
path = path.encode(_fs_encoding, _fs_encode_errors)
146+
return path
147+
148+
149+
def fsdecode(path):
150+
"""
151+
Decode a filesystem path using the proper filesystem encoding
152+
153+
:param path: The filesystem path to decode from bytes or string
154+
:return: An appropriately decoded path
155+
:rtype: str
156+
"""
157+
158+
path = _get_path(path)
159+
if path is None:
160+
raise TypeError("expected a valid path to decode")
161+
binary_type = str if sys.version_info[0] == 2 else bytes
162+
if isinstance(path, binary_type):
163+
path = path.decode(_fs_encoding, _fs_decode_errors)
164+
return path

0 commit comments

Comments
 (0)