Skip to content

gh-117349: Speedup os.path #117610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 14 additions & 20 deletions Lib/ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@
"samefile", "sameopenfile", "samestat", "commonpath", "isjunction",
"isdevdrive"]

def _get_bothseps(path):
if isinstance(path, bytes):
return b'\\/'
else:
return '\\/'

# Normalize the case of a pathname and map slashes to backslashes.
# Other normalizations (such as optimizing '../' away) are not done
# (this is done by normpath).
Expand Down Expand Up @@ -108,8 +102,6 @@ def join(path, *paths):
seps = '\\/'
colon_seps = ':\\/'
try:
if not paths:
path[:0] + sep #23780: Ensure compatible data type even if p is null.
result_drive, result_root, result_path = splitroot(path)
for p in map(os.fspath, paths):
p_drive, p_root, p_path = splitroot(p)
Expand Down Expand Up @@ -232,7 +224,7 @@ def split(p):
Return tuple (head, tail) where tail is everything after the final slash.
Either part may be empty."""
p = os.fspath(p)
seps = _get_bothseps(p)
seps = b'\\/' if isinstance(p, bytes) else '\\/'
d, r, p = splitroot(p)
# set i to index beyond p's last slash
i = len(p)
Expand Down Expand Up @@ -303,7 +295,7 @@ def ismount(path):
"""Test whether a path is a mount point (a drive root, the root of a
share, or a mounted volume)"""
path = os.fspath(path)
seps = _get_bothseps(path)
seps = b'\\/' if isinstance(path, bytes) else '\\/'
path = abspath(path)
drive, root, rest = splitroot(path)
if drive and drive[0] in seps:
Expand Down Expand Up @@ -368,13 +360,15 @@ def expanduser(path):
If user or $HOME is unknown, do nothing."""
path = os.fspath(path)
if isinstance(path, bytes):
seps = b'\\/'
tilde = b'~'
else:
seps = '\\/'
tilde = '~'
if not path.startswith(tilde):
return path
i, n = 1, len(path)
while i < n and path[i] not in _get_bothseps(path):
while i < n and path[i] not in seps:
i += 1

if 'USERPROFILE' in os.environ:
Expand Down Expand Up @@ -567,10 +561,8 @@ def normpath(path):
i += 1
else:
i += 1
# If the path is now empty, substitute '.'
if not prefix and not comps:
comps.append(curdir)
return prefix + sep.join(comps)
path = prefix + sep.join(comps)
return path or curdir

else:
def normpath(path):
Expand Down Expand Up @@ -636,8 +628,9 @@ def _readlink_deep(path):
allowed_winerror = 1, 2, 3, 5, 21, 32, 50, 67, 87, 4390, 4392, 4393

seen = set()
while normcase(path) not in seen:
seen.add(normcase(path))
normp = normcase(path)
while normp not in seen:
seen.add(normp)
try:
old_path = path
path = _nt_readlink(path)
Expand All @@ -651,6 +644,7 @@ def _readlink_deep(path):
path = old_path
break
path = normpath(join(dirname(old_path), path))
normp = normcase(path)
except OSError as ex:
if ex.winerror in allowed_winerror:
break
Expand Down Expand Up @@ -720,7 +714,7 @@ def realpath(path, *, strict=False):
prefix = b'\\\\?\\'
unc_prefix = b'\\\\?\\UNC\\'
new_unc_prefix = b'\\\\'
cwd = os.getcwdb()
getcwd = os.getcwdb
# bpo-38081: Special case for realpath(b'nul')
devnull = b'nul'
if normcase(path) == devnull:
Expand All @@ -729,14 +723,14 @@ def realpath(path, *, strict=False):
prefix = '\\\\?\\'
unc_prefix = '\\\\?\\UNC\\'
new_unc_prefix = '\\\\'
cwd = os.getcwd()
getcwd = os.getcwd
# bpo-38081: Special case for realpath('nul')
devnull = 'nul'
if normcase(path) == devnull:
return '\\\\.\\NUL'
had_prefix = path.startswith(prefix)
if not had_prefix and not isabs(path):
path = join(cwd, path)
path = join(getcwd(), path)
try:
path = _getfinalpathname(path)
initial_winerror = 0
Expand Down
60 changes: 27 additions & 33 deletions Lib/posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@
"commonpath", "isjunction","isdevdrive"]


def _get_sep(path):
if isinstance(path, bytes):
return b'/'
else:
return '/'

# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
# On MS-DOS this may also turn slashes into backslashes; however, other
# normalizations (such as optimizing '../' away) are not allowed
Expand All @@ -60,7 +54,7 @@ def normcase(s):
def isabs(s):
"""Test whether a path is absolute"""
s = os.fspath(s)
sep = _get_sep(s)
sep = b'/' if isinstance(s, bytes) else '/'
return s.startswith(sep)


Expand All @@ -74,15 +68,13 @@ def join(a, *p):
will be discarded. An empty last part will result in a path that
ends with a separator."""
a = os.fspath(a)
sep = _get_sep(a)
sep = b'/' if isinstance(a, bytes) else '/'
path = a
try:
if not p:
path[:0] + sep #23780: Ensure compatible data type even if p is null.
for b in map(os.fspath, p):
if b.startswith(sep):
if b.startswith(sep) or not path: # startswith ensures no mixing
path = b
elif not path or path.endswith(sep):
elif path.endswith(sep):
path += b
else:
path += sep + b
Expand All @@ -101,7 +93,7 @@ def split(p):
"""Split a pathname. Returns tuple "(head, tail)" where "tail" is
everything after the final slash. Either part may be empty."""
p = os.fspath(p)
sep = _get_sep(p)
sep = b'/' if isinstance(p, bytes) else '/'
i = p.rfind(sep) + 1
head, tail = p[:i], p[i:]
if head and head != sep*len(head):
Expand Down Expand Up @@ -161,15 +153,15 @@ def splitroot(p):
else:
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
return empty, p[:2], p[2:]
return empty, sep + sep, p[2:]


# Return the tail (basename) part of a path, same as split(path)[1].

def basename(p):
"""Returns the final component of a pathname"""
p = os.fspath(p)
sep = _get_sep(p)
sep = b'/' if isinstance(p, bytes) else '/'
i = p.rfind(sep) + 1
return p[i:]

Expand All @@ -179,7 +171,7 @@ def basename(p):
def dirname(p):
"""Returns the directory component of a pathname"""
p = os.fspath(p)
sep = _get_sep(p)
sep = b'/' if isinstance(p, bytes) else '/'
i = p.rfind(sep) + 1
head = p[:i]
if head and head != sep*len(head):
Expand All @@ -192,6 +184,7 @@ def dirname(p):

def ismount(path):
"""Test whether a path is a mount point"""
path = os.fspath(path)
try:
s1 = os.lstat(path)
except (OSError, ValueError):
Expand All @@ -202,7 +195,6 @@ def ismount(path):
if stat.S_ISLNK(s1.st_mode):
return False

path = os.fspath(path)
if isinstance(path, bytes):
parent = join(path, b'..')
else:
Expand Down Expand Up @@ -231,12 +223,13 @@ def expanduser(path):
do nothing."""
path = os.fspath(path)
if isinstance(path, bytes):
sep = b'/'
tilde = b'~'
else:
sep = '/'
tilde = '~'
if not path.startswith(tilde):
return path
sep = _get_sep(path)
i = path.find(sep, 1)
if i < 0:
i = len(path)
Expand Down Expand Up @@ -276,11 +269,8 @@ def expanduser(path):
return path
if isinstance(path, bytes):
userhome = os.fsencode(userhome)
root = b'/'
else:
root = '/'
userhome = userhome.rstrip(root)
return (userhome + path[i:]) or root
userhome = userhome.rstrip(sep)
return (userhome + path[i:]) or sep


# Expand paths containing shell variable substitutions.
Expand Down Expand Up @@ -358,7 +348,7 @@ def normpath(path):
sep = '/'
dot = '.'
dotdot = '..'
if not path:
if not path or path == dot:
return dot
_, initial_slashes, path = splitroot(path)
comps = path.split(sep)
Expand Down Expand Up @@ -388,11 +378,17 @@ def abspath(path):
"""Return an absolute path."""
path = os.fspath(path)
if isinstance(path, bytes):
if not path.startswith(b'/'):
path = join(os.getcwdb(), path)
sep = b'/'
curdir = b'.'
getcwd = os.getcwdb
else:
if not path.startswith('/'):
path = join(os.getcwd(), path)
sep = '/'
curdir = '.'
getcwd = os.getcwd
if not path.startswith(sep):
if not path or path == curdir:
return getcwd()
path = join(getcwd(), path)
return normpath(path)


Expand Down Expand Up @@ -558,10 +554,8 @@ def commonpath(paths):
try:
split_paths = [path.split(sep) for path in paths]

try:
isabs, = set(p[:1] == sep for p in paths)
except ValueError:
raise ValueError("Can't mix absolute and relative paths") from None
if len({p.startswith(sep) for p in paths}) != 1:
raise ValueError("Can't mix absolute and relative paths")

split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
s1 = min(split_paths)
Expand All @@ -572,7 +566,7 @@ def commonpath(paths):
common = s1[:i]
break

prefix = sep if isabs else sep[:0]
prefix = sep if paths[0].startswith(sep) else sep[:0]
return prefix + sep.join(common)
except (TypeError, AttributeError):
genericpath._check_arg_types('commonpath', *paths)
Expand Down
2 changes: 2 additions & 0 deletions Lib/test/test_posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def test_join(self):
self.assertEqual(fn(b"/foo", b"bar", b"baz"), b"/foo/bar/baz")
self.assertEqual(fn(b"/foo/", b"bar/", b"baz/"), b"/foo/bar/baz/")

self.assertEqual(fn("a", ""), "a/")
self.assertEqual(fn("a", "", ""), "a/")
self.assertEqual(fn("a", "b"), "a/b")
self.assertEqual(fn("a", "b/"), "a/b/")
self.assertEqual(fn("a/", "b"), "a/b")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speedup some functions in :mod:`os.path`.