Skip to content

Commit 38e6991

Browse files
barneygaletomasr8
authored andcommitted
pythonGH-126363: Speed up pattern parsing in pathlib.Path.glob() (python#126364)
The implementation of `Path.glob()` does rather a hacky thing: it calls `self.with_segments()` to convert the given pattern to a `Path` object, and then peeks at the private `_raw_path` attribute to see if pathlib removed a trailing slash from the pattern. In this patch, we make `glob()` use a new `_parse_pattern()` classmethod that splits the pattern into parts while preserving information about any trailing slash. This skips the cost of creating a `Path` object, and avoids some path anchor normalization, which makes `Path.glob()` slightly faster. But mostly it's about making the code less naughty. Co-authored-by: Tomas R. <[email protected]>
1 parent d363da4 commit 38e6991

File tree

2 files changed

+29
-14
lines changed

2 files changed

+29
-14
lines changed

Lib/pathlib/_local.py

+27-14
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,31 @@ def _parse_path(cls, path):
274274
root = sep
275275
return drv, root, [x for x in rel.split(sep) if x and x != '.']
276276

277+
@classmethod
278+
def _parse_pattern(cls, pattern):
279+
"""Parse a glob pattern to a list of parts. This is much like
280+
_parse_path, except:
281+
282+
- Rather than normalizing and returning the drive and root, we raise
283+
NotImplementedError if either are present.
284+
- If the path has no real parts, we raise ValueError.
285+
- If the path ends in a slash, then a final empty part is added.
286+
"""
287+
drv, root, rel = cls.parser.splitroot(pattern)
288+
if root or drv:
289+
raise NotImplementedError("Non-relative patterns are unsupported")
290+
sep = cls.parser.sep
291+
altsep = cls.parser.altsep
292+
if altsep:
293+
rel = rel.replace(altsep, sep)
294+
parts = [x for x in rel.split(sep) if x and x != '.']
295+
if not parts:
296+
raise ValueError(f"Unacceptable pattern: {str(pattern)!r}")
297+
elif rel.endswith(sep):
298+
# GH-65238: preserve trailing slash in glob patterns.
299+
parts.append('')
300+
return parts
301+
277302
@property
278303
def _raw_path(self):
279304
"""The joined but unnormalized path."""
@@ -641,17 +666,7 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
641666
kind, including directories) matching the given relative pattern.
642667
"""
643668
sys.audit("pathlib.Path.glob", self, pattern)
644-
if not isinstance(pattern, PurePath):
645-
pattern = self.with_segments(pattern)
646-
if pattern.anchor:
647-
raise NotImplementedError("Non-relative patterns are unsupported")
648-
parts = pattern._tail.copy()
649-
if not parts:
650-
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
651-
raw = pattern._raw_path
652-
if raw[-1] in (self.parser.sep, self.parser.altsep):
653-
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
654-
parts.append('')
669+
parts = self._parse_pattern(pattern)
655670
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
656671
root = str(self)
657672
paths = select(root)
@@ -672,9 +687,7 @@ def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
672687
this subtree.
673688
"""
674689
sys.audit("pathlib.Path.rglob", self, pattern)
675-
if not isinstance(pattern, PurePath):
676-
pattern = self.with_segments(pattern)
677-
pattern = '**' / pattern
690+
pattern = self.parser.join('**', pattern)
678691
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
679692

680693
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Speed up pattern parsing in :meth:`pathlib.Path.glob` by skipping creation
2+
of a :class:`pathlib.Path` object for the pattern.

0 commit comments

Comments
 (0)