Skip to content

Commit 6360916

Browse files
committed
Merge pull request #290 from jaraco and anntzer.
2 parents fa58934 + 5ef4f52 commit 6360916

File tree

4 files changed

+155
-35
lines changed

4 files changed

+155
-35
lines changed

CHANGES.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
v3.8.0
2+
======
3+
4+
* #290: Add mtime-based caching for ``FastPath`` and its
5+
lookups, dramatically increasing performance for repeated
6+
distribution lookups.
7+
18
v3.7.3
29
======
310

importlib_metadata/__init__.py

Lines changed: 56 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import functools
1212
import itertools
1313
import posixpath
14+
import contextlib
1415
import collections
1516

1617
from ._compat import (
@@ -20,6 +21,7 @@
2021
Protocol,
2122
)
2223

24+
from ._functools import method_cache
2325
from ._itertools import unique_everseen
2426

2527
from configparser import ConfigParser
@@ -615,9 +617,12 @@ class FastPath:
615617
children.
616618
"""
617619

620+
@functools.lru_cache() # type: ignore
621+
def __new__(cls, root):
622+
return super().__new__(cls)
623+
618624
def __init__(self, root):
619625
self.root = str(root)
620-
self.base = os.path.basename(self.root).lower()
621626

622627
def joinpath(self, child):
623628
return pathlib.Path(self.root, child)
@@ -637,11 +642,50 @@ def zip_children(self):
637642
return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
638643

639644
def search(self, name):
640-
return (
641-
self.joinpath(child)
642-
for child in self.children()
643-
if name.matches(child, self.base)
645+
return self.lookup(self.mtime).search(name)
646+
647+
@property
648+
def mtime(self):
649+
with contextlib.suppress(OSError):
650+
return os.stat(self.root).st_mtime
651+
self.lookup.cache_clear()
652+
653+
@method_cache
654+
def lookup(self, mtime):
655+
return Lookup(self)
656+
657+
658+
class Lookup:
659+
def __init__(self, path: FastPath):
660+
base = os.path.basename(path.root).lower()
661+
base_is_egg = base.endswith(".egg")
662+
self.infos = collections.defaultdict(list)
663+
self.eggs = collections.defaultdict(list)
664+
665+
for child in path.children():
666+
low = child.lower()
667+
if low.endswith((".dist-info", ".egg-info")):
668+
# rpartition is faster than splitext and suitable for this purpose.
669+
name = low.rpartition(".")[0].partition("-")[0]
670+
normalized = Prepared.normalize(name)
671+
self.infos[normalized].append(path.joinpath(child))
672+
elif base_is_egg and low == "egg-info":
673+
name = base.rpartition(".")[0].partition("-")[0]
674+
legacy_normalized = Prepared.legacy_normalize(name)
675+
self.eggs[legacy_normalized].append(path.joinpath(child))
676+
677+
def search(self, prepared):
678+
infos = (
679+
self.infos[prepared.normalized]
680+
if prepared
681+
else itertools.chain.from_iterable(self.infos.values())
682+
)
683+
eggs = (
684+
self.eggs[prepared.legacy_normalized]
685+
if prepared
686+
else itertools.chain.from_iterable(self.eggs.values())
644687
)
688+
return itertools.chain(infos, eggs)
645689

646690

647691
class Prepared:
@@ -650,22 +694,14 @@ class Prepared:
650694
"""
651695

652696
normalized = None
653-
suffixes = 'dist-info', 'egg-info'
654-
exact_matches = [''][:0]
655-
egg_prefix = ''
656-
versionless_egg_name = ''
697+
legacy_normalized = None
657698

658699
def __init__(self, name):
659700
self.name = name
660701
if name is None:
661702
return
662703
self.normalized = self.normalize(name)
663-
self.exact_matches = [
664-
self.normalized + '.' + suffix for suffix in self.suffixes
665-
]
666-
legacy_normalized = self.legacy_normalize(self.name)
667-
self.egg_prefix = legacy_normalized + '-'
668-
self.versionless_egg_name = legacy_normalized + '.egg'
704+
self.legacy_normalized = self.legacy_normalize(name)
669705

670706
@staticmethod
671707
def normalize(name):
@@ -682,26 +718,8 @@ def legacy_normalize(name):
682718
"""
683719
return name.lower().replace('-', '_')
684720

685-
def matches(self, cand, base):
686-
low = cand.lower()
687-
# rpartition is faster than splitext and suitable for this purpose.
688-
pre, _, ext = low.rpartition('.')
689-
name, _, rest = pre.partition('-')
690-
return (
691-
low in self.exact_matches
692-
or ext in self.suffixes
693-
and (not self.normalized or name.replace('.', '_') == self.normalized)
694-
# legacy case:
695-
or self.is_egg(base)
696-
and low == 'egg-info'
697-
)
698-
699-
def is_egg(self, base):
700-
return (
701-
base == self.versionless_egg_name
702-
or base.startswith(self.egg_prefix)
703-
and base.endswith('.egg')
704-
)
721+
def __bool__(self):
722+
return bool(self.name)
705723

706724

707725
@install
@@ -732,6 +750,9 @@ def _search_paths(cls, name, paths):
732750
path.search(prepared) for path in map(FastPath, paths)
733751
)
734752

753+
def invalidate_caches(cls):
754+
FastPath.__new__.cache_clear()
755+
735756

736757
class PathDistribution(Distribution):
737758
def __init__(self, path):

importlib_metadata/_functools.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import types
2+
import functools
3+
4+
5+
# from jaraco.functools 3.3
6+
def method_cache(method, cache_wrapper=None):
7+
"""
8+
Wrap lru_cache to support storing the cache data in the object instances.
9+
10+
Abstracts the common paradigm where the method explicitly saves an
11+
underscore-prefixed protected property on first call and returns that
12+
subsequently.
13+
14+
>>> class MyClass:
15+
... calls = 0
16+
...
17+
... @method_cache
18+
... def method(self, value):
19+
... self.calls += 1
20+
... return value
21+
22+
>>> a = MyClass()
23+
>>> a.method(3)
24+
3
25+
>>> for x in range(75):
26+
... res = a.method(x)
27+
>>> a.calls
28+
75
29+
30+
Note that the apparent behavior will be exactly like that of lru_cache
31+
except that the cache is stored on each instance, so values in one
32+
instance will not flush values from another, and when an instance is
33+
deleted, so are the cached values for that instance.
34+
35+
>>> b = MyClass()
36+
>>> for x in range(35):
37+
... res = b.method(x)
38+
>>> b.calls
39+
35
40+
>>> a.method(0)
41+
0
42+
>>> a.calls
43+
75
44+
45+
Note that if method had been decorated with ``functools.lru_cache()``,
46+
a.calls would have been 76 (due to the cached value of 0 having been
47+
flushed by the 'b' instance).
48+
49+
Clear the cache with ``.cache_clear()``
50+
51+
>>> a.method.cache_clear()
52+
53+
Same for a method that hasn't yet been called.
54+
55+
>>> c = MyClass()
56+
>>> c.method.cache_clear()
57+
58+
Another cache wrapper may be supplied:
59+
60+
>>> cache = functools.lru_cache(maxsize=2)
61+
>>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
62+
>>> a = MyClass()
63+
>>> a.method2()
64+
3
65+
66+
Caution - do not subsequently wrap the method with another decorator, such
67+
as ``@property``, which changes the semantics of the function.
68+
69+
See also
70+
http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
71+
for another implementation and additional justification.
72+
"""
73+
cache_wrapper = cache_wrapper or functools.lru_cache()
74+
75+
def wrapper(self, *args, **kwargs):
76+
# it's the first call, replace the method with a cached, bound method
77+
bound_method = types.MethodType(method, self)
78+
cached_method = cache_wrapper(bound_method)
79+
setattr(self, method.__name__, cached_method)
80+
return cached_method(*args, **kwargs)
81+
82+
# Support cache clear even before cache has been created.
83+
wrapper.cache_clear = lambda: None
84+
85+
return wrapper

tests/test_api.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import textwrap
33
import unittest
44
import warnings
5+
import importlib
56

67
from . import fixtures
78
from importlib_metadata import (
@@ -275,3 +276,9 @@ def test_distribution_at_str(self):
275276
dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info'
276277
dist = Distribution.at(str(dist_info_path))
277278
assert dist.version == '1.0.0'
279+
280+
281+
class InvalidateCache(unittest.TestCase):
282+
def test_invalidate_cache(self):
283+
# No externally observable behavior, but ensures test coverage...
284+
importlib.invalidate_caches()

0 commit comments

Comments
 (0)