Skip to content

Commit 1dd08e0

Browse files
committed
Lower noop wheel install overhead.
Previously, installing a wheel that was already installed incurred the cost of hashing the installed wheel chroot every time. The overhead of this wasted work for a warm cache was egregious for large distributions like PyTorch, with gigabytes of files to hash taking seconds. Work towards pex-tool#2312.
1 parent 4eb5c9a commit 1dd08e0

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

pex/jobs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ def iter_map_parallel(
732732
# least two slots to ensure we process input items in parallel.
733733
pool_size = max(2, min(len(input_items) // min_average_load, _sanitize_max_jobs(max_jobs)))
734734

735-
perform_install = functools.partial(_apply_function, function)
735+
apply_function = functools.partial(_apply_function, function)
736736

737737
slots = defaultdict(list) # type: DefaultDict[int, List[float]]
738738
with TRACER.timed(
@@ -741,7 +741,7 @@ def iter_map_parallel(
741741
)
742742
):
743743
with _mp_pool(size=pool_size) as pool:
744-
for pid, result, elapsed_secs in pool.imap_unordered(perform_install, input_items):
744+
for pid, result, elapsed_secs in pool.imap_unordered(apply_function, input_items):
745745
TRACER.log(
746746
"[{pid}] {verbed} {result} in {elapsed_secs:.2f}s".format(
747747
pid=pid,

pex/pep_376.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pex.common import is_pyc_dir, is_pyc_file, safe_mkdir, safe_open
1818
from pex.interpreter import PythonInterpreter
1919
from pex.typing import TYPE_CHECKING, cast
20+
from pex.util import CacheHelper
2021
from pex.venv.virtualenv import Virtualenv
2122

2223
if TYPE_CHECKING:
@@ -169,10 +170,24 @@ def save(
169170
record_relpath, # type: Text
170171
):
171172
# type: (...) -> InstalledWheel
172-
layout = {"stash_dir": stash_dir, "record_relpath": record_relpath}
173+
174+
# We currently need the installed wheel chroot hash for PEX-INFO / boot purposes. It is
175+
# expensive to calculate; so we do it here 1 time when saving the installed wheel.
176+
fingerprint = CacheHelper.dir_hash(prefix_dir, hasher=hashlib.sha256)
177+
178+
layout = {
179+
"stash_dir": stash_dir,
180+
"record_relpath": record_relpath,
181+
"fingerprint": fingerprint,
182+
}
173183
with open(cls.layout_file(prefix_dir), "w") as fp:
174184
json.dump(layout, fp, sort_keys=True)
175-
return cls(prefix_dir=prefix_dir, stash_dir=stash_dir, record_relpath=record_relpath)
185+
return cls(
186+
prefix_dir=prefix_dir,
187+
stash_dir=stash_dir,
188+
record_relpath=record_relpath,
189+
fingerprint=fingerprint,
190+
)
176191

177192
@classmethod
178193
def load(cls, prefix_dir):
@@ -201,15 +216,20 @@ def load(cls, prefix_dir):
201216
layout_file=layout_file, value=layout
202217
)
203218
)
219+
220+
fingerprint = layout.get("fingerprint")
221+
204222
return cls(
205223
prefix_dir=prefix_dir,
206224
stash_dir=cast(str, stash_dir),
207225
record_relpath=cast(str, record_relpath),
226+
fingerprint=cast("Optional[str]", fingerprint),
208227
)
209228

210229
prefix_dir = attr.ib() # type: str
211230
stash_dir = attr.ib() # type: str
212231
record_relpath = attr.ib() # type: Text
232+
fingerprint = attr.ib() # type: Optional[str]
213233

214234
def stashed_path(self, *components):
215235
# type: (*str) -> str

pex/resolver.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pex.jobs import Raise, SpawnedJob, execute_parallel, iter_map_parallel
2424
from pex.network_configuration import NetworkConfiguration
2525
from pex.orderedset import OrderedSet
26+
from pex.pep_376 import InstalledWheel, LoadError
2627
from pex.pep_425 import CompatibilityTags
2728
from pex.pep_427 import InstallableType, WheelError, install_wheel_chroot
2829
from pex.pep_503 import ProjectName
@@ -485,7 +486,16 @@ def finalize_install(self, install_requests):
485486
# pex: * - paths that do not exist or will be imported via zipimport
486487
# pex.pex 2.0.2
487488
#
488-
wheel_dir_hash = fingerprint_path(self.install_chroot)
489+
cached_fingerprint = None # type: Optional[str]
490+
try:
491+
installed_wheel = InstalledWheel.load(self.install_chroot)
492+
except LoadError:
493+
# We support legacy chroots below by calculating the chroot fingerprint just in time.
494+
pass
495+
else:
496+
cached_fingerprint = installed_wheel.fingerprint
497+
498+
wheel_dir_hash = cached_fingerprint or fingerprint_path(self.install_chroot)
489499
runtime_key_dir = os.path.join(self._installation_root, wheel_dir_hash)
490500
with atomic_directory(runtime_key_dir) as atomic_dir:
491501
if not atomic_dir.is_finalized():

0 commit comments

Comments
 (0)