Skip to content

Commit 1be2143

Browse files
authored
Lower noop wheel install overhead. (#2315)
Previously, installing a wheel that was already installed incurred the cost of hashing the installed wheel chroot every time. The overhead of this wasted work for a warm cache was egregious for large distributions like PyTorch, with gigabytes of files to hash taking seconds. Work towards #2312.
1 parent 4eb5c9a commit 1be2143

File tree

10 files changed

+42
-12
lines changed

10 files changed

+42
-12
lines changed

pex/jobs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ def iter_map_parallel(
732732
# least two slots to ensure we process input items in parallel.
733733
pool_size = max(2, min(len(input_items) // min_average_load, _sanitize_max_jobs(max_jobs)))
734734

735-
perform_install = functools.partial(_apply_function, function)
735+
apply_function = functools.partial(_apply_function, function)
736736

737737
slots = defaultdict(list) # type: DefaultDict[int, List[float]]
738738
with TRACER.timed(
@@ -741,7 +741,7 @@ def iter_map_parallel(
741741
)
742742
):
743743
with _mp_pool(size=pool_size) as pool:
744-
for pid, result, elapsed_secs in pool.imap_unordered(perform_install, input_items):
744+
for pid, result, elapsed_secs in pool.imap_unordered(apply_function, input_items):
745745
TRACER.log(
746746
"[{pid}] {verbed} {result} in {elapsed_secs:.2f}s".format(
747747
pid=pid,

pex/pep_376.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pex.common import is_pyc_dir, is_pyc_file, safe_mkdir, safe_open
1818
from pex.interpreter import PythonInterpreter
1919
from pex.typing import TYPE_CHECKING, cast
20+
from pex.util import CacheHelper
2021
from pex.venv.virtualenv import Virtualenv
2122

2223
if TYPE_CHECKING:
@@ -169,10 +170,24 @@ def save(
169170
record_relpath, # type: Text
170171
):
171172
# type: (...) -> InstalledWheel
172-
layout = {"stash_dir": stash_dir, "record_relpath": record_relpath}
173+
174+
# We currently need the installed wheel chroot hash for PEX-INFO / boot purposes. It is
175+
# expensive to calculate; so we do it here 1 time when saving the installed wheel.
176+
fingerprint = CacheHelper.dir_hash(prefix_dir, hasher=hashlib.sha256)
177+
178+
layout = {
179+
"stash_dir": stash_dir,
180+
"record_relpath": record_relpath,
181+
"fingerprint": fingerprint,
182+
}
173183
with open(cls.layout_file(prefix_dir), "w") as fp:
174184
json.dump(layout, fp, sort_keys=True)
175-
return cls(prefix_dir=prefix_dir, stash_dir=stash_dir, record_relpath=record_relpath)
185+
return cls(
186+
prefix_dir=prefix_dir,
187+
stash_dir=stash_dir,
188+
record_relpath=record_relpath,
189+
fingerprint=fingerprint,
190+
)
176191

177192
@classmethod
178193
def load(cls, prefix_dir):
@@ -201,15 +216,20 @@ def load(cls, prefix_dir):
201216
layout_file=layout_file, value=layout
202217
)
203218
)
219+
220+
fingerprint = layout.get("fingerprint")
221+
204222
return cls(
205223
prefix_dir=prefix_dir,
206224
stash_dir=cast(str, stash_dir),
207225
record_relpath=cast(str, record_relpath),
226+
fingerprint=cast("Optional[str]", fingerprint),
208227
)
209228

210229
prefix_dir = attr.ib() # type: str
211230
stash_dir = attr.ib() # type: str
212231
record_relpath = attr.ib() # type: Text
232+
fingerprint = attr.ib() # type: Optional[str]
213233

214234
def stashed_path(self, *components):
215235
# type: (*str) -> str

pex/resolver.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pex.jobs import Raise, SpawnedJob, execute_parallel, iter_map_parallel
2424
from pex.network_configuration import NetworkConfiguration
2525
from pex.orderedset import OrderedSet
26+
from pex.pep_376 import InstalledWheel, LoadError
2627
from pex.pep_425 import CompatibilityTags
2728
from pex.pep_427 import InstallableType, WheelError, install_wheel_chroot
2829
from pex.pep_503 import ProjectName
@@ -485,7 +486,16 @@ def finalize_install(self, install_requests):
485486
# pex: * - paths that do not exist or will be imported via zipimport
486487
# pex.pex 2.0.2
487488
#
488-
wheel_dir_hash = fingerprint_path(self.install_chroot)
489+
cached_fingerprint = None # type: Optional[str]
490+
try:
491+
installed_wheel = InstalledWheel.load(self.install_chroot)
492+
except LoadError:
493+
# We support legacy chroots below by calculating the chroot fingerprint just in time.
494+
pass
495+
else:
496+
cached_fingerprint = installed_wheel.fingerprint
497+
498+
wheel_dir_hash = cached_fingerprint or fingerprint_path(self.install_chroot)
489499
runtime_key_dir = os.path.join(self._installation_root, wheel_dir_hash)
490500
with atomic_directory(runtime_key_dir) as atomic_dir:
491501
if not atomic_dir.is_finalized():
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "attrs-21.5.0.dev0.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "13e0015aa58e8f470b17936f42a5d5f874a20194156c371d2a4c695dc8c16d9e", "record_relpath": "attrs-21.5.0.dev0.dist-info/RECORD", "stash_dir": ".prefix"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "packaging-20.9.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "5a3fc5dcd563b4a4474944cd0d73ee4a51fb53132af553abfb203dc1cdf8f7c3", "record_relpath": "packaging-20.9.dist-info/RECORD", "stash_dir": ".prefix"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "packaging-21.3.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "2a93da49a5fc8217a0f710ff0ca3cfc56fefa35eddcf6a64786d452bfa284525", "record_relpath": "packaging-21.3.dist-info/RECORD", "stash_dir": ".prefix"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "packaging-23.1.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "a98c4a74d7b6a62763df7ad330ac4c7a0779323fc36e961aeb8f20865e21a191", "record_relpath": "packaging-23.1.dist-info/RECORD", "stash_dir": ".prefix"}

pex/vendor/_vendored/pip/.layout.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "pip-20.3.4.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "120267325b80f5c4b4adac019eb6617ab3319395c043d2871eedf70dd6ae2954", "record_relpath": "pip-20.3.4.dist-info/RECORD", "stash_dir": ".prefix"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "setuptools-44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "ebe3717ba6bad87ca328cbf3d3eb3f5475105bccb51dc09a69d37eff6b2e5210", "record_relpath": "setuptools-44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863.dist-info/RECORD", "stash_dir": ".prefix"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"record_relpath": "toml-0.10.2.dist-info/RECORD", "stash_dir": ".prefix"}
1+
{"fingerprint": "3d44cdc5911c31b190a0225202daafbb22f7f74e6761fb086742dadb5dff5384", "record_relpath": "toml-0.10.2.dist-info/RECORD", "stash_dir": ".prefix"}

0 commit comments

Comments
 (0)