Skip to content

Commit a38f62a

Browse files
committed
only download metadata we need (instead of all metadata)
1 parent 17252fe commit a38f62a

File tree

2 files changed

+78
-92
lines changed

2 files changed

+78
-92
lines changed

src/poetry/repositories/http_repository.py

+69-84
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import functools
44
import hashlib
55

6-
from collections import defaultdict
76
from contextlib import contextmanager
87
from pathlib import Path
98
from typing import TYPE_CHECKING
@@ -16,7 +15,6 @@
1615

1716
from poetry.core.constraints.version import parse_constraint
1817
from poetry.core.packages.dependency import Dependency
19-
from poetry.core.packages.utils.link import Link
2018
from poetry.core.utils.helpers import temporary_directory
2119
from poetry.core.version.markers import parse_marker
2220

@@ -37,6 +35,7 @@
3735

3836
if TYPE_CHECKING:
3937
from packaging.utils import NormalizedName
38+
from poetry.core.packages.utils.link import Link
4039

4140
from poetry.repositories.link_sources.base import LinkSource
4241
from poetry.utils.authenticator import RepositoryCertificateConfig
@@ -109,10 +108,9 @@ def _cached_or_downloaded_file(
109108
)
110109
yield filepath
111110

112-
def _get_info_from_wheel(self, url: str) -> PackageInfo:
111+
def _get_info_from_wheel(self, link: Link) -> PackageInfo:
113112
from poetry.inspection.info import PackageInfo
114113

115-
link = Link(url)
116114
netloc = link.netloc
117115

118116
# If "lazy-wheel" is enabled and the domain supports range requests
@@ -146,37 +144,73 @@ def _get_info_from_wheel(self, url: str) -> PackageInfo:
146144
level="debug",
147145
)
148146
self._supports_range_requests[netloc] = True
149-
return self._get_info_from_wheel(link.url)
147+
return self._get_info_from_wheel(link)
150148

151-
def _get_info_from_sdist(self, url: str) -> PackageInfo:
149+
def _get_info_from_sdist(self, link: Link) -> PackageInfo:
152150
from poetry.inspection.info import PackageInfo
153151

154-
with self._cached_or_downloaded_file(Link(url)) as filepath:
152+
with self._cached_or_downloaded_file(link) as filepath:
155153
return PackageInfo.from_sdist(filepath)
156154

157-
@staticmethod
158-
def _get_info_from_metadata(
159-
url: str, metadata: dict[str, pkginfo.Distribution]
160-
) -> PackageInfo | None:
161-
if url in metadata:
162-
dist = metadata[url]
163-
return PackageInfo(
164-
name=dist.name,
165-
version=dist.version,
166-
summary=dist.summary,
167-
requires_dist=list(dist.requires_dist),
168-
requires_python=dist.requires_python,
169-
)
155+
def _get_info_from_metadata(self, link: Link) -> PackageInfo | None:
156+
if link.has_metadata:
157+
try:
158+
assert link.metadata_url is not None
159+
response = self.session.get(link.metadata_url)
160+
distribution = pkginfo.Distribution()
161+
if link.metadata_hash_name is not None:
162+
metadata_hash = getattr(hashlib, link.metadata_hash_name)(
163+
response.text.encode()
164+
).hexdigest()
165+
166+
if metadata_hash != link.metadata_hash:
167+
self._log(
168+
f"Metadata file hash ({metadata_hash}) does not match"
169+
f" expected hash ({link.metadata_hash})."
170+
f" Metadata file for {link.filename} will be ignored.",
171+
level="warning",
172+
)
173+
return None
174+
175+
distribution.parse(response.content)
176+
return PackageInfo(
177+
name=distribution.name,
178+
version=distribution.version,
179+
summary=distribution.summary,
180+
requires_dist=list(distribution.requires_dist),
181+
requires_python=distribution.requires_python,
182+
)
183+
184+
except requests.HTTPError:
185+
self._log(
186+
f"Failed to retrieve metadata at {link.metadata_url}",
187+
level="warning",
188+
)
189+
170190
return None
171191

172-
def _get_info_from_urls(
192+
def _get_info_from_links(
173193
self,
174-
urls: dict[str, list[str]],
175-
metadata: dict[str, pkginfo.Distribution] | None = None,
194+
links: list[Link],
195+
*,
196+
ignore_yanked: bool = True,
176197
) -> PackageInfo:
177-
metadata = metadata or {}
198+
# Sort links by distribution type
199+
wheels: list[Link] = []
200+
sdists: list[Link] = []
201+
for link in links:
202+
if link.yanked and ignore_yanked:
203+
# drop yanked files unless the entire release is yanked
204+
continue
205+
if link.is_wheel:
206+
wheels.append(link)
207+
elif link.filename.endswith(
208+
(".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")
209+
):
210+
sdists.append(link)
211+
178212
# Prefer to read data from wheels: this is faster and more reliable
179-
if wheels := urls.get("bdist_wheel"):
213+
if wheels:
180214
# We ought just to be able to look at any of the available wheels to read
181215
# metadata, they all should give the same answer.
182216
#
@@ -191,8 +225,7 @@ def _get_info_from_urls(
191225
universal_python3_wheel = None
192226
platform_specific_wheels = []
193227
for wheel in wheels:
194-
link = Link(wheel)
195-
m = wheel_file_re.match(link.filename)
228+
m = wheel_file_re.match(wheel.filename)
196229
if not m:
197230
continue
198231

@@ -213,17 +246,17 @@ def _get_info_from_urls(
213246

214247
if universal_wheel is not None:
215248
return self._get_info_from_metadata(
216-
universal_wheel, metadata
249+
universal_wheel
217250
) or self._get_info_from_wheel(universal_wheel)
218251

219252
info = None
220253
if universal_python2_wheel and universal_python3_wheel:
221254
info = self._get_info_from_metadata(
222-
universal_python2_wheel, metadata
255+
universal_python2_wheel
223256
) or self._get_info_from_wheel(universal_python2_wheel)
224257

225258
py3_info = self._get_info_from_metadata(
226-
universal_python3_wheel, metadata
259+
universal_python3_wheel
227260
) or self._get_info_from_wheel(universal_python3_wheel)
228261

229262
if info.requires_python or py3_info.requires_python:
@@ -275,71 +308,23 @@ def _get_info_from_urls(
275308
# Prefer non platform specific wheels
276309
if universal_python3_wheel:
277310
return self._get_info_from_metadata(
278-
universal_python3_wheel, metadata
311+
universal_python3_wheel
279312
) or self._get_info_from_wheel(universal_python3_wheel)
280313

281314
if universal_python2_wheel:
282315
return self._get_info_from_metadata(
283-
universal_python2_wheel, metadata
316+
universal_python2_wheel
284317
) or self._get_info_from_wheel(universal_python2_wheel)
285318

286319
if platform_specific_wheels:
287320
first_wheel = platform_specific_wheels[0]
288321
return self._get_info_from_metadata(
289-
first_wheel, metadata
322+
first_wheel
290323
) or self._get_info_from_wheel(first_wheel)
291324

292-
return self._get_info_from_metadata(
293-
urls["sdist"][0], metadata
294-
) or self._get_info_from_sdist(urls["sdist"][0])
295-
296-
def _get_info_from_links(
297-
self,
298-
links: list[Link],
299-
*,
300-
ignore_yanked: bool = True,
301-
) -> PackageInfo:
302-
urls = defaultdict(list)
303-
metadata: dict[str, pkginfo.Distribution] = {}
304-
for link in links:
305-
if link.yanked and ignore_yanked:
306-
# drop yanked files unless the entire release is yanked
307-
continue
308-
if link.has_metadata:
309-
try:
310-
assert link.metadata_url is not None
311-
response = self.session.get(link.metadata_url)
312-
distribution = pkginfo.Distribution()
313-
if link.metadata_hash_name is not None:
314-
metadata_hash = getattr(hashlib, link.metadata_hash_name)(
315-
response.text.encode()
316-
).hexdigest()
317-
318-
if metadata_hash != link.metadata_hash:
319-
self._log(
320-
f"Metadata file hash ({metadata_hash}) does not match"
321-
f" expected hash ({link.metadata_hash})."
322-
f" Metadata file for {link.filename} will be ignored.",
323-
level="warning",
324-
)
325-
continue
326-
327-
distribution.parse(response.content)
328-
metadata[link.url] = distribution
329-
except requests.HTTPError:
330-
self._log(
331-
f"Failed to retrieve metadata at {link.metadata_url}",
332-
level="warning",
333-
)
334-
335-
if link.is_wheel:
336-
urls["bdist_wheel"].append(link.url)
337-
elif link.filename.endswith(
338-
(".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")
339-
):
340-
urls["sdist"].append(link.url)
341-
342-
return self._get_info_from_urls(urls, metadata)
325+
return self._get_info_from_metadata(sdists[0]) or self._get_info_from_sdist(
326+
sdists[0]
327+
)
343328

344329
def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]:
345330
if not links:

tests/repositories/test_http_repository.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111

1212
from packaging.metadata import parse_email
13+
from poetry.core.packages.utils.link import Link
1314

1415
from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
1516
from poetry.repositories.http_repository import HTTPRepository
@@ -61,7 +62,7 @@ def test_get_info_from_wheel(
6162
if lazy_wheel and supports_range_requests is not None:
6263
repo._supports_range_requests[domain] = supports_range_requests
6364

64-
info = repo._get_info_from_wheel(url)
65+
info = repo._get_info_from_wheel(Link(url))
6566
assert info.name == "poetry-core"
6667
assert info.version == "1.5.0"
6768
assert info.requires_dist == [
@@ -110,41 +111,41 @@ def test_get_info_from_wheel_state_sequence(mocker: MockerFixture) -> None:
110111

111112
filename = "poetry_core-1.5.0-py3-none-any.whl"
112113
domain = "foo.com"
113-
url = f"https://{domain}/{filename}"
114+
link = Link(f"https://{domain}/{filename}")
114115
repo = MockRepository()
115116

116117
# 1. range request and download
117118
mock_metadata_from_wheel_url.side_effect = HTTPRangeRequestUnsupported
118-
repo._get_info_from_wheel(url)
119+
repo._get_info_from_wheel(link)
119120
assert mock_metadata_from_wheel_url.call_count == 1
120121
assert mock_download.call_count == 1
121122

122123
# 2. only download
123-
repo._get_info_from_wheel(url)
124+
repo._get_info_from_wheel(link)
124125
assert mock_metadata_from_wheel_url.call_count == 1
125126
assert mock_download.call_count == 2
126127

127128
# 3. range request and download
128129
mock_metadata_from_wheel_url.side_effect = None
129130
mock_download.side_effect = HTTPRangeRequestSupported
130-
repo._get_info_from_wheel(url)
131+
repo._get_info_from_wheel(link)
131132
assert mock_metadata_from_wheel_url.call_count == 2
132133
assert mock_download.call_count == 3
133134

134135
# 4. only range request
135-
repo._get_info_from_wheel(url)
136+
repo._get_info_from_wheel(link)
136137
assert mock_metadata_from_wheel_url.call_count == 3
137138
assert mock_download.call_count == 3
138139

139140
# 5. range request and download
140141
mock_metadata_from_wheel_url.side_effect = HTTPRangeRequestUnsupported
141142
mock_download.side_effect = None
142-
repo._get_info_from_wheel(url)
143+
repo._get_info_from_wheel(link)
143144
assert mock_metadata_from_wheel_url.call_count == 4
144145
assert mock_download.call_count == 4
145146

146147
# 6. only range request
147148
mock_metadata_from_wheel_url.side_effect = None
148-
repo._get_info_from_wheel(url)
149+
repo._get_info_from_wheel(link)
149150
assert mock_metadata_from_wheel_url.call_count == 5
150151
assert mock_download.call_count == 4

0 commit comments

Comments
 (0)