Skip to content

Commit c286e84

Browse files
committed
only download metadata we need (instead of all metadata)
1 parent d3d201f commit c286e84

File tree

2 files changed

+78
-92
lines changed

2 files changed

+78
-92
lines changed

src/poetry/repositories/http_repository.py

+69-84
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import functools
44
import hashlib
55

6-
from collections import defaultdict
76
from contextlib import contextmanager
87
from pathlib import Path
98
from typing import TYPE_CHECKING
@@ -16,7 +15,6 @@
1615

1716
from poetry.core.constraints.version import parse_constraint
1817
from poetry.core.packages.dependency import Dependency
19-
from poetry.core.packages.utils.link import Link
2018
from poetry.core.utils.helpers import temporary_directory
2119
from poetry.core.version.markers import parse_marker
2220

@@ -38,6 +36,7 @@
3836

3937
if TYPE_CHECKING:
4038
from packaging.utils import NormalizedName
39+
from poetry.core.packages.utils.link import Link
4140

4241
from poetry.repositories.link_sources.base import LinkSource
4342
from poetry.utils.authenticator import RepositoryCertificateConfig
@@ -110,10 +109,9 @@ def _cached_or_downloaded_file(
110109
)
111110
yield filepath
112111

113-
def _get_info_from_wheel(self, url: str) -> PackageInfo:
112+
def _get_info_from_wheel(self, link: Link) -> PackageInfo:
114113
from poetry.inspection.info import PackageInfo
115114

116-
link = Link(url)
117115
netloc = link.netloc
118116

119117
# If "lazy-wheel" is enabled and the domain supports range requests
@@ -149,37 +147,73 @@ def _get_info_from_wheel(self, url: str) -> PackageInfo:
149147
level="debug",
150148
)
151149
self._supports_range_requests[netloc] = True
152-
return self._get_info_from_wheel(link.url)
150+
return self._get_info_from_wheel(link)
153151

154-
def _get_info_from_sdist(self, url: str) -> PackageInfo:
152+
def _get_info_from_sdist(self, link: Link) -> PackageInfo:
155153
from poetry.inspection.info import PackageInfo
156154

157-
with self._cached_or_downloaded_file(Link(url)) as filepath:
155+
with self._cached_or_downloaded_file(link) as filepath:
158156
return PackageInfo.from_sdist(filepath)
159157

160-
@staticmethod
161-
def _get_info_from_metadata(
162-
url: str, metadata: dict[str, pkginfo.Distribution]
163-
) -> PackageInfo | None:
164-
if url in metadata:
165-
dist = metadata[url]
166-
return PackageInfo(
167-
name=dist.name,
168-
version=dist.version,
169-
summary=dist.summary,
170-
requires_dist=list(dist.requires_dist),
171-
requires_python=dist.requires_python,
172-
)
158+
def _get_info_from_metadata(self, link: Link) -> PackageInfo | None:
159+
if link.has_metadata:
160+
try:
161+
assert link.metadata_url is not None
162+
response = self.session.get(link.metadata_url)
163+
distribution = pkginfo.Distribution()
164+
if link.metadata_hash_name is not None:
165+
metadata_hash = getattr(hashlib, link.metadata_hash_name)(
166+
response.text.encode()
167+
).hexdigest()
168+
169+
if metadata_hash != link.metadata_hash:
170+
self._log(
171+
f"Metadata file hash ({metadata_hash}) does not match"
172+
f" expected hash ({link.metadata_hash})."
173+
f" Metadata file for {link.filename} will be ignored.",
174+
level="warning",
175+
)
176+
return None
177+
178+
distribution.parse(response.content)
179+
return PackageInfo(
180+
name=distribution.name,
181+
version=distribution.version,
182+
summary=distribution.summary,
183+
requires_dist=list(distribution.requires_dist),
184+
requires_python=distribution.requires_python,
185+
)
186+
187+
except requests.HTTPError:
188+
self._log(
189+
f"Failed to retrieve metadata at {link.metadata_url}",
190+
level="warning",
191+
)
192+
173193
return None
174194

175-
def _get_info_from_urls(
195+
def _get_info_from_links(
176196
self,
177-
urls: dict[str, list[str]],
178-
metadata: dict[str, pkginfo.Distribution] | None = None,
197+
links: list[Link],
198+
*,
199+
ignore_yanked: bool = True,
179200
) -> PackageInfo:
180-
metadata = metadata or {}
201+
# Sort links by distribution type
202+
wheels: list[Link] = []
203+
sdists: list[Link] = []
204+
for link in links:
205+
if link.yanked and ignore_yanked:
206+
# drop yanked files unless the entire release is yanked
207+
continue
208+
if link.is_wheel:
209+
wheels.append(link)
210+
elif link.filename.endswith(
211+
(".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")
212+
):
213+
sdists.append(link)
214+
181215
# Prefer to read data from wheels: this is faster and more reliable
182-
if wheels := urls.get("bdist_wheel"):
216+
if wheels:
183217
# We ought just to be able to look at any of the available wheels to read
184218
# metadata, they all should give the same answer.
185219
#
@@ -194,8 +228,7 @@ def _get_info_from_urls(
194228
universal_python3_wheel = None
195229
platform_specific_wheels = []
196230
for wheel in wheels:
197-
link = Link(wheel)
198-
m = wheel_file_re.match(link.filename)
231+
m = wheel_file_re.match(wheel.filename)
199232
if not m:
200233
continue
201234

@@ -216,17 +249,17 @@ def _get_info_from_urls(
216249

217250
if universal_wheel is not None:
218251
return self._get_info_from_metadata(
219-
universal_wheel, metadata
252+
universal_wheel
220253
) or self._get_info_from_wheel(universal_wheel)
221254

222255
info = None
223256
if universal_python2_wheel and universal_python3_wheel:
224257
info = self._get_info_from_metadata(
225-
universal_python2_wheel, metadata
258+
universal_python2_wheel
226259
) or self._get_info_from_wheel(universal_python2_wheel)
227260

228261
py3_info = self._get_info_from_metadata(
229-
universal_python3_wheel, metadata
262+
universal_python3_wheel
230263
) or self._get_info_from_wheel(universal_python3_wheel)
231264

232265
if info.requires_python or py3_info.requires_python:
@@ -278,71 +311,23 @@ def _get_info_from_urls(
278311
# Prefer non platform specific wheels
279312
if universal_python3_wheel:
280313
return self._get_info_from_metadata(
281-
universal_python3_wheel, metadata
314+
universal_python3_wheel
282315
) or self._get_info_from_wheel(universal_python3_wheel)
283316

284317
if universal_python2_wheel:
285318
return self._get_info_from_metadata(
286-
universal_python2_wheel, metadata
319+
universal_python2_wheel
287320
) or self._get_info_from_wheel(universal_python2_wheel)
288321

289322
if platform_specific_wheels:
290323
first_wheel = platform_specific_wheels[0]
291324
return self._get_info_from_metadata(
292-
first_wheel, metadata
325+
first_wheel
293326
) or self._get_info_from_wheel(first_wheel)
294327

295-
return self._get_info_from_metadata(
296-
urls["sdist"][0], metadata
297-
) or self._get_info_from_sdist(urls["sdist"][0])
298-
299-
def _get_info_from_links(
300-
self,
301-
links: list[Link],
302-
*,
303-
ignore_yanked: bool = True,
304-
) -> PackageInfo:
305-
urls = defaultdict(list)
306-
metadata: dict[str, pkginfo.Distribution] = {}
307-
for link in links:
308-
if link.yanked and ignore_yanked:
309-
# drop yanked files unless the entire release is yanked
310-
continue
311-
if link.has_metadata:
312-
try:
313-
assert link.metadata_url is not None
314-
response = self.session.get(link.metadata_url)
315-
distribution = pkginfo.Distribution()
316-
if link.metadata_hash_name is not None:
317-
metadata_hash = getattr(hashlib, link.metadata_hash_name)(
318-
response.text.encode()
319-
).hexdigest()
320-
321-
if metadata_hash != link.metadata_hash:
322-
self._log(
323-
f"Metadata file hash ({metadata_hash}) does not match"
324-
f" expected hash ({link.metadata_hash})."
325-
f" Metadata file for {link.filename} will be ignored.",
326-
level="warning",
327-
)
328-
continue
329-
330-
distribution.parse(response.content)
331-
metadata[link.url] = distribution
332-
except requests.HTTPError:
333-
self._log(
334-
f"Failed to retrieve metadata at {link.metadata_url}",
335-
level="warning",
336-
)
337-
338-
if link.is_wheel:
339-
urls["bdist_wheel"].append(link.url)
340-
elif link.filename.endswith(
341-
(".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")
342-
):
343-
urls["sdist"].append(link.url)
344-
345-
return self._get_info_from_urls(urls, metadata)
328+
return self._get_info_from_metadata(sdists[0]) or self._get_info_from_sdist(
329+
sdists[0]
330+
)
346331

347332
def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]:
348333
if not links:

tests/repositories/test_http_repository.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111

1212
from packaging.metadata import parse_email
13+
from poetry.core.packages.utils.link import Link
1314

1415
from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
1516
from poetry.repositories.http_repository import HTTPRepository
@@ -61,7 +62,7 @@ def test_get_info_from_wheel(
6162
if lazy_wheel and supports_range_requests is not None:
6263
repo._supports_range_requests[domain] = supports_range_requests
6364

64-
info = repo._get_info_from_wheel(url)
65+
info = repo._get_info_from_wheel(Link(url))
6566
assert info.name == "poetry-core"
6667
assert info.version == "1.5.0"
6768
assert info.requires_dist == [
@@ -110,45 +111,45 @@ def test_get_info_from_wheel_state_sequence(mocker: MockerFixture) -> None:
110111

111112
filename = "poetry_core-1.5.0-py3-none-any.whl"
112113
domain = "foo.com"
113-
url = f"https://{domain}/{filename}"
114+
link = Link(f"https://{domain}/{filename}")
114115
repo = MockRepository()
115116

116117
# 1. range request and download
117118
mock_metadata_from_wheel_url.side_effect = HTTPRangeRequestUnsupported
118-
repo._get_info_from_wheel(url)
119+
repo._get_info_from_wheel(link)
119120
assert mock_metadata_from_wheel_url.call_count == 1
120121
assert mock_download.call_count == 1
121122
assert mock_download.call_args[1]["raise_accepts_ranges"] is False
122123

123124
# 2. only download
124-
repo._get_info_from_wheel(url)
125+
repo._get_info_from_wheel(link)
125126
assert mock_metadata_from_wheel_url.call_count == 1
126127
assert mock_download.call_count == 2
127128
assert mock_download.call_args[1]["raise_accepts_ranges"] is True
128129

129130
# 3. download and range request
130131
mock_metadata_from_wheel_url.side_effect = None
131132
mock_download.side_effect = HTTPRangeRequestSupported
132-
repo._get_info_from_wheel(url)
133+
repo._get_info_from_wheel(link)
133134
assert mock_metadata_from_wheel_url.call_count == 2
134135
assert mock_download.call_count == 3
135136
assert mock_download.call_args[1]["raise_accepts_ranges"] is True
136137

137138
# 4. only range request
138-
repo._get_info_from_wheel(url)
139+
repo._get_info_from_wheel(link)
139140
assert mock_metadata_from_wheel_url.call_count == 3
140141
assert mock_download.call_count == 3
141142

142143
# 5. range request and download
143144
mock_metadata_from_wheel_url.side_effect = HTTPRangeRequestUnsupported
144145
mock_download.side_effect = None
145-
repo._get_info_from_wheel(url)
146+
repo._get_info_from_wheel(link)
146147
assert mock_metadata_from_wheel_url.call_count == 4
147148
assert mock_download.call_count == 4
148149
assert mock_download.call_args[1]["raise_accepts_ranges"] is False
149150

150151
# 6. only range request
151152
mock_metadata_from_wheel_url.side_effect = None
152-
repo._get_info_from_wheel(url)
153+
repo._get_info_from_wheel(link)
153154
assert mock_metadata_from_wheel_url.call_count == 5
154155
assert mock_download.call_count == 4

0 commit comments

Comments
 (0)