Skip to content

Commit b78a1d2

Browse files
committed
Avoid batch prefetching for un-optimized registries
1 parent d9cd282 commit b78a1d2

File tree

24 files changed

+227
-90
lines changed

24 files changed

+227
-90
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/bench/benches/uv.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ mod resolver {
8383

8484
use anyhow::Result;
8585

86-
use distribution_types::IndexLocations;
86+
use distribution_types::{IndexCapabilities, IndexLocations};
8787
use install_wheel_rs::linker::LinkMode;
8888
use pep440_rs::Version;
8989
use pep508_rs::{MarkerEnvironment, MarkerEnvironmentBuilder};
@@ -152,6 +152,7 @@ mod resolver {
152152
);
153153
let flat_index = FlatIndex::default();
154154
let git = GitResolver::default();
155+
let capabilities = IndexCapabilities::default();
155156
let hashes = HashStrategy::None;
156157
let in_flight = InFlight::default();
157158
let index = InMemoryIndex::default();
@@ -179,6 +180,7 @@ mod resolver {
179180
&flat_index,
180181
&index,
181182
&git,
183+
&capabilities,
182184
&in_flight,
183185
IndexStrategy::default(),
184186
&config_settings,

crates/distribution-types/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ fs-err = { workspace = true }
2828
itertools = { workspace = true }
2929
jiff = { workspace = true }
3030
rkyv = { workspace = true }
31+
rustc-hash = { workspace = true }
3132
schemars = { workspace = true, optional = true }
3233
serde = { workspace = true, features = ["derive"] }
3334
serde_json = { workspace = true }

crates/distribution-types/src/index_url.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use itertools::Either;
2+
use rustc_hash::FxHashSet;
23
use std::borrow::Cow;
34
use std::fmt::{Display, Formatter};
45
use std::ops::Deref;
56
use std::path::Path;
67
use std::str::FromStr;
7-
use std::sync::LazyLock;
8+
use std::sync::{Arc, LazyLock, RwLock};
89
use thiserror::Error;
910
use url::{ParseError, Url};
1011

@@ -485,3 +486,27 @@ impl From<IndexLocations> for IndexUrls {
485486
}
486487
}
487488
}
489+
490+
/// A map of [`IndexUrl`]s to their capabilities.
491+
///
492+
/// For now, we only support a single capability (range requests), and we only store an index if
493+
/// it _doesn't_ support range requests. The benefit is that the map is almost always empty, so
494+
/// validating capabilities is extremely cheap.
495+
#[derive(Debug, Default, Clone)]
496+
pub struct IndexCapabilities(Arc<RwLock<FxHashSet<IndexUrl>>>);
497+
498+
impl IndexCapabilities {
499+
/// Returns `true` if the given [`IndexUrl`] supports range requests.
500+
pub fn supports_range_requests(&self, index_url: &IndexUrl) -> bool {
501+
!self.0.read().unwrap().contains(index_url)
502+
}
503+
504+
/// Mark an [`IndexUrl`] as not supporting range requests.
505+
pub fn set_supports_range_requests(&self, index_url: IndexUrl, supports: bool) {
506+
if supports {
507+
self.0.write().unwrap().remove(&index_url);
508+
} else {
509+
self.0.write().unwrap().insert(index_url);
510+
}
511+
}
512+
}

crates/distribution-types/src/prioritized_distribution.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -468,15 +468,14 @@ impl<'a> CompatibleDist<'a> {
468468
}
469469
}
470470

471-
/// Returns whether the distribution is a source distribution.
472-
///
473-
/// Avoid building source distributions we don't need.
474-
pub fn prefetchable(&self) -> bool {
475-
match *self {
476-
CompatibleDist::SourceDist { .. } => false,
477-
CompatibleDist::InstalledDist(_)
478-
| CompatibleDist::CompatibleWheel { .. }
479-
| CompatibleDist::IncompatibleWheel { .. } => true,
471+
/// Returns a [`RegistryBuiltWheel`] if the distribution includes a compatible or incompatible
472+
/// wheel.
473+
pub fn wheel(&self) -> Option<&RegistryBuiltWheel> {
474+
match self {
475+
CompatibleDist::InstalledDist(_) => None,
476+
CompatibleDist::SourceDist { .. } => None,
477+
CompatibleDist::CompatibleWheel { wheel, .. } => Some(wheel),
478+
CompatibleDist::IncompatibleWheel { wheel, .. } => Some(wheel),
480479
}
481480
}
482481
}

crates/uv-client/src/registry_client.rs

Lines changed: 108 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use std::collections::BTreeMap;
22
use std::fmt::Debug;
3-
use std::io;
43
use std::path::PathBuf;
54
use std::str::FromStr;
65

@@ -16,7 +15,9 @@ use tracing::{info_span, instrument, trace, warn, Instrument};
1615
use url::Url;
1716

1817
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
19-
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
18+
use distribution_types::{
19+
BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name, RegistryBuiltDist,
20+
};
2021
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
2122
use pep440_rs::Version;
2223
use pep508_rs::MarkerEnvironment;
@@ -147,7 +148,7 @@ impl<'a> RegistryClientBuilder<'a> {
147148
}
148149

149150
impl<'a> TryFrom<BaseClientBuilder<'a>> for RegistryClientBuilder<'a> {
150-
type Error = io::Error;
151+
type Error = std::io::Error;
151152

152153
fn try_from(value: BaseClientBuilder<'a>) -> Result<Self, Self::Error> {
153154
Ok(Self {
@@ -197,6 +198,22 @@ impl RegistryClient {
197198
self.timeout
198199
}
199200

201+
/// Whether a [`File`] supports metadata prefetching.
202+
pub fn should_prefetch(
203+
&self,
204+
wheel: &RegistryBuiltDist,
205+
capabilities: &IndexCapabilities,
206+
) -> bool {
207+
let wheel = wheel.best_wheel();
208+
if wheel.file.dist_info_metadata {
209+
return true;
210+
}
211+
if capabilities.supports_range_requests(&wheel.index) {
212+
return true;
213+
}
214+
false
215+
}
216+
200217
/// Fetch a package from the `PyPI` simple API.
201218
///
202219
/// "simple" here refers to [PEP 503 – Simple Repository API](https://peps.python.org/pep-0503/)
@@ -395,14 +412,19 @@ impl RegistryClient {
395412
OwnedArchive::from_unarchived(&metadata)
396413
}
397414

415+
// STOPSHIP: Pass in capabilities.
398416
/// Fetch the metadata for a remote wheel file.
399417
///
400418
/// For a remote wheel, we try the following ways to fetch the metadata:
401419
/// 1. From a [PEP 658](https://peps.python.org/pep-0658/) data-dist-info-metadata url
402420
/// 2. From a remote wheel by partial zip reading
403421
/// 3. From a (temp) download of a remote wheel (this is a fallback, the webserver should support range requests)
404422
#[instrument(skip_all, fields(% built_dist))]
405-
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata23, Error> {
423+
pub async fn wheel_metadata(
424+
&self,
425+
built_dist: &BuiltDist,
426+
capabilities: &IndexCapabilities,
427+
) -> Result<Metadata23, Error> {
406428
let metadata = match &built_dist {
407429
BuiltDist::Registry(wheels) => {
408430
#[derive(Debug, Clone)]
@@ -451,7 +473,7 @@ impl RegistryClient {
451473
.await?
452474
}
453475
WheelLocation::Url(url) => {
454-
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
476+
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities)
455477
.await?
456478
}
457479
}
@@ -460,7 +482,9 @@ impl RegistryClient {
460482
self.wheel_metadata_no_pep658(
461483
&wheel.filename,
462484
&wheel.url,
485+
None,
463486
WheelCache::Url(&wheel.url),
487+
capabilities,
464488
)
465489
.await?
466490
}
@@ -489,6 +513,7 @@ impl RegistryClient {
489513
index: &IndexUrl,
490514
file: &File,
491515
url: &Url,
516+
capabilities: &IndexCapabilities,
492517
) -> Result<Metadata23, Error> {
493518
// If the metadata file is available at its own url (PEP 658), download it from there.
494519
let filename = WheelFilename::from_str(&file.filename).map_err(ErrorKind::WheelFilename)?;
@@ -536,8 +561,14 @@ impl RegistryClient {
536561
// If we lack PEP 658 support, try using HTTP range requests to read only the
537562
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
538563
// into the cache and read from there
539-
self.wheel_metadata_no_pep658(&filename, url, WheelCache::Index(index))
540-
.await
564+
self.wheel_metadata_no_pep658(
565+
&filename,
566+
url,
567+
Some(index),
568+
WheelCache::Index(index),
569+
capabilities,
570+
)
571+
.await
541572
}
542573
}
543574

@@ -546,7 +577,9 @@ impl RegistryClient {
546577
&self,
547578
filename: &'data WheelFilename,
548579
url: &'data Url,
580+
index: Option<&'data IndexUrl>,
549581
cache_shard: WheelCache<'data>,
582+
capabilities: &'data IndexCapabilities,
550583
) -> Result<Metadata23, Error> {
551584
let cache_entry = self.cache.entry(
552585
CacheBucket::Wheels,
@@ -562,72 +595,80 @@ impl RegistryClient {
562595
Connectivity::Offline => CacheControl::AllowStale,
563596
};
564597

565-
let req = self
566-
.uncached_client(url)
567-
.head(url.clone())
568-
.header(
569-
"accept-encoding",
570-
http::HeaderValue::from_static("identity"),
571-
)
572-
.build()
573-
.map_err(ErrorKind::from)?;
598+
// Attempt to fetch via a range request.
599+
if index.map_or(true, |index| capabilities.supports_range_requests(index)) {
600+
let req = self
601+
.uncached_client(url)
602+
.head(url.clone())
603+
.header(
604+
"accept-encoding",
605+
http::HeaderValue::from_static("identity"),
606+
)
607+
.build()
608+
.map_err(ErrorKind::from)?;
574609

575-
// Copy authorization headers from the HEAD request to subsequent requests
576-
let mut headers = HeaderMap::default();
577-
if let Some(authorization) = req.headers().get("authorization") {
578-
headers.append("authorization", authorization.clone());
579-
}
610+
// Copy authorization headers from the HEAD request to subsequent requests
611+
let mut headers = HeaderMap::default();
612+
if let Some(authorization) = req.headers().get("authorization") {
613+
headers.append("authorization", authorization.clone());
614+
}
580615

581-
// This response callback is special, we actually make a number of subsequent requests to
582-
// fetch the file from the remote zip.
583-
let read_metadata_range_request = |response: Response| {
584-
async {
585-
let mut reader = AsyncHttpRangeReader::from_head_response(
586-
self.uncached_client(url).clone(),
587-
response,
588-
url.clone(),
589-
headers,
616+
// This response callback is special, we actually make a number of subsequent requests to
617+
// fetch the file from the remote zip.
618+
let read_metadata_range_request = |response: Response| {
619+
async {
620+
let mut reader = AsyncHttpRangeReader::from_head_response(
621+
self.uncached_client(url).clone(),
622+
response,
623+
url.clone(),
624+
headers,
625+
)
626+
.await
627+
.map_err(ErrorKind::AsyncHttpRangeReader)?;
628+
trace!("Getting metadata for {filename} by range request");
629+
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
630+
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
631+
Error::from(ErrorKind::MetadataParseError(
632+
filename.clone(),
633+
url.to_string(),
634+
Box::new(err),
635+
))
636+
})?;
637+
Ok::<Metadata23, CachedClientError<Error>>(metadata)
638+
}
639+
.boxed_local()
640+
.instrument(info_span!("read_metadata_range_request", wheel = %filename))
641+
};
642+
643+
let result = self
644+
.cached_client()
645+
.get_serde(
646+
req,
647+
&cache_entry,
648+
cache_control,
649+
read_metadata_range_request,
590650
)
591651
.await
592-
.map_err(ErrorKind::AsyncHttpRangeReader)?;
593-
trace!("Getting metadata for {filename} by range request");
594-
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
595-
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
596-
Error::from(ErrorKind::MetadataParseError(
597-
filename.clone(),
598-
url.to_string(),
599-
Box::new(err),
600-
))
601-
})?;
602-
Ok::<Metadata23, CachedClientError<Error>>(metadata)
603-
}
604-
.boxed_local()
605-
.instrument(info_span!("read_metadata_range_request", wheel = %filename))
606-
};
652+
.map_err(crate::Error::from);
607653

608-
let result = self
609-
.cached_client()
610-
.get_serde(
611-
req,
612-
&cache_entry,
613-
cache_control,
614-
read_metadata_range_request,
615-
)
616-
.await
617-
.map_err(crate::Error::from);
618-
619-
match result {
620-
Ok(metadata) => return Ok(metadata),
621-
Err(err) => {
622-
if err.is_http_range_requests_unsupported() {
623-
// The range request version failed. Fall back to streaming the file to search
624-
// for the METADATA file.
625-
warn!("Range requests not supported for {filename}; streaming wheel");
626-
} else {
627-
return Err(err);
654+
match result {
655+
Ok(metadata) => return Ok(metadata),
656+
Err(err) => {
657+
if err.is_http_range_requests_unsupported() {
658+
// The range request version failed. Fall back to streaming the file to search
659+
// for the METADATA file.
660+
warn!("Range requests not supported for {filename}; streaming wheel");
661+
662+
// Mark the index as not supporting range requests.
663+
if let Some(index) = index {
664+
capabilities.set_supports_range_requests(index.clone(), false);
665+
}
666+
} else {
667+
return Err(err);
668+
}
628669
}
629-
}
630-
};
670+
};
671+
}
631672

632673
// Create a request to stream the file.
633674
let req = self

crates/uv-client/tests/remote_metadata.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use anyhow::Result;
44
use url::Url;
55

66
use distribution_filename::WheelFilename;
7-
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
7+
use distribution_types::{BuiltDist, DirectUrlBuiltDist, IndexCapabilities};
88
use pep508_rs::VerbatimUrl;
99
use uv_cache::Cache;
1010
use uv_client::RegistryClientBuilder;
@@ -24,7 +24,8 @@ async fn remote_metadata_with_and_without_cache() -> Result<()> {
2424
location: Url::parse(url).unwrap(),
2525
url: VerbatimUrl::from_str(url).unwrap(),
2626
});
27-
let metadata = client.wheel_metadata(&dist).await.unwrap();
27+
let capabilities = IndexCapabilities::default();
28+
let metadata = client.wheel_metadata(&dist, &capabilities).await.unwrap();
2829
assert_eq!(metadata.version.to_string(), "4.66.1");
2930
}
3031

0 commit comments

Comments
 (0)