Skip to content

Commit 9a7262c

Browse files
Avoid batch prefetching for un-optimized registries (#7226)
## Summary We now track the discovered `IndexCapabilities` for each `IndexUrl`. If we learn that an index doesn't support range requests, we avoid doing any batch prefetching. Closes #7221.
1 parent 970bd1a commit 9a7262c

File tree

24 files changed

+203
-96
lines changed

24 files changed

+203
-96
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/bench/benches/uv.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ mod resolver {
8383

8484
use anyhow::Result;
8585

86-
use distribution_types::IndexLocations;
86+
use distribution_types::{IndexCapabilities, IndexLocations};
8787
use install_wheel_rs::linker::LinkMode;
8888
use pep440_rs::Version;
8989
use pep508_rs::{MarkerEnvironment, MarkerEnvironmentBuilder};
@@ -152,6 +152,7 @@ mod resolver {
152152
);
153153
let flat_index = FlatIndex::default();
154154
let git = GitResolver::default();
155+
let capabilities = IndexCapabilities::default();
155156
let hashes = HashStrategy::None;
156157
let in_flight = InFlight::default();
157158
let index = InMemoryIndex::default();
@@ -179,6 +180,7 @@ mod resolver {
179180
&flat_index,
180181
&index,
181182
&git,
183+
&capabilities,
182184
&in_flight,
183185
IndexStrategy::default(),
184186
&config_settings,

crates/distribution-types/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ fs-err = { workspace = true }
2828
itertools = { workspace = true }
2929
jiff = { workspace = true }
3030
rkyv = { workspace = true }
31+
rustc-hash = { workspace = true }
3132
schemars = { workspace = true, optional = true }
3233
serde = { workspace = true, features = ["derive"] }
3334
serde_json = { workspace = true }

crates/distribution-types/src/index_url.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use itertools::Either;
2+
use rustc_hash::FxHashSet;
23
use std::borrow::Cow;
34
use std::fmt::{Display, Formatter};
45
use std::ops::Deref;
56
use std::path::Path;
67
use std::str::FromStr;
7-
use std::sync::LazyLock;
8+
use std::sync::{Arc, LazyLock, RwLock};
89
use thiserror::Error;
910
use url::{ParseError, Url};
1011

@@ -485,3 +486,27 @@ impl From<IndexLocations> for IndexUrls {
485486
}
486487
}
487488
}
489+
490+
/// A map of [`IndexUrl`]s to their capabilities.
491+
///
492+
/// For now, we only support a single capability (range requests), and we only store an index if
493+
/// it _doesn't_ support range requests. The benefit is that the map is almost always empty, so
494+
/// validating capabilities is extremely cheap.
495+
#[derive(Debug, Default, Clone)]
496+
pub struct IndexCapabilities(Arc<RwLock<FxHashSet<IndexUrl>>>);
497+
498+
impl IndexCapabilities {
499+
/// Returns `true` if the given [`IndexUrl`] supports range requests.
500+
pub fn supports_range_requests(&self, index_url: &IndexUrl) -> bool {
501+
!self.0.read().unwrap().contains(index_url)
502+
}
503+
504+
/// Mark an [`IndexUrl`] as not supporting range requests.
505+
pub fn set_supports_range_requests(&self, index_url: IndexUrl, supports: bool) {
506+
if supports {
507+
self.0.write().unwrap().remove(&index_url);
508+
} else {
509+
self.0.write().unwrap().insert(index_url);
510+
}
511+
}
512+
}

crates/distribution-types/src/prioritized_distribution.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -468,15 +468,14 @@ impl<'a> CompatibleDist<'a> {
468468
}
469469
}
470470

471-
/// Returns whether the distribution is a source distribution.
472-
///
473-
/// Avoid building source distributions we don't need.
474-
pub fn prefetchable(&self) -> bool {
475-
match *self {
476-
CompatibleDist::SourceDist { .. } => false,
477-
CompatibleDist::InstalledDist(_)
478-
| CompatibleDist::CompatibleWheel { .. }
479-
| CompatibleDist::IncompatibleWheel { .. } => true,
471+
/// Returns a [`RegistryBuiltWheel`] if the distribution includes a compatible or incompatible
472+
/// wheel.
473+
pub fn wheel(&self) -> Option<&RegistryBuiltWheel> {
474+
match self {
475+
CompatibleDist::InstalledDist(_) => None,
476+
CompatibleDist::SourceDist { .. } => None,
477+
CompatibleDist::CompatibleWheel { wheel, .. } => Some(wheel),
478+
CompatibleDist::IncompatibleWheel { wheel, .. } => Some(wheel),
480479
}
481480
}
482481
}

crates/uv-client/src/registry_client.rs

Lines changed: 91 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use std::collections::BTreeMap;
22
use std::fmt::Debug;
3-
use std::io;
43
use std::path::PathBuf;
54
use std::str::FromStr;
65

@@ -16,7 +15,9 @@ use tracing::{info_span, instrument, trace, warn, Instrument};
1615
use url::Url;
1716

1817
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
19-
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
18+
use distribution_types::{
19+
BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name,
20+
};
2021
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
2122
use pep440_rs::Version;
2223
use pep508_rs::MarkerEnvironment;
@@ -147,7 +148,7 @@ impl<'a> RegistryClientBuilder<'a> {
147148
}
148149

149150
impl<'a> TryFrom<BaseClientBuilder<'a>> for RegistryClientBuilder<'a> {
150-
type Error = io::Error;
151+
type Error = std::io::Error;
151152

152153
fn try_from(value: BaseClientBuilder<'a>) -> Result<Self, Self::Error> {
153154
Ok(Self {
@@ -402,7 +403,11 @@ impl RegistryClient {
402403
/// 2. From a remote wheel by partial zip reading
403404
/// 3. From a (temp) download of a remote wheel (this is a fallback, the webserver should support range requests)
404405
#[instrument(skip_all, fields(% built_dist))]
405-
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata23, Error> {
406+
pub async fn wheel_metadata(
407+
&self,
408+
built_dist: &BuiltDist,
409+
capabilities: &IndexCapabilities,
410+
) -> Result<Metadata23, Error> {
406411
let metadata = match &built_dist {
407412
BuiltDist::Registry(wheels) => {
408413
#[derive(Debug, Clone)]
@@ -451,7 +456,7 @@ impl RegistryClient {
451456
.await?
452457
}
453458
WheelLocation::Url(url) => {
454-
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
459+
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities)
455460
.await?
456461
}
457462
}
@@ -460,7 +465,9 @@ impl RegistryClient {
460465
self.wheel_metadata_no_pep658(
461466
&wheel.filename,
462467
&wheel.url,
468+
None,
463469
WheelCache::Url(&wheel.url),
470+
capabilities,
464471
)
465472
.await?
466473
}
@@ -489,6 +496,7 @@ impl RegistryClient {
489496
index: &IndexUrl,
490497
file: &File,
491498
url: &Url,
499+
capabilities: &IndexCapabilities,
492500
) -> Result<Metadata23, Error> {
493501
// If the metadata file is available at its own url (PEP 658), download it from there.
494502
let filename = WheelFilename::from_str(&file.filename).map_err(ErrorKind::WheelFilename)?;
@@ -536,8 +544,14 @@ impl RegistryClient {
536544
// If we lack PEP 658 support, try using HTTP range requests to read only the
537545
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
538546
// into the cache and read from there
539-
self.wheel_metadata_no_pep658(&filename, url, WheelCache::Index(index))
540-
.await
547+
self.wheel_metadata_no_pep658(
548+
&filename,
549+
url,
550+
Some(index),
551+
WheelCache::Index(index),
552+
capabilities,
553+
)
554+
.await
541555
}
542556
}
543557

@@ -546,7 +560,9 @@ impl RegistryClient {
546560
&self,
547561
filename: &'data WheelFilename,
548562
url: &'data Url,
563+
index: Option<&'data IndexUrl>,
549564
cache_shard: WheelCache<'data>,
565+
capabilities: &'data IndexCapabilities,
550566
) -> Result<Metadata23, Error> {
551567
let cache_entry = self.cache.entry(
552568
CacheBucket::Wheels,
@@ -562,72 +578,80 @@ impl RegistryClient {
562578
Connectivity::Offline => CacheControl::AllowStale,
563579
};
564580

565-
let req = self
566-
.uncached_client(url)
567-
.head(url.clone())
568-
.header(
569-
"accept-encoding",
570-
http::HeaderValue::from_static("identity"),
571-
)
572-
.build()
573-
.map_err(ErrorKind::from)?;
581+
// Attempt to fetch via a range request.
582+
if index.map_or(true, |index| capabilities.supports_range_requests(index)) {
583+
let req = self
584+
.uncached_client(url)
585+
.head(url.clone())
586+
.header(
587+
"accept-encoding",
588+
http::HeaderValue::from_static("identity"),
589+
)
590+
.build()
591+
.map_err(ErrorKind::from)?;
574592

575-
// Copy authorization headers from the HEAD request to subsequent requests
576-
let mut headers = HeaderMap::default();
577-
if let Some(authorization) = req.headers().get("authorization") {
578-
headers.append("authorization", authorization.clone());
579-
}
593+
// Copy authorization headers from the HEAD request to subsequent requests
594+
let mut headers = HeaderMap::default();
595+
if let Some(authorization) = req.headers().get("authorization") {
596+
headers.append("authorization", authorization.clone());
597+
}
580598

581-
// This response callback is special, we actually make a number of subsequent requests to
582-
// fetch the file from the remote zip.
583-
let read_metadata_range_request = |response: Response| {
584-
async {
585-
let mut reader = AsyncHttpRangeReader::from_head_response(
586-
self.uncached_client(url).clone(),
587-
response,
588-
url.clone(),
589-
headers,
599+
// This response callback is special, we actually make a number of subsequent requests to
600+
// fetch the file from the remote zip.
601+
let read_metadata_range_request = |response: Response| {
602+
async {
603+
let mut reader = AsyncHttpRangeReader::from_head_response(
604+
self.uncached_client(url).clone(),
605+
response,
606+
url.clone(),
607+
headers,
608+
)
609+
.await
610+
.map_err(ErrorKind::AsyncHttpRangeReader)?;
611+
trace!("Getting metadata for {filename} by range request");
612+
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
613+
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
614+
Error::from(ErrorKind::MetadataParseError(
615+
filename.clone(),
616+
url.to_string(),
617+
Box::new(err),
618+
))
619+
})?;
620+
Ok::<Metadata23, CachedClientError<Error>>(metadata)
621+
}
622+
.boxed_local()
623+
.instrument(info_span!("read_metadata_range_request", wheel = %filename))
624+
};
625+
626+
let result = self
627+
.cached_client()
628+
.get_serde(
629+
req,
630+
&cache_entry,
631+
cache_control,
632+
read_metadata_range_request,
590633
)
591634
.await
592-
.map_err(ErrorKind::AsyncHttpRangeReader)?;
593-
trace!("Getting metadata for {filename} by range request");
594-
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
595-
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
596-
Error::from(ErrorKind::MetadataParseError(
597-
filename.clone(),
598-
url.to_string(),
599-
Box::new(err),
600-
))
601-
})?;
602-
Ok::<Metadata23, CachedClientError<Error>>(metadata)
603-
}
604-
.boxed_local()
605-
.instrument(info_span!("read_metadata_range_request", wheel = %filename))
606-
};
635+
.map_err(crate::Error::from);
607636

608-
let result = self
609-
.cached_client()
610-
.get_serde(
611-
req,
612-
&cache_entry,
613-
cache_control,
614-
read_metadata_range_request,
615-
)
616-
.await
617-
.map_err(crate::Error::from);
618-
619-
match result {
620-
Ok(metadata) => return Ok(metadata),
621-
Err(err) => {
622-
if err.is_http_range_requests_unsupported() {
623-
// The range request version failed. Fall back to streaming the file to search
624-
// for the METADATA file.
625-
warn!("Range requests not supported for {filename}; streaming wheel");
626-
} else {
627-
return Err(err);
637+
match result {
638+
Ok(metadata) => return Ok(metadata),
639+
Err(err) => {
640+
if err.is_http_range_requests_unsupported() {
641+
// The range request version failed. Fall back to streaming the file to search
642+
// for the METADATA file.
643+
warn!("Range requests not supported for {filename}; streaming wheel");
644+
645+
// Mark the index as not supporting range requests.
646+
if let Some(index) = index {
647+
capabilities.set_supports_range_requests(index.clone(), false);
648+
}
649+
} else {
650+
return Err(err);
651+
}
628652
}
629-
}
630-
};
653+
};
654+
}
631655

632656
// Create a request to stream the file.
633657
let req = self

crates/uv-client/tests/remote_metadata.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use anyhow::Result;
44
use url::Url;
55

66
use distribution_filename::WheelFilename;
7-
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
7+
use distribution_types::{BuiltDist, DirectUrlBuiltDist, IndexCapabilities};
88
use pep508_rs::VerbatimUrl;
99
use uv_cache::Cache;
1010
use uv_client::RegistryClientBuilder;
@@ -24,7 +24,8 @@ async fn remote_metadata_with_and_without_cache() -> Result<()> {
2424
location: Url::parse(url).unwrap(),
2525
url: VerbatimUrl::from_str(url).unwrap(),
2626
});
27-
let metadata = client.wheel_metadata(&dist).await.unwrap();
27+
let capabilities = IndexCapabilities::default();
28+
let metadata = client.wheel_metadata(&dist, &capabilities).await.unwrap();
2829
assert_eq!(metadata.version.to_string(), "4.66.1");
2930
}
3031

crates/uv-dev/src/wheel_metadata.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use anyhow::{bail, Result};
55
use clap::Parser;
66

77
use distribution_filename::WheelFilename;
8-
use distribution_types::{BuiltDist, DirectUrlBuiltDist, RemoteSource};
8+
use distribution_types::{BuiltDist, DirectUrlBuiltDist, IndexCapabilities, RemoteSource};
99
use pep508_rs::VerbatimUrl;
1010
use pypi_types::ParsedUrl;
1111
use uv_cache::{Cache, CacheArgs};
@@ -21,6 +21,7 @@ pub(crate) struct WheelMetadataArgs {
2121
pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
2222
let cache = Cache::try_from(args.cache_args)?.init()?;
2323
let client = RegistryClientBuilder::new(cache).build();
24+
let capabilities = IndexCapabilities::default();
2425

2526
let filename = WheelFilename::from_str(&args.url.filename()?)?;
2627

@@ -29,11 +30,14 @@ pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
2930
};
3031

3132
let metadata = client
32-
.wheel_metadata(&BuiltDist::DirectUrl(DirectUrlBuiltDist {
33-
filename,
34-
location: archive.url,
35-
url: args.url,
36-
}))
33+
.wheel_metadata(
34+
&BuiltDist::DirectUrl(DirectUrlBuiltDist {
35+
filename,
36+
location: archive.url,
37+
url: args.url,
38+
}),
39+
&capabilities,
40+
)
3741
.await?;
3842
println!("{metadata:?}");
3943
Ok(())

0 commit comments

Comments
 (0)