-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Make uv’s first-index strategy more secure by default by failing early on authentication failure #12805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make uv’s first-index strategy more secure by default by failing early on authentication failure #12805
Changes from 6 commits
866ed93
ad5c323
05b5f2d
5f2badb
c87194b
992961a
5871444
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
use std::fmt::Display; | ||
use std::fmt::Formatter; | ||
use std::hash::BuildHasherDefault; | ||
use std::sync::Arc; | ||
|
@@ -14,11 +15,26 @@ use crate::Realm; | |
|
||
type FxOnceMap<K, V> = OnceMap<K, V, BuildHasherDefault<FxHasher>>; | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
pub(crate) enum FetchUrl { | ||
Index(Url), | ||
Realm(Realm), | ||
} | ||
|
||
impl Display for FetchUrl { | ||
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { | ||
match self { | ||
FetchUrl::Index(index) => Display::fmt(index, f), | ||
FetchUrl::Realm(realm) => Display::fmt(realm, f), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
} | ||
} | ||
} | ||
|
||
pub struct CredentialsCache { | ||
/// A cache per realm and username | ||
realms: RwLock<FxHashMap<(Realm, Username), Arc<Credentials>>>, | ||
/// A cache tracking the result of realm or index URL fetches from external services | ||
pub(crate) fetches: FxOnceMap<(String, Username), Option<Arc<Credentials>>>, | ||
pub(crate) fetches: FxOnceMap<(FetchUrl, Username), Option<Arc<Credentials>>>, | ||
/// A cache per URL, uses a trie for efficient prefix queries. | ||
urls: RwLock<UrlTrie>, | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,6 +60,23 @@ pub struct Index { | |
pub auth_policy: AuthPolicy, | ||
} | ||
|
||
impl Index { | ||
pub fn is_prefix_for(&self, url: &Url) -> bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Rustdoc please with an example of when this returns There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because of the timing of the release, can we defer this to a PR afterwards? I just moved this logic from a function to a method in this PR. |
||
if self.root_url.scheme() != url.scheme() | ||
|| self.root_url.host_str() != url.host_str() | ||
|| self.root_url.port_or_known_default() != url.port_or_known_default() | ||
{ | ||
return false; | ||
} | ||
|
||
url.path().starts_with(self.root_url.path()) | ||
} | ||
} | ||
|
||
// TODO(john): Multiple methods in this struct need to iterate over | ||
jtfmumm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// all the indexes in the set. There are probably not many URLs to | ||
// iterate through, but we could use a trie instead of a HashSet here | ||
// for more efficient search. | ||
#[derive(Debug, Default, Clone, Eq, PartialEq)] | ||
pub struct Indexes(FxHashSet<Index>); | ||
|
||
|
@@ -79,36 +96,17 @@ impl Indexes { | |
|
||
/// Get the index URL prefix for a URL if one exists. | ||
pub fn index_url_for(&self, url: &Url) -> Option<&Url> { | ||
// TODO(john): There are probably not many URLs to iterate through, | ||
// but we could use a trie instead of a HashSet here for more | ||
// efficient search. | ||
self.0 | ||
.iter() | ||
.find(|index| is_url_prefix(&index.root_url, url)) | ||
.map(|index| &index.url) | ||
self.find_prefix_index(url).map(|index| &index.url) | ||
} | ||
|
||
/// Get the [`AuthPolicy`] for a URL. | ||
pub fn policy_for(&self, url: &Url) -> AuthPolicy { | ||
// TODO(john): There are probably not many URLs to iterate through, | ||
// but we could use a trie instead of a HashMap here for more | ||
// efficient search. | ||
for index in &self.0 { | ||
if is_url_prefix(&index.root_url, url) { | ||
return index.auth_policy; | ||
} | ||
} | ||
AuthPolicy::Auto | ||
pub fn auth_policy_for(&self, url: &Url) -> AuthPolicy { | ||
self.find_prefix_index(url) | ||
.map(|index| index.auth_policy) | ||
.unwrap_or(AuthPolicy::Auto) | ||
} | ||
} | ||
|
||
fn is_url_prefix(base: &Url, url: &Url) -> bool { | ||
if base.scheme() != url.scheme() | ||
|| base.host_str() != url.host_str() | ||
|| base.port_or_known_default() != url.port_or_known_default() | ||
{ | ||
return false; | ||
fn find_prefix_index(&self, url: &Url) -> Option<&Index> { | ||
self.0.iter().find(|&index| index.is_prefix_for(url)) | ||
} | ||
|
||
url.path().starts_with(base.path()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,13 +7,13 @@ use std::time::Duration; | |
|
||
use async_http_range_reader::AsyncHttpRangeReader; | ||
use futures::{FutureExt, StreamExt, TryStreamExt}; | ||
use http::HeaderMap; | ||
use http::{HeaderMap, StatusCode}; | ||
use itertools::Either; | ||
use reqwest::{Proxy, Response, StatusCode}; | ||
use reqwest::{Proxy, Response}; | ||
use reqwest_middleware::ClientWithMiddleware; | ||
use rustc_hash::FxHashMap; | ||
use tokio::sync::{Mutex, Semaphore}; | ||
use tracing::{info_span, instrument, trace, warn, Instrument}; | ||
use tracing::{debug, info_span, instrument, trace, warn, Instrument}; | ||
use url::Url; | ||
|
||
use uv_auth::Indexes; | ||
|
@@ -23,7 +23,7 @@ use uv_configuration::{IndexStrategy, TrustedHost}; | |
use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; | ||
use uv_distribution_types::{ | ||
BuiltDist, File, FileLocation, IndexCapabilities, IndexFormat, IndexLocations, | ||
IndexMetadataRef, IndexUrl, IndexUrls, Name, | ||
IndexMetadataRef, IndexStatusCodeDecision, IndexStatusCodeStrategy, IndexUrl, IndexUrls, Name, | ||
}; | ||
use uv_metadata::{read_metadata_async_seek, read_metadata_async_stream}; | ||
use uv_normalize::PackageName; | ||
|
@@ -331,12 +331,29 @@ impl RegistryClient { | |
let _permit = download_concurrency.acquire().await; | ||
match index.format { | ||
IndexFormat::Simple => { | ||
if let Some(metadata) = self | ||
.simple_single_index(package_name, index.url, capabilities) | ||
let status_code_strategy = | ||
self.index_urls.status_code_strategy_for(index.url); | ||
match self | ||
.simple_single_index( | ||
package_name, | ||
index.url, | ||
capabilities, | ||
&status_code_strategy, | ||
) | ||
.await? | ||
{ | ||
results.push((index.url, MetadataFormat::Simple(metadata))); | ||
break; | ||
SimpleMetadataSearchOutcome::Found(metadata) => { | ||
results.push((index.url, MetadataFormat::Simple(metadata))); | ||
break; | ||
} | ||
// Package not found, so we will continue on to the next index (if there is one) | ||
SimpleMetadataSearchOutcome::NotFound => {} | ||
// The search failed because of an HTTP status code that we don't ignore for | ||
// this index. We end our search here. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should mention the assumption that the status code failure will be tracked by |
||
SimpleMetadataSearchOutcome::StatusCodeFailure(status_code) => { | ||
debug!("Indexes search failed because of status code failure: {status_code}"); | ||
break; | ||
} | ||
} | ||
} | ||
IndexFormat::Flat => { | ||
|
@@ -357,9 +374,21 @@ impl RegistryClient { | |
let _permit = download_concurrency.acquire().await; | ||
match index.format { | ||
IndexFormat::Simple => { | ||
let metadata = self | ||
.simple_single_index(package_name, index.url, capabilities) | ||
.await?; | ||
// For unsafe matches, ignore authentication failures. | ||
let status_code_strategy = | ||
IndexStatusCodeStrategy::ignore_authentication_error_codes(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a comment. For unsafe index strategies, we ignore authentication failures and just find whatever matches we can across the indexes. |
||
let metadata = match self | ||
.simple_single_index( | ||
package_name, | ||
index.url, | ||
capabilities, | ||
&status_code_strategy, | ||
) | ||
.await? | ||
{ | ||
SimpleMetadataSearchOutcome::Found(metadata) => Some(metadata), | ||
_ => None, | ||
}; | ||
Ok((index.url, metadata.map(MetadataFormat::Simple))) | ||
} | ||
IndexFormat::Flat => { | ||
|
@@ -438,14 +467,13 @@ impl RegistryClient { | |
/// | ||
/// The index can either be a PEP 503-compatible remote repository, or a local directory laid | ||
/// out in the same format. | ||
/// | ||
/// Returns `Ok(None)` if the package is not found in the index. | ||
async fn simple_single_index( | ||
&self, | ||
package_name: &PackageName, | ||
index: &IndexUrl, | ||
capabilities: &IndexCapabilities, | ||
) -> Result<Option<OwnedArchive<SimpleMetadata>>, Error> { | ||
status_code_strategy: &IndexStatusCodeStrategy, | ||
) -> Result<SimpleMetadataSearchOutcome, Error> { | ||
// Format the URL for PyPI. | ||
let mut url = index.url().clone(); | ||
url.path_segments_mut() | ||
|
@@ -487,27 +515,31 @@ impl RegistryClient { | |
}; | ||
|
||
match result { | ||
Ok(metadata) => Ok(Some(metadata)), | ||
Ok(metadata) => Ok(SimpleMetadataSearchOutcome::Found(metadata)), | ||
Err(err) => match err.into_kind() { | ||
// The package could not be found in the remote index. | ||
ErrorKind::WrappedReqwestError(url, err) => match err.status() { | ||
Some(StatusCode::NOT_FOUND) => Ok(None), | ||
Some(StatusCode::UNAUTHORIZED) => { | ||
capabilities.set_unauthorized(index.clone()); | ||
Ok(None) | ||
} | ||
Some(StatusCode::FORBIDDEN) => { | ||
capabilities.set_forbidden(index.clone()); | ||
Ok(None) | ||
ErrorKind::WrappedReqwestError(url, err) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we now lose this error? Does it ever get surfaced to the user? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It no longer gets surfaced to the user if it's an error with a response status code. That's because the new interface gives the user the ability to ignore any status code they want. But now I've updated it so that if the strategy check fails on anything other than a 401 or 403, we just return the original error. This still provides the flexibility but preserves the old error information. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've also added a regression test for this case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still struggling a bit. Why do we break when (e.g.) we hit a 401, but 401 isn't included in the list of ignored errors? Why is it not something like this? diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs
index 8bdf2c065..3cb6b4978 100644
--- a/crates/uv-client/src/registry_client.rs
+++ b/crates/uv-client/src/registry_client.rs
@@ -348,12 +348,6 @@ impl RegistryClient {
}
// Package not found, so we will continue on to the next index (if there is one)
SimpleMetadataSearchOutcome::NotFound => {}
- // The search failed because of an HTTP status code that we don't ignore for
- // this index. We end our search here.
- SimpleMetadataSearchOutcome::StatusCodeFailure(status_code) => {
- debug!("Indexes search failed because of status code failure: {status_code}");
- break;
- }
}
}
IndexFormat::Flat => {
@@ -522,17 +516,15 @@ impl RegistryClient {
let Some(status_code) = err.status() else {
return Err(ErrorKind::WrappedReqwestError(url, err).into());
};
- let decision =
- status_code_strategy.handle_status_code(status_code, index, capabilities);
- if let IndexStatusCodeDecision::Fail(status_code) = decision {
- if !matches!(
- status_code,
- StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN
- ) {
- return Err(ErrorKind::WrappedReqwestError(url, err).into());
+ match status_code_strategy.handle_status_code(status_code, index, capabilities)
+ {
+ IndexStatusCodeDecision::Ignore => {
+ Ok(SimpleMetadataSearchOutcome::NotFound)
+ }
+ IndexStatusCodeDecision::Fail(..) => {
+ Err(ErrorKind::WrappedReqwestError(url, err).into())
}
}
- Ok(SimpleMetadataSearchOutcome::from(decision))
}
// The package is unavailable due to a lack of connectivity.
@@ -998,18 +990,6 @@ pub(crate) enum SimpleMetadataSearchOutcome {
Found(OwnedArchive<SimpleMetadata>),
/// Simple metadata was not found
NotFound,
- /// A status code failure was encountered when searching for
- /// simple metadata and our strategy did not ignore it
- StatusCodeFailure(StatusCode),
-}
-
-impl From<IndexStatusCodeDecision> for SimpleMetadataSearchOutcome {
- fn from(item: IndexStatusCodeDecision) -> Self {
- match item {
- IndexStatusCodeDecision::Ignore => Self::NotFound,
- IndexStatusCodeDecision::Fail(status_code) => Self::StatusCodeFailure(status_code),
- }
- }
}
/// A map from [`IndexUrl`] to [`FlatIndexEntry`] entries found at the given URL, indexed by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because if we do it that way, we lose the richer error messages from the resolver. I actually originally did it the way you suggest, but @zanieb pointed out that there were regressions in the amount of information the user was getting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is sort of like the original behavior of returning |
||
let Some(status_code) = err.status() else { | ||
return Err(ErrorKind::WrappedReqwestError(url, err).into()); | ||
}; | ||
let decision = | ||
status_code_strategy.handle_status_code(status_code, index, capabilities); | ||
if let IndexStatusCodeDecision::Fail(status_code) = decision { | ||
if !matches!( | ||
status_code, | ||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should mention that these specific codes are tracked via |
||
) { | ||
return Err(ErrorKind::WrappedReqwestError(url, err).into()); | ||
} | ||
} | ||
_ => Err(ErrorKind::WrappedReqwestError(url, err).into()), | ||
}, | ||
Ok(SimpleMetadataSearchOutcome::from(decision)) | ||
} | ||
|
||
// The package is unavailable due to a lack of connectivity. | ||
ErrorKind::Offline(_) => Ok(None), | ||
ErrorKind::Offline(_) => Ok(SimpleMetadataSearchOutcome::NotFound), | ||
|
||
// The package could not be found in the local index. | ||
ErrorKind::FileNotFound(_) => Ok(None), | ||
ErrorKind::FileNotFound(_) => Ok(SimpleMetadataSearchOutcome::NotFound), | ||
|
||
err => Err(err.into()), | ||
}, | ||
|
@@ -960,6 +992,26 @@ impl RegistryClient { | |
} | ||
} | ||
|
||
#[derive(Debug)] | ||
pub(crate) enum SimpleMetadataSearchOutcome { | ||
/// Simple metadata was found | ||
Found(OwnedArchive<SimpleMetadata>), | ||
jtfmumm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// Simple metadata was not found | ||
NotFound, | ||
/// A status code failure was encountered when searching for | ||
/// simple metadata and our strategy did not ignore it | ||
StatusCodeFailure(StatusCode), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I would find it clearer if we actually just made individual enum variants for Unauthorized and Forbidden, rather than allowing an arbitrary status code here. That way the expectations are encoded in the type system. We should make the invalid state (e.g., |
||
} | ||
|
||
impl From<IndexStatusCodeDecision> for SimpleMetadataSearchOutcome { | ||
fn from(item: IndexStatusCodeDecision) -> Self { | ||
match item { | ||
IndexStatusCodeDecision::Ignore => Self::NotFound, | ||
IndexStatusCodeDecision::Fail(status_code) => Self::StatusCodeFailure(status_code), | ||
} | ||
} | ||
} | ||
|
||
/// A map from [`IndexUrl`] to [`FlatIndexEntry`] entries found at the given URL, indexed by | ||
/// [`PackageName`]. | ||
#[derive(Default, Debug, Clone)] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rustdoc please for the variants.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added