Skip to content

Commit 1222db1

Browse files
committed
Make uv’s first-index strategy more secure by default by failing early on authentication failure (#12805)
uv’s default index strategy was designed with dependency confusion attacks in mind. [According to the docs](https://docs.astral.sh/uv/configuration/indexes/#searching-across-multiple-indexes), “if a package exists on an internal index, it should always be installed from the internal index, and never from PyPI”. Unfortunately, this is not true in the case where authentication fails on that internal index. In that case, uv will simply try the next index (even on the `first-index` strategy). This means that uv is not secure by default in this common scenario. This PR causes uv to stop searching for a package if it encounters an authentication failure at an index. It is possible to opt out of this behavior for an index with a new `pyproject.toml` option `ignore-error-codes`. For example: ``` [[tool.uv.index]] name = "my-index" url = "<index-url>" ignore-error-codes = [401, 403] ``` This will also enable users to handle idiosyncratic registries in a more fine-grained way. For example, PyTorch registries return a 403 when a package is not found. In this PR, we special-case PyTorch registries to ignore 403s, but users can use `ignore-error-codes` to handle similar behaviors if they encounter them on internal registries. Depends on #12651 Closes #9429 Closes #12362
1 parent 841c430 commit 1222db1

File tree

16 files changed

+783
-98
lines changed

16 files changed

+783
-98
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/uv-auth/src/cache.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::fmt::Display;
12
use std::fmt::Formatter;
23
use std::hash::BuildHasherDefault;
34
use std::sync::Arc;
@@ -14,11 +15,28 @@ use crate::Realm;
1415

1516
type FxOnceMap<K, V> = OnceMap<K, V, BuildHasherDefault<FxHasher>>;
1617

18+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
19+
pub(crate) enum FetchUrl {
20+
/// A full index URL
21+
Index(Url),
22+
/// A realm URL
23+
Realm(Realm),
24+
}
25+
26+
impl Display for FetchUrl {
27+
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
28+
match self {
29+
Self::Index(index) => Display::fmt(index, f),
30+
Self::Realm(realm) => Display::fmt(realm, f),
31+
}
32+
}
33+
}
34+
1735
pub struct CredentialsCache {
1836
/// A cache per realm and username
1937
realms: RwLock<FxHashMap<(Realm, Username), Arc<Credentials>>>,
2038
/// A cache tracking the result of realm or index URL fetches from external services
21-
pub(crate) fetches: FxOnceMap<(String, Username), Option<Arc<Credentials>>>,
39+
pub(crate) fetches: FxOnceMap<(FetchUrl, Username), Option<Arc<Credentials>>>,
2240
/// A cache per URL, uses a trie for efficient prefix queries.
2341
urls: RwLock<UrlTrie>,
2442
}

crates/uv-auth/src/index.rs

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,23 @@ pub struct Index {
6060
pub auth_policy: AuthPolicy,
6161
}
6262

63+
impl Index {
64+
pub fn is_prefix_for(&self, url: &Url) -> bool {
65+
if self.root_url.scheme() != url.scheme()
66+
|| self.root_url.host_str() != url.host_str()
67+
|| self.root_url.port_or_known_default() != url.port_or_known_default()
68+
{
69+
return false;
70+
}
71+
72+
url.path().starts_with(self.root_url.path())
73+
}
74+
}
75+
76+
// TODO(john): Multiple methods in this struct need to iterate over
77+
// all the indexes in the set. There are probably not many URLs to
78+
// iterate through, but we could use a trie instead of a HashSet here
79+
// for more efficient search.
6380
#[derive(Debug, Default, Clone, Eq, PartialEq)]
6481
pub struct Indexes(FxHashSet<Index>);
6582

@@ -79,36 +96,17 @@ impl Indexes {
7996

8097
/// Get the index URL prefix for a URL if one exists.
8198
pub fn index_url_for(&self, url: &Url) -> Option<&Url> {
82-
// TODO(john): There are probably not many URLs to iterate through,
83-
// but we could use a trie instead of a HashSet here for more
84-
// efficient search.
85-
self.0
86-
.iter()
87-
.find(|index| is_url_prefix(&index.root_url, url))
88-
.map(|index| &index.url)
99+
self.find_prefix_index(url).map(|index| &index.url)
89100
}
90101

91102
/// Get the [`AuthPolicy`] for a URL.
92-
pub fn policy_for(&self, url: &Url) -> AuthPolicy {
93-
// TODO(john): There are probably not many URLs to iterate through,
94-
// but we could use a trie instead of a HashMap here for more
95-
// efficient search.
96-
for index in &self.0 {
97-
if is_url_prefix(&index.root_url, url) {
98-
return index.auth_policy;
99-
}
100-
}
101-
AuthPolicy::Auto
103+
pub fn auth_policy_for(&self, url: &Url) -> AuthPolicy {
104+
self.find_prefix_index(url)
105+
.map(|index| index.auth_policy)
106+
.unwrap_or(AuthPolicy::Auto)
102107
}
103-
}
104108

105-
fn is_url_prefix(base: &Url, url: &Url) -> bool {
106-
if base.scheme() != url.scheme()
107-
|| base.host_str() != url.host_str()
108-
|| base.port_or_known_default() != url.port_or_known_default()
109-
{
110-
return false;
109+
fn find_prefix_index(&self, url: &Url) -> Option<&Index> {
110+
self.0.iter().find(|&index| index.is_prefix_for(url))
111111
}
112-
113-
url.path().starts_with(base.path())
114112
}

crates/uv-auth/src/middleware.rs

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use http::{Extensions, StatusCode};
44
use url::Url;
55

66
use crate::{
7+
cache::FetchUrl,
78
credentials::{Credentials, Username},
89
index::{AuthPolicy, Indexes},
910
realm::Realm,
@@ -182,7 +183,7 @@ impl Middleware for AuthMiddleware {
182183
// to the headers so for display purposes we restore some information
183184
let url = tracing_url(&request, request_credentials.as_ref());
184185
let maybe_index_url = self.indexes.index_url_for(request.url());
185-
let auth_policy = self.indexes.policy_for(request.url());
186+
let auth_policy = self.indexes.auth_policy_for(request.url());
186187
trace!("Handling request for {url} with authentication policy {auth_policy}");
187188

188189
let credentials: Option<Arc<Credentials>> = if matches!(auth_policy, AuthPolicy::Never) {
@@ -384,7 +385,7 @@ impl AuthMiddleware {
384385
extensions: &mut Extensions,
385386
next: Next<'_>,
386387
url: &str,
387-
maybe_index_url: Option<&Url>,
388+
index_url: Option<&Url>,
388389
auth_policy: AuthPolicy,
389390
) -> reqwest_middleware::Result<Response> {
390391
let credentials = Arc::new(credentials);
@@ -402,7 +403,7 @@ impl AuthMiddleware {
402403
// There's just a username, try to find a password.
403404
// If we have an index URL, check the cache for that URL. Otherwise,
404405
// check for the realm.
405-
let maybe_cached_credentials = if let Some(index_url) = maybe_index_url {
406+
let maybe_cached_credentials = if let Some(index_url) = index_url {
406407
self.cache()
407408
.get_url(index_url, credentials.as_username().as_ref())
408409
} else {
@@ -426,17 +427,12 @@ impl AuthMiddleware {
426427
// Do not insert already-cached credentials
427428
None
428429
} else if let Some(credentials) = self
429-
.fetch_credentials(
430-
Some(&credentials),
431-
request.url(),
432-
maybe_index_url,
433-
auth_policy,
434-
)
430+
.fetch_credentials(Some(&credentials), request.url(), index_url, auth_policy)
435431
.await
436432
{
437433
request = credentials.authenticate(request);
438434
Some(credentials)
439-
} else if maybe_index_url.is_some() {
435+
} else if index_url.is_some() {
440436
// If this is a known index, we fall back to checking for the realm.
441437
self.cache()
442438
.get_realm(Realm::from(request.url()), credentials.to_username())
@@ -468,9 +464,9 @@ impl AuthMiddleware {
468464
// Fetches can be expensive, so we will only run them _once_ per realm or index URL and username combination
469465
// All other requests for the same realm or index URL will wait until the first one completes
470466
let key = if let Some(index_url) = maybe_index_url {
471-
(index_url.to_string(), username)
467+
(FetchUrl::Index(index_url.clone()), username)
472468
} else {
473-
(Realm::from(url).to_string(), username)
469+
(FetchUrl::Realm(Realm::from(url)), username)
474470
};
475471
if !self.cache().fetches.register(key.clone()) {
476472
let credentials = self
@@ -520,7 +516,7 @@ impl AuthMiddleware {
520516
debug!("Checking keyring for credentials for index URL {}@{}", username, index_url);
521517
keyring.fetch(index_url, Some(username)).await
522518
} else {
523-
debug!("Checking keyring for credentials for full URL {}@{}", username, *url);
519+
debug!("Checking keyring for credentials for full URL {}@{}", username, url);
524520
keyring.fetch(url, Some(username)).await
525521
}
526522
} else if matches!(auth_policy, AuthPolicy::Always) {
@@ -530,10 +526,7 @@ impl AuthMiddleware {
530526
);
531527
keyring.fetch(index_url, None).await
532528
} else {
533-
debug!(
534-
"Checking keyring for credentials for full URL {url} without username due to `authenticate = always`"
535-
);
536-
keyring.fetch(url, None).await
529+
None
537530
}
538531
} else {
539532
debug!("Skipping keyring fetch for {url} without username; use `authenticate = always` to force");

crates/uv-client/src/registry_client.rs

Lines changed: 80 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ use std::time::Duration;
77

88
use async_http_range_reader::AsyncHttpRangeReader;
99
use futures::{FutureExt, StreamExt, TryStreamExt};
10-
use http::HeaderMap;
10+
use http::{HeaderMap, StatusCode};
1111
use itertools::Either;
12-
use reqwest::{Proxy, Response, StatusCode};
12+
use reqwest::{Proxy, Response};
1313
use rustc_hash::FxHashMap;
1414
use tokio::sync::{Mutex, Semaphore};
15-
use tracing::{info_span, instrument, trace, warn, Instrument};
15+
use tracing::{debug, info_span, instrument, trace, warn, Instrument};
1616
use url::Url;
1717

1818
use uv_auth::Indexes;
@@ -22,7 +22,7 @@ use uv_configuration::{IndexStrategy, TrustedHost};
2222
use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
2323
use uv_distribution_types::{
2424
BuiltDist, File, FileLocation, IndexCapabilities, IndexFormat, IndexLocations,
25-
IndexMetadataRef, IndexUrl, IndexUrls, Name,
25+
IndexMetadataRef, IndexStatusCodeDecision, IndexStatusCodeStrategy, IndexUrl, IndexUrls, Name,
2626
};
2727
use uv_metadata::{read_metadata_async_seek, read_metadata_async_stream};
2828
use uv_normalize::PackageName;
@@ -332,12 +332,29 @@ impl RegistryClient {
332332
let _permit = download_concurrency.acquire().await;
333333
match index.format {
334334
IndexFormat::Simple => {
335-
if let Some(metadata) = self
336-
.simple_single_index(package_name, index.url, capabilities)
335+
let status_code_strategy =
336+
self.index_urls.status_code_strategy_for(index.url);
337+
match self
338+
.simple_single_index(
339+
package_name,
340+
index.url,
341+
capabilities,
342+
&status_code_strategy,
343+
)
337344
.await?
338345
{
339-
results.push((index.url, MetadataFormat::Simple(metadata)));
340-
break;
346+
SimpleMetadataSearchOutcome::Found(metadata) => {
347+
results.push((index.url, MetadataFormat::Simple(metadata)));
348+
break;
349+
}
350+
// Package not found, so we will continue on to the next index (if there is one)
351+
SimpleMetadataSearchOutcome::NotFound => {}
352+
// The search failed because of an HTTP status code that we don't ignore for
353+
// this index. We end our search here.
354+
SimpleMetadataSearchOutcome::StatusCodeFailure(status_code) => {
355+
debug!("Indexes search failed because of status code failure: {status_code}");
356+
break;
357+
}
341358
}
342359
}
343360
IndexFormat::Flat => {
@@ -358,9 +375,21 @@ impl RegistryClient {
358375
let _permit = download_concurrency.acquire().await;
359376
match index.format {
360377
IndexFormat::Simple => {
361-
let metadata = self
362-
.simple_single_index(package_name, index.url, capabilities)
363-
.await?;
378+
// For unsafe matches, ignore authentication failures.
379+
let status_code_strategy =
380+
IndexStatusCodeStrategy::ignore_authentication_error_codes();
381+
let metadata = match self
382+
.simple_single_index(
383+
package_name,
384+
index.url,
385+
capabilities,
386+
&status_code_strategy,
387+
)
388+
.await?
389+
{
390+
SimpleMetadataSearchOutcome::Found(metadata) => Some(metadata),
391+
_ => None,
392+
};
364393
Ok((index.url, metadata.map(MetadataFormat::Simple)))
365394
}
366395
IndexFormat::Flat => {
@@ -439,14 +468,13 @@ impl RegistryClient {
439468
///
440469
/// The index can either be a PEP 503-compatible remote repository, or a local directory laid
441470
/// out in the same format.
442-
///
443-
/// Returns `Ok(None)` if the package is not found in the index.
444471
async fn simple_single_index(
445472
&self,
446473
package_name: &PackageName,
447474
index: &IndexUrl,
448475
capabilities: &IndexCapabilities,
449-
) -> Result<Option<OwnedArchive<SimpleMetadata>>, Error> {
476+
status_code_strategy: &IndexStatusCodeStrategy,
477+
) -> Result<SimpleMetadataSearchOutcome, Error> {
450478
// Format the URL for PyPI.
451479
let mut url = index.url().clone();
452480
url.path_segments_mut()
@@ -488,27 +516,31 @@ impl RegistryClient {
488516
};
489517

490518
match result {
491-
Ok(metadata) => Ok(Some(metadata)),
519+
Ok(metadata) => Ok(SimpleMetadataSearchOutcome::Found(metadata)),
492520
Err(err) => match err.into_kind() {
493521
// The package could not be found in the remote index.
494-
ErrorKind::WrappedReqwestError(url, err) => match err.status() {
495-
Some(StatusCode::NOT_FOUND) => Ok(None),
496-
Some(StatusCode::UNAUTHORIZED) => {
497-
capabilities.set_unauthorized(index.clone());
498-
Ok(None)
499-
}
500-
Some(StatusCode::FORBIDDEN) => {
501-
capabilities.set_forbidden(index.clone());
502-
Ok(None)
522+
ErrorKind::WrappedReqwestError(url, err) => {
523+
let Some(status_code) = err.status() else {
524+
return Err(ErrorKind::WrappedReqwestError(url, err).into());
525+
};
526+
let decision =
527+
status_code_strategy.handle_status_code(status_code, index, capabilities);
528+
if let IndexStatusCodeDecision::Fail(status_code) = decision {
529+
if !matches!(
530+
status_code,
531+
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN
532+
) {
533+
return Err(ErrorKind::WrappedReqwestError(url, err).into());
534+
}
503535
}
504-
_ => Err(ErrorKind::WrappedReqwestError(url, err).into()),
505-
},
536+
Ok(SimpleMetadataSearchOutcome::from(decision))
537+
}
506538

507539
// The package is unavailable due to a lack of connectivity.
508-
ErrorKind::Offline(_) => Ok(None),
540+
ErrorKind::Offline(_) => Ok(SimpleMetadataSearchOutcome::NotFound),
509541

510542
// The package could not be found in the local index.
511-
ErrorKind::FileNotFound(_) => Ok(None),
543+
ErrorKind::FileNotFound(_) => Ok(SimpleMetadataSearchOutcome::NotFound),
512544

513545
err => Err(err.into()),
514546
},
@@ -961,6 +993,26 @@ impl RegistryClient {
961993
}
962994
}
963995

996+
#[derive(Debug)]
997+
pub(crate) enum SimpleMetadataSearchOutcome {
998+
/// Simple metadata was found
999+
Found(OwnedArchive<SimpleMetadata>),
1000+
/// Simple metadata was not found
1001+
NotFound,
1002+
/// A status code failure was encountered when searching for
1003+
/// simple metadata and our strategy did not ignore it
1004+
StatusCodeFailure(StatusCode),
1005+
}
1006+
1007+
impl From<IndexStatusCodeDecision> for SimpleMetadataSearchOutcome {
1008+
fn from(item: IndexStatusCodeDecision) -> Self {
1009+
match item {
1010+
IndexStatusCodeDecision::Ignore => Self::NotFound,
1011+
IndexStatusCodeDecision::Fail(status_code) => Self::StatusCodeFailure(status_code),
1012+
}
1013+
}
1014+
}
1015+
9641016
/// A map from [`IndexUrl`] to [`FlatIndexEntry`] entries found at the given URL, indexed by
9651017
/// [`PackageName`].
9661018
#[derive(Default, Debug, Clone)]

crates/uv-distribution-types/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ uv-small-str = { workspace = true }
3232
arcstr = { workspace = true }
3333
bitflags = { workspace = true }
3434
fs-err = { workspace = true }
35+
http = { workspace = true }
3536
itertools = { workspace = true }
3637
jiff = { workspace = true }
3738
owo-colors = { workspace = true }
@@ -47,7 +48,5 @@ tracing = { workspace = true }
4748
url = { workspace = true }
4849
version-ranges = { workspace = true }
4950

50-
5151
[dev-dependencies]
5252
toml = { workspace = true }
53-

0 commit comments

Comments
 (0)