Skip to content

Commit da30cc4

Browse files
authored
Include environment variables in interpreter info caching (#11601)
We want to use `sys.path` for package discovery (#2500, #9849). For that, we need to know the correct value of `sys.path`. `sys.path` is a runtime-changeable value, which gets influenced from a lot of different sources: Environment variables, CLI arguments, `.pth` files with scripting, `sys.path.append()` at runtime, a distributor patching Python, etc. We cannot capture them all accurately, especially since it's possible to change `sys.path` mid-execution. Instead, we do a best effort attempt at matching the user's expectation. The assumption is that package installation generally happens in venv site-packages, system/user site-packages (including pypy shipping packages with std), and `PYTHONPATH`. Specifically, we reuse `PYTHONPATH` as dedicated way for users to tell uv to include specific directories in package discovery. A common way to influence `sys.path` that is not using venvs is setting `PYTHONPATH`. To support this we're capturing `PYTHONPATH` as part of the cache invalidation, i.e. we refresh the interpreter metadata if it changed. For completeness, we're also capturing other environment variables documented as influencing `sys.path` or other fields in the interpreter info. This PR does not include reading registry values for `sys.path` additions on Windows as documented in https://docs.python.org/3.11/using/windows.html#finding-modules. It notably also does not include parsing of python CLI arguments, we only consider their environment variable versions for package installation and listing. We could try parsing CLI flags in `uv run python`, but we'd still miss them when Python is launched indirectly through a script, and it's more consistent to only consider uv's own arguments and environment variables, similar to uv's behavior in other places.
1 parent f394f72 commit da30cc4

File tree

4 files changed

+142
-43
lines changed

4 files changed

+142
-43
lines changed

crates/uv-cache/src/by_timestamp.rs

-8
This file was deleted.

crates/uv-cache/src/lib.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ use uv_fs::{cachedir, directories, LockedFile};
1818
use uv_normalize::PackageName;
1919
use uv_pypi_types::ResolutionMetadata;
2020

21-
pub use crate::by_timestamp::CachedByTimestamp;
2221
#[cfg(feature = "clap")]
2322
pub use crate::cli::CacheArgs;
2423
use crate::removal::Remover;
@@ -27,7 +26,6 @@ pub use crate::wheel::WheelCache;
2726
use crate::wheel::WheelCacheKind;
2827

2928
mod archive;
30-
mod by_timestamp;
3129
#[cfg(feature = "clap")]
3230
mod cli;
3331
mod removal;
@@ -1034,7 +1032,7 @@ impl CacheBucket {
10341032
Self::SourceDistributions => "sdists-v8",
10351033
Self::FlatIndex => "flat-index-v2",
10361034
Self::Git => "git-v0",
1037-
Self::Interpreter => "interpreter-v4",
1035+
Self::Interpreter => "interpreter-v5",
10381036
// Note that when bumping this, you'll also need to bump it
10391037
// in `crates/uv/tests/it/cache_clean.rs`.
10401038
Self::Simple => "simple-v15",

crates/uv-python/src/interpreter.rs

+123-32
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use std::borrow::Cow;
22
use std::env::consts::ARCH;
3+
use std::ffi::OsString;
34
use std::fmt::{Display, Formatter};
4-
use std::io;
55
use std::path::{Path, PathBuf};
66
use std::process::{Command, ExitStatus};
77
use std::sync::OnceLock;
8+
use std::{env, io};
89

910
use configparser::ini::Ini;
1011
use fs_err as fs;
@@ -14,7 +15,7 @@ use serde::{Deserialize, Serialize};
1415
use thiserror::Error;
1516
use tracing::{debug, trace, warn};
1617

17-
use uv_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness};
18+
use uv_cache::{Cache, CacheBucket, CacheEntry, Freshness};
1819
use uv_cache_info::Timestamp;
1920
use uv_cache_key::cache_digest;
2021
use uv_fs::{write_atomic_sync, PythonExt, Simplified};
@@ -24,6 +25,7 @@ use uv_pep508::{MarkerEnvironment, StringVersion};
2425
use uv_platform_tags::Platform;
2526
use uv_platform_tags::{Tags, TagsError};
2627
use uv_pypi_types::{ResolverMarkerEnvironment, Scheme};
28+
use uv_static::EnvVars;
2729

2830
use crate::implementation::LenientImplementationName;
2931
use crate::platform::{Arch, Libc, Os};
@@ -713,6 +715,42 @@ pub enum InterpreterInfoError {
713715
},
714716
}
715717

718+
/// Environment variables that can change the values of [`InterpreterInfo`].
719+
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
720+
struct PythonEnvVars {
721+
/// `PYTHONHOME` overrides `sys.prefix`.
722+
pythonhome: Option<OsString>,
723+
/// `PYTHONPATH` adds to `sys.path`.
724+
pythonpath: Option<OsString>,
725+
/// `PYTHONSAFEPATH` influences `sys.path`.
726+
pythonsafepath: Option<OsString>,
727+
/// `PYTHONPLATLIBDIR` influences `sys.path`.
728+
pythonplatlibdir: Option<OsString>,
729+
/// `PYTHONNOUSERSITE` influences `sys.path`.
730+
pythonnousersite: Option<OsString>,
731+
/// `PYTHONUSERBASE` influences `sys.path`.
732+
pythonuserbase: Option<OsString>,
733+
/// `APPDATA` influences `sys.path` through the user site packages (windows).
734+
appdata: Option<OsString>,
735+
/// `HOME` influences `sys.path` through the user site packages (unix).
736+
home: Option<OsString>,
737+
}
738+
739+
impl PythonEnvVars {
740+
fn from_env() -> Self {
741+
Self {
742+
pythonhome: env::var_os(EnvVars::PYTHONHOME),
743+
pythonpath: env::var_os(EnvVars::PYTHONPATH),
744+
pythonsafepath: env::var_os(EnvVars::PYTHONSAFEPATH),
745+
pythonplatlibdir: env::var_os(EnvVars::PYTHONPLATLIBDIR),
746+
pythonnousersite: env::var_os(EnvVars::PYTHONNOUSERSITE),
747+
pythonuserbase: env::var_os(EnvVars::PYTHONUSERBASE),
748+
appdata: env::var_os(EnvVars::APPDATA),
749+
home: env::var_os(EnvVars::HOME),
750+
}
751+
}
752+
}
753+
716754
#[derive(Debug, Deserialize, Serialize, Clone)]
717755
struct InterpreterInfo {
718756
platform: Platform,
@@ -732,6 +770,18 @@ struct InterpreterInfo {
732770
gil_disabled: bool,
733771
}
734772

773+
#[derive(Debug, Deserialize, Serialize, Clone)]
774+
struct CachedInterpreterInfo {
775+
/// Information about a Python interpreter at a path.
776+
data: InterpreterInfo,
777+
/// The last modified timestamp of the Python interpreter path.
778+
///
779+
/// It is ctime on unix.
780+
timestamp: Timestamp,
781+
/// Environment variables that can influence the other keys used for cache invalidation.
782+
env_vars: PythonEnvVars,
783+
}
784+
735785
impl InterpreterInfo {
736786
/// Return the resolved [`InterpreterInfo`] for the given Python executable.
737787
pub(crate) fn query(interpreter: &Path, cache: &Cache) -> Result<Self, Error> {
@@ -869,36 +919,10 @@ impl InterpreterInfo {
869919
})?;
870920

871921
// Read from the cache.
872-
if cache
873-
.freshness(&cache_entry, None)
874-
.is_ok_and(Freshness::is_fresh)
922+
if let Some(value) =
923+
Self::read_and_validate_cache(executable, cache, &cache_entry, modified)
875924
{
876-
if let Ok(data) = fs::read(cache_entry.path()) {
877-
match rmp_serde::from_slice::<CachedByTimestamp<Self>>(&data) {
878-
Ok(cached) => {
879-
if cached.timestamp == modified {
880-
trace!(
881-
"Cached interpreter info for Python {}, skipping probing: {}",
882-
cached.data.markers.python_full_version(),
883-
executable.user_display()
884-
);
885-
return Ok(cached.data);
886-
}
887-
888-
trace!(
889-
"Ignoring stale interpreter markers for: {}",
890-
executable.user_display()
891-
);
892-
}
893-
Err(err) => {
894-
warn!(
895-
"Broken interpreter cache entry at {}, removing: {err}",
896-
cache_entry.path().user_display()
897-
);
898-
let _ = fs_err::remove_file(cache_entry.path());
899-
}
900-
}
901-
}
925+
return Ok(value);
902926
}
903927

904928
// Otherwise, run the Python script.
@@ -914,15 +938,82 @@ impl InterpreterInfo {
914938
fs::create_dir_all(cache_entry.dir())?;
915939
write_atomic_sync(
916940
cache_entry.path(),
917-
rmp_serde::to_vec(&CachedByTimestamp {
941+
rmp_serde::to_vec(&CachedInterpreterInfo {
918942
timestamp: modified,
919943
data: info.clone(),
944+
env_vars: PythonEnvVars::from_env(),
920945
})?,
921946
)?;
922947
}
923948

924949
Ok(info)
925950
}
951+
952+
/// If a cache entry for the Python interpreter exists and it's fresh, return it.
953+
fn read_and_validate_cache(
954+
executable: &Path,
955+
cache: &Cache,
956+
cache_entry: &CacheEntry,
957+
modified: Timestamp,
958+
) -> Option<InterpreterInfo> {
959+
if !cache
960+
.freshness(cache_entry, None)
961+
.is_ok_and(Freshness::is_fresh)
962+
{
963+
return None;
964+
}
965+
966+
let data = match fs::read(cache_entry.path()) {
967+
Ok(data) => data,
968+
Err(err) if err.kind() == io::ErrorKind::NotFound => {
969+
return None;
970+
}
971+
Err(err) => {
972+
warn!(
973+
"Broken interpreter cache entry at {}, removing: {err}",
974+
cache_entry.path().user_display()
975+
);
976+
let _ = fs_err::remove_file(cache_entry.path());
977+
return None;
978+
}
979+
};
980+
981+
let cached = match rmp_serde::from_slice::<CachedInterpreterInfo>(&data) {
982+
Ok(cached) => cached,
983+
Err(err) => {
984+
warn!(
985+
"Broken interpreter cache entry at {}, removing: {err}",
986+
cache_entry.path().user_display()
987+
);
988+
let _ = fs_err::remove_file(cache_entry.path());
989+
return None;
990+
}
991+
};
992+
993+
if cached.timestamp != modified {
994+
trace!(
995+
"Ignoring stale cached interpreter info for: `{}`",
996+
executable.user_display()
997+
);
998+
return None;
999+
}
1000+
1001+
if cached.env_vars != PythonEnvVars::from_env() {
1002+
trace!(
1003+
"Ignoring cached interpreter info due to changed environment variables for: `{}`",
1004+
executable.user_display()
1005+
);
1006+
return None;
1007+
}
1008+
1009+
trace!(
1010+
"Cached interpreter info for Python {}, skipping probing: `{}`",
1011+
cached.data.markers.python_full_version(),
1012+
executable.user_display()
1013+
);
1014+
1015+
Some(cached.data)
1016+
}
9261017
}
9271018

9281019
/// Find the Python executable that should be considered the "base" for a virtual environment.

crates/uv-static/src/env_vars.rs

+18
Original file line numberDiff line numberDiff line change
@@ -649,4 +649,22 @@ impl EnvVars {
649649
///
650650
/// This is a quasi-standard variable, described e.g. in `ncurses(3x)`.
651651
pub const COLUMNS: &'static str = "COLUMNS";
652+
653+
/// Overrides `sys.prefix`.
654+
pub const PYTHONHOME: &'static str = "PYTHONHOME";
655+
656+
/// Don't prepend a potentially unsafe path to `sys.path`.
657+
pub const PYTHONSAFEPATH: &'static str = "PYTHONSAFEPATH";
658+
659+
/// Overrides `sys.platlibdir`.
660+
pub const PYTHONPLATLIBDIR: &'static str = "PYTHONPLATLIBDIR";
661+
662+
/// Don't add the user site packages to `sys.path`.
663+
pub const PYTHONNOUSERSITE: &'static str = "PYTHONNOUSERSITE";
664+
665+
/// Overrides `site.USER_BASE`.
666+
pub const PYTHONUSERBASE: &'static str = "PYTHONUSERBASE";
667+
668+
/// The base path for user site packages on Windows.
669+
pub const APPDATA: &'static str = "APPDATA";
652670
}

0 commit comments

Comments
 (0)