Skip to content

Commit edf875e

Browse files
authored
add conflict markers to the lock file (#9370)
This PR adds a notion of "conflict markers" to the lock file as an attempt to address #9289. The idea is to encode a new kind of boolean expression indicating how to choose dependencies based on which extras are activated. As an example of what conflict markers look like, consider one of the cases brought up in #9289, where `anyio` had unconditional dependencies on two different versions of `idna`. Now, those are gated by markers, like this: ```toml [[package]] name = "anyio" version = "4.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-project-foo'" }, { name = "idna", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-project-bar' or extra != 'extra-7-project-foo'" }, { name = "sniffio" }, ] ``` The odd extra values like `extra-7-project-foo` are an encoding of not just the conflicting extra (`foo`) but also the package it's declared for (`project`). We need both bits of information because different packages may have the same extra name, even if they are completely unrelated. The `extra-` part is a prefix to distinguish it from groups (which, in this case, would be encoded as `group-7-project-foo` if `foo` were a dependency group). And the `7` part indicates the length of the package name which makes it possible to parse out the package and extra name from this encoding. (We don't actually utilize that property, but it seems like good sense to do it in case we do need to extra information from these markers.) While this preserves PEP 508 compatibility at a surface level, it does require utilizing this encoding scheme in order to evaluate them when they're present (which only occurs when conflicting extras/groups are declared). My sense is that the most complex part of this change is not just adding conflict markers, but their simplification. I tried to address this in the code comments and commit messages. Reviewers should look at this commit-by-commit. Fixes #9289, Fixes #9546, Fixes #9640, Fixes #9622, Fixes #9498, Fixes #9701, Fixes #9734
1 parent 6fb0d79 commit edf875e

18 files changed

+5161
-126
lines changed

crates/uv-pypi-types/src/conflicts.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ impl Conflicts {
2424
}
2525

2626
/// Returns an iterator over all sets of conflicting sets.
27-
pub fn iter(&self) -> impl Iterator<Item = &'_ ConflictSet> + '_ {
27+
pub fn iter(&self) -> impl Iterator<Item = &'_ ConflictSet> + Clone + '_ {
2828
self.0.iter()
2929
}
3030

@@ -75,7 +75,7 @@ impl ConflictSet {
7575
}
7676

7777
/// Returns an iterator over all conflicting items.
78-
pub fn iter(&self) -> impl Iterator<Item = &'_ ConflictItem> + '_ {
78+
pub fn iter(&self) -> impl Iterator<Item = &'_ ConflictItem> + Clone + '_ {
7979
self.0.iter()
8080
}
8181

crates/uv-resolver/src/graph_ops.rs

+187-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
use petgraph::graph::NodeIndex;
1+
use petgraph::graph::{EdgeIndex, NodeIndex};
22
use petgraph::visit::EdgeRef;
33
use petgraph::{Direction, Graph};
4-
use rustc_hash::{FxBuildHasher, FxHashMap};
4+
use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};
55
use std::collections::hash_map::Entry;
66

7+
use uv_normalize::{ExtraName, GroupName, PackageName};
8+
use uv_pypi_types::{ConflictItem, Conflicts};
9+
10+
use crate::resolution::ResolutionGraphNode;
711
use crate::universal_marker::UniversalMarker;
812

913
/// Determine the markers under which a package is reachable in the dependency tree.
@@ -79,3 +83,184 @@ pub(crate) fn marker_reachability<T>(
7983

8084
reachability
8185
}
86+
87+
/// Traverse the given dependency graph and propagate activated markers.
88+
///
89+
/// For example, given an edge like `foo[x1] -> bar`, then it is known that
90+
/// `x1` is activated. This in turn can be used to simplify any downstream
91+
/// conflict markers with `extra == "x1"` in them (by replacing `extra == "x1"`
92+
/// with `true`).
93+
pub(crate) fn simplify_conflict_markers(
94+
conflicts: &Conflicts,
95+
graph: &mut Graph<ResolutionGraphNode, UniversalMarker>,
96+
) {
97+
/// An inference about whether a conflicting item is always included or
98+
/// excluded.
99+
///
100+
/// We collect these for each node in the graph after determining which
101+
/// extras/groups are activated for each node. Once we know what's
102+
/// activated, we can infer what must also be *inactivated* based on what's
103+
/// conflicting with it. So for example, if we have a conflict marker like
104+
/// `extra == 'foo' and extra != 'bar'`, and `foo` and `bar` have been
105+
/// declared as conflicting, and we are in a part of the graph where we
106+
/// know `foo` must be activated, then it follows that `extra != 'bar'`
107+
/// must always be true. Because if it were false, it would imply both
108+
/// `foo` and `bar` were activated simultaneously, which uv guarantees
109+
/// won't happen.
110+
///
111+
/// We then use these inferences to simplify the conflict markers.
112+
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
113+
struct Inference {
114+
item: ConflictItem,
115+
included: bool,
116+
}
117+
118+
// Do nothing if there are no declared conflicts. Without any declared
119+
// conflicts, we know we have no conflict markers and thus nothing to
120+
// simplify by determining which extras are activated at different points
121+
// in the dependency graph.
122+
if conflicts.is_empty() {
123+
return;
124+
}
125+
126+
// The set of activated extras and groups for each node. The ROOT nodes
127+
// don't have any extras/groups activated.
128+
let mut activated: FxHashMap<NodeIndex, Vec<FxHashSet<ConflictItem>>> = FxHashMap::default();
129+
130+
// Collect the root nodes.
131+
//
132+
// Besides the actual virtual root node, virtual dev dependencies packages are also root
133+
// nodes since the edges don't cover dev dependencies.
134+
let mut queue: Vec<_> = graph
135+
.node_indices()
136+
.filter(|node_index| {
137+
graph
138+
.edges_directed(*node_index, Direction::Incoming)
139+
.next()
140+
.is_none()
141+
})
142+
.collect();
143+
144+
let mut seen: FxHashSet<NodeIndex> = FxHashSet::default();
145+
while let Some(parent_index) = queue.pop() {
146+
if let Some((package, extra)) = graph[parent_index].package_extra_names() {
147+
for set in activated
148+
.entry(parent_index)
149+
.or_insert_with(|| vec![FxHashSet::default()])
150+
{
151+
set.insert(ConflictItem::from((package.clone(), extra.clone())));
152+
}
153+
}
154+
if let Some((package, group)) = graph[parent_index].package_group_names() {
155+
for set in activated
156+
.entry(parent_index)
157+
.or_insert_with(|| vec![FxHashSet::default()])
158+
{
159+
set.insert(ConflictItem::from((package.clone(), group.clone())));
160+
}
161+
}
162+
let sets = activated.get(&parent_index).cloned().unwrap_or_default();
163+
for child_edge in graph.edges_directed(parent_index, Direction::Outgoing) {
164+
let mut change = false;
165+
for set in sets.clone() {
166+
let existing = activated.entry(child_edge.target()).or_default();
167+
// This is doing a linear scan for testing membership, which
168+
// is non-ideal. But it's not actually clear that there's a
169+
// strictly better alternative without a real workload being
170+
// slow because of this. Namely, we are checking whether the
171+
// _set_ being inserted is equivalent to an existing set. So
172+
// instead of, say, `Vec<FxHashSet<ConflictItem>>`, we could
173+
// have `BTreeSet<BTreeSet<ConflictItem>>`. But this in turn
174+
// makes mutating the elements in each set (done above) more
175+
// difficult and likely require more allocations.
176+
//
177+
// So if this does result in a perf slowdown on some real
178+
// work-load, I think the first step would be to re-examine
179+
// whether we're doing more work than we need to be doing. If
180+
// we aren't, then we might want a more purpose-built data
181+
// structure for this.
182+
if !existing.contains(&set) {
183+
existing.push(set);
184+
change = true;
185+
}
186+
}
187+
if seen.insert(child_edge.target()) || change {
188+
queue.push(child_edge.target());
189+
}
190+
}
191+
}
192+
193+
let mut inferences: FxHashMap<NodeIndex, Vec<FxHashSet<Inference>>> = FxHashMap::default();
194+
for (node_id, sets) in activated {
195+
let mut new_sets = vec![];
196+
for set in sets {
197+
let mut new_set = FxHashSet::default();
198+
for item in set {
199+
for conflict_set in conflicts.iter() {
200+
if !conflict_set.contains(item.package(), item.as_ref().conflict()) {
201+
continue;
202+
}
203+
for conflict_item in conflict_set.iter() {
204+
if conflict_item == &item {
205+
continue;
206+
}
207+
new_set.insert(Inference {
208+
item: conflict_item.clone(),
209+
included: false,
210+
});
211+
}
212+
}
213+
new_set.insert(Inference {
214+
item,
215+
included: true,
216+
});
217+
}
218+
new_sets.push(new_set);
219+
}
220+
inferences.insert(node_id, new_sets);
221+
}
222+
223+
for edge_index in (0..graph.edge_count()).map(EdgeIndex::new) {
224+
let (from_index, _) = graph.edge_endpoints(edge_index).unwrap();
225+
let Some(inference_sets) = inferences.get(&from_index) else {
226+
continue;
227+
};
228+
// If not all possible paths (represented by our inferences)
229+
// satisfy the conflict marker on this edge, then we can't make any
230+
// simplifications. Namely, because it follows that out inferences
231+
// aren't always true. Some of them may sometimes be false.
232+
let all_paths_satisfied = inference_sets.iter().all(|set| {
233+
let extras = set
234+
.iter()
235+
.filter_map(|inf| {
236+
if !inf.included {
237+
return None;
238+
}
239+
Some((inf.item.package().clone(), inf.item.extra()?.clone()))
240+
})
241+
.collect::<Vec<(PackageName, ExtraName)>>();
242+
let groups = set
243+
.iter()
244+
.filter_map(|inf| {
245+
if !inf.included {
246+
return None;
247+
}
248+
Some((inf.item.package().clone(), inf.item.group()?.clone()))
249+
})
250+
.collect::<Vec<(PackageName, GroupName)>>();
251+
graph[edge_index].conflict().evaluate(&extras, &groups)
252+
});
253+
if !all_paths_satisfied {
254+
continue;
255+
}
256+
for set in inference_sets {
257+
for inf in set {
258+
if inf.included {
259+
graph[edge_index].assume_conflict_item(&inf.item);
260+
} else {
261+
graph[edge_index].assume_not_conflict_item(&inf.item);
262+
}
263+
}
264+
}
265+
}
266+
}

crates/uv-resolver/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ pub use resolver::{
2222
PackageVersionsResult, Reporter as ResolverReporter, Resolver, ResolverEnvironment,
2323
ResolverProvider, VersionsResponse, WheelMetadataResult,
2424
};
25-
pub use universal_marker::UniversalMarker;
25+
pub use universal_marker::{ConflictMarker, UniversalMarker};
2626
pub use version_map::VersionMap;
2727
pub use yanks::AllowedYanks;
2828

crates/uv-resolver/src/lock/mod.rs

+15-15
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub use crate::lock::target::InstallTarget;
1919
pub use crate::lock::tree::TreeDisplay;
2020
use crate::requires_python::SimplifiedMarkerTree;
2121
use crate::resolution::{AnnotatedDist, ResolutionGraphNode};
22-
use crate::universal_marker::UniversalMarker;
22+
use crate::universal_marker::{ConflictMarker, UniversalMarker};
2323
use crate::{
2424
ExcludeNewer, InMemoryIndex, MetadataResponse, PrereleaseMode, RequiresPython, ResolutionMode,
2525
ResolverOutput,
@@ -63,21 +63,21 @@ static LINUX_MARKERS: LazyLock<UniversalMarker> = LazyLock::new(|| {
6363
"platform_system == 'Linux' and os_name == 'posix' and sys_platform == 'linux'",
6464
)
6565
.unwrap();
66-
UniversalMarker::new(pep508, MarkerTree::TRUE)
66+
UniversalMarker::new(pep508, ConflictMarker::TRUE)
6767
});
6868
static WINDOWS_MARKERS: LazyLock<UniversalMarker> = LazyLock::new(|| {
6969
let pep508 = MarkerTree::from_str(
7070
"platform_system == 'Windows' and os_name == 'nt' and sys_platform == 'win32'",
7171
)
7272
.unwrap();
73-
UniversalMarker::new(pep508, MarkerTree::TRUE)
73+
UniversalMarker::new(pep508, ConflictMarker::TRUE)
7474
});
7575
static MAC_MARKERS: LazyLock<UniversalMarker> = LazyLock::new(|| {
7676
let pep508 = MarkerTree::from_str(
7777
"platform_system == 'Darwin' and os_name == 'posix' and sys_platform == 'darwin'",
7878
)
7979
.unwrap();
80-
UniversalMarker::new(pep508, MarkerTree::TRUE)
80+
UniversalMarker::new(pep508, ConflictMarker::TRUE)
8181
});
8282

8383
#[derive(Clone, Debug, serde::Deserialize)]
@@ -149,7 +149,7 @@ impl Lock {
149149
resolution
150150
.fork_markers
151151
.iter()
152-
.filter(|fork_markers| !fork_markers.is_disjoint(&dist.marker))
152+
.filter(|fork_markers| !fork_markers.is_disjoint(dist.marker))
153153
.copied()
154154
.collect()
155155
} else {
@@ -296,16 +296,16 @@ impl Lock {
296296
tag.starts_with(linux_tag) || tag == "linux_armv6l" || tag == "linux_armv7l"
297297
})
298298
}) {
299-
!graph.graph[node_index].marker().is_disjoint(&LINUX_MARKERS)
299+
!graph.graph[node_index].marker().is_disjoint(*LINUX_MARKERS)
300300
} else if platform_tags
301301
.iter()
302302
.all(|tag| windows_tags.contains(&&**tag))
303303
{
304304
!graph.graph[node_index]
305305
.marker()
306-
.is_disjoint(&WINDOWS_MARKERS)
306+
.is_disjoint(*WINDOWS_MARKERS)
307307
} else if platform_tags.iter().all(|tag| tag.starts_with("macosx_")) {
308-
!graph.graph[node_index].marker().is_disjoint(&MAC_MARKERS)
308+
!graph.graph[node_index].marker().is_disjoint(*MAC_MARKERS)
309309
} else {
310310
true
311311
}
@@ -860,7 +860,7 @@ impl Lock {
860860
|| dist
861861
.fork_markers
862862
.iter()
863-
.any(|marker| marker.evaluate(marker_env, &[]))
863+
.any(|marker| marker.evaluate_no_extras(marker_env))
864864
{
865865
if found_dist.is_some() {
866866
return Err(format!("found multiple packages matching `{name}`"));
@@ -1449,7 +1449,9 @@ impl TryFrom<LockWire> for Lock {
14491449
.map(|simplified_marker| simplified_marker.into_marker(&wire.requires_python))
14501450
// TODO(ag): Consider whether this should also deserialize a conflict marker.
14511451
// We currently aren't serializing. Dropping it completely is likely to be wrong.
1452-
.map(|complexified_marker| UniversalMarker::new(complexified_marker, MarkerTree::TRUE))
1452+
.map(|complexified_marker| {
1453+
UniversalMarker::new(complexified_marker, ConflictMarker::TRUE)
1454+
})
14531455
.collect();
14541456
let lock = Lock::new(
14551457
wire.version,
@@ -2251,7 +2253,7 @@ impl PackageWire {
22512253
// TODO(ag): Consider whether this should also deserialize a conflict marker.
22522254
// We currently aren't serializing. Dropping it completely is likely to be wrong.
22532255
.map(|complexified_marker| {
2254-
UniversalMarker::new(complexified_marker, MarkerTree::TRUE)
2256+
UniversalMarker::new(complexified_marker, ConflictMarker::TRUE)
22552257
})
22562258
.collect(),
22572259
dependencies: unwire_deps(self.dependencies)?,
@@ -3541,7 +3543,7 @@ impl Dependency {
35413543
complexified_marker: UniversalMarker,
35423544
) -> Dependency {
35433545
let simplified_marker =
3544-
SimplifiedMarkerTree::new(requires_python, complexified_marker.pep508());
3546+
SimplifiedMarkerTree::new(requires_python, complexified_marker.combined());
35453547
Dependency {
35463548
package_id,
35473549
extra,
@@ -3621,7 +3623,6 @@ struct DependencyWire {
36213623
extra: BTreeSet<ExtraName>,
36223624
#[serde(default)]
36233625
marker: SimplifiedMarkerTree,
3624-
// FIXME: Add support for representing conflict markers.
36253626
}
36263627

36273628
impl DependencyWire {
@@ -3635,8 +3636,7 @@ impl DependencyWire {
36353636
package_id: self.package_id.unwire(unambiguous_package_ids)?,
36363637
extra: self.extra,
36373638
simplified_marker: self.marker,
3638-
// FIXME: Support reading conflict markers.
3639-
complexified_marker: UniversalMarker::new(complexified_marker, MarkerTree::TRUE),
3639+
complexified_marker: UniversalMarker::from_combined(complexified_marker),
36403640
})
36413641
}
36423642
}

crates/uv-resolver/src/lock/requirements_txt.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use uv_pypi_types::{ParsedArchiveUrl, ParsedGitUrl};
2020

2121
use crate::graph_ops::marker_reachability;
2222
use crate::lock::{Package, PackageId, Source};
23-
use crate::universal_marker::UniversalMarker;
23+
use crate::universal_marker::{ConflictMarker, UniversalMarker};
2424
use crate::{InstallTarget, LockError};
2525

2626
/// An export of a [`Lock`] that renders in `requirements.txt` format.
@@ -119,7 +119,7 @@ impl<'lock> RequirementsTxtExport<'lock> {
119119
// `marker_reachability` wants and it (probably) isn't
120120
// worth inventing a new abstraction so that it can accept
121121
// graphs with either `MarkerTree` or `UniversalMarker`.
122-
MarkerTree::TRUE,
122+
ConflictMarker::TRUE,
123123
),
124124
);
125125

@@ -172,7 +172,7 @@ impl<'lock> RequirementsTxtExport<'lock> {
172172
dep.simplified_marker.as_simplified_marker_tree(),
173173
// See note above for other `UniversalMarker::new` for
174174
// why this is OK.
175-
MarkerTree::TRUE,
175+
ConflictMarker::TRUE,
176176
),
177177
);
178178

crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__missing_dependency_source_unambiguous.snap

+1-4
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,7 @@ Ok(
135135
simplified_marker: SimplifiedMarkerTree(
136136
true,
137137
),
138-
complexified_marker: UniversalMarker {
139-
pep508_marker: python_full_version >= '3.12',
140-
conflict_marker: true,
141-
},
138+
complexified_marker: python_full_version >= '3.12',
142139
},
143140
],
144141
optional_dependencies: {},

crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__missing_dependency_source_version_unambiguous.snap

+1-4
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,7 @@ Ok(
135135
simplified_marker: SimplifiedMarkerTree(
136136
true,
137137
),
138-
complexified_marker: UniversalMarker {
139-
pep508_marker: python_full_version >= '3.12',
140-
conflict_marker: true,
141-
},
138+
complexified_marker: python_full_version >= '3.12',
142139
},
143140
],
144141
optional_dependencies: {},

0 commit comments

Comments
 (0)