|
1 |
| -use petgraph::graph::NodeIndex; |
| 1 | +use petgraph::graph::{EdgeIndex, NodeIndex}; |
2 | 2 | use petgraph::visit::EdgeRef;
|
3 | 3 | use petgraph::{Direction, Graph};
|
4 |
| -use rustc_hash::{FxBuildHasher, FxHashMap}; |
| 4 | +use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; |
5 | 5 | use std::collections::hash_map::Entry;
|
6 | 6 |
|
| 7 | +use uv_normalize::{ExtraName, GroupName, PackageName}; |
| 8 | +use uv_pypi_types::{ConflictItem, Conflicts}; |
| 9 | + |
| 10 | +use crate::resolution::ResolutionGraphNode; |
7 | 11 | use crate::universal_marker::UniversalMarker;
|
8 | 12 |
|
9 | 13 | /// Determine the markers under which a package is reachable in the dependency tree.
|
@@ -79,3 +83,184 @@ pub(crate) fn marker_reachability<T>(
|
79 | 83 |
|
80 | 84 | reachability
|
81 | 85 | }
|
| 86 | + |
| 87 | +/// Traverse the given dependency graph and propagate activated markers. |
| 88 | +/// |
| 89 | +/// For example, given an edge like `foo[x1] -> bar`, then it is known that |
| 90 | +/// `x1` is activated. This in turn can be used to simplify any downstream |
| 91 | +/// conflict markers with `extra == "x1"` in them (by replacing `extra == "x1"` |
| 92 | +/// with `true`). |
| 93 | +pub(crate) fn simplify_conflict_markers( |
| 94 | + conflicts: &Conflicts, |
| 95 | + graph: &mut Graph<ResolutionGraphNode, UniversalMarker>, |
| 96 | +) { |
| 97 | + /// An inference about whether a conflicting item is always included or |
| 98 | + /// excluded. |
| 99 | + /// |
| 100 | + /// We collect these for each node in the graph after determining which |
| 101 | + /// extras/groups are activated for each node. Once we know what's |
| 102 | + /// activated, we can infer what must also be *inactivated* based on what's |
| 103 | + /// conflicting with it. So for example, if we have a conflict marker like |
| 104 | + /// `extra == 'foo' and extra != 'bar'`, and `foo` and `bar` have been |
| 105 | + /// declared as conflicting, and we are in a part of the graph where we |
| 106 | + /// know `foo` must be activated, then it follows that `extra != 'bar'` |
| 107 | + /// must always be true. Because if it were false, it would imply both |
| 108 | + /// `foo` and `bar` were activated simultaneously, which uv guarantees |
| 109 | + /// won't happen. |
| 110 | + /// |
| 111 | + /// We then use these inferences to simplify the conflict markers. |
| 112 | + #[derive(Clone, Debug, Eq, Hash, PartialEq)] |
| 113 | + struct Inference { |
| 114 | + item: ConflictItem, |
| 115 | + included: bool, |
| 116 | + } |
| 117 | + |
| 118 | + // Do nothing if there are no declared conflicts. Without any declared |
| 119 | + // conflicts, we know we have no conflict markers and thus nothing to |
| 120 | + // simplify by determining which extras are activated at different points |
| 121 | + // in the dependency graph. |
| 122 | + if conflicts.is_empty() { |
| 123 | + return; |
| 124 | + } |
| 125 | + |
| 126 | + // The set of activated extras and groups for each node. The ROOT nodes |
| 127 | + // don't have any extras/groups activated. |
| 128 | + let mut activated: FxHashMap<NodeIndex, Vec<FxHashSet<ConflictItem>>> = FxHashMap::default(); |
| 129 | + |
| 130 | + // Collect the root nodes. |
| 131 | + // |
| 132 | + // Besides the actual virtual root node, virtual dev dependencies packages are also root |
| 133 | + // nodes since the edges don't cover dev dependencies. |
| 134 | + let mut queue: Vec<_> = graph |
| 135 | + .node_indices() |
| 136 | + .filter(|node_index| { |
| 137 | + graph |
| 138 | + .edges_directed(*node_index, Direction::Incoming) |
| 139 | + .next() |
| 140 | + .is_none() |
| 141 | + }) |
| 142 | + .collect(); |
| 143 | + |
| 144 | + let mut seen: FxHashSet<NodeIndex> = FxHashSet::default(); |
| 145 | + while let Some(parent_index) = queue.pop() { |
| 146 | + if let Some((package, extra)) = graph[parent_index].package_extra_names() { |
| 147 | + for set in activated |
| 148 | + .entry(parent_index) |
| 149 | + .or_insert_with(|| vec![FxHashSet::default()]) |
| 150 | + { |
| 151 | + set.insert(ConflictItem::from((package.clone(), extra.clone()))); |
| 152 | + } |
| 153 | + } |
| 154 | + if let Some((package, group)) = graph[parent_index].package_group_names() { |
| 155 | + for set in activated |
| 156 | + .entry(parent_index) |
| 157 | + .or_insert_with(|| vec![FxHashSet::default()]) |
| 158 | + { |
| 159 | + set.insert(ConflictItem::from((package.clone(), group.clone()))); |
| 160 | + } |
| 161 | + } |
| 162 | + let sets = activated.get(&parent_index).cloned().unwrap_or_default(); |
| 163 | + for child_edge in graph.edges_directed(parent_index, Direction::Outgoing) { |
| 164 | + let mut change = false; |
| 165 | + for set in sets.clone() { |
| 166 | + let existing = activated.entry(child_edge.target()).or_default(); |
| 167 | + // This is doing a linear scan for testing membership, which |
| 168 | + // is non-ideal. But it's not actually clear that there's a |
| 169 | + // strictly better alternative without a real workload being |
| 170 | + // slow because of this. Namely, we are checking whether the |
| 171 | + // _set_ being inserted is equivalent to an existing set. So |
| 172 | + // instead of, say, `Vec<FxHashSet<ConflictItem>>`, we could |
| 173 | + // have `BTreeSet<BTreeSet<ConflictItem>>`. But this in turn |
| 174 | + // makes mutating the elements in each set (done above) more |
| 175 | + // difficult and likely require more allocations. |
| 176 | + // |
| 177 | + // So if this does result in a perf slowdown on some real |
| 178 | + // work-load, I think the first step would be to re-examine |
| 179 | + // whether we're doing more work than we need to be doing. If |
| 180 | + // we aren't, then we might want a more purpose-built data |
| 181 | + // structure for this. |
| 182 | + if !existing.contains(&set) { |
| 183 | + existing.push(set); |
| 184 | + change = true; |
| 185 | + } |
| 186 | + } |
| 187 | + if seen.insert(child_edge.target()) || change { |
| 188 | + queue.push(child_edge.target()); |
| 189 | + } |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + let mut inferences: FxHashMap<NodeIndex, Vec<FxHashSet<Inference>>> = FxHashMap::default(); |
| 194 | + for (node_id, sets) in activated { |
| 195 | + let mut new_sets = vec![]; |
| 196 | + for set in sets { |
| 197 | + let mut new_set = FxHashSet::default(); |
| 198 | + for item in set { |
| 199 | + for conflict_set in conflicts.iter() { |
| 200 | + if !conflict_set.contains(item.package(), item.as_ref().conflict()) { |
| 201 | + continue; |
| 202 | + } |
| 203 | + for conflict_item in conflict_set.iter() { |
| 204 | + if conflict_item == &item { |
| 205 | + continue; |
| 206 | + } |
| 207 | + new_set.insert(Inference { |
| 208 | + item: conflict_item.clone(), |
| 209 | + included: false, |
| 210 | + }); |
| 211 | + } |
| 212 | + } |
| 213 | + new_set.insert(Inference { |
| 214 | + item, |
| 215 | + included: true, |
| 216 | + }); |
| 217 | + } |
| 218 | + new_sets.push(new_set); |
| 219 | + } |
| 220 | + inferences.insert(node_id, new_sets); |
| 221 | + } |
| 222 | + |
| 223 | + for edge_index in (0..graph.edge_count()).map(EdgeIndex::new) { |
| 224 | + let (from_index, _) = graph.edge_endpoints(edge_index).unwrap(); |
| 225 | + let Some(inference_sets) = inferences.get(&from_index) else { |
| 226 | + continue; |
| 227 | + }; |
| 228 | + // If not all possible paths (represented by our inferences) |
| 229 | + // satisfy the conflict marker on this edge, then we can't make any |
| 230 | + // simplifications. Namely, because it follows that out inferences |
| 231 | + // aren't always true. Some of them may sometimes be false. |
| 232 | + let all_paths_satisfied = inference_sets.iter().all(|set| { |
| 233 | + let extras = set |
| 234 | + .iter() |
| 235 | + .filter_map(|inf| { |
| 236 | + if !inf.included { |
| 237 | + return None; |
| 238 | + } |
| 239 | + Some((inf.item.package().clone(), inf.item.extra()?.clone())) |
| 240 | + }) |
| 241 | + .collect::<Vec<(PackageName, ExtraName)>>(); |
| 242 | + let groups = set |
| 243 | + .iter() |
| 244 | + .filter_map(|inf| { |
| 245 | + if !inf.included { |
| 246 | + return None; |
| 247 | + } |
| 248 | + Some((inf.item.package().clone(), inf.item.group()?.clone())) |
| 249 | + }) |
| 250 | + .collect::<Vec<(PackageName, GroupName)>>(); |
| 251 | + graph[edge_index].conflict().evaluate(&extras, &groups) |
| 252 | + }); |
| 253 | + if !all_paths_satisfied { |
| 254 | + continue; |
| 255 | + } |
| 256 | + for set in inference_sets { |
| 257 | + for inf in set { |
| 258 | + if inf.included { |
| 259 | + graph[edge_index].assume_conflict_item(&inf.item); |
| 260 | + } else { |
| 261 | + graph[edge_index].assume_not_conflict_item(&inf.item); |
| 262 | + } |
| 263 | + } |
| 264 | + } |
| 265 | + } |
| 266 | +} |
0 commit comments