Skip to content

Commit 1e414c7

Browse files
fix(topology): use topology when decreasing replicas
When removing volume replicas, take the topology into account and prefer to remove replicas which don't line up with the volume topology. This can happen when a volume's topology has been modified. Signed-off-by: Tiago Castro <[email protected]>
1 parent 4104acf commit 1e414c7

File tree

5 files changed

+113
-22
lines changed

5 files changed

+113
-22
lines changed

control-plane/agents/src/bin/core/controller/scheduling/mod.rs

+15
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,21 @@ impl ChildSorters {
163163
match Self::sort_by_health(a, b) {
164164
Ordering::Equal => match Self::sort_by_child(a, b) {
165165
Ordering::Equal => {
166+
// remove mismatched topology replicas first
167+
if let (Some(a), Some(b)) = (a.valid_node_topology(), b.valid_node_topology()) {
168+
match a.cmp(b) {
169+
Ordering::Equal => {}
170+
// todo: what if the pool and node topology are at odds with each other?
171+
_else => return _else,
172+
}
173+
}
174+
if let (Some(a), Some(b)) = (a.valid_pool_topology(), b.valid_pool_topology()) {
175+
match a.cmp(b) {
176+
Ordering::Equal => {}
177+
_else => return _else,
178+
}
179+
}
180+
166181
let childa_is_local = !a.spec().share.shared();
167182
let childb_is_local = !b.spec().share.shared();
168183
if childa_is_local == childb_is_local {

control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs

+26
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ pub(crate) struct ReplicaItem {
176176
/// a Affinity Group and the already created volumes have replicas
177177
/// on this pool.
178178
ag_replicas_on_pool: Option<u64>,
179+
valid_pool_topology: Option<bool>,
180+
valid_node_topology: Option<bool>,
179181
}
180182

181183
impl ReplicaItem {
@@ -197,8 +199,32 @@ impl ReplicaItem {
197199
child_spec,
198200
child_info,
199201
ag_replicas_on_pool,
202+
valid_pool_topology: None,
203+
valid_node_topology: None,
200204
}
201205
}
206+
/// Set the validity of the replica's node topology.
207+
/// If set to false, this means the replica is not in sync with the volume topology and
208+
/// therefore should be replaced by another replica which is in sync.
209+
pub(crate) fn with_node_topology(mut self, valid_node_topology: Option<bool>) -> ReplicaItem {
210+
self.valid_node_topology = valid_node_topology;
211+
self
212+
}
213+
/// Set the validity of the replica's pool topology.
214+
/// If set to false, this means the replica is not in sync with the volume topology and
215+
/// therefore should be replaced by another replica which is in sync.
216+
pub(crate) fn with_pool_topology(mut self, valid_pool_topology: Option<bool>) -> ReplicaItem {
217+
self.valid_pool_topology = valid_pool_topology;
218+
self
219+
}
220+
/// Get a reference to the node topology validity flag.
221+
pub(crate) fn valid_node_topology(&self) -> &Option<bool> {
222+
&self.valid_node_topology
223+
}
224+
/// Get a reference to the pool topology validity flag.
225+
pub(crate) fn valid_pool_topology(&self) -> &Option<bool> {
226+
&self.valid_pool_topology
227+
}
202228
/// Get a reference to the replica spec.
203229
pub(crate) fn spec(&self) -> &ReplicaSpec {
204230
&self.replica_spec

control-plane/agents/src/bin/core/controller/scheduling/volume.rs

+18-2
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@ use crate::controller::{
55
affinity_group::{get_pool_ag_replica_count, get_restricted_nodes},
66
pool::replica_rebuildable,
77
resources::{ChildItem, PoolItem, PoolItemLister, ReplicaItem},
8-
volume_policy::{SimplePolicy, ThickPolicy},
8+
volume_policy::{node::NodeFilters, SimplePolicy, ThickPolicy},
99
AddReplicaFilters, AddReplicaSorters, ChildSorters, ResourceData, ResourceFilter,
1010
},
1111
wrapper::PoolWrapper,
1212
};
1313
use agents::errors::SvcError;
14-
use std::{collections::HashMap, ops::Deref};
1514
use stor_port::types::v0::{
1615
store::{
1716
nexus::NexusSpec, nexus_persistence::NexusInfo, snapshots::replica::ReplicaSnapshot,
@@ -20,6 +19,8 @@ use stor_port::types::v0::{
2019
transport::{NodeId, PoolId, VolumeState},
2120
};
2221

22+
use std::{collections::HashMap, ops::Deref};
23+
2324
/// Move replica to another pool.
2425
#[derive(Default, Clone)]
2526
pub(crate) struct MoveReplica {
@@ -343,6 +344,21 @@ impl GetChildForRemovalContext {
343344
}),
344345
ag_rep_count,
345346
)
347+
.with_node_topology({
348+
Some(NodeFilters::topology_replica(
349+
&self.spec,
350+
&self.registry,
351+
replica_spec.pool_name(),
352+
))
353+
})
354+
.with_pool_topology({
355+
use crate::controller::scheduling::volume_policy::pool::PoolBaseFilters;
356+
Some(PoolBaseFilters::topology_(
357+
&self.spec,
358+
&self.registry,
359+
replica_spec.pool_name(),
360+
))
361+
})
346362
})
347363
.collect::<Vec<_>>()
348364
}

control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs

+30-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1-
use crate::controller::scheduling::{
2-
nexus::GetSuitableNodesContext,
3-
resources::{NodeItem, PoolItem},
4-
volume::GetSuitablePoolsContext,
5-
volume_policy::qualifies_label_criteria,
1+
use crate::controller::{
2+
registry::Registry,
3+
scheduling::{
4+
nexus::GetSuitableNodesContext,
5+
resources::{NodeItem, PoolItem},
6+
volume::GetSuitablePoolsContext,
7+
volume_policy::qualifies_label_criteria,
8+
},
69
};
10+
use stor_port::types::v0::{
11+
store::volume::VolumeSpec,
12+
transport::{NodeId, NodeTopology, PoolId},
13+
};
14+
715
use std::collections::HashMap;
8-
use stor_port::types::v0::transport::NodeTopology;
916

1017
/// Filter nodes used for replica creation.
1118
pub(crate) struct NodeFilters {}
@@ -72,9 +79,24 @@ impl NodeFilters {
7279
}
7380
/// Should only attempt to use nodes having specific creation label if topology has it.
7481
pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool {
82+
Self::topology_(request, request.registry(), &item.pool.node)
83+
}
84+
/// Should only attempt to use nodes having specific creation label if topology has it.
85+
pub(crate) fn topology_replica(
86+
volume: &VolumeSpec,
87+
registry: &Registry,
88+
pool_id: &PoolId,
89+
) -> bool {
90+
let Ok(pool) = registry.specs().pool(pool_id) else {
91+
return false;
92+
};
93+
Self::topology_(volume, registry, &pool.node)
94+
}
95+
/// Should only attempt to use nodes having specific creation label if topology has it.
96+
pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, node_id: &NodeId) -> bool {
7597
let volume_node_topology_inclusion_labels: HashMap<String, String>;
7698
let volume_node_topology_exclusion_labels: HashMap<String, String>;
77-
match &request.topology {
99+
match &volume.topology {
78100
None => return true,
79101
Some(topology) => match &topology.node {
80102
None => return true,
@@ -99,7 +121,7 @@ impl NodeFilters {
99121
};
100122

101123
// We will reach this part of code only if the volume has inclusion/exclusion labels.
102-
match request.registry().specs().node(&item.pool.node) {
124+
match registry.specs().node(node_id) {
103125
Ok(spec) => {
104126
qualifies_label_criteria(volume_node_topology_inclusion_labels, spec.labels())
105127
&& qualifies_label_criteria(

control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs

+24-12
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1-
use crate::controller::scheduling::{
2-
resources::{ChildItem, PoolItem},
3-
volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext},
4-
volume_policy::qualifies_label_criteria,
1+
use crate::controller::{
2+
registry::Registry,
3+
scheduling::{
4+
resources::{ChildItem, PoolItem},
5+
volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext},
6+
volume_policy::qualifies_label_criteria,
7+
},
58
};
9+
use stor_port::types::v0::{
10+
store::volume::VolumeSpec,
11+
transport::{PoolId, PoolStatus, PoolTopology},
12+
};
13+
614
use std::collections::HashMap;
7-
use stor_port::types::v0::transport::{PoolStatus, PoolTopology};
815

916
/// Filter pools used for replica creation.
1017
pub(crate) struct PoolBaseFilters {}
@@ -77,27 +84,32 @@ impl PoolBaseFilters {
7784

7885
/// Should only attempt to use pools having specific creation label if topology has it.
7986
pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool {
87+
Self::topology_(request, request.registry(), &item.pool.id)
88+
}
89+
90+
/// Should only attempt to use pools having specific creation label if topology has it.
91+
pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, pool_id: &PoolId) -> bool {
8092
let volume_pool_topology_inclusion_labels: HashMap<String, String>;
81-
match request.topology.clone() {
93+
match &volume.topology {
8294
None => return true,
83-
Some(topology) => match topology.pool {
95+
Some(topology) => match &topology.pool {
8496
None => return true,
85-
Some(pool_topology) => match pool_topology {
97+
Some(pool_topology) => match &pool_topology {
8698
PoolTopology::Labelled(labelled_topology) => {
8799
// The labels in Volume Pool Topology should match the pool labels if
88100
// present, otherwise selection of any pool is allowed.
89-
if !labelled_topology.inclusion.is_empty() {
90-
volume_pool_topology_inclusion_labels = labelled_topology.inclusion
91-
} else {
101+
if labelled_topology.inclusion.is_empty() {
102+
// todo: missing exclusion check?
92103
return true;
93104
}
105+
volume_pool_topology_inclusion_labels = labelled_topology.inclusion.clone()
94106
}
95107
},
96108
},
97109
};
98110

99111
// We will reach this part of code only if the volume has inclusion/exclusion labels.
100-
match request.registry().specs().pool(&item.pool.id) {
112+
match registry.specs().pool(pool_id) {
101113
Ok(spec) => match spec.labels {
102114
None => false,
103115
Some(pool_labels) => {

0 commit comments

Comments
 (0)