Skip to content

Commit 4cf58a5

Browse files
mayastor-borstiagolobocastro
mayastor-bors
andcommitted
chore(bors): merge pull request #906
906: fix(topology): use topology when decreasing replicas r=tiagolobocastro a=tiagolobocastro When removing volume replicas, take the pool topology into account and prefer to remove replicas which don't line up with the volume topology. This can happen when a volume's topology has been modified. Co-authored-by: Tiago Castro <[email protected]>
2 parents 35aa836 + 1e414c7 commit 4cf58a5

File tree

5 files changed

+113
-22
lines changed

5 files changed

+113
-22
lines changed

control-plane/agents/src/bin/core/controller/scheduling/mod.rs

+15
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,21 @@ impl ChildSorters {
163163
match Self::sort_by_health(a, b) {
164164
Ordering::Equal => match Self::sort_by_child(a, b) {
165165
Ordering::Equal => {
166+
// remove mismatched topology replicas first
167+
if let (Some(a), Some(b)) = (a.valid_node_topology(), b.valid_node_topology()) {
168+
match a.cmp(b) {
169+
Ordering::Equal => {}
170+
// todo: what if the pool and node topology are at odds with each other?
171+
_else => return _else,
172+
}
173+
}
174+
if let (Some(a), Some(b)) = (a.valid_pool_topology(), b.valid_pool_topology()) {
175+
match a.cmp(b) {
176+
Ordering::Equal => {}
177+
_else => return _else,
178+
}
179+
}
180+
166181
let childa_is_local = !a.spec().share.shared();
167182
let childb_is_local = !b.spec().share.shared();
168183
if childa_is_local == childb_is_local {

control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs

+26
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ pub(crate) struct ReplicaItem {
176176
/// a Affinity Group and the already created volumes have replicas
177177
/// on this pool.
178178
ag_replicas_on_pool: Option<u64>,
179+
valid_pool_topology: Option<bool>,
180+
valid_node_topology: Option<bool>,
179181
}
180182

181183
impl ReplicaItem {
@@ -197,8 +199,32 @@ impl ReplicaItem {
197199
child_spec,
198200
child_info,
199201
ag_replicas_on_pool,
202+
valid_pool_topology: None,
203+
valid_node_topology: None,
200204
}
201205
}
206+
/// Set the validity of the replica's node topology.
207+
/// If set to false, this means the replica is not in sync with the volume topology and
208+
/// therefore should be replaced by another replica which is in sync.
209+
pub(crate) fn with_node_topology(mut self, valid_node_topology: Option<bool>) -> ReplicaItem {
210+
self.valid_node_topology = valid_node_topology;
211+
self
212+
}
213+
/// Set the validity of the replica's pool topology.
214+
/// If set to false, this means the replica is not in sync with the volume topology and
215+
/// therefore should be replaced by another replica which is in sync.
216+
pub(crate) fn with_pool_topology(mut self, valid_pool_topology: Option<bool>) -> ReplicaItem {
217+
self.valid_pool_topology = valid_pool_topology;
218+
self
219+
}
220+
/// Get a reference to the node topology validity flag.
221+
pub(crate) fn valid_node_topology(&self) -> &Option<bool> {
222+
&self.valid_node_topology
223+
}
224+
/// Get a reference to the pool topology validity flag.
225+
pub(crate) fn valid_pool_topology(&self) -> &Option<bool> {
226+
&self.valid_pool_topology
227+
}
202228
/// Get a reference to the replica spec.
203229
pub(crate) fn spec(&self) -> &ReplicaSpec {
204230
&self.replica_spec

control-plane/agents/src/bin/core/controller/scheduling/volume.rs

+18-2
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@ use crate::controller::{
55
affinity_group::{get_pool_ag_replica_count, get_restricted_nodes},
66
pool::replica_rebuildable,
77
resources::{ChildItem, PoolItem, PoolItemLister, ReplicaItem},
8-
volume_policy::{SimplePolicy, ThickPolicy},
8+
volume_policy::{node::NodeFilters, SimplePolicy, ThickPolicy},
99
AddReplicaFilters, AddReplicaSorters, ChildSorters, ResourceData, ResourceFilter,
1010
},
1111
wrapper::PoolWrapper,
1212
};
1313
use agents::errors::SvcError;
14-
use std::{collections::HashMap, ops::Deref};
1514
use stor_port::types::v0::{
1615
store::{
1716
nexus::NexusSpec, nexus_persistence::NexusInfo, snapshots::replica::ReplicaSnapshot,
@@ -20,6 +19,8 @@ use stor_port::types::v0::{
2019
transport::{NodeId, PoolId, VolumeState},
2120
};
2221

22+
use std::{collections::HashMap, ops::Deref};
23+
2324
/// Move replica to another pool.
2425
#[derive(Default, Clone)]
2526
pub(crate) struct MoveReplica {
@@ -343,6 +344,21 @@ impl GetChildForRemovalContext {
343344
}),
344345
ag_rep_count,
345346
)
347+
.with_node_topology({
348+
Some(NodeFilters::topology_replica(
349+
&self.spec,
350+
&self.registry,
351+
replica_spec.pool_name(),
352+
))
353+
})
354+
.with_pool_topology({
355+
use crate::controller::scheduling::volume_policy::pool::PoolBaseFilters;
356+
Some(PoolBaseFilters::topology_(
357+
&self.spec,
358+
&self.registry,
359+
replica_spec.pool_name(),
360+
))
361+
})
346362
})
347363
.collect::<Vec<_>>()
348364
}

control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs

+30-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1-
use crate::controller::scheduling::{
2-
nexus::GetSuitableNodesContext,
3-
resources::{NodeItem, PoolItem},
4-
volume::GetSuitablePoolsContext,
5-
volume_policy::qualifies_label_criteria,
1+
use crate::controller::{
2+
registry::Registry,
3+
scheduling::{
4+
nexus::GetSuitableNodesContext,
5+
resources::{NodeItem, PoolItem},
6+
volume::GetSuitablePoolsContext,
7+
volume_policy::qualifies_label_criteria,
8+
},
69
};
10+
use stor_port::types::v0::{
11+
store::volume::VolumeSpec,
12+
transport::{NodeId, NodeTopology, PoolId},
13+
};
14+
715
use std::collections::HashMap;
8-
use stor_port::types::v0::transport::NodeTopology;
916

1017
/// Filter nodes used for replica creation.
1118
pub(crate) struct NodeFilters {}
@@ -72,9 +79,24 @@ impl NodeFilters {
7279
}
7380
/// Should only attempt to use nodes having specific creation label if topology has it.
7481
pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool {
82+
Self::topology_(request, request.registry(), &item.pool.node)
83+
}
84+
/// Should only attempt to use nodes having specific creation label if topology has it.
85+
pub(crate) fn topology_replica(
86+
volume: &VolumeSpec,
87+
registry: &Registry,
88+
pool_id: &PoolId,
89+
) -> bool {
90+
let Ok(pool) = registry.specs().pool(pool_id) else {
91+
return false;
92+
};
93+
Self::topology_(volume, registry, &pool.node)
94+
}
95+
/// Should only attempt to use nodes having specific creation label if topology has it.
96+
pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, node_id: &NodeId) -> bool {
7597
let volume_node_topology_inclusion_labels: HashMap<String, String>;
7698
let volume_node_topology_exclusion_labels: HashMap<String, String>;
77-
match &request.topology {
99+
match &volume.topology {
78100
None => return true,
79101
Some(topology) => match &topology.node {
80102
None => return true,
@@ -99,7 +121,7 @@ impl NodeFilters {
99121
};
100122

101123
// We will reach this part of code only if the volume has inclusion/exclusion labels.
102-
match request.registry().specs().node(&item.pool.node) {
124+
match registry.specs().node(node_id) {
103125
Ok(spec) => {
104126
qualifies_label_criteria(volume_node_topology_inclusion_labels, spec.labels())
105127
&& qualifies_label_criteria(

control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs

+24-12
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1-
use crate::controller::scheduling::{
2-
resources::{ChildItem, PoolItem},
3-
volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext},
4-
volume_policy::qualifies_label_criteria,
1+
use crate::controller::{
2+
registry::Registry,
3+
scheduling::{
4+
resources::{ChildItem, PoolItem},
5+
volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext},
6+
volume_policy::qualifies_label_criteria,
7+
},
58
};
9+
use stor_port::types::v0::{
10+
store::volume::VolumeSpec,
11+
transport::{PoolId, PoolStatus, PoolTopology},
12+
};
13+
614
use std::collections::HashMap;
7-
use stor_port::types::v0::transport::{PoolStatus, PoolTopology};
815

916
/// Filter pools used for replica creation.
1017
pub(crate) struct PoolBaseFilters {}
@@ -77,27 +84,32 @@ impl PoolBaseFilters {
7784

7885
/// Should only attempt to use pools having specific creation label if topology has it.
7986
pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool {
87+
Self::topology_(request, request.registry(), &item.pool.id)
88+
}
89+
90+
/// Should only attempt to use pools having specific creation label if topology has it.
91+
pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, pool_id: &PoolId) -> bool {
8092
let volume_pool_topology_inclusion_labels: HashMap<String, String>;
81-
match request.topology.clone() {
93+
match &volume.topology {
8294
None => return true,
83-
Some(topology) => match topology.pool {
95+
Some(topology) => match &topology.pool {
8496
None => return true,
85-
Some(pool_topology) => match pool_topology {
97+
Some(pool_topology) => match &pool_topology {
8698
PoolTopology::Labelled(labelled_topology) => {
8799
// The labels in Volume Pool Topology should match the pool labels if
88100
// present, otherwise selection of any pool is allowed.
89-
if !labelled_topology.inclusion.is_empty() {
90-
volume_pool_topology_inclusion_labels = labelled_topology.inclusion
91-
} else {
101+
if labelled_topology.inclusion.is_empty() {
102+
// todo: missing exclusion check?
92103
return true;
93104
}
105+
volume_pool_topology_inclusion_labels = labelled_topology.inclusion.clone()
94106
}
95107
},
96108
},
97109
};
98110

99111
// We will reach this part of code only if the volume has inclusion/exclusion labels.
100-
match request.registry().specs().pool(&item.pool.id) {
112+
match registry.specs().pool(pool_id) {
101113
Ok(spec) => match spec.labels {
102114
None => false,
103115
Some(pool_labels) => {

0 commit comments

Comments
 (0)