Skip to content

Commit 02aa5aa

Browse files
mayastor-borstiagolobocastro
mayastor-bors
andcommitted
chore(bors): merge pull request #892
892: fix(csi-node): handle devices for existing subsystems r=tiagolobocastro a=tiagolobocastro When a device is broken but the volume is not unstaged properly, the subsytem ends up lingering in the system. If the device is staged again, the device is assined a different namespace id. Change the regex which is used in the device parameter fo account for this. Also extend the existing csi-node BDD testing. Co-authored-by: Tiago Castro <[email protected]>
2 parents 00190e9 + d831ac0 commit 02aa5aa

File tree

10 files changed

+258
-36
lines changed

10 files changed

+258
-36
lines changed

control-plane/csi-driver/src/bin/node/dev.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ impl Device {
110110
continue;
111111
}
112112

113-
if let Some(devname) = match_dev::match_nvmf_device(&device, &nvmf_key) {
113+
if let Some(devname) = match_dev::match_nvmf_device_valid(&device, &nvmf_key)? {
114114
let nqn = if std::env::var("MOAC").is_ok() {
115115
format!("{}:nexus-{uuid}", nvme_target_nqn_prefix())
116116
} else {

control-plane/csi-driver/src/bin/node/dev/nvmf.rs

+75-21
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1+
use nvmeadm::{
2+
error::NvmeError,
3+
nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
4+
};
15
use std::{
26
collections::HashMap,
37
convert::{From, TryFrom},
48
path::Path,
59
str::FromStr,
610
};
711

8-
use nvmeadm::{
9-
error::NvmeError,
10-
nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
11-
};
12-
1312
use csi_driver::PublishParams;
1413
use glob::glob;
1514
use nvmeadm::nvmf_subsystem::Subsystem;
@@ -29,7 +28,7 @@ use crate::{
2928
use super::{Attach, Detach, DeviceError, DeviceName};
3029

3130
lazy_static::lazy_static! {
32-
static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,3})n1").unwrap();
31+
static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,5})n(\d{1,5})").unwrap();
3332
}
3433

3534
pub(super) struct NvmfAttach {
@@ -43,6 +42,7 @@ pub(super) struct NvmfAttach {
4342
ctrl_loss_tmo: Option<u32>,
4443
keep_alive_tmo: Option<u32>,
4544
hostnqn: Option<String>,
45+
warn_bad: std::sync::atomic::AtomicBool,
4646
}
4747

4848
impl NvmfAttach {
@@ -70,6 +70,7 @@ impl NvmfAttach {
7070
ctrl_loss_tmo,
7171
keep_alive_tmo,
7272
hostnqn,
73+
warn_bad: std::sync::atomic::AtomicBool::new(true),
7374
}
7475
}
7576

@@ -80,13 +81,20 @@ impl NvmfAttach {
8081
enumerator.match_subsystem("block")?;
8182
enumerator.match_property("DEVTYPE", "disk")?;
8283

84+
let mut first_error = Ok(None);
8385
for device in enumerator.scan_devices()? {
84-
if match_nvmf_device(&device, &key).is_some() {
85-
return Ok(Some(device));
86+
match match_device(&device, &key, &self.warn_bad) {
87+
Ok(name) if name.is_some() => {
88+
return Ok(Some(device));
89+
}
90+
Err(error) if first_error.is_ok() => {
91+
first_error = Err(error);
92+
}
93+
_ => {}
8694
}
8795
}
8896

89-
Ok(None)
97+
first_error
9098
}
9199
}
92100

@@ -153,6 +161,12 @@ impl Attach for NvmfAttach {
153161
if let Some(keep_alive_tmo) = nvme_config.keep_alive_tmo() {
154162
self.keep_alive_tmo = Some(keep_alive_tmo);
155163
}
164+
if self.io_tmo.is_none() {
165+
if let Some(io_tmo) = publish_context.io_timeout() {
166+
self.io_tmo = Some(*io_tmo);
167+
}
168+
}
169+
156170
Ok(())
157171
}
158172

@@ -221,18 +235,16 @@ impl Attach for NvmfAttach {
221235
.get_device()?
222236
.ok_or_else(|| DeviceError::new("NVMe device not found"))?;
223237
let dev_name = device.sysname().to_str().unwrap();
224-
let major = DEVICE_REGEX
225-
.captures(dev_name)
226-
.ok_or_else(|| {
227-
DeviceError::new(&format!(
228-
"NVMe device \"{}\" does not match \"{}\"",
229-
dev_name, *DEVICE_REGEX,
230-
))
231-
})?
232-
.get(1)
233-
.unwrap()
234-
.as_str();
235-
let pattern = format!("/sys/class/block/nvme{major}c*n1/queue");
238+
let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
239+
DeviceError::new(&format!(
240+
"NVMe device \"{}\" does not match \"{}\"",
241+
dev_name, *DEVICE_REGEX,
242+
))
243+
})?;
244+
let major = captures.get(1).unwrap().as_str();
245+
let nid = captures.get(2).unwrap().as_str();
246+
247+
let pattern = format!("/sys/class/block/nvme{major}c*n{nid}/queue");
236248
let glob = glob(&pattern).unwrap();
237249
let result = glob
238250
.into_iter()
@@ -302,6 +314,48 @@ impl Detach for NvmfDetach {
302314
}
303315
}
304316

317+
/// Get the sysfs block device queue path for the given udev::Device.
318+
fn block_dev_q(device: &Device) -> Result<String, DeviceError> {
319+
let dev_name = device.sysname().to_str().unwrap();
320+
let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
321+
DeviceError::new(&format!(
322+
"NVMe device \"{}\" does not match \"{}\"",
323+
dev_name, *DEVICE_REGEX,
324+
))
325+
})?;
326+
let major = captures.get(1).unwrap().as_str();
327+
let nid = captures.get(2).unwrap().as_str();
328+
Ok(format!("/sys/class/block/nvme{major}c*n{nid}/queue"))
329+
}
330+
331+
/// Check if the given device is a valid NVMf device.
332+
/// # NOTE
333+
/// In older kernels when a device with an existing mount is lost, the nvmf controller
334+
/// is lost, but the block device remains, in a broken state.
335+
/// On newer kernels, the block device is also gone.
336+
pub(crate) fn match_device<'a>(
337+
device: &'a Device,
338+
key: &str,
339+
warn_bad: &std::sync::atomic::AtomicBool,
340+
) -> Result<Option<&'a str>, DeviceError> {
341+
let Some(devname) = match_nvmf_device(device, key) else {
342+
return Ok(None);
343+
};
344+
345+
let glob = glob(&block_dev_q(device)?).unwrap();
346+
if !glob.into_iter().any(|glob_result| glob_result.is_ok()) {
347+
if warn_bad.load(std::sync::atomic::Ordering::Relaxed) {
348+
let name = device.sysname().to_string_lossy();
349+
warn!("Block device {name} for volume {key} has no controller!");
350+
// todo: shoot-down the stale mounts?
351+
warn_bad.store(false, std::sync::atomic::Ordering::Relaxed);
352+
}
353+
return Ok(None);
354+
}
355+
356+
Ok(Some(devname))
357+
}
358+
305359
/// Check for the presence of nvme tcp kernel module.
306360
pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
307361
let path = "/sys/module/nvme_tcp";

control-plane/csi-driver/src/bin/node/match_dev.rs

+11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Utility functions for matching a udev record against a known device type.
22
3+
use crate::error::DeviceError;
34
use udev::Device;
45

56
macro_rules! require {
@@ -72,3 +73,13 @@ pub(super) fn match_nvmf_device<'a>(device: &'a Device, key: &str) -> Option<&'a
7273

7374
Some(devname)
7475
}
76+
77+
/// Match the device, if it's a nvmef device, but only if it's a valid block device.
78+
/// See [`super::dev::nvmf::match_device`].
79+
pub(super) fn match_nvmf_device_valid<'a>(
80+
device: &'a Device,
81+
key: &str,
82+
) -> Result<Option<&'a str>, DeviceError> {
83+
let cell = std::sync::atomic::AtomicBool::new(true);
84+
super::dev::nvmf::match_device(device, key, &cell)
85+
}

shell.nix

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ mkShell {
9191
[ ! -z "${io-engine}" ] && cowsay "${io-engine-moth}"
9292
[ ! -z "${io-engine}" ] && export IO_ENGINE_BIN="${io-engine-moth}"
9393
export PATH="$PATH:$(pwd)/target/debug"
94-
export SUDO=$(which sudo || echo /run/wrappers/bin/sudo)
94+
export SUDO=$(which sudo 2>/dev/null || echo /run/wrappers/bin/sudo)
9595
9696
DOCKER_CONFIG=~/.docker/config.json
9797
if [ -f "$DOCKER_CONFIG" ]; then

tests/bdd/README.md

+18-2
Original file line numberDiff line numberDiff line change
@@ -2,61 +2,77 @@
22

33
The BDD tests are written in Python and make use of the pytest-bdd library.
44

5-
The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.
5+
The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are
6+
described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.
67

7-
The feature files can be used to auto-generate the test file. For example running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
8+
The feature files can be used to auto-generate the test file. For example
9+
running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
810
generates the `test_volume_replicas.py` test file from the `replicas.feature` file.
11+
When updating the feature file, you can also get some helpe updating the python code.
12+
Example: `pytest --generate-missing --feature node.feature test_node.py`
913

1014
**:warning: Note: Running pytest-bdd generate will overwrite any existing files with the same name**
1115

1216
## Running the Tests by entering the python virtual environment
17+
1318
Before running any tests run the `setup.sh` script. This sets up the necessary environment to run the tests:
19+
1420
```bash
21+
# NOTE: you should be inside the nix-shell to begin
1522
source ./setup.sh
1623
```
1724

1825
To run all the tests:
26+
1927
```bash
2028
pytest .
2129
```
2230

2331
To run individual test files:
32+
2433
```bash
2534
pytest features/volume/create/test_feature.py
2635
```
2736

2837
To run an individual test within a test file use the `-k` option followed by the test name:
38+
2939
```bash
3040
pytest features/volume/create/test_feature.py -k test_sufficient_suitable_pools
3141
```
3242

3343
## Running the Tests
44+
3445
The script in `../../scripts/python/test.sh` can be used to run the tests without entering the venv.
3546
This script will implicitly enter and exit the venv during test execution.
3647

3748
To run all the tests:
49+
3850
```bash
3951
../../scripts/python/test.sh
4052
```
4153

4254
Arguments will be passed directly to pytest. Example running individual tests:
55+
4356
```bash
4457
../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools
4558
```
4659

4760
## Debugging the Tests
61+
4862
Typically, the test code cleans up after itself and so it's impossible to debug the test cluster.
4963
The environmental variable `CLEAN` can be set to `false` to skip tearing down the cluster when a test ends.
5064
It should be paired with the pytest option `-x` or `--exitfirst` to exit when the first test fails otherwise the other
5165
tests may end up tearing down the cluster anyway.
5266

5367
Example:
68+
5469
```bash
5570
CLEAN=false ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
5671
```
5772

5873
The script `../../scripts/python/test.sh` does a lot of repetitive work of regenerating the auto-generated code.
5974
This step can be skipped with the `FAST` environment variable to speed up the test cycle:
75+
6076
```bash
6177
FAST=true ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
6278
```

tests/bdd/common/deployer.py

+5
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ def node_name(id: int):
194194
assert id >= 0
195195
return f"io-engine-{id + 1}"
196196

197+
@staticmethod
198+
def csi_node_name(id: int):
199+
assert id >= 0
200+
return f"csi-node-{id + 1}"
201+
197202
@staticmethod
198203
def create_disks(len=1, size=100 * 1024 * 1024):
199204
host_tmp = workspace_tmp()

tests/bdd/common/docker.py

+6
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ def unpause_container(name):
6767
container = docker_client.containers.get(name)
6868
container.unpause()
6969

70+
@staticmethod
71+
def execute(name, commands):
72+
docker_client = docker.from_env()
73+
container = docker_client.containers.get(name)
74+
return container.exec_run(commands)
75+
7076
# Restart a container with the given name.
7177
def restart_container(name):
7278
docker_client = docker.from_env()

tests/bdd/common/nvme.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -307,21 +307,35 @@ def identify_namespace(device):
307307

308308
def nvme_set_reconnect_delay_all(nqn, delay=10):
309309
for controller in nvme_find_controllers(nqn):
310-
nvme_controller_set_reconnect_delay(controller, delay)
310+
nvme_controller_set_param(controller, "reconnect_delay", delay)
311311

312312

313313
def nvme_set_reconnect_delay(uri, delay=10):
314314
controller = nvme_find_controller(uri)
315-
nvme_controller_set_reconnect_delay(controller, delay)
315+
nvme_controller_set_param(controller, "reconnect_delay", delay)
316316

317317

318-
def nvme_controller_set_reconnect_delay(controller, delay=10):
318+
def nvme_set_ctrl_loss_tmo(uri, ctrl_loss_tmo=10):
319+
controller = nvme_find_controller(uri)
320+
nvme_controller_set_param(controller, "ctrl_loss_tmo", ctrl_loss_tmo)
321+
322+
323+
def nvme_controller_set_param(controller, name: str, value: int):
319324
controller = controller.get("Controller")
320-
command = f"echo {delay} | sudo tee -a /sys/class/nvme/{controller}/reconnect_delay"
325+
command = f"echo {value} | sudo tee -a /sys/class/nvme/{controller}/{name}"
321326
print(command)
322327
subprocess.run(command, check=True, shell=True, capture_output=True)
323328

324329

325330
@retry(wait_fixed=100, stop_max_attempt_number=40)
326331
def wait_nvme_find_device(uri):
327332
return nvme_find_device(uri)
333+
334+
335+
@retry(wait_fixed=100, stop_max_attempt_number=40)
336+
def wait_nvme_gone_device(uri, nqn=None):
337+
if nqn is None:
338+
u = urlparse(uri)
339+
nqn = u.path[1:]
340+
devs = nvme_find_subsystem_devices(nqn)
341+
assert len(devs) == 0

tests/bdd/features/csi/node/node.feature

+13
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,16 @@ Feature: CSI node plugin
106106
Scenario: publishing a reader only block volume as rw
107107
Given a block volume staged as "MULTI_NODE_READER_ONLY"
108108
When publishing the block volume as "rw" should fail
109+
110+
Scenario: re-staging after controller loss timeout
111+
Given a staged volume
112+
When the kernel device is removed after a controller loss timeout
113+
Then the volume should be unstageable
114+
And the volume should be stageable again
115+
116+
Scenario: re-staging after controller loss timeout without unstaging
117+
Given a staged volume
118+
When the kernel device is removed after a controller loss timeout
119+
Then the mounts become broken
120+
But the volume should be stageable on a different path
121+
And the nvme device should have a different controller and namespace

0 commit comments

Comments
 (0)