Skip to content

fix(csi-node): handle devices for existing subsystems #892

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion control-plane/csi-driver/src/bin/node/dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl Device {
continue;
}

if let Some(devname) = match_dev::match_nvmf_device(&device, &nvmf_key) {
if let Some(devname) = match_dev::match_nvmf_device_valid(&device, &nvmf_key)? {
let nqn = if std::env::var("MOAC").is_ok() {
format!("{}:nexus-{uuid}", nvme_target_nqn_prefix())
} else {
Expand Down
96 changes: 75 additions & 21 deletions control-plane/csi-driver/src/bin/node/dev/nvmf.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use nvmeadm::{
error::NvmeError,
nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
};
use std::{
collections::HashMap,
convert::{From, TryFrom},
path::Path,
str::FromStr,
};

use nvmeadm::{
error::NvmeError,
nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
};

use csi_driver::PublishParams;
use glob::glob;
use nvmeadm::nvmf_subsystem::Subsystem;
Expand All @@ -29,7 +28,7 @@ use crate::{
use super::{Attach, Detach, DeviceError, DeviceName};

lazy_static::lazy_static! {
static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,3})n1").unwrap();
static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,5})n(\d{1,5})").unwrap();
}

pub(super) struct NvmfAttach {
Expand All @@ -43,6 +42,7 @@ pub(super) struct NvmfAttach {
ctrl_loss_tmo: Option<u32>,
keep_alive_tmo: Option<u32>,
hostnqn: Option<String>,
warn_bad: std::sync::atomic::AtomicBool,
}

impl NvmfAttach {
Expand Down Expand Up @@ -70,6 +70,7 @@ impl NvmfAttach {
ctrl_loss_tmo,
keep_alive_tmo,
hostnqn,
warn_bad: std::sync::atomic::AtomicBool::new(true),
}
}

Expand All @@ -80,13 +81,20 @@ impl NvmfAttach {
enumerator.match_subsystem("block")?;
enumerator.match_property("DEVTYPE", "disk")?;

let mut first_error = Ok(None);
for device in enumerator.scan_devices()? {
if match_nvmf_device(&device, &key).is_some() {
return Ok(Some(device));
match match_device(&device, &key, &self.warn_bad) {
Ok(name) if name.is_some() => {
return Ok(Some(device));
}
Err(error) if first_error.is_ok() => {
first_error = Err(error);
}
_ => {}
}
}

Ok(None)
first_error
}
}

Expand Down Expand Up @@ -153,6 +161,12 @@ impl Attach for NvmfAttach {
if let Some(keep_alive_tmo) = nvme_config.keep_alive_tmo() {
self.keep_alive_tmo = Some(keep_alive_tmo);
}
if self.io_tmo.is_none() {
if let Some(io_tmo) = publish_context.io_timeout() {
self.io_tmo = Some(*io_tmo);
}
}

Ok(())
}

Expand Down Expand Up @@ -221,18 +235,16 @@ impl Attach for NvmfAttach {
.get_device()?
.ok_or_else(|| DeviceError::new("NVMe device not found"))?;
let dev_name = device.sysname().to_str().unwrap();
let major = DEVICE_REGEX
.captures(dev_name)
.ok_or_else(|| {
DeviceError::new(&format!(
"NVMe device \"{}\" does not match \"{}\"",
dev_name, *DEVICE_REGEX,
))
})?
.get(1)
.unwrap()
.as_str();
let pattern = format!("/sys/class/block/nvme{major}c*n1/queue");
let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
DeviceError::new(&format!(
"NVMe device \"{}\" does not match \"{}\"",
dev_name, *DEVICE_REGEX,
))
})?;
let major = captures.get(1).unwrap().as_str();
let nid = captures.get(2).unwrap().as_str();

let pattern = format!("/sys/class/block/nvme{major}c*n{nid}/queue");
let glob = glob(&pattern).unwrap();
let result = glob
.into_iter()
Expand Down Expand Up @@ -302,6 +314,48 @@ impl Detach for NvmfDetach {
}
}

/// Get the sysfs block device queue path for the given udev::Device.
fn block_dev_q(device: &Device) -> Result<String, DeviceError> {
let dev_name = device.sysname().to_str().unwrap();
let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
DeviceError::new(&format!(
"NVMe device \"{}\" does not match \"{}\"",
dev_name, *DEVICE_REGEX,
))
})?;
let major = captures.get(1).unwrap().as_str();
let nid = captures.get(2).unwrap().as_str();
Ok(format!("/sys/class/block/nvme{major}c*n{nid}/queue"))
}

/// Check if the given device is a valid NVMf device.
/// # NOTE
/// In older kernels when a device with an existing mount is lost, the nvmf controller
/// is lost, but the block device remains, in a broken state.
/// On newer kernels, the block device is also gone.
pub(crate) fn match_device<'a>(
device: &'a Device,
key: &str,
warn_bad: &std::sync::atomic::AtomicBool,
) -> Result<Option<&'a str>, DeviceError> {
let Some(devname) = match_nvmf_device(device, key) else {
return Ok(None);
};

let glob = glob(&block_dev_q(device)?).unwrap();
if !glob.into_iter().any(|glob_result| glob_result.is_ok()) {
if warn_bad.load(std::sync::atomic::Ordering::Relaxed) {
let name = device.sysname().to_string_lossy();
warn!("Block device {name} for volume {key} has no controller!");
// todo: shoot-down the stale mounts?
warn_bad.store(false, std::sync::atomic::Ordering::Relaxed);
}
return Ok(None);
}

Ok(Some(devname))
}

/// Check for the presence of nvme tcp kernel module.
pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
let path = "/sys/module/nvme_tcp";
Expand Down
11 changes: 11 additions & 0 deletions control-plane/csi-driver/src/bin/node/match_dev.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Utility functions for matching a udev record against a known device type.

use crate::error::DeviceError;
use udev::Device;

macro_rules! require {
Expand Down Expand Up @@ -72,3 +73,13 @@ pub(super) fn match_nvmf_device<'a>(device: &'a Device, key: &str) -> Option<&'a

Some(devname)
}

/// Match the device, if it's a nvmef device, but only if it's a valid block device.
/// See [`super::dev::nvmf::match_device`].
pub(super) fn match_nvmf_device_valid<'a>(
device: &'a Device,
key: &str,
) -> Result<Option<&'a str>, DeviceError> {
let cell = std::sync::atomic::AtomicBool::new(true);
super::dev::nvmf::match_device(device, key, &cell)
}
2 changes: 1 addition & 1 deletion shell.nix
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ mkShell {
[ ! -z "${io-engine}" ] && cowsay "${io-engine-moth}"
[ ! -z "${io-engine}" ] && export IO_ENGINE_BIN="${io-engine-moth}"
export PATH="$PATH:$(pwd)/target/debug"
export SUDO=$(which sudo || echo /run/wrappers/bin/sudo)
export SUDO=$(which sudo 2>/dev/null || echo /run/wrappers/bin/sudo)

DOCKER_CONFIG=~/.docker/config.json
if [ -f "$DOCKER_CONFIG" ]; then
Expand Down
20 changes: 18 additions & 2 deletions tests/bdd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,77 @@

The BDD tests are written in Python and make use of the pytest-bdd library.

The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.
The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are
described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.

The feature files can be used to auto-generate the test file. For example running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
The feature files can be used to auto-generate the test file. For example
running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
generates the `test_volume_replicas.py` test file from the `replicas.feature` file.
When updating the feature file, you can also get some helpe updating the python code.
Example: `pytest --generate-missing --feature node.feature test_node.py`

**:warning: Note: Running pytest-bdd generate will overwrite any existing files with the same name**

## Running the Tests by entering the python virtual environment

Before running any tests run the `setup.sh` script. This sets up the necessary environment to run the tests:

```bash
# NOTE: you should be inside the nix-shell to begin
source ./setup.sh
```

To run all the tests:

```bash
pytest .
```

To run individual test files:

```bash
pytest features/volume/create/test_feature.py
```

To run an individual test within a test file use the `-k` option followed by the test name:

```bash
pytest features/volume/create/test_feature.py -k test_sufficient_suitable_pools
```

## Running the Tests

The script in `../../scripts/python/test.sh` can be used to run the tests without entering the venv.
This script will implicitly enter and exit the venv during test execution.

To run all the tests:

```bash
../../scripts/python/test.sh
```

Arguments will be passed directly to pytest. Example running individual tests:

```bash
../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools
```

## Debugging the Tests

Typically, the test code cleans up after itself and so it's impossible to debug the test cluster.
The environmental variable `CLEAN` can be set to `false` to skip tearing down the cluster when a test ends.
It should be paired with the pytest option `-x` or `--exitfirst` to exit when the first test fails otherwise the other
tests may end up tearing down the cluster anyway.

Example:

```bash
CLEAN=false ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
```

The script `../../scripts/python/test.sh` does a lot of repetitive work of regenerating the auto-generated code.
This step can be skipped with the `FAST` environment variable to speed up the test cycle:

```bash
FAST=true ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
```
5 changes: 5 additions & 0 deletions tests/bdd/common/deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ def node_name(id: int):
assert id >= 0
return f"io-engine-{id + 1}"

@staticmethod
def csi_node_name(id: int):
assert id >= 0
return f"csi-node-{id + 1}"

@staticmethod
def create_disks(len=1, size=100 * 1024 * 1024):
host_tmp = workspace_tmp()
Expand Down
6 changes: 6 additions & 0 deletions tests/bdd/common/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def unpause_container(name):
container = docker_client.containers.get(name)
container.unpause()

@staticmethod
def execute(name, commands):
docker_client = docker.from_env()
container = docker_client.containers.get(name)
return container.exec_run(commands)

# Restart a container with the given name.
def restart_container(name):
docker_client = docker.from_env()
Expand Down
22 changes: 18 additions & 4 deletions tests/bdd/common/nvme.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,21 +307,35 @@ def identify_namespace(device):

def nvme_set_reconnect_delay_all(nqn, delay=10):
for controller in nvme_find_controllers(nqn):
nvme_controller_set_reconnect_delay(controller, delay)
nvme_controller_set_param(controller, "reconnect_delay", delay)


def nvme_set_reconnect_delay(uri, delay=10):
controller = nvme_find_controller(uri)
nvme_controller_set_reconnect_delay(controller, delay)
nvme_controller_set_param(controller, "reconnect_delay", delay)


def nvme_controller_set_reconnect_delay(controller, delay=10):
def nvme_set_ctrl_loss_tmo(uri, ctrl_loss_tmo=10):
controller = nvme_find_controller(uri)
nvme_controller_set_param(controller, "ctrl_loss_tmo", ctrl_loss_tmo)


def nvme_controller_set_param(controller, name: str, value: int):
controller = controller.get("Controller")
command = f"echo {delay} | sudo tee -a /sys/class/nvme/{controller}/reconnect_delay"
command = f"echo {value} | sudo tee -a /sys/class/nvme/{controller}/{name}"
print(command)
subprocess.run(command, check=True, shell=True, capture_output=True)


@retry(wait_fixed=100, stop_max_attempt_number=40)
def wait_nvme_find_device(uri):
return nvme_find_device(uri)


@retry(wait_fixed=100, stop_max_attempt_number=40)
def wait_nvme_gone_device(uri, nqn=None):
if nqn is None:
u = urlparse(uri)
nqn = u.path[1:]
devs = nvme_find_subsystem_devices(nqn)
assert len(devs) == 0
13 changes: 13 additions & 0 deletions tests/bdd/features/csi/node/node.feature
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,16 @@ Feature: CSI node plugin
Scenario: publishing a reader only block volume as rw
Given a block volume staged as "MULTI_NODE_READER_ONLY"
When publishing the block volume as "rw" should fail

Scenario: re-staging after controller loss timeout
Given a staged volume
When the kernel device is removed after a controller loss timeout
Then the volume should be unstageable
And the volume should be stageable again

Scenario: re-staging after controller loss timeout without unstaging
Given a staged volume
When the kernel device is removed after a controller loss timeout
Then the mounts become broken
But the volume should be stageable on a different path
And the nvme device should have a different controller and namespace
Loading
Loading