diff --git a/control-plane/csi-driver/src/bin/node/dev.rs b/control-plane/csi-driver/src/bin/node/dev.rs
index db8e1172c..2872a69a5 100644
--- a/control-plane/csi-driver/src/bin/node/dev.rs
+++ b/control-plane/csi-driver/src/bin/node/dev.rs
@@ -110,7 +110,7 @@ impl Device {
                 continue;
             }
 
-            if let Some(devname) = match_dev::match_nvmf_device(&device, &nvmf_key) {
+            if let Some(devname) = match_dev::match_nvmf_device_valid(&device, &nvmf_key)? {
                 let nqn = if std::env::var("MOAC").is_ok() {
                     format!("{}:nexus-{uuid}", nvme_target_nqn_prefix())
                 } else {
diff --git a/control-plane/csi-driver/src/bin/node/dev/nvmf.rs b/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
index c9549f980..93ba6636e 100644
--- a/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
+++ b/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
@@ -1,3 +1,7 @@
+use nvmeadm::{
+    error::NvmeError,
+    nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
+};
 use std::{
     collections::HashMap,
     convert::{From, TryFrom},
@@ -5,11 +9,6 @@ use std::{
     str::FromStr,
 };
 
-use nvmeadm::{
-    error::NvmeError,
-    nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
-};
-
 use csi_driver::PublishParams;
 use glob::glob;
 use nvmeadm::nvmf_subsystem::Subsystem;
@@ -29,7 +28,7 @@ use crate::{
 use super::{Attach, Detach, DeviceError, DeviceName};
 
 lazy_static::lazy_static! {
-    static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,3})n1").unwrap();
+    static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,5})n(\d{1,5})").unwrap();
 }
 
 pub(super) struct NvmfAttach {
@@ -43,6 +42,7 @@ pub(super) struct NvmfAttach {
     ctrl_loss_tmo: Option<u32>,
     keep_alive_tmo: Option<u32>,
     hostnqn: Option<String>,
+    warn_bad: std::sync::atomic::AtomicBool,
 }
 
 impl NvmfAttach {
@@ -70,6 +70,7 @@ impl NvmfAttach {
             ctrl_loss_tmo,
             keep_alive_tmo,
             hostnqn,
+            warn_bad: std::sync::atomic::AtomicBool::new(true),
         }
     }
 
@@ -80,13 +81,20 @@ impl NvmfAttach {
         enumerator.match_subsystem("block")?;
         enumerator.match_property("DEVTYPE", "disk")?;
 
+        let mut first_error = Ok(None);
         for device in enumerator.scan_devices()? {
-            if match_nvmf_device(&device, &key).is_some() {
-                return Ok(Some(device));
+            match match_device(&device, &key, &self.warn_bad) {
+                Ok(name) if name.is_some() => {
+                    return Ok(Some(device));
+                }
+                Err(error) if first_error.is_ok() => {
+                    first_error = Err(error);
+                }
+                _ => {}
             }
         }
 
-        Ok(None)
+        first_error
     }
 }
 
@@ -153,6 +161,12 @@ impl Attach for NvmfAttach {
         if let Some(keep_alive_tmo) = nvme_config.keep_alive_tmo() {
             self.keep_alive_tmo = Some(keep_alive_tmo);
         }
+        if self.io_tmo.is_none() {
+            if let Some(io_tmo) = publish_context.io_timeout() {
+                self.io_tmo = Some(*io_tmo);
+            }
+        }
+
         Ok(())
     }
 
@@ -221,18 +235,16 @@ impl Attach for NvmfAttach {
             .get_device()?
             .ok_or_else(|| DeviceError::new("NVMe device not found"))?;
         let dev_name = device.sysname().to_str().unwrap();
-        let major = DEVICE_REGEX
-            .captures(dev_name)
-            .ok_or_else(|| {
-                DeviceError::new(&format!(
-                    "NVMe device \"{}\" does not match \"{}\"",
-                    dev_name, *DEVICE_REGEX,
-                ))
-            })?
-            .get(1)
-            .unwrap()
-            .as_str();
-        let pattern = format!("/sys/class/block/nvme{major}c*n1/queue");
+        let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
+            DeviceError::new(&format!(
+                "NVMe device \"{}\" does not match \"{}\"",
+                dev_name, *DEVICE_REGEX,
+            ))
+        })?;
+        let major = captures.get(1).unwrap().as_str();
+        let nid = captures.get(2).unwrap().as_str();
+
+        let pattern = format!("/sys/class/block/nvme{major}c*n{nid}/queue");
         let glob = glob(&pattern).unwrap();
         let result = glob
             .into_iter()
@@ -302,6 +314,48 @@ impl Detach for NvmfDetach {
     }
 }
 
+/// Get the sysfs block device queue path for the given udev::Device.
+fn block_dev_q(device: &Device) -> Result<String, DeviceError> {
+    let dev_name = device.sysname().to_str().unwrap();
+    let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
+        DeviceError::new(&format!(
+            "NVMe device \"{}\" does not match \"{}\"",
+            dev_name, *DEVICE_REGEX,
+        ))
+    })?;
+    let major = captures.get(1).unwrap().as_str();
+    let nid = captures.get(2).unwrap().as_str();
+    Ok(format!("/sys/class/block/nvme{major}c*n{nid}/queue"))
+}
+
+/// Check if the given device is a valid NVMf device.
+/// # NOTE
+/// In older kernels when a device with an existing mount is lost, the nvmf controller
+/// is lost, but the block device remains, in a broken state.
+/// On newer kernels, the block device is also gone.
+pub(crate) fn match_device<'a>(
+    device: &'a Device,
+    key: &str,
+    warn_bad: &std::sync::atomic::AtomicBool,
+) -> Result<Option<&'a str>, DeviceError> {
+    let Some(devname) = match_nvmf_device(device, key) else {
+        return Ok(None);
+    };
+
+    let glob = glob(&block_dev_q(device)?).unwrap();
+    if !glob.into_iter().any(|glob_result| glob_result.is_ok()) {
+        if warn_bad.load(std::sync::atomic::Ordering::Relaxed) {
+            let name = device.sysname().to_string_lossy();
+            warn!("Block device {name} for volume {key} has no controller!");
+            // todo: shoot-down the stale mounts?
+            warn_bad.store(false, std::sync::atomic::Ordering::Relaxed);
+        }
+        return Ok(None);
+    }
+
+    Ok(Some(devname))
+}
+
 /// Check for the presence of nvme tcp kernel module.
 pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
     let path = "/sys/module/nvme_tcp";
diff --git a/control-plane/csi-driver/src/bin/node/match_dev.rs b/control-plane/csi-driver/src/bin/node/match_dev.rs
index 4d42f7476..652159c2d 100644
--- a/control-plane/csi-driver/src/bin/node/match_dev.rs
+++ b/control-plane/csi-driver/src/bin/node/match_dev.rs
@@ -1,5 +1,6 @@
 //! Utility functions for matching a udev record against a known device type.
 
+use crate::error::DeviceError;
 use udev::Device;
 
 macro_rules! require {
@@ -72,3 +73,13 @@ pub(super) fn match_nvmf_device<'a>(device: &'a Device, key: &str) -> Option<&'a
 
     Some(devname)
 }
+
+/// Match the device, if it's a nvmef device, but only if it's a valid block device.
+/// See [`super::dev::nvmf::match_device`].
+pub(super) fn match_nvmf_device_valid<'a>(
+    device: &'a Device,
+    key: &str,
+) -> Result<Option<&'a str>, DeviceError> {
+    let cell = std::sync::atomic::AtomicBool::new(true);
+    super::dev::nvmf::match_device(device, key, &cell)
+}
diff --git a/shell.nix b/shell.nix
index 21eedb477..bc176b407 100644
--- a/shell.nix
+++ b/shell.nix
@@ -91,7 +91,7 @@ mkShell {
     [ ! -z "${io-engine}" ] && cowsay "${io-engine-moth}"
     [ ! -z "${io-engine}" ] && export IO_ENGINE_BIN="${io-engine-moth}"
     export PATH="$PATH:$(pwd)/target/debug"
-    export SUDO=$(which sudo || echo /run/wrappers/bin/sudo)
+    export SUDO=$(which sudo 2>/dev/null || echo /run/wrappers/bin/sudo)
 
     DOCKER_CONFIG=~/.docker/config.json
     if [ -f "$DOCKER_CONFIG" ]; then
diff --git a/tests/bdd/README.md b/tests/bdd/README.md
index 838f9b976..24c6651e6 100644
--- a/tests/bdd/README.md
+++ b/tests/bdd/README.md
@@ -2,61 +2,77 @@
 
 The BDD tests are written in Python and make use of the pytest-bdd library.
 
-The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.
+The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are
+described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.
 
-The feature files can be used to auto-generate the test file. For example running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
+The feature files can be used to auto-generate the test file. For example
+running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
 generates the `test_volume_replicas.py` test file from the `replicas.feature` file.
+When updating the feature file, you can also get some helpe updating the python code.
+Example: `pytest --generate-missing --feature node.feature test_node.py`
 
 **:warning: Note: Running pytest-bdd generate will overwrite any existing files with the same name**
 
 ## Running the Tests by entering the python virtual environment
+
 Before running any tests run the `setup.sh` script. This sets up the necessary environment to run the tests:
+
 ```bash
+# NOTE: you should be inside the nix-shell to begin
 source ./setup.sh
 ```
 
 To run all the tests:
+
 ```bash
 pytest .
 ```
 
 To run individual test files:
+
 ```bash
 pytest features/volume/create/test_feature.py
 ```
 
 To run an individual test within a test file use the `-k` option followed by the test name:
+
 ```bash
 pytest features/volume/create/test_feature.py -k test_sufficient_suitable_pools
 ```
 
 ## Running the Tests
+
 The script in `../../scripts/python/test.sh` can be used to run the tests without entering the venv.
 This script will implicitly enter and exit the venv during test execution.
 
 To run all the tests:
+
 ```bash
 ../../scripts/python/test.sh
 ```
 
 Arguments will be passed directly to pytest. Example running individual tests:
+
 ```bash
 ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools
 ```
 
 ## Debugging the Tests
+
 Typically, the test code cleans up after itself and so it's impossible to debug the test cluster.
 The environmental variable `CLEAN` can be set to `false` to skip tearing down the cluster when a test ends.
 It should be paired with the pytest option `-x` or `--exitfirst` to exit when the first test fails otherwise the other
 tests may end up tearing down the cluster anyway.
 
 Example:
+
 ```bash
 CLEAN=false ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
 ```
 
 The script `../../scripts/python/test.sh` does a lot of repetitive work of regenerating the auto-generated code.
 This step can be skipped with the `FAST` environment variable to speed up the test cycle:
+
 ```bash
 FAST=true ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
 ```
diff --git a/tests/bdd/common/deployer.py b/tests/bdd/common/deployer.py
index 5b745f14c..e278dd9d2 100644
--- a/tests/bdd/common/deployer.py
+++ b/tests/bdd/common/deployer.py
@@ -194,6 +194,11 @@ def node_name(id: int):
         assert id >= 0
         return f"io-engine-{id + 1}"
 
+    @staticmethod
+    def csi_node_name(id: int):
+        assert id >= 0
+        return f"csi-node-{id + 1}"
+
     @staticmethod
     def create_disks(len=1, size=100 * 1024 * 1024):
         host_tmp = workspace_tmp()
diff --git a/tests/bdd/common/docker.py b/tests/bdd/common/docker.py
index 73377a4a8..beb566252 100644
--- a/tests/bdd/common/docker.py
+++ b/tests/bdd/common/docker.py
@@ -67,6 +67,12 @@ def unpause_container(name):
         container = docker_client.containers.get(name)
         container.unpause()
 
+    @staticmethod
+    def execute(name, commands):
+        docker_client = docker.from_env()
+        container = docker_client.containers.get(name)
+        return container.exec_run(commands)
+
     # Restart a container with the given name.
     def restart_container(name):
         docker_client = docker.from_env()
diff --git a/tests/bdd/common/nvme.py b/tests/bdd/common/nvme.py
index 65864091c..1e6a87f8c 100644
--- a/tests/bdd/common/nvme.py
+++ b/tests/bdd/common/nvme.py
@@ -307,17 +307,22 @@ def identify_namespace(device):
 
 def nvme_set_reconnect_delay_all(nqn, delay=10):
     for controller in nvme_find_controllers(nqn):
-        nvme_controller_set_reconnect_delay(controller, delay)
+        nvme_controller_set_param(controller, "reconnect_delay", delay)
 
 
 def nvme_set_reconnect_delay(uri, delay=10):
     controller = nvme_find_controller(uri)
-    nvme_controller_set_reconnect_delay(controller, delay)
+    nvme_controller_set_param(controller, "reconnect_delay", delay)
 
 
-def nvme_controller_set_reconnect_delay(controller, delay=10):
+def nvme_set_ctrl_loss_tmo(uri, ctrl_loss_tmo=10):
+    controller = nvme_find_controller(uri)
+    nvme_controller_set_param(controller, "ctrl_loss_tmo", ctrl_loss_tmo)
+
+
+def nvme_controller_set_param(controller, name: str, value: int):
     controller = controller.get("Controller")
-    command = f"echo {delay} | sudo tee -a /sys/class/nvme/{controller}/reconnect_delay"
+    command = f"echo {value} | sudo tee -a /sys/class/nvme/{controller}/{name}"
     print(command)
     subprocess.run(command, check=True, shell=True, capture_output=True)
 
@@ -325,3 +330,12 @@ def nvme_controller_set_reconnect_delay(controller, delay=10):
 @retry(wait_fixed=100, stop_max_attempt_number=40)
 def wait_nvme_find_device(uri):
     return nvme_find_device(uri)
+
+
+@retry(wait_fixed=100, stop_max_attempt_number=40)
+def wait_nvme_gone_device(uri, nqn=None):
+    if nqn is None:
+        u = urlparse(uri)
+        nqn = u.path[1:]
+    devs = nvme_find_subsystem_devices(nqn)
+    assert len(devs) == 0
diff --git a/tests/bdd/features/csi/node/node.feature b/tests/bdd/features/csi/node/node.feature
index 1fa97bf91..7aef8c7b1 100644
--- a/tests/bdd/features/csi/node/node.feature
+++ b/tests/bdd/features/csi/node/node.feature
@@ -106,3 +106,16 @@ Feature: CSI node plugin
   Scenario: publishing a reader only block volume as rw
     Given a block volume staged as "MULTI_NODE_READER_ONLY"
     When publishing the block volume as "rw" should fail
+
+  Scenario: re-staging after controller loss timeout
+    Given a staged volume
+    When the kernel device is removed after a controller loss timeout
+    Then the volume should be unstageable
+    And the volume should be stageable again
+
+  Scenario: re-staging after controller loss timeout without unstaging
+    Given a staged volume
+    When the kernel device is removed after a controller loss timeout
+    Then the mounts become broken
+    But the volume should be stageable on a different path
+    And the nvme device should have a different controller and namespace
\ No newline at end of file
diff --git a/tests/bdd/features/csi/node/test_node.py b/tests/bdd/features/csi/node/test_node.py
index c0146c246..b6f96108a 100644
--- a/tests/bdd/features/csi/node/test_node.py
+++ b/tests/bdd/features/csi/node/test_node.py
@@ -1,3 +1,7 @@
+import http
+import time
+from pathlib import Path
+
 import pytest
 from pytest_bdd import given, scenario, then, when, parsers
 
@@ -6,11 +10,13 @@
 
 import grpc
 import csi_pb2 as pb
+import openapi
 from common import disk_pool_label
 
 from common.apiclient import ApiClient
 from common.csi import CsiHandle
 from common.deployer import Deployer
+from common.docker import Docker
 from openapi.model.create_pool_body import CreatePoolBody
 from openapi.model.publish_volume_body import PublishVolumeBody
 from common.operations import Volume as VolumeOps
@@ -18,10 +24,18 @@
 from openapi.model.create_volume_body import CreateVolumeBody
 from openapi.model.volume_policy import VolumePolicy
 from openapi.model.volume_share_protocol import VolumeShareProtocol
+from common.nvme import (
+    nvme_find_device,
+    nvme_set_reconnect_delay,
+    nvme_set_ctrl_loss_tmo,
+    wait_nvme_gone_device,
+    nvme_find_controller,
+)
 
 POOL1_UUID = "ec176677-8202-4199-b461-2b68e53a055f"
 NODE1 = "io-engine-1"
 VOLUME_SIZE = 32 * 1024 * 1024
+FS_TYPE = "ext4"
 
 
 class Nexus:
@@ -274,12 +288,25 @@ def test_unstaging_a_single_writer_volume():
     """Unstaging a single writer volume."""
 
 
+@scenario("node.feature", "re-staging after controller loss timeout")
+def test_restaging_after_controller_loss_timeout():
+    """re-staging after controller loss timeout."""
+
+
+@scenario("node.feature", "re-staging after controller loss timeout without unstaging")
+def test_restaging_after_controller_loss_timeout_without_unstaging():
+    """re-staging after controller loss timeout without unstaging."""
+
+
 @pytest.fixture
 def published_nexuses(setup, volumes):
     published = {}
     yield published
     for uuid in published.keys():
-        ApiClient.volumes_api().del_volume_target(uuid)
+        try:
+            ApiClient.volumes_api().del_volume_target(uuid)
+        except openapi.ApiException as e:
+            assert e.status == http.HTTPStatus.PRECONDITION_FAILED
 
 
 @pytest.fixture
@@ -288,7 +315,10 @@ def publish(uuid, protocol):
         volume = ApiClient.volumes_api().put_volume_target(
             uuid,
             publish_volume_body=PublishVolumeBody(
-                {}, VolumeShareProtocol("nvmf"), node=NODE1, frontend_node=""
+                {},
+                VolumeShareProtocol("nvmf"),
+                node=NODE1,
+                frontend_node=Deployer.csi_node_name(0),
             ),
         )
         nexus = Nexus(uuid, protocol, volume.state["target"]["device_uri"])
@@ -427,7 +457,7 @@ def generic_staged_volume(get_published_nexus, stage_volume, staging_target_path
         nexus.uri,
         "MULTI_NODE_SINGLE_WRITER",
         staging_target_path,
-        "ext4",
+        FS_TYPE,
     )
     stage_volume(volume)
     return volume
@@ -467,7 +497,7 @@ def attempt_to_stage_volume_with_missing_staging_target_path(
                         mode=pb.VolumeCapability.AccessMode.Mode.MULTI_NODE_SINGLE_WRITER
                     ),
                     mount=pb.VolumeCapability.MountVolume(
-                        fs_type="ext4", mount_flags=[]
+                        fs_type=FS_TYPE, mount_flags=[]
                     ),
                 ),
                 secrets={},
@@ -510,7 +540,7 @@ def attempt_to_stage_volume_with_missing_volume_id(
                         mode=pb.VolumeCapability.AccessMode.Mode.MULTI_NODE_SINGLE_WRITER
                     ),
                     mount=pb.VolumeCapability.MountVolume(
-                        fs_type="ext4", mount_flags=[]
+                        fs_type=FS_TYPE, mount_flags=[]
                     ),
                 ),
                 secrets={},
@@ -533,7 +563,7 @@ def attempt_to_stage_volume_with_missing_access_mode(
                 staging_target_path=staging_target_path,
                 volume_capability=pb.VolumeCapability(
                     mount=pb.VolumeCapability.MountVolume(
-                        fs_type="ext4", mount_flags=[]
+                        fs_type=FS_TYPE, mount_flags=[]
                     )
                 ),
                 secrets={},
@@ -626,7 +656,7 @@ def attempt_to_stage_different_volume_with_same_staging_target_path(
                 nexus.uri,
                 volume.mode,
                 volume.staging_target_path,
-                "ext4",
+                FS_TYPE,
             )
         )
     assert error.value.code() == grpc.StatusCode.ALREADY_EXISTS
@@ -733,3 +763,76 @@ def publish_block_volume_as_read_or_write(
 @then(parsers.parse("the request should {disposition}"))
 def request_success_expected(disposition):
     return disposition == "succeed"
+
+
+@when("the kernel device is removed after a controller loss timeout")
+def _(generic_staged_volume):
+    """the kernel device is removed after a controller loss timeout."""
+    uri = generic_staged_volume.uri
+    nvme_set_reconnect_delay(uri, 1)
+    nvme_set_ctrl_loss_tmo(uri, 1)
+    ApiClient.volumes_api().del_volume_target(generic_staged_volume.uuid)
+    wait_nvme_gone_device(uri)
+
+
+@then("the volume should be stageable again")
+def _(publish_nexus, generic_staged_volume, stage_volume):
+    """the volume should be stageable again."""
+    publish_nexus(generic_staged_volume.uuid, generic_staged_volume.protocol)
+    stage_volume(generic_staged_volume)
+    uri = generic_staged_volume.uri
+    device = nvme_find_device(uri)
+    print(device)
+
+
+@then("the volume should be unstageable")
+def _(csi_instance, generic_staged_volume, staged_volumes):
+    """the volume should be unstageable."""
+    volume = generic_staged_volume
+    csi_instance.node.NodeUnstageVolume(
+        pb.NodeUnstageVolumeRequest(
+            volume_id=volume.uuid, staging_target_path=volume.staging_target_path
+        )
+    )
+    del staged_volumes[volume.uuid]
+
+
+@then("the mounts become broken", target_fixture="lost_device")
+def _(generic_staged_volume):
+    """the mounts become broken."""
+    result = Docker.execute(
+        Deployer.csi_node_name(0),
+        f"findmnt {generic_staged_volume.staging_target_path} -osource -rnf",
+    )
+    assert result.exit_code == 0, f"{result.output}"
+    device = str(result.output, "utf-8").removesuffix("\n")
+    # on older kernels, the controller is gone, but the device is still present (broken)
+    # assert not Path(device).exists(), "device must be gone"
+    wait_nvme_gone_device(generic_staged_volume.uri)
+    assert not Path(f"/sys/fs/{FS_TYPE}/{device}").exists(), "mount must be broken"
+    yield device
+    Docker.execute(
+        Deployer.csi_node_name(0),
+        f"umount --force {generic_staged_volume.staging_target_path}",
+    )
+
+
+@then("the volume should be stageable on a different path", target_fixture="new_device")
+def _(publish_nexus, generic_staged_volume, stage_volume):
+    """the volume should be stageable on a different path."""
+    volume = generic_staged_volume
+    volume.staging_target_path = f"{volume.staging_target_path}-2"
+    publish_nexus(volume.uuid, volume.protocol)
+    stage_volume(volume)
+    yield nvme_find_device(volume.uri)
+
+
+@then("the nvme device should have a different controller and namespace")
+def _(generic_staged_volume, lost_device, new_device):
+    """the nvme device should have a different controller and namespace."""
+    assert lost_device != new_device
+    print(f"{lost_device} => {new_device}")
+    ctrl = nvme_find_controller(generic_staged_volume.uri)
+    assert not new_device.startswith(ctrl.get("Controller")), "Different controller"
+    assert lost_device.split("n")[1] == new_device.split("n")[1], "Same subsystem"
+    assert lost_device.split("n")[2] != new_device.split("n")[2], "Different ns"