From 6b4224799c09c976df326fb0637b9d63b78f4691 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Fri, 20 Dec 2024 16:16:14 +0100
Subject: [PATCH 01/17] Add basic implementation of the Connector interface and
 a StreamingAudioInputDevice Connector

---
 src/rai/rai/communication/__init__.py         |  23 ++++
 src/rai/rai/communication/base_connector.py   |  46 +++++++
 .../communication/sound_device_connector.py   | 129 ++++++++++++++++++
 .../test_sound_device_connector.py            | 100 ++++++++++++++
 4 files changed, 298 insertions(+)
 create mode 100644 src/rai/rai/communication/__init__.py
 create mode 100644 src/rai/rai/communication/base_connector.py
 create mode 100644 src/rai/rai/communication/sound_device_connector.py
 create mode 100644 tests/communication/test_sound_device_connector.py

diff --git a/src/rai/rai/communication/__init__.py b/src/rai/rai/communication/__init__.py
new file mode 100644
index 000000000..f22b87447
--- /dev/null
+++ b/src/rai/rai/communication/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_connector import BaseConnector, BaseMessage
+from .sound_device_connector import SoundDeviceError, StreamingAudioInputDevice
+
+__all__ = [
+    "BaseMessage",
+    "BaseConnector",
+    "StreamingAudioInputDevice",
+    "SoundDeviceError",
+]
diff --git a/src/rai/rai/communication/base_connector.py b/src/rai/rai/communication/base_connector.py
new file mode 100644
index 000000000..6c332e9b6
--- /dev/null
+++ b/src/rai/rai/communication/base_connector.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from abc import ABC, abstractmethod
+from typing import Callable
+from uuid import uuid4
+
+class BaseMessage(ABC):
+    ...
+
+class BaseConnector(ABC):
+
+    def _generate_handle(self) -> str:
+        return str(uuid4())
+
+    @abstractmethod
+    def send_message(self, msg: BaseMessage, target: str) -> None:
+        ...
+
+    @abstractmethod
+    def receive_message(self, source: str) -> BaseMessage:
+        ...
+
+    @abstractmethod
+    def send_and_wait(self, target: str) -> BaseMessage:
+        ...
+    
+    @abstractmethod
+    def start_action(self, target: str, on_feedback: Callable, on_finish: Callable = lambda _: None)  -> str:
+        ...
+    
+    @abstractmethod
+    def terminate_action(self, action_handle: str):
+        ...
diff --git a/src/rai/rai/communication/sound_device_connector.py b/src/rai/rai/communication/sound_device_connector.py
new file mode 100644
index 000000000..239f39e44
--- /dev/null
+++ b/src/rai/rai/communication/sound_device_connector.py
@@ -0,0 +1,129 @@
+# Copyright (C) 2024 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Callable, Optional, TypedDict
+
+import numpy as np
+import sounddevice as sd
+from scipy.signal import resample
+from sounddevice import CallbackFlags
+
+from rai.communication.base_connector import BaseConnector, BaseMessage
+
+
+class SoundDeviceError(Exception):
+    def __init__(self, msg: str):
+        super().__init__(msg)
+
+
+class DeviceConfig(TypedDict):
+    kind: str
+    block_size: int
+    sampling_rate: int
+    target_smpling_rate: int
+    dtype: str
+    device_number: Optional[int]
+
+
+class ConfiguredDevice:
+    def __init__(self, config: DeviceConfig):
+        self.sample_rate = sd.query_devices(
+            device=config["device_number"], kind=config["kind"]
+        )[
+            "default_samplerate"
+        ]  # type: ignore
+        self.window_size_samples = int(
+            config["block_size"] * self.sample_rate / config["sampling_rate"]
+        )
+        self.target_samping_rate = int(config["target_smpling_rate"])
+        self.dtype = config["dtype"]
+
+
+class StreamingAudioInputDevice(BaseConnector):
+    def __init__(self):
+        self.streams = {}
+        sd.default.latency = ("low", "low")
+        self.configred_devices: dict[str, ConfiguredDevice] = {}
+
+    def configure_device(self, target: str, config: DeviceConfig):
+        if target.isdigit():
+            if config.get("device_number") is None:
+                config["device_number"] = int(target)
+            elif config["device_number"] != int(target):
+                raise SoundDeviceError(
+                    "device_number in config must be the same as target"
+                )
+            self.configred_devices[target] = ConfiguredDevice(config)
+        else:
+            raise SoundDeviceError("target must be a device number!")
+
+    def send_message(self, msg: BaseMessage, target: str) -> None:
+        raise SoundDeviceError(
+            "StreamingAudioInputDevice does not suport sending messages"
+        )
+
+    def receive_message(self, source: str) -> BaseMessage:
+        raise SoundDeviceError(
+            "StreamingAudioInputDevice does not suport receiving messages messages"
+        )
+
+    def send_and_wait(self, target: str) -> BaseMessage:
+        raise SoundDeviceError(
+            "StreamingAudioInputDevice does not suport sending messages"
+        )
+
+    def start_action(
+        self,
+        target: str,
+        on_feedback: Callable[[np.ndarray, dict[str, Any]], None],
+        on_finish: Callable = lambda _: None,
+    ) -> str:
+
+        target_device = self.configred_devices.get(target)
+        if target_device is None:
+            raise SoundDeviceError(f"Device {target} has not been configured")
+
+        def callback(indata: np.ndarray, frames: int, _, status: CallbackFlags):
+            indata = indata.flatten()
+            sample_time_length = len(indata) / target_device.target_samping_rate
+            if target_device.sample_rate != target_device.target_samping_rate:
+                indata = resample(indata, int(sample_time_length * target_device.target_samping_rate))  # type: ignore
+            flag_dict = {
+                "input_overflow": status.input_overflow,
+                "input_underflow": status.input_underflow,
+                "output_overflow": status.output_overflow,
+                "output_underflow": status.output_underflow,
+                "priming_output": status.priming_output,
+            }
+            on_feedback(indata, flag_dict)
+
+        handle = self._generate_handle()
+        try:
+            stream = sd.InputStream(
+                samplerate=target_device.sample_rate,
+                channels=1,
+                device=int(target),
+                dtype=target_device.dtype,
+                blocksize=target_device.window_size_samples,
+                callback=callback,
+                finished_callback=on_finish,
+            )
+        except AttributeError:
+            raise SoundDeviceError(f"Device {target} has not been correctly configured")
+        stream.start()
+        self.streams[handle] = stream
+        return handle
+
+    def terminate_action(self, action_handle: str):
+        self.streams[action_handle].stop()
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
new file mode 100644
index 000000000..254bc315a
--- /dev/null
+++ b/tests/communication/test_sound_device_connector.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2024 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import pytest
+
+from rai.communication import SoundDeviceError, StreamingAudioInputDevice
+
+
+@pytest.fixture
+def setup_mock_input_stream():
+    with mock.patch("sounddevice.InputStream") as mock_input_stream:
+        yield mock_input_stream
+
+
+@pytest.fixture
+def device_config():
+    return {
+        "kind": "input",
+        "block_size": 1024,
+        "sampling_rate": 44100,
+        "target_smpling_rate": 16000,
+        "dtype": "float32",
+    }
+
+
+def test_configure(
+    setup_mock_input_stream,
+    device_config,
+):
+    mock_input_stream = setup_mock_input_stream
+    mock_instance = mock.MagicMock()
+    mock_input_stream.return_value = mock_instance
+    audio_input_device = StreamingAudioInputDevice()
+    audio_input_device.configure_device("0", device_config)
+    assert audio_input_device.configred_devices["0"].sample_rate == 44100
+    assert audio_input_device.configred_devices["0"].window_size_samples == 1024
+    assert audio_input_device.configred_devices["0"].target_samping_rate == 16000
+    assert audio_input_device.configred_devices["0"].dtype == "float32"
+
+
+def test_start_action_failed_init(
+    setup_mock_input_stream,
+):
+    mock_input_stream = setup_mock_input_stream
+    mock_instance = mock.MagicMock()
+    mock_input_stream.return_value = mock_instance
+    audio_input_device = StreamingAudioInputDevice()
+
+    feedback_callback = mock.MagicMock()
+    finish_callback = mock.MagicMock()
+
+    recording_device = 0
+    with pytest.raises(SoundDeviceError, match="Device 0 has not been configured"):
+        stream_handle = audio_input_device.start_action(
+            str(recording_device), feedback_callback, finish_callback
+        )
+
+
+def test_start_action(
+    setup_mock_input_stream,
+    device_config,
+):
+    mock_input_stream = setup_mock_input_stream
+    mock_instance = mock.MagicMock()
+    mock_input_stream.return_value = mock_instance
+    audio_input_device = StreamingAudioInputDevice()
+
+    feedback_callback = mock.MagicMock()
+    finish_callback = mock.MagicMock()
+
+    recording_device = "0"
+    audio_input_device.configure_device(recording_device, device_config)
+
+    stream_handle = audio_input_device.start_action(
+        str(recording_device), feedback_callback, finish_callback
+    )
+
+    assert mock_input_stream.call_count == 1
+    init_args = mock_input_stream.call_args.kwargs
+    assert init_args["samplerate"] == 44100.0
+    assert init_args["channels"] == 1
+    assert init_args["device"] == int(recording_device)
+    assert init_args["dtype"] == "float32"
+    assert init_args["blocksize"] == 1024
+    assert init_args["finished_callback"] == finish_callback
+
+    assert audio_input_device.streams.get(stream_handle) is not None

From f1c2a6d5e45c5f3528611b504ec24b69c06ac42e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Fri, 20 Dec 2024 16:28:40 +0100
Subject: [PATCH 02/17] Pre-commit fixes

---
 src/rai/rai/communication/base_connector.py   | 23 +++++++++++--------
 .../test_sound_device_connector.py            |  2 +-
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/rai/rai/communication/base_connector.py b/src/rai/rai/communication/base_connector.py
index 6c332e9b6..fe01097fc 100644
--- a/src/rai/rai/communication/base_connector.py
+++ b/src/rai/rai/communication/base_connector.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import logging
 from abc import ABC, abstractmethod
 from typing import Callable
 from uuid import uuid4
 
+
 class BaseMessage(ABC):
-    ...
+    pass
+
 
 class BaseConnector(ABC):
 
@@ -27,20 +28,22 @@ def _generate_handle(self) -> str:
 
     @abstractmethod
     def send_message(self, msg: BaseMessage, target: str) -> None:
-        ...
+        pass
 
     @abstractmethod
     def receive_message(self, source: str) -> BaseMessage:
-        ...
+        pass
 
     @abstractmethod
     def send_and_wait(self, target: str) -> BaseMessage:
-        ...
-    
+        pass
+
     @abstractmethod
-    def start_action(self, target: str, on_feedback: Callable, on_finish: Callable = lambda _: None)  -> str:
-        ...
-    
+    def start_action(
+        self, target: str, on_feedback: Callable, on_finish: Callable = lambda _: None
+    ) -> str:
+        pass
+
     @abstractmethod
     def terminate_action(self, action_handle: str):
-        ...
+        pass
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index 254bc315a..657a6d61f 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -64,7 +64,7 @@ def test_start_action_failed_init(
 
     recording_device = 0
     with pytest.raises(SoundDeviceError, match="Device 0 has not been configured"):
-        stream_handle = audio_input_device.start_action(
+        _ = audio_input_device.start_action(
             str(recording_device), feedback_callback, finish_callback
         )
 

From d3b9ece64680742df1d2c2c75ac2e5865cba0b9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 10:37:30 +0100
Subject: [PATCH 03/17] Add virtual audio device to test procedure

---
 .github/workflows/poetry-test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 96dfc8e45..7e5ffab60 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -38,6 +38,9 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
 
+      - name: Create virtual audio device
+        runs: jackd -d dummy &
+
       - name: Install python dependencies
         run: poetry install --with openset,nomad
 

From e2420ea52f46d60441569a434c5b5ed0972e5864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 10:39:35 +0100
Subject: [PATCH 04/17] fix typo in test

---
 .github/workflows/poetry-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 7e5ffab60..49d710924 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -39,7 +39,7 @@ jobs:
         uses: snok/install-poetry@v1
 
       - name: Create virtual audio device
-        runs: jackd -d dummy &
+        run: jackd -d dummy &
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad

From f435dbf405cc7cda3d5fb0b55a4a8a458de136ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 10:50:18 +0100
Subject: [PATCH 05/17] add dependency installation

---
 .github/workflows/poetry-test.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 49d710924..fce9dabad 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -39,7 +39,9 @@ jobs:
         uses: snok/install-poetry@v1
 
       - name: Create virtual audio device
-        run: jackd -d dummy &
+        run: |
+          DEBIAN_FRONTEND=noninteractive apt-get install jackd
+          jackd -d dummy &
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad

From a758bbedf11580d8605b5b42a28b27cc5ea707f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 10:53:03 +0100
Subject: [PATCH 06/17] fix: change the way sd is mocked to a non-dependancy
 one

---
 .github/workflows/poetry-test.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index fce9dabad..e84fec06c 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -39,9 +39,7 @@ jobs:
         uses: snok/install-poetry@v1
 
       - name: Create virtual audio device
-        run: |
-          DEBIAN_FRONTEND=noninteractive apt-get install jackd
-          jackd -d dummy &
+        run: modprobe snd-dummy
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad

From 0c98d91ff34116d80619de02b4308c99bee64ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 10:56:06 +0100
Subject: [PATCH 07/17] fix: change the way sd is mocked to a non-dependancy
 one

---
 .github/workflows/poetry-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index e84fec06c..9c3facaef 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -39,7 +39,7 @@ jobs:
         uses: snok/install-poetry@v1
 
       - name: Create virtual audio device
-        run: modprobe snd-dummy
+        run: /usr/sbin/modprobe snd-dummy
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad

From 011f47f90e5a50632220c7ce5d07dddae05398c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 10:59:47 +0100
Subject: [PATCH 08/17] fix: change the way sd is mocked to a non-dependancy
 one

---
 .github/workflows/poetry-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 9c3facaef..e3e472b9c 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -39,7 +39,7 @@ jobs:
         uses: snok/install-poetry@v1
 
       - name: Create virtual audio device
-        run: /usr/sbin/modprobe snd-dummy
+        run: /sbin/modprobe snd-dummy
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad

From c234314a737c9d208895fd8546ee0f70ed9c87b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 11:11:34 +0100
Subject: [PATCH 09/17] fix: change the way sd is mocked to a non-dependancy
 one

---
 .github/workflows/poetry-test.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index e3e472b9c..7177e8e89 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -39,7 +39,10 @@ jobs:
         uses: snok/install-poetry@v1
 
       - name: Create virtual audio device
-        run: /sbin/modprobe snd-dummy
+        run: |
+          apt-get update
+          DEBIAN_FRONTEND=noninteractive sudo apt-get install jackd
+          jackd -d dummy &
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad

From 48e12061193de155de9c1d9d54cfcaae7c6eaeb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 11:12:59 +0100
Subject: [PATCH 10/17] fix: change the way sd is mocked to a non-dependancy
 one

---
 .github/workflows/poetry-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 7177e8e89..470ebb4cf 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -41,7 +41,7 @@ jobs:
       - name: Create virtual audio device
         run: |
           apt-get update
-          DEBIAN_FRONTEND=noninteractive sudo apt-get install jackd
+          DEBIAN_FRONTEND=noninteractive sudo apt-get --yes install jackd
           jackd -d dummy &
 
       - name: Install python dependencies

From ed2cf4dd01fecdeeb7e656133e63ab07475355d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 13:03:13 +0100
Subject: [PATCH 11/17] feat: change tests to autoselect device_id, minor typos
 fixed

---
 .../communication/sound_device_connector.py   | 23 +++++++++++++++----
 .../test_sound_device_connector.py            | 22 +++++++++++++-----
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/rai/rai/communication/sound_device_connector.py b/src/rai/rai/communication/sound_device_connector.py
index 239f39e44..d43dd1f9c 100644
--- a/src/rai/rai/communication/sound_device_connector.py
+++ b/src/rai/rai/communication/sound_device_connector.py
@@ -30,23 +30,36 @@ def __init__(self, msg: str):
 class DeviceConfig(TypedDict):
     kind: str
     block_size: int
-    sampling_rate: int
+    consumer_sampling_rate: int
     target_smpling_rate: int
     dtype: str
     device_number: Optional[int]
 
 
 class ConfiguredDevice:
+    """
+    A class to store the configuration of an audio device
+
+    Attributes
+    ----------
+    sample_rate (int): Device sample rate
+    consumer_sampling_rate (int): The sampling rate of the consumer
+    window_size_samples (int): The size of the window in samples
+    target_sampling_rate (int): The target sampling rate
+    dtype (str): The data type of the audio samples
+    """
+
     def __init__(self, config: DeviceConfig):
         self.sample_rate = sd.query_devices(
             device=config["device_number"], kind=config["kind"]
         )[
             "default_samplerate"
         ]  # type: ignore
+        self.consumer_sampling_rate = config["consumer_sampling_rate"]
         self.window_size_samples = int(
-            config["block_size"] * self.sample_rate / config["sampling_rate"]
+            config["block_size"] * self.sample_rate / config["consumer_sampling_rate"]
         )
-        self.target_samping_rate = int(config["target_smpling_rate"])
+        self.target_sampling_rate = int(config["target_smpling_rate"])
         self.dtype = config["dtype"]
 
 
@@ -96,8 +109,8 @@ def start_action(
 
         def callback(indata: np.ndarray, frames: int, _, status: CallbackFlags):
             indata = indata.flatten()
-            sample_time_length = len(indata) / target_device.target_samping_rate
-            if target_device.sample_rate != target_device.target_samping_rate:
+            sample_time_length = len(indata) / target_device.target_sampling_rate
+            if target_device.sample_rate != target_device.target_sampling_rate:
                 indata = resample(indata, int(sample_time_length * target_device.target_samping_rate))  # type: ignore
             flag_dict = {
                 "input_overflow": status.input_overflow,
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index 657a6d61f..62e2b10a7 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -15,6 +15,7 @@
 from unittest import mock
 
 import pytest
+import sounddevice as sd
 
 from rai.communication import SoundDeviceError, StreamingAudioInputDevice
 
@@ -30,7 +31,7 @@ def device_config():
     return {
         "kind": "input",
         "block_size": 1024,
-        "sampling_rate": 44100,
+        "consumer_sampling_rate": 44100,
         "target_smpling_rate": 16000,
         "dtype": "float32",
     }
@@ -44,11 +45,20 @@ def test_configure(
     mock_instance = mock.MagicMock()
     mock_input_stream.return_value = mock_instance
     audio_input_device = StreamingAudioInputDevice()
-    audio_input_device.configure_device("0", device_config)
-    assert audio_input_device.configred_devices["0"].sample_rate == 44100
-    assert audio_input_device.configred_devices["0"].window_size_samples == 1024
-    assert audio_input_device.configred_devices["0"].target_samping_rate == 16000
-    assert audio_input_device.configred_devices["0"].dtype == "float32"
+    device = sd.query_devices(kind="input")
+    if type(device) is dict:
+        device_id = str(device["index"])
+    elif isinstance(device, list):
+        device_id = str(device[0]["index"])  # type: ignore
+    else:
+        assert False
+    audio_input_device.configure_device(device_id, device_config)
+    assert (
+        audio_input_device.configred_devices[device_id].consumer_sampling_rate == 44100
+    )
+    assert audio_input_device.configred_devices[device_id].window_size_samples == 1024
+    assert audio_input_device.configred_devices[device_id].target_sampling_rate == 16000
+    assert audio_input_device.configred_devices[device_id].dtype == "float32"
 
 
 def test_start_action_failed_init(

From 1a78d4a0968f722c6f7bf0e75b167570d343cf9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 13:23:12 +0100
Subject: [PATCH 12/17] fix: fix the sampling rate of the mocked device

---
 .github/workflows/poetry-test.yml                  | 2 +-
 tests/communication/test_sound_device_connector.py | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 470ebb4cf..9c89fc2d9 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -42,7 +42,7 @@ jobs:
         run: |
           apt-get update
           DEBIAN_FRONTEND=noninteractive sudo apt-get --yes install jackd
-          jackd -d dummy &
+          jackd -d dummy -r 44100 &
 
       - name: Install python dependencies
         run: poetry install --with openset,nomad
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index 62e2b10a7..4b1523d1f 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -100,11 +100,8 @@ def test_start_action(
 
     assert mock_input_stream.call_count == 1
     init_args = mock_input_stream.call_args.kwargs
-    assert init_args["samplerate"] == 44100.0
-    assert init_args["channels"] == 1
     assert init_args["device"] == int(recording_device)
-    assert init_args["dtype"] == "float32"
-    assert init_args["blocksize"] == 1024
+    assert init_args["callback"] == feedback_callback
     assert init_args["finished_callback"] == finish_callback
 
     assert audio_input_device.streams.get(stream_handle) is not None

From 7601b1fe647d4b2953022f0b1e9b310bc9513a50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 13:34:44 +0100
Subject: [PATCH 13/17] feat: add mark, so device test run only on ci with
 preconfigured mocked device

---
 .github/workflows/poetry-test.yml              |  2 +-
 pyproject.toml                                 |  3 ++-
 .../test_sound_device_connector.py             | 18 +++++++++++++-----
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 9c89fc2d9..3e8ed04c9 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -69,4 +69,4 @@ jobs:
         run: |
           source /opt/ros/${{ matrix.ros_distro }}/setup.bash
           source install/setup.bash
-          poetry run pytest
+          poetry run pytest -m "not billable"
diff --git a/pyproject.toml b/pyproject.toml
index 20e7eb32f..4457ea4cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,8 @@ profile = "black"
 [tool.pytest.ini_options]
 markers = [
     "billable: marks test as billable  (deselect with '-m \"not billable\"')",
+    "ci_only: marks test as cli only (deselect with '-m \"not ci_only\"')",
 ]
-addopts = "-m 'not billable' --ignore=src"
+addopts = "-m 'not billable and not ci_only' --ignore=src"
 log_cli = true
 log_cli_level = "DEBUG"
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index 4b1523d1f..fa0ad95dd 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -37,6 +37,7 @@ def device_config():
     }
 
 
+@pytest.mark.ci_only
 def test_configure(
     setup_mock_input_stream,
     device_config,
@@ -61,6 +62,7 @@ def test_configure(
     assert audio_input_device.configred_devices[device_id].dtype == "float32"
 
 
+@pytest.mark.ci_only
 def test_start_action_failed_init(
     setup_mock_input_stream,
 ):
@@ -79,6 +81,7 @@ def test_start_action_failed_init(
         )
 
 
+@pytest.mark.ci_only
 def test_start_action(
     setup_mock_input_stream,
     device_config,
@@ -91,17 +94,22 @@ def test_start_action(
     feedback_callback = mock.MagicMock()
     finish_callback = mock.MagicMock()
 
-    recording_device = "0"
-    audio_input_device.configure_device(recording_device, device_config)
+    device = sd.query_devices(kind="input")
+    if type(device) is dict:
+        device_id = str(device["index"])
+    elif isinstance(device, list):
+        device_id = str(device[0]["index"])  # type: ignore
+    else:
+        assert False
+    audio_input_device.configure_device(device_id, device_config)
 
     stream_handle = audio_input_device.start_action(
-        str(recording_device), feedback_callback, finish_callback
+        device_id, feedback_callback, finish_callback
     )
 
     assert mock_input_stream.call_count == 1
     init_args = mock_input_stream.call_args.kwargs
-    assert init_args["device"] == int(recording_device)
-    assert init_args["callback"] == feedback_callback
+    assert init_args["device"] == int(device_id)
     assert init_args["finished_callback"] == finish_callback
 
     assert audio_input_device.streams.get(stream_handle) is not None

From 59351a4877c9c84e3e770cdceb1c8aa5a2ecbac8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 16:44:04 +0100
Subject: [PATCH 14/17] fix: rename configuration classeS

---
 src/rai/rai/agents/__init__.py                    | 12 ++++++++++++
 .../rai/communication/sound_device_connector.py   | 15 +++++++--------
 .../communication/test_sound_device_connector.py  |  1 -
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/rai/rai/agents/__init__.py b/src/rai/rai/agents/__init__.py
index ef74fc891..b33279982 100644
--- a/src/rai/rai/agents/__init__.py
+++ b/src/rai/rai/agents/__init__.py
@@ -11,3 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from .conversational_agent import create_conversational_agent
+from .state_based import create_state_based_agent
+from .tool_runner import ToolRunner
+from .voice_agent import VoiceRecognitionAgent
+
+__all__ = [
+    "VoiceRecognitionAgent",
+    "ToolRunner",
+    "create_conversational_agent",
+    "create_state_based_agent",
+]
diff --git a/src/rai/rai/communication/sound_device_connector.py b/src/rai/rai/communication/sound_device_connector.py
index d43dd1f9c..26fea5141 100644
--- a/src/rai/rai/communication/sound_device_connector.py
+++ b/src/rai/rai/communication/sound_device_connector.py
@@ -27,8 +27,7 @@ def __init__(self, msg: str):
         super().__init__(msg)
 
 
-class DeviceConfig(TypedDict):
-    kind: str
+class AudioInputDeviceConfig(TypedDict):
     block_size: int
     consumer_sampling_rate: int
     target_smpling_rate: int
@@ -36,7 +35,7 @@ class DeviceConfig(TypedDict):
     device_number: Optional[int]
 
 
-class ConfiguredDevice:
+class ConfiguredAudioInputDevice:
     """
     A class to store the configuration of an audio device
 
@@ -49,9 +48,9 @@ class ConfiguredDevice:
     dtype (str): The data type of the audio samples
     """
 
-    def __init__(self, config: DeviceConfig):
+    def __init__(self, config: AudioInputDeviceConfig):
         self.sample_rate = sd.query_devices(
-            device=config["device_number"], kind=config["kind"]
+            device=config["device_number"], kind="input"
         )[
             "default_samplerate"
         ]  # type: ignore
@@ -67,9 +66,9 @@ class StreamingAudioInputDevice(BaseConnector):
     def __init__(self):
         self.streams = {}
         sd.default.latency = ("low", "low")
-        self.configred_devices: dict[str, ConfiguredDevice] = {}
+        self.configred_devices: dict[str, ConfiguredAudioInputDevice] = {}
 
-    def configure_device(self, target: str, config: DeviceConfig):
+    def configure_device(self, target: str, config: AudioInputDeviceConfig):
         if target.isdigit():
             if config.get("device_number") is None:
                 config["device_number"] = int(target)
@@ -77,7 +76,7 @@ def configure_device(self, target: str, config: DeviceConfig):
                 raise SoundDeviceError(
                     "device_number in config must be the same as target"
                 )
-            self.configred_devices[target] = ConfiguredDevice(config)
+            self.configred_devices[target] = ConfiguredAudioInputDevice(config)
         else:
             raise SoundDeviceError("target must be a device number!")
 
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index fa0ad95dd..090e76a29 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -29,7 +29,6 @@ def setup_mock_input_stream():
 @pytest.fixture
 def device_config():
     return {
-        "kind": "input",
         "block_size": 1024,
         "consumer_sampling_rate": 44100,
         "target_smpling_rate": 16000,

From de41ae417889906758f27e971457f5a7b9444062 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 16:49:38 +0100
Subject: [PATCH 15/17] fix: typos and workflow

---
 .github/workflows/poetry-test.yml                   | 2 +-
 src/rai/rai/communication/sound_device_connector.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 3e8ed04c9..1f01701a9 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -41,7 +41,7 @@ jobs:
       - name: Create virtual audio device
         run: |
           apt-get update
-          DEBIAN_FRONTEND=noninteractive sudo apt-get --yes install jackd
+          DEBIAN_FRONTEND=noninteractive apt-get --yes install jackd
           jackd -d dummy -r 44100 &
 
       - name: Install python dependencies
diff --git a/src/rai/rai/communication/sound_device_connector.py b/src/rai/rai/communication/sound_device_connector.py
index 26fea5141..f88619c2b 100644
--- a/src/rai/rai/communication/sound_device_connector.py
+++ b/src/rai/rai/communication/sound_device_connector.py
@@ -30,7 +30,7 @@ def __init__(self, msg: str):
 class AudioInputDeviceConfig(TypedDict):
     block_size: int
     consumer_sampling_rate: int
-    target_smpling_rate: int
+    target_sampling_rate: int
     dtype: str
     device_number: Optional[int]
 
@@ -58,7 +58,7 @@ def __init__(self, config: AudioInputDeviceConfig):
         self.window_size_samples = int(
             config["block_size"] * self.sample_rate / config["consumer_sampling_rate"]
         )
-        self.target_sampling_rate = int(config["target_smpling_rate"])
+        self.target_sampling_rate = int(config["target_sampling_rate"])
         self.dtype = config["dtype"]
 
 
@@ -110,7 +110,7 @@ def callback(indata: np.ndarray, frames: int, _, status: CallbackFlags):
             indata = indata.flatten()
             sample_time_length = len(indata) / target_device.target_sampling_rate
             if target_device.sample_rate != target_device.target_sampling_rate:
-                indata = resample(indata, int(sample_time_length * target_device.target_samping_rate))  # type: ignore
+                indata = resample(indata, int(sample_time_length * target_device.target_sampling_rate))  # type: ignore
             flag_dict = {
                 "input_overflow": status.input_overflow,
                 "input_underflow": status.input_underflow,

From c3f6e79ca0dac46add61bcd1c64c49073267066b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Thu, 2 Jan 2025 16:57:19 +0100
Subject: [PATCH 16/17] fix: import error

---
 src/rai/rai/agents/__init__.py                     | 8 +++-----
 tests/communication/test_sound_device_connector.py | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rai/rai/agents/__init__.py b/src/rai/rai/agents/__init__.py
index b33279982..dc101282b 100644
--- a/src/rai/rai/agents/__init__.py
+++ b/src/rai/rai/agents/__init__.py
@@ -12,13 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .conversational_agent import create_conversational_agent
-from .state_based import create_state_based_agent
-from .tool_runner import ToolRunner
-from .voice_agent import VoiceRecognitionAgent
+from rai.agents.conversational_agent import create_conversational_agent
+from rai.agents.state_based import create_state_based_agent
+from rai.agents.tool_runner import ToolRunner
 
 __all__ = [
-    "VoiceRecognitionAgent",
     "ToolRunner",
     "create_conversational_agent",
     "create_state_based_agent",
diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index 090e76a29..f62115142 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -31,7 +31,7 @@ def device_config():
     return {
         "block_size": 1024,
         "consumer_sampling_rate": 44100,
-        "target_smpling_rate": 16000,
+        "target_sampling_rate": 16000,
         "dtype": "float32",
     }
 

From f99f923033401cee1694d76156bf4e5532cc24de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajetan=20Rachwa=C5=82?= <kajetan.rachwal@robotec.ai>
Date: Fri, 3 Jan 2025 11:40:35 +0100
Subject: [PATCH 17/17] fix: switch assert to raise for the edge cases

---
 tests/communication/test_sound_device_connector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/communication/test_sound_device_connector.py b/tests/communication/test_sound_device_connector.py
index f62115142..cb6d72101 100644
--- a/tests/communication/test_sound_device_connector.py
+++ b/tests/communication/test_sound_device_connector.py
@@ -51,7 +51,7 @@ def test_configure(
     elif isinstance(device, list):
         device_id = str(device[0]["index"])  # type: ignore
     else:
-        assert False
+        raise AssertionError("No input device found")
     audio_input_device.configure_device(device_id, device_config)
     assert (
         audio_input_device.configred_devices[device_id].consumer_sampling_rate == 44100
@@ -99,7 +99,7 @@ def test_start_action(
     elif isinstance(device, list):
         device_id = str(device[0]["index"])  # type: ignore
     else:
-        assert False
+        raise AssertionError("No input device found")
     audio_input_device.configure_device(device_id, device_config)
 
     stream_handle = audio_input_device.start_action(