Skip to content

Commit 2f59e38

Browse files
authored
Modularize tricky dependencies (#95)
* removed pyaudio from threaded transport * modularized torch and torchaudio * modularized local transport * Working Dockerfile as well * docker updates for fly.io
1 parent c210148 commit 2f59e38

File tree

8 files changed

+69
-37
lines changed

8 files changed

+69
-37
lines changed

.dockerignore

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# flyctl launch added from .gitignore
2+
**/.vscode
3+
**/env
4+
**/__pycache__
5+
**/*~
6+
**/venv
7+
#*#
8+
9+
# Distribution / packaging
10+
**/.Python
11+
**/build
12+
**/develop-eggs
13+
**/dist
14+
**/downloads
15+
**/eggs
16+
**/.eggs
17+
**/lib
18+
**/lib64
19+
**/parts
20+
**/sdist
21+
**/var
22+
**/wheels
23+
**/share/python-wheels
24+
**/*.egg-info
25+
**/.installed.cfg
26+
**/*.egg
27+
**/MANIFEST
28+
**/.DS_Store
29+
**/.env
30+
fly.toml

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ share/python-wheels/
2626
MANIFEST
2727
.DS_Store
2828
.env
29+
fly.toml

examples/server/Dockerfile Dockerfile

+3-2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ COPY *.py /app
77
COPY pyproject.toml /app
88

99
COPY src/ /app/src/
10+
COPY examples/ /app/examples/
1011

1112
WORKDIR /app
1213
RUN ls --recursive /app/
1314
RUN pip3 install --upgrade -r requirements.txt
1415
RUN python -m build .
1516
RUN pip3 install .
16-
17+
RUN pip3 install gunicorn
1718
# If running on Ubuntu, Azure TTS requires some extra config
1819
# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi
1920

@@ -36,4 +37,4 @@ WORKDIR /app
3637

3738
EXPOSE 8000
3839
# run
39-
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]
40+
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--chdir", "examples/server", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]

examples/server/daily-bot-manager.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
CORS(app)
1515

1616
APPS = {
17-
"chatbot": "examples/starter-apps/chatbot.py",
18-
"patient-intake": "examples/starter-apps/patient-intake.py",
19-
"storybot": "examples/starter-apps/storybot.py",
20-
"translator": "examples/starter-apps/translator.py"
17+
"chatbot": "../starter-apps/chatbot.py",
18+
"patient-intake": "../starter-apps/patient-intake.py",
19+
"storybot": "../starter-apps/storybot.py",
20+
"translator": "../starter-apps/translator.py"
2121
}
2222

2323
daily_api_key = os.getenv("DAILY_API_KEY")
@@ -157,7 +157,7 @@ def start(botname):
157157
else:
158158
return jsonify({"room_url": room_url, "token": token})
159159
except BaseException as e:
160-
return "There was a problem starting the bot: {e}", 500
160+
return f"There was a problem starting the bot: {e}", 500
161161

162162

163163
@app.route("/healthz")

examples/starter-apps/translator.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ async def main(room_url: str, token):
8484
mic_enabled=True,
8585
mic_sample_rate=16000,
8686
camera_enabled=False,
87-
vad_enabled=True,
8887
)
8988
tts = AzureTTSService(
9089
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
@@ -98,7 +97,7 @@ async def main(room_url: str, token):
9897
tp = TranslationProcessor("Spanish")
9998
lfra = LLMFullResponseAggregator()
10099
ts = TranslationSubtitles("spanish")
101-
pipeline = Pipeline([sa, tp, llm, lfra, ts])
100+
pipeline = Pipeline([sa, tp, llm, lfra, ts, tts])
102101

103102
transport.transcription_settings["extra"]["endpointing"] = True
104103
transport.transcription_settings["extra"]["punctuate"] = True

pyproject.toml

+2-3
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,14 @@ dependencies = [
2626
"daily-python",
2727
"fal",
2828
"faster_whisper",
29+
"flask",
30+
"flask_cors",
2931
"google-cloud-texttospeech",
3032
"numpy",
3133
"openai",
3234
"Pillow",
3335
"pyht",
3436
"python-dotenv",
35-
"torch",
36-
"torchaudio",
37-
"pyaudio",
3837
"typing-extensions",
3938
"websockets"
4039
]

src/dailyai/transports/local_transport.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
import asyncio
22
import numpy as np
33
import tkinter as tk
4-
import pyaudio
54

65
from dailyai.transports.threaded_transport import ThreadedTransport
76

87

98
class LocalTransport(ThreadedTransport):
109
def __init__(self, **kwargs):
1110
super().__init__(**kwargs)
11+
try:
12+
global pyaudio
13+
import pyaudio
14+
except ModuleNotFoundError as e:
15+
print(f"Exception: {e}")
16+
print("In order to use the local transport, you'll need to `pip install pyaudio`. On MacOS, you'll also need to `brew install portaudio`.")
17+
raise Exception(f"Missing module: {e}")
1218
self._sample_width = kwargs.get("sample_width") or 2
1319
self._n_channels = kwargs.get("n_channels") or 1
1420
self._tk_root = kwargs.get("tk_root") or None

src/dailyai/transports/threaded_transport.py

+20-24
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import itertools
44
import logging
55
import numpy as np
6-
import pyaudio
7-
import torch
6+
87
import queue
98
import threading
109
import time
@@ -29,22 +28,6 @@
2928
from dailyai.services.ai_services import TTSService
3029
from dailyai.transports.abstract_transport import AbstractTransport
3130

32-
torch.set_num_threads(1)
33-
34-
model, utils = torch.hub.load(
35-
repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
36-
)
37-
38-
(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
39-
40-
# Taken from utils_vad.py
41-
42-
43-
def validate(model, inputs: torch.Tensor):
44-
with torch.no_grad():
45-
outs = model(inputs)
46-
return outs
47-
4831

4932
# Provided by Alexander Veysov
5033

@@ -58,12 +41,7 @@ def int2float(sound):
5841
return sound
5942

6043

61-
FORMAT = pyaudio.paInt16
62-
CHANNELS = 1
6344
SAMPLE_RATE = 16000
64-
CHUNK = int(SAMPLE_RATE / 10)
65-
66-
audio = pyaudio.PyAudio()
6745

6846

6947
class VADState(Enum):
@@ -90,6 +68,24 @@ def __init__(
9068
"Sorry, you can't use speaker_enabled and vad_enabled at the same time. Please set one to False."
9169
)
9270

71+
if self._vad_enabled:
72+
try:
73+
global torch, torchaudio
74+
import torch
75+
# We don't use torchaudio here, but we need to try importing it because
76+
# Silero uses it
77+
import torchaudio
78+
torch.set_num_threads(1)
79+
80+
(self.model, self.utils) = torch.hub.load(
81+
repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
82+
)
83+
84+
except ModuleNotFoundError as e:
85+
print(f"Exception: {e}")
86+
print("In order to use VAD, you'll need to install the `torch` and `torchaudio` modules.")
87+
raise Exception(f"Missing module(s): {e}")
88+
9389
self._vad_samples = 1536
9490
vad_frame_s = self._vad_samples / SAMPLE_RATE
9591
self._vad_start_frames = round(self._vad_start_s / vad_frame_s)
@@ -276,7 +272,7 @@ def _vad(self):
276272
audio_chunk = self.read_audio_frames(self._vad_samples)
277273
audio_int16 = np.frombuffer(audio_chunk, np.int16)
278274
audio_float32 = int2float(audio_int16)
279-
new_confidence = model(
275+
new_confidence = self.model(
280276
torch.from_numpy(audio_float32), 16000).item()
281277
speaking = new_confidence > 0.5
282278

0 commit comments

Comments
 (0)