Skip to content

Commit 1e0a9d7

Browse files
committed
Add previous_text context to ElevenLabsHttpTTSService
1 parent 4a23e13 commit 1e0a9d7

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616
you to control aggregator settings. You can now pass these arguments when
1717
creating aggregator pairs with `create_context_aggregator()`.
1818

19+
- Added `previous_text` context support to ElevenLabsHttpTTSService, improving
20+
speech consistency across sentences within an LLM response.
21+
1922
- Added word/timestamp pairs to `ElevenLabsHttpTTSService`.
2023

2124
- It is now possible to disable `SoundfileMixer` when created. You can then use

src/pipecat/services/elevenlabs/tts.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
import aiohttp
1313
from loguru import logger
1414
from pydantic import BaseModel, model_validator
15-
from sentry_sdk import push_scope
1615

1716
from pipecat.frames.frames import (
1817
CancelFrame,
1918
EndFrame,
2019
ErrorFrame,
2120
Frame,
21+
LLMFullResponseEndFrame,
2222
StartFrame,
2323
StartInterruptionFrame,
2424
TTSAudioRawFrame,
@@ -509,6 +509,9 @@ def __init__(
509509
self._cumulative_time = 0
510510
self._started = False
511511

512+
# Store previous text for context within a turn
513+
self._previous_text = ""
514+
512515
def language_to_service_language(self, language: Language) -> Optional[str]:
513516
"""Convert pipecat Language to ElevenLabs language code."""
514517
return language_to_elevenlabs_language(language)
@@ -526,16 +529,23 @@ async def start(self, frame: StartFrame):
526529
self._output_format = output_format_from_sample_rate(self.sample_rate)
527530
self._cumulative_time = 0
528531
self._started = False
532+
self._previous_text = ""
529533

530534
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
531535
await super().push_frame(frame, direction)
532536
if isinstance(frame, (StartInterruptionFrame, TTSStoppedFrame)):
533537
# Reset timing on interruption or stop
534538
self._started = False
535539
self._cumulative_time = 0
540+
self._previous_text = ""
541+
536542
if isinstance(frame, TTSStoppedFrame):
537543
await self.add_word_timestamps([("LLMFullResponseEndFrame", 0), ("Reset", 0)])
538544

545+
elif isinstance(frame, LLMFullResponseEndFrame):
546+
# End of turn - reset previous text
547+
self._previous_text = ""
548+
539549
def calculate_word_times(self, alignment_info: Mapping[str, Any]) -> List[Tuple[str, float]]:
540550
"""Calculate word timing from character alignment data.
541551
@@ -598,6 +608,10 @@ def calculate_word_times(self, alignment_info: Mapping[str, Any]) -> List[Tuple[
598608
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
599609
"""Generate speech from text using ElevenLabs streaming API with timestamps.
600610
611+
Makes a request to the ElevenLabs API to generate audio and timing data.
612+
Tracks the duration of each utterance to ensure correct sequencing.
613+
Includes previous text as context for better prosody continuity.
614+
601615
Args:
602616
text: Text to convert to speech
603617
@@ -614,6 +628,11 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
614628
"model_id": self._model_name,
615629
}
616630

631+
# Include previous text as context if available
632+
if self._previous_text:
633+
payload["previous_text"] = self._previous_text
634+
print(f"Previous text: {self._previous_text}")
635+
617636
if self._voice_settings:
618637
payload["voice_settings"] = self._voice_settings
619638

@@ -702,6 +721,13 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
702721
if utterance_duration > 0:
703722
self._cumulative_time += utterance_duration
704723

724+
# Append the current text to previous_text for context continuity
725+
# Only add a space if there's already text
726+
if self._previous_text:
727+
self._previous_text += " " + text
728+
else:
729+
self._previous_text = text
730+
705731
except Exception as e:
706732
logger.error(f"Error in run_tts: {e}")
707733
yield ErrorFrame(error=str(e))

0 commit comments

Comments
 (0)