Skip to content

ISSUE=11042: add tts model in siliconflow #11043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@
- meta-llama/Meta-Llama-3.1-8B-Instruct
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- deepseek-ai/DeepSeek-V2-Chat
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ supported_model_types:
- text-embedding
- rerank
- speech2text
- tts
configurate_methods:
- predefined-model
- customizable-model
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
model: fishaudio/fish-speech-1.4
model_type: tts
model_properties:
default_voice: 'fishaudio/fish-speech-1.4:alex'
voices:
- mode: "fishaudio/fish-speech-1.4:alex"
name: "Alex(男声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:benjamin"
name: "Benjamin(男声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:charles"
name: "Charles(男声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:david"
name: "David(男声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:anna"
name: "Anna(女声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:bella"
name: "Bella(女声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:claire"
name: "Claire(女声)"
language: [ "zh-Hans", "en-US" ]
- mode: "fishaudio/fish-speech-1.4:diana"
name: "Diana(女声)"
language: [ "zh-Hans", "en-US" ]
audio_type: 'mp3'
max_workers: 5
# stream: false
pricing:
input: '0.015'
output: '0'
unit: '0.001'
currency: RMB
105 changes: 105 additions & 0 deletions api/core/model_runtime/model_providers/siliconflow/tts/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import concurrent.futures
from typing import Any, Optional

from openai import OpenAI

from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.tts_model import TTSModel
from core.model_runtime.model_providers.openai._common import _CommonOpenAI


class SiliconFlowText2SpeechModel(_CommonOpenAI, TTSModel):
"""
Model class for SiliconFlow Speech to text model.
"""

def _invoke(
self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, user: Optional[str] = None
) -> Any:
"""
_invoke text2speech model

:param model: model name
:param tenant_id: user tenant id
:param credentials: model credentials
:param content_text: text content to be translated
:param voice: model timbre
:param user: unique user id
:return: text translated to audio file
"""
if not voice or voice not in [
d["value"] for d in self.get_tts_model_voices(model=model, credentials=credentials)
]:
voice = self._get_model_default_voice(model, credentials)
# if streaming:
return self._tts_invoke_streaming(model=model, credentials=credentials, content_text=content_text, voice=voice)

def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
"""
validate credentials text2speech model

:param model: model name
:param credentials: model credentials
:param user: unique user id
:return: text translated to audio file
"""
try:
self._tts_invoke_streaming(
model=model,
credentials=credentials,
content_text="Hello SiliconFlow!",
voice=self._get_model_default_voice(model, credentials),
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))

def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> Any:
"""
_tts_invoke_streaming text2speech model

:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param voice: model timbre
:return: text translated to audio file
"""
try:
# doc: https://docs.siliconflow.cn/capabilities/text-to-speech
self._add_custom_parameters(credentials)
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
model_support_voice = [
x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)
]
if not voice or voice not in model_support_voice:
voice = self._get_model_default_voice(model, credentials)
if len(content_text) > 4096:
sentences = self._split_text_into_sentences(content_text, max_length=4096)
executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
futures = [
executor.submit(
client.audio.speech.with_streaming_response.create,
model=model,
response_format="mp3",
input=sentences[i],
voice=voice,
)
for i in range(len(sentences))
]
for future in futures:
yield from future.result().__enter__().iter_bytes(1024) # noqa:PLC2801

else:
response = client.audio.speech.with_streaming_response.create(
model=model, voice=voice, response_format="mp3", input=content_text.strip()
)

yield from response.__enter__().iter_bytes(1024) # noqa:PLC2801
except Exception as ex:
raise InvokeBadRequestError(str(ex))

@classmethod
def _add_custom_parameters(cls, credentials: dict) -> None:
credentials["openai_api_base"] = "https://api.siliconflow.cn"
credentials["openai_api_key"] = credentials["api_key"]