Skip to content

Commit bff61db

Browse files
committed
feat: detect and set source language for translation
1 parent 05fc049 commit bff61db

File tree

6 files changed

+30
-6
lines changed

6 files changed

+30
-6
lines changed

core/tasks.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -250,11 +250,13 @@ def translate_feed(
250250
total_tokens = 0
251251
translated_characters = 0
252252
need_cache_objs = {}
253+
source_language = "auto"
253254

254255
try:
255256
for entry in translated_feed.entries[:max_posts]:
256257
title = entry.get("title")
257-
258+
source_language = text_handler.detect_language(entry)
259+
258260
# Translate title
259261
if title and translate_engine and translate_title:
260262
cached = Translated_Content.is_translated(
@@ -263,7 +265,7 @@ def translate_feed(
263265
translated_text = ""
264266
if not cached:
265267
results = translate_engine.translate(
266-
title, target_language=target_language, text_type="title"
268+
title, target_language=target_language, source_language=source_language, text_type="title"
267269
)
268270
translated_text = results.get("text", title)
269271
total_tokens += results.get("tokens", 0)
@@ -315,7 +317,7 @@ def translate_feed(
315317
if content:
316318
translated_summary, tokens, characters, need_cache = (
317319
content_translate(
318-
content, target_language, translate_engine, quality
320+
content, target_language, translate_engine, quality, source_language=source_language
319321
)
320322
)
321323
total_tokens += tokens
@@ -396,6 +398,7 @@ def content_translate(
396398
target_language: str,
397399
engine: TranslatorEngine,
398400
quality: bool = False,
401+
source_language:str = "auto"
399402
):
400403
total_tokens = 0
401404
total_characters = 0
@@ -417,7 +420,7 @@ def content_translate(
417420

418421
if not cached:
419422
results = engine.translate(
420-
text, target_language=target_language, text_type="content"
423+
text, target_language=target_language, source_language=source_language, text_type="content"
421424
)
422425
total_tokens += results.get("tokens", 0)
423426
total_characters += len(text)

translator/models/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class TranslatorEngine(models.Model):
1212
valid = models.BooleanField(_("Valid"), null=True)
1313
is_ai = models.BooleanField(default=False, editable=False)
1414

15-
def translate(self, text: str, target_language: str) -> dict:
15+
def translate(self, text: str, target_language: str, source_language:str="auto", **kwargs) -> dict:
1616
raise NotImplementedError(
1717
"subclasses of TranslatorEngine must provide a translate() method"
1818
)
@@ -142,6 +142,7 @@ def translate(
142142
system_prompt: str = None,
143143
user_prompt: str = None,
144144
text_type: str = "title",
145+
**kwargs
145146
) -> dict:
146147
logging.info(">>> Translate [%s]: %s", target_language, text)
147148
client = self._init()

translator/models/claude.py

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def translate(
5555
system_prompt: str = None,
5656
user_prompt: str = None,
5757
text_type: str = "title",
58+
**kwargs
5859
) -> dict:
5960
logging.info(">>> Claude Translate [%s]:", target_language)
6061
client = self._init()

translator/models/free_translators.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def translate(self, text: str, target_language: str, source_language:str="auto",
3434
try:
3535
source_language = detect(text) if source_language == "auto" else source_language
3636
except:
37-
source_language == "auto"
37+
source_language = "auto"
3838
logging.warning("Cannot detect source language:%s", text)
3939

4040
results = et.translate(

translator/models/gemini.py

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def translate(
6161
system_prompt: str = None,
6262
user_prompt: str = None,
6363
text_type: str = "title",
64+
**kwargs
6465
) -> dict:
6566
logging.info(">>> Gemini Translate [%s]:", target_language)
6667

utils/text_handler.py

+18
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@
55
import tiktoken
66
import html2text
77
from translator.models import TranslatorEngine
8+
from langdetect import detect
9+
10+
def detect_language(entry):
11+
title = entry.get("title")
12+
original_content = entry.get("content")
13+
content = (
14+
original_content[0].get("value")
15+
if original_content
16+
else entry.get("summary")
17+
)
18+
text = title + " " + content
19+
source_language = "auto"
20+
try:
21+
source_language = detect(text)
22+
except Exception as e:
23+
logging.warning("Cannot detect source language:%s,%s", e, text)
24+
25+
return source_language
826

927

1028
def clean_content(content: str) -> str:

0 commit comments

Comments
 (0)