Skip to content

Commit 2f70567

Browse files
committed
fix: update keyword extraction to remove optional parameter and improve type casting
Signed-off-by: -LAN- <[email protected]>
1 parent cf00ee4 commit 2f70567

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import re
2-
from typing import Optional
2+
from typing import Optional, cast
33

44

55
class JiebaKeywordTableHandler:
@@ -8,18 +8,20 @@ def __init__(self):
88

99
from core.rag.datasource.keyword.jieba.stopwords import STOPWORDS
1010

11-
jieba.analyse.default_tfidf.stop_words = STOPWORDS
11+
jieba.analyse.default_tfidf.stop_words = STOPWORDS # type: ignore
1212

1313
def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> set[str]:
1414
"""Extract keywords with JIEBA tfidf."""
15-
import jieba # type: ignore
15+
import jieba.analyse # type: ignore
1616

1717
keywords = jieba.analyse.extract_tags(
1818
sentence=text,
1919
topK=max_keywords_per_chunk,
2020
)
21+
# jieba.analyse.extract_tags returns list[Any] when withFlag is False by default.
22+
keywords = cast(list[str], keywords)
2123

22-
return set(self._expand_tokens_with_subtokens(keywords))
24+
return set(self._expand_tokens_with_subtokens(set(keywords)))
2325

2426
def _expand_tokens_with_subtokens(self, tokens: set[str]) -> set[str]:
2527
"""Get subtokens from a list of tokens., filtering for stopwords."""

0 commit comments

Comments
 (0)