File tree 1 file changed +6
-4
lines changed
api/core/rag/datasource/keyword/jieba
1 file changed +6
-4
lines changed Original file line number Diff line number Diff line change 1
1
import re
2
- from typing import Optional
2
+ from typing import Optional , cast
3
3
4
4
5
5
class JiebaKeywordTableHandler :
@@ -8,18 +8,20 @@ def __init__(self):
8
8
9
9
from core .rag .datasource .keyword .jieba .stopwords import STOPWORDS
10
10
11
- jieba .analyse .default_tfidf .stop_words = STOPWORDS
11
+ jieba .analyse .default_tfidf .stop_words = STOPWORDS # type: ignore
12
12
13
13
def extract_keywords (self , text : str , max_keywords_per_chunk : Optional [int ] = 10 ) -> set [str ]:
14
14
"""Extract keywords with JIEBA tfidf."""
15
- import jieba
15
+ import jieba . analyse
16
16
17
17
keywords = jieba .analyse .extract_tags (
18
18
sentence = text ,
19
19
topK = max_keywords_per_chunk ,
20
20
)
21
+ # jieba.analyse.extract_tags returns list[Any] when withFlag is False by default.
22
+ keywords = cast (list [str ], keywords )
21
23
22
- return set (self ._expand_tokens_with_subtokens (keywords ))
24
+ return set (self ._expand_tokens_with_subtokens (set ( keywords ) ))
23
25
24
26
def _expand_tokens_with_subtokens (self , tokens : set [str ]) -> set [str ]:
25
27
"""Get subtokens from a list of tokens., filtering for stopwords."""
You can’t perform that action at this time.
0 commit comments