@@ -227,7 +227,10 @@ class SearchChinese(SearchLanguage):
227
227
js_stemmer_code = js_porter_stemmer
228
228
stopwords = english_stopwords
229
229
latin1_letters = re .compile (r'[a-zA-Z0-9_]+' )
230
- latin_terms : list [str ] = []
230
+
231
+ def __init__ (self , options : dict [str , str ]) -> None :
232
+ super ().__init__ (options )
233
+ self .latin_terms : set [str ] = set ()
231
234
232
235
def init (self , options : dict [str , str ]) -> None :
233
236
if JIEBA :
@@ -238,12 +241,13 @@ def init(self, options: dict[str, str]) -> None:
238
241
self .stemmer = snowballstemmer .stemmer ('english' )
239
242
240
243
def split (self , input : str ) -> list [str ]:
241
- chinese : list [str ] = []
242
244
if JIEBA :
243
- chinese = list (jieba .cut_for_search (input ))
245
+ chinese : list [str ] = list (jieba .cut_for_search (input ))
246
+ else :
247
+ chinese = []
244
248
245
249
latin1 = [term .strip () for term in self .latin1_letters .findall (input )]
246
- self .latin_terms .extend (latin1 )
250
+ self .latin_terms .update (latin1 )
247
251
return chinese + latin1
248
252
249
253
def word_filter (self , stemmed_word : str ) -> bool :
@@ -255,7 +259,7 @@ def stem(self, word: str) -> str:
255
259
# avoids some issues with acronyms
256
260
stemmed = self .stemmer .stemWord (word .lower ())
257
261
should_not_be_stemmed = (
258
- word in self . latin_terms and len (word ) >= 3 > len (stemmed )
262
+ len (word ) >= 3 > len (stemmed ) and word in self . latin_terms
259
263
) # fmt: skip
260
264
if should_not_be_stemmed :
261
265
return word .lower ()
0 commit comments