Skip to content

Commit 72c0eca

Browse files
authored
spellcheck (#916)
Co-authored-by: starylan <[email protected]>
1 parent ed75ecd commit 72c0eca

File tree

2 files changed

+39
-39
lines changed

2 files changed

+39
-39
lines changed

GPT_SoVITS/TTS_infer_pack/TTS.py

+34-34
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def __init__(self, configs: Union[dict, str]=None):
140140
self.win_length:int = 2048
141141
self.n_speakers:int = 300
142142

143-
self.langauges:list = ["auto", "en", "zh", "ja", "all_zh", "all_ja"]
143+
self.languages:list = ["auto", "en", "zh", "ja", "all_zh", "all_ja"]
144144
# print(self)
145145

146146
def _load_configs(self, configs_path: str)->dict:
@@ -207,19 +207,19 @@ def __init__(self, configs: Union[dict, str, TTS_Config]):
207207

208208

209209
self.prompt_cache:dict = {
210-
"ref_audio_path":None,
211-
"prompt_semantic":None,
212-
"refer_spepc":None,
213-
"prompt_text":None,
214-
"prompt_lang":None,
215-
"phones":None,
216-
"bert_features":None,
217-
"norm_text":None,
210+
"ref_audio_path" : None,
211+
"prompt_semantic": None,
212+
"refer_spec" : None,
213+
"prompt_text" : None,
214+
"prompt_lang" : None,
215+
"phones" : None,
216+
"bert_features" : None,
217+
"norm_text" : None,
218218
}
219219

220220

221221
self.stop_flag:bool = False
222-
self.precison:torch.dtype = torch.float16 if self.configs.is_half else torch.float32
222+
self.precision:torch.dtype = torch.float16 if self.configs.is_half else torch.float32
223223

224224
def _init_models(self,):
225225
self.init_t2s_weights(self.configs.t2s_weights_path)
@@ -312,7 +312,7 @@ def enable_half_precision(self, enable: bool = True):
312312
return
313313

314314
self.configs.is_half = enable
315-
self.precison = torch.float16 if enable else torch.float32
315+
self.precision = torch.float16 if enable else torch.float32
316316
self.configs.save_configs()
317317
if enable:
318318
if self.t2s_model is not None:
@@ -358,9 +358,9 @@ def set_ref_audio(self, ref_audio_path:str):
358358
ref_audio_path: str, the path of the reference audio.
359359
'''
360360
self._set_prompt_semantic(ref_audio_path)
361-
self._set_ref_spepc(ref_audio_path)
361+
self._set_ref_spec(ref_audio_path)
362362

363-
def _set_ref_spepc(self, ref_audio_path):
363+
def _set_ref_spec(self, ref_audio_path):
364364
audio = load_audio(ref_audio_path, int(self.configs.sampling_rate))
365365
audio = torch.FloatTensor(audio)
366366
audio_norm = audio
@@ -376,8 +376,8 @@ def _set_ref_spepc(self, ref_audio_path):
376376
spec = spec.to(self.configs.device)
377377
if self.configs.is_half:
378378
spec = spec.half()
379-
# self.refer_spepc = spec
380-
self.prompt_cache["refer_spepc"] = spec
379+
# self.refer_spec = spec
380+
self.prompt_cache["refer_spec"] = spec
381381

382382

383383
def _set_prompt_semantic(self, ref_wav_path:str):
@@ -435,7 +435,7 @@ def to_batch(self, data:list,
435435
threshold:float=0.75,
436436
split_bucket:bool=True,
437437
device:torch.device=torch.device("cpu"),
438-
precison:torch.dtype=torch.float32,
438+
precision:torch.dtype=torch.float32,
439439
):
440440

441441
_data:list = []
@@ -488,13 +488,13 @@ def to_batch(self, data:list,
488488
for item in item_list:
489489
if prompt_data is not None:
490490
all_bert_features = torch.cat([prompt_data["bert_features"], item["bert_features"]], 1)\
491-
.to(dtype=precison, device=device)
491+
.to(dtype=precision, device=device)
492492
all_phones = torch.LongTensor(prompt_data["phones"]+item["phones"]).to(device)
493493
phones = torch.LongTensor(item["phones"]).to(device)
494494
# norm_text = prompt_data["norm_text"]+item["norm_text"]
495495
else:
496496
all_bert_features = item["bert_features"]\
497-
.to(dtype=precison, device=device)
497+
.to(dtype=precision, device=device)
498498
phones = torch.LongTensor(item["phones"]).to(device)
499499
all_phones = phones
500500
# norm_text = item["norm_text"]
@@ -519,7 +519,7 @@ def to_batch(self, data:list,
519519
#### 直接对phones和bert_features进行pad,会增大复读概率。
520520
# all_phones_batch = self.batch_sequences(all_phones_list, axis=0, pad_value=0, max_length=max_len)
521521
# all_bert_features_batch = all_bert_features_list
522-
# all_bert_features_batch = torch.zeros(len(item_list), 1024, max_len, dtype=precison, device=device)
522+
# all_bert_features_batch = torch.zeros(len(item_list), 1024, max_len, dtype=precision, device=device)
523523
# for idx, item in enumerate(all_bert_features_list):
524524
# all_bert_features_batch[idx, :, : item.shape[-1]] = item
525525

@@ -555,8 +555,8 @@ def recovery_order(self, data:list, batch_index_list:list)->list:
555555
Returns:
556556
list (List[np.ndarray]): the data in the original order.
557557
'''
558-
lenght = len(sum(batch_index_list, []))
559-
_data = [None]*lenght
558+
length = len(sum(batch_index_list, []))
559+
_data = [None]*length
560560
for i, index_list in enumerate(batch_index_list):
561561
for j, index in enumerate(index_list):
562562
_data[index] = data[i][j]
@@ -584,7 +584,7 @@ def run(self, inputs:dict):
584584
"top_k": 5, # int. top k sampling
585585
"top_p": 1, # float. top p sampling
586586
"temperature": 1, # float. temperature for sampling
587-
"text_split_method": "cut0", # str. text split method, see text_segmentaion_method.py for details.
587+
"text_split_method": "cut0", # str. text split method, see text_segmentation_method.py for details.
588588
"batch_size": 1, # int. batch size for inference
589589
"batch_threshold": 0.75, # float. threshold for batch splitting.
590590
"split_bucket: True, # bool. whether to split the batch into multiple buckets.
@@ -594,7 +594,7 @@ def run(self, inputs:dict):
594594
"seed": -1, # int. random seed for reproducibility.
595595
}
596596
returns:
597-
tulpe[int, np.ndarray]: sampling rate and audio data.
597+
tuple[int, np.ndarray]: sampling rate and audio data.
598598
"""
599599
########## variables initialization ###########
600600
self.stop_flag:bool = False
@@ -635,12 +635,12 @@ def run(self, inputs:dict):
635635
if prompt_text in [None, ""]:
636636
no_prompt_text = True
637637

638-
assert text_lang in self.configs.langauges
638+
assert text_lang in self.configs.languages
639639
if not no_prompt_text:
640-
assert prompt_lang in self.configs.langauges
640+
assert prompt_lang in self.configs.languages
641641

642642
if ref_audio_path in [None, ""] and \
643-
((self.prompt_cache["prompt_semantic"] is None) or (self.prompt_cache["refer_spepc"] is None)):
643+
((self.prompt_cache["prompt_semantic"] is None) or (self.prompt_cache["refer_spec"] is None)):
644644
raise ValueError("ref_audio_path cannot be empty, when the reference audio is not set using set_ref_audio()")
645645

646646

@@ -682,7 +682,7 @@ def run(self, inputs:dict):
682682
threshold=batch_threshold,
683683
split_bucket=split_bucket,
684684
device=self.configs.device,
685-
precison=self.precison
685+
precision=self.precision
686686
)
687687
else:
688688
print(i18n("############ 切分文本 ############"))
@@ -714,7 +714,7 @@ def make_batch(batch_texts):
714714
threshold=batch_threshold,
715715
split_bucket=False,
716716
device=self.configs.device,
717-
precison=self.precison
717+
precision=self.precision
718718
)
719719
return batch[0]
720720

@@ -760,8 +760,8 @@ def make_batch(batch_texts):
760760
t4 = ttime()
761761
t_34 += t4 - t3
762762

763-
refer_audio_spepc:torch.Tensor = self.prompt_cache["refer_spepc"]\
764-
.to(dtype=self.precison, device=self.configs.device)
763+
refer_audio_spec:torch.Tensor = self.prompt_cache["refer_spec"]\
764+
.to(dtype=self.precision, device=self.configs.device)
765765

766766
batch_audio_fragment = []
767767

@@ -775,7 +775,7 @@ def make_batch(batch_texts):
775775
# batch_phones = self.batch_sequences(batch_phones, axis=0, pad_value=0, max_length=max_len)
776776
# batch_phones = batch_phones.to(self.configs.device)
777777
# batch_audio_fragment = (self.vits_model.batched_decode(
778-
# pred_semantic, pred_semantic_len, batch_phones, batch_phones_len,refer_audio_spepc
778+
# pred_semantic, pred_semantic_len, batch_phones, batch_phones_len,refer_audio_spec
779779
# ))
780780

781781
# ## vits并行推理 method 2
@@ -786,7 +786,7 @@ def make_batch(batch_texts):
786786
all_pred_semantic = torch.cat(pred_semantic_list).unsqueeze(0).unsqueeze(0).to(self.configs.device)
787787
_batch_phones = torch.cat(batch_phones).unsqueeze(0).to(self.configs.device)
788788
_batch_audio_fragment = (self.vits_model.decode(
789-
all_pred_semantic, _batch_phones,refer_audio_spepc
789+
all_pred_semantic, _batch_phones, refer_audio_spec
790790
).detach()[0, 0, :])
791791
audio_frag_end_idx.insert(0, 0)
792792
batch_audio_fragment= [_batch_audio_fragment[audio_frag_end_idx[i-1]:audio_frag_end_idx[i]] for i in range(1, len(audio_frag_end_idx))]
@@ -797,7 +797,7 @@ def make_batch(batch_texts):
797797
# phones = batch_phones[i].unsqueeze(0).to(self.configs.device)
798798
# _pred_semantic = (pred_semantic_list[i][-idx:].unsqueeze(0).unsqueeze(0)) # .unsqueeze(0)#mq要多unsqueeze一次
799799
# audio_fragment =(self.vits_model.decode(
800-
# _pred_semantic, phones, refer_audio_spepc
800+
# _pred_semantic, phones, refer_audio_spec
801801
# ).detach()[0, 0, :])
802802
# batch_audio_fragment.append(
803803
# audio_fragment
@@ -866,7 +866,7 @@ def audio_postprocess(self,
866866
)->tuple[int, np.ndarray]:
867867
zero_wav = torch.zeros(
868868
int(self.configs.sampling_rate * fragment_interval),
869-
dtype=self.precison,
869+
dtype=self.precision,
870870
device=self.configs.device
871871
)
872872

GPT_SoVITS/inference_webui.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
tts_config.bert_base_path = bert_path
8383

8484
print(tts_config)
85-
tts_pipline = TTS(tts_config)
85+
tts_pipeline = TTS(tts_config)
8686
gpt_path = tts_config.t2s_weights_path
8787
sovits_path = tts_config.vits_weights_path
8888

@@ -113,7 +113,7 @@ def inference(text, text_lang,
113113
"fragment_interval":fragment_interval,
114114
"seed":actual_seed,
115115
}
116-
for item in tts_pipline.run(inputs):
116+
for item in tts_pipeline.run(inputs):
117117
yield item, actual_seed
118118

119119
def custom_sort_key(s):
@@ -162,8 +162,8 @@ def get_weights_names():
162162
SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True)
163163
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
164164
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
165-
SoVITS_dropdown.change(tts_pipline.init_vits_weights, [SoVITS_dropdown], [])
166-
GPT_dropdown.change(tts_pipline.init_t2s_weights, [GPT_dropdown], [])
165+
SoVITS_dropdown.change(tts_pipeline.init_vits_weights, [SoVITS_dropdown], [])
166+
GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
167167

168168
with gr.Row():
169169
with gr.Column():
@@ -227,7 +227,7 @@ def get_weights_names():
227227
],
228228
[output, seed],
229229
)
230-
stop_infer.click(tts_pipline.stop, [], [])
230+
stop_infer.click(tts_pipeline.stop, [], [])
231231

232232
with gr.Group():
233233
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))

0 commit comments

Comments
 (0)