Skip to content

Commit 798b676

Browse files
ko3n1gblisc
andauthored
Fix some bugs (#12153) (#12201)
* update outdated image to np code; update random sampling * code cleanup --------- Signed-off-by: Jason <[email protected]> Co-authored-by: Jason <[email protected]>
1 parent c5b3112 commit 798b676

File tree

2 files changed

+7
-10
lines changed

2 files changed

+7
-10
lines changed

nemo/collections/tts/data/dataset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,10 @@ def __init__(
190190
self.phoneme_probability = getattr(self.text_tokenizer, "phoneme_probability", None)
191191
else:
192192
if text_tokenizer_pad_id is None:
193-
raise ValueError(f"text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer")
193+
raise ValueError("text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer")
194194

195195
if tokens is None:
196-
raise ValueError(f"tokens must be specified if text_tokenizer is not BaseTokenizer")
196+
raise ValueError("tokens must be specified if text_tokenizer is not BaseTokenizer")
197197

198198
self.text_tokenizer_pad_id = text_tokenizer_pad_id
199199
self.cache_text = True if self.phoneme_probability is None else False
@@ -496,7 +496,7 @@ def add_reference_audio(self, **kwargs):
496496
speaker_to_index_map[d["speaker_id"]].add(i)
497497
# Random sample a reference audio from the same speaker
498498
self.get_reference_for_sample = lambda sample: self.data[
499-
random.sample(speaker_to_index_map[sample["speaker_id"]], 1)[0]
499+
random.choice(speaker_to_index_map[tuple(sample["speaker_id"])])
500500
]
501501
elif reference_audio_type == "ground-truth":
502502
# Use ground truth audio as reference audio
@@ -679,7 +679,7 @@ def __getitem__(self, index):
679679
sample_pitch_mean = pitch_stats["pitch_mean"]
680680
sample_pitch_std = pitch_stats["pitch_std"]
681681
else:
682-
raise ValueError(f"Missing statistics for pitch normalization.")
682+
raise ValueError("Missing statistics for pitch normalization.")
683683

684684
pitch -= sample_pitch_mean
685685
pitch[pitch == -sample_pitch_mean] = 0.0 # Zero out values that were previously zero

nemo/collections/tts/parts/utils/helpers.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -632,10 +632,8 @@ def plot_gate_outputs_to_numpy(gate_targets, gate_outputs):
632632

633633

634634
def save_figure_to_numpy(fig):
635-
# save it to a numpy array.
636-
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
637-
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
638-
return data
635+
img_array = np.array(fig.canvas.renderer.buffer_rgba())
636+
return img_array
639637

640638

641639
@rank_zero_only
@@ -802,8 +800,7 @@ def clip_grad_value_(parameters, clip_value, norm_type=2):
802800

803801

804802
def convert_pad_shape(pad_shape):
805-
l = pad_shape[::-1]
806-
pad_shape = [item for sublist in l for item in sublist]
803+
pad_shape = [item for sublist in pad_shape[::-1] for item in sublist]
807804
return pad_shape
808805

809806

0 commit comments

Comments
 (0)