Skip to content

Commit 571222b

Browse files
authored
Normalize chinese arithmetic operations (RVC-Boss#947)
1 parent 4b500f9 commit 571222b

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

GPT_SoVITS/text/zh_normalization/num.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,29 @@ def replace_default_num(match):
106106
return verbalize_digit(number, alt_one=True)
107107

108108

109+
# 加减乘除
110+
RE_ASMD = re.compile(
111+
r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))([\+\-\×÷=])((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
112+
asmd_map = {
113+
'+': '加',
114+
'-': '减',
115+
'×': '乘',
116+
'÷': '除',
117+
'=': '等于'
118+
}
119+
120+
121+
def replace_asmd(match) -> str:
122+
"""
123+
Args:
124+
match (re.Match)
125+
Returns:
126+
str
127+
"""
128+
result = match.group(1) + asmd_map[match.group(8)] + match.group(9)
129+
return result
130+
131+
109132
# 数字表达式
110133
# 纯小数
111134
RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
@@ -155,7 +178,13 @@ def replace_number(match) -> str:
155178
# match.group(1) and match.group(8) are copy from RE_NUMBER
156179

157180
RE_RANGE = re.compile(
158-
r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
181+
r"""
182+
(?<![\d\+\-\×÷=]) # 使用反向前瞻以确保数字范围之前没有其他数字和操作符
183+
((-?)((\d+)(\.\d+)?)) # 匹配范围起始的负数或正数(整数或小数)
184+
[-~] # 匹配范围分隔符
185+
((-?)((\d+)(\.\d+)?)) # 匹配范围结束的负数或正数(整数或小数)
186+
(?![\d\+\-\×÷=]) # 使用正向前瞻以确保数字范围之后没有其他数字和操作符
187+
""", re.VERBOSE)
159188

160189

161190
def replace_range(match) -> str:
@@ -165,7 +194,7 @@ def replace_range(match) -> str:
165194
Returns:
166195
str
167196
"""
168-
first, second = match.group(1), match.group(8)
197+
first, second = match.group(1), match.group(6)
169198
first = RE_NUMBER.sub(replace_number, first)
170199
second = RE_NUMBER.sub(replace_number, second)
171200
result = f"{first}{second}"

GPT_SoVITS/text/zh_normalization/text_normlization.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from .num import RE_POSITIVE_QUANTIFIERS
3535
from .num import RE_RANGE
3636
from .num import RE_TO_RANGE
37+
from .num import RE_ASMD
3738
from .num import replace_default_num
3839
from .num import replace_frac
3940
from .num import replace_negative_num
@@ -42,6 +43,7 @@
4243
from .num import replace_positive_quantifier
4344
from .num import replace_range
4445
from .num import replace_to_range
46+
from .num import replace_asmd
4547
from .phonecode import RE_MOBILE_PHONE
4648
from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
4749
from .phonecode import RE_TELEPHONE
@@ -67,7 +69,7 @@ def _split(self, text: str, lang="zh") -> List[str]:
6769
if lang == "zh":
6870
text = text.replace(" ", "")
6971
# 过滤掉特殊字符
70-
text = re.sub(r'[——《》【】<=>{}()()#&@“”^_|\\]', '', text)
72+
text = re.sub(r'[——《》【】<>{}()()#&@“”^_|\\]', '', text)
7173
text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
7274
text = text.strip()
7375
sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
@@ -142,6 +144,11 @@ def normalize_sentence(self, sentence: str) -> str:
142144
sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
143145

144146
sentence = RE_RANGE.sub(replace_range, sentence)
147+
148+
# 处理加减乘除
149+
while RE_ASMD.search(sentence):
150+
sentence = RE_ASMD.sub(replace_asmd, sentence)
151+
145152
sentence = RE_INTEGER.sub(replace_negative_num, sentence)
146153
sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
147154
sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,

0 commit comments

Comments
 (0)