Skip to content

Commit fbaf731

Browse files
committed
Added translit generator script
1 parent 41c5c4a commit fbaf731

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

devtools/translit/generate.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python3
2+
3+
# This script generates a Unicode character transliteration
4+
# table into sequences of individual characters.
5+
6+
# For each Unicode character from decimal value 128 to 0xffff,
7+
# its transliteration is obtained using the `unidecode` library.
8+
9+
from unidecode import unidecode
10+
11+
def unicode_table():
12+
chars = []
13+
mapChar = []
14+
elementCnt = 0
15+
pos = 0
16+
for i in range(128, 0xffff):
17+
try:
18+
char = chr(i)
19+
charU = unidecode(char)
20+
if charU != "" :
21+
charField = []
22+
for element in charU:
23+
charField.append("'" + element.replace('\\', '\\\\').replace('\'', '\\\'').replace('\"', '\\\"') + "'")
24+
25+
stringlen = len(charU)
26+
print (f"{stringlen}, " + ", ".join(charField) + ",", end = " " )
27+
elementCnt = elementCnt + 1
28+
mapChar.append(pos);
29+
pos = pos + stringlen + 1
30+
if elementCnt % 6 == 0:
31+
print("")
32+
else :
33+
mapChar.append(-1)
34+
except ValueError:
35+
mapChar.append(-1)
36+
return mapChar
37+
38+
print ('const char Data::translitTab[] = {')
39+
l = unicode_table()
40+
print ('};')
41+
42+
print('const int Data::tranlitIndexMap[] = {')
43+
count = 0
44+
for charMap in l:
45+
print(f"{charMap}, ", end = "")
46+
count = count + 1;
47+
if count % 10 == 0:
48+
print("")
49+
print('};')

0 commit comments

Comments
 (0)