-
Notifications
You must be signed in to change notification settings - Fork 0
/
consonant.py
70 lines (60 loc) · 1.86 KB
/
consonant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import re
class Dictionary:
def __init__(self, ruleList):
self.ipaList = None
self.vowels = []
self.consonants = []
self.list = []
self.map = dict()
for rule in ruleList:
self.map[str(rule.name)] = self.uni2str(rule.unicode_repr)
def chaoshan2IPA(self, data) -> str:
start: int = 0
finish: int = len(data)
result = ''
data = data.lower()
while start <= finish:
sample = data[start:finish]
token = self.map.get(sample)
if token is not None:
result += token
start = finish
finish = len(data)
else:
finish -= 1
return result
def word2IPA(self, data) -> str:
data = data.lower()
max_tokens = len(data)
result = [None] * max_tokens
length = len(data)
start = length - 1
finish = length
index = 0
while start >= 0:
while start >= 0:
sample = data[start:finish]
token = self.map.get(sample)
if token is not None:
result[index] = token
index += 1
finish = start
start -= 1
else:
start -= 1
finish = start + 1
start = finish - 1
return ''.join(result[:index][::-1])
def uni2str(self, data) -> str:
result = ''
while data.__contains__('\\u'):
index = data[2:6]
index = chr(int(index, 16))
result += index
data = data[6:]
return result
@staticmethod
def unistr(data) -> str:
pattern = r'\\u([0-9a-fA-F]{4})'
result = re.sub(pattern, lambda match: chr(int(match.group(1), 16)), data)
return result