-
Notifications
You must be signed in to change notification settings - Fork 0
/
r2h.go
129 lines (115 loc) · 2.54 KB
/
r2h.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
package r2h
import (
"fmt"
"math"
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
const (
alphaRegexString = "^[a-zA-Z]+$"
numberRegexString = "^[0-90-9]+$"
)
var (
alphaRegex = regexp.MustCompile(alphaRegexString)
numberRegex = regexp.MustCompile(numberRegexString)
)
func substr(s string, start int, end ...int) string {
if len(end) > 0 {
return string([]rune(s)[start:end[0]])
}
return string([]rune(s)[start:])
}
func charAt(s string, index int) string {
return string([]rune(s)[index])
}
func isAlpha(s string) bool {
return alphaRegex.MatchString(s)
}
func isNumber(s string) bool {
return numberRegex.MatchString(s)
}
func isHiragana(s string) bool {
for _, r := range s {
if b := isHiraganaRune(r); !b {
return false
}
}
return true
}
func isVowel(s string) bool {
if s == "A" || s == "I" || s == "U" || s == "E" || s == "O" {
return true
}
return false
}
func isHiraganaRune(r rune) bool {
return unicode.In(r, unicode.Hiragana)
}
func convertLetter(us string) (kana string, length int) {
min := int(math.Min(3, float64(utf8.RuneCountInString(us))))
for min > 0 {
l := substr(us, 0, min)
if kana, ok := dict[l]; ok {
return kana, utf8.RuneCountInString(l)
}
min--
}
return
}
func convertWords(s string, strict bool) (result string, isCompleted bool, err error) {
isCompleted = true
for utf8.RuneCountInString(s) > 0 {
us := strings.ToUpper(s)
kana, l := convertLetter(us)
if kana == "ん" && strings.HasPrefix(us, "NN") {
// issue #1
if utf8.RuneCountInString(us) >= 3 {
third := charAt(us, 2)
if isVowel(third) {
l = 1
} else {
l = 2
}
} else {
l = 2
}
} else if kana == "" {
if utf8.RuneCountInString(us) >= 3 {
head := charAt(us, 0)
next := charAt(us, 1)
if isAlpha(head) && head == next {
kana, l = convertLetter(substr(us, 0))
if kana == "" {
kana = dict["LTU"]
l = 1
}
}
}
if kana == "" {
kana = charAt(s, 0)
l = 1
if kana != " " && !isNumber(kana) && !isHiragana(kana) {
isCompleted = false
if strict {
return "", false, fmt.Errorf("%s is not romaji", kana)
}
}
}
}
result += kana
s = substr(s, l)
}
return
}
// Convert romaji to hiragana
func Convert(s string) (result string, isCompleted bool) {
result, isCompleted, _ = convertWords(s, false)
return
}
// ConvertStrict converts romaji to hiragana. If non-romaji letter are mixed, an error will occur.
func ConvertStrict(s string) (result string, err error) {
result, _, err = convertWords(s, true)
return
}