Skip to content

Commit

Permalink
Merge pull request #3 from pablodz/feat/pt
Browse files Browse the repository at this point in the history
portuguese
  • Loading branch information
pablodz committed May 18, 2024
2 parents 22c4b23 + 4bb12e0 commit fc1c55f
Show file tree
Hide file tree
Showing 9 changed files with 629 additions and 19 deletions.
15 changes: 15 additions & 0 deletions examples/alpha/pt/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package main

import (
"github.com/pablodz/itn/itn"
)

func main() {
itn.SetDebug(true)

processor, _ := itn.NewLanguage(itn.Portuguese)
new_string := processor.Alpha2Digit("Trezentos e setenta e oito milhões vinte e sete mil trezentos e doze", false, true, 3)
println(new_string)
println("-----------------------------------------------------")
println("378027312")
}
50 changes: 35 additions & 15 deletions itn/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type Language struct {
Simplify_check_coef_appliable bool // Optional
RadMap map[string]string // Optional
Composites map[string]int // Optional
PtOrdinals map[string]string // Only for Portuguese
}

type RelaxTuple struct {
Expand All @@ -36,8 +37,18 @@ type RelaxTuple struct {

func (lg *Language) Ord2Card(word string) string {
switch lg.LangCode {
case Portuguese:
logPrintf(">>>> Ord2Card.0 [word] %s", word)
if len(word) < 1 {
return ""
}
ordinal, ok := lg.PtOrdinals[word[:len(word)-1]]
if !ok {
return ""
}
return ordinal
case English:
logPrintf(">>>> Ord2Card.0 %s", word)
logPrintf(">>>> Ord2Card.1 %s", word)
plurSuff := strings.HasSuffix(word, "ths")
singSuff := strings.HasSuffix(word, "th")
source := ""
Expand All @@ -49,7 +60,7 @@ func (lg *Language) Ord2Card(word string) string {
} else if strings.HasSuffix(word, "third") {
source = strings.ReplaceAll(word, "third", "three")
} else {
logPrintf(">>>> Ord2Card.1 %s", word)
logPrintf(">>>> Ord2Card.2 %s", word)
return ""
}
} else {
Expand All @@ -73,14 +84,13 @@ func (lg *Language) Ord2Card(word string) string {
}

if !containsKey(lg.Numbers, source) {
logPrintf(">>>> Ord2Card.2 %s", source)
logPrintf(">>>> Ord2Card.3 %s", source)
return ""
}

logPrintf(">>>> Ord2Card.3 %s", source)
logPrintf(">>>> Ord2Card.4 %s", source)
return source
case Spanish:
return ""

default:
return ""
}
Expand All @@ -89,6 +99,7 @@ func (lg *Language) Ord2Card(word string) string {
func (lg *Language) NumOrd(digits string, originalWord string) string {
switch lg.LangCode {
case English:
logPrintf(">>>> NumOrd.0 %s", originalWord)
sf := ""
if strings.HasSuffix(originalWord, "s") {
sf = originalWord[len(originalWord)-3:]
Expand All @@ -98,14 +109,16 @@ func (lg *Language) NumOrd(digits string, originalWord string) string {

return fmt.Sprintf("%s%s", digits, sf)

case Spanish:

case Portuguese, Spanish:
logPrintf(">>>> NumOrd.1 %s", originalWord)
if strings.HasSuffix(originalWord, "o") {
return fmt.Sprintf("%sº", digits)
}

return fmt.Sprintf("%sª", digits)
}

logPrintf(">>>> NumOrd.2 ❌ %s", originalWord)
return "ERROR"
}

Expand All @@ -120,7 +133,12 @@ func (lg *Language) NotNumericWord(word string) bool {
return word == "" || word != lg.DecimalSep && !containsKey(lg.Numbers, word) && !contains(lg.Zero, word)
}

var WORDSEP = regexp.MustCompile(`\s*[\.,;\(\)…\[\]:!\?]+\s*|\n`)
const UsePTOrdinalsMerger = true

var (
WORDSEP = regexp.MustCompile(`\s*[\.,;\(\)…\[\]:!\?]+\s*|\n`)
omg = OrdinalsMerger{}
)

type segmentAndPunct struct {
segment string
Expand Down Expand Up @@ -171,12 +189,7 @@ func (lg Language) Alpha2Digit(text string, relaxed bool, signed bool, ordinalTh

segmentAndPuncts := []segmentAndPunct{}
for i, segment := range segments {
segmentAndPuncts = append(segmentAndPuncts,
segmentAndPunct{
segment,
punct[i],
},
)
segmentAndPuncts = append(segmentAndPuncts, segmentAndPunct{segment, punct[i]})
}

outSegments := []string{}
Expand Down Expand Up @@ -226,5 +239,12 @@ func (lg Language) Alpha2Digit(text string, relaxed bool, signed bool, ordinalTh
}
text = strings.Join(outSegments, "")

logPrintf(">>> [text] %s", text)

// Post-Processing
if lg.LangCode == Portuguese && UsePTOrdinalsMerger {
text = omg.MergeCompoundOrdinalsPT(text)
}

return text
}
186 changes: 186 additions & 0 deletions itn/i18n.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,192 @@ func NewLanguage(LangCode LanguageCode) (*Language, error) {
maps.Copy(l.Numbers, l.Hundred)
maps.Copy(l.Numbers, l.Composites)

return l, nil
case Portuguese:

l := &Language{
LangCode: LangCode,
Multipliers: map[string]int{
"mil": 1000,
"milhar": 1000,
"milhares": 1000,
"milhao": 1000000,
"milhão": 1000000,
"milhoes": 1000000,
"milhões": 1000000,
"bilhao": 1000000000,
"bilhão": 1000000000,
"bilhoes": 1000000000,
"bilhões": 1000000000,
"trilhao": 1000000000000,
"trilhão": 1000000000000,
"trilhoes": 1000000000000,
"trilhões": 1000000000000,
},
Units: map[string]int{
"um": 1,
"dois": 2,
"três": 3,
"quatro": 4,
"cinco": 5,
"seis": 6,
"sete": 7,
"oito": 8,
"nove": 9,
"uma": 1, // optional
"duas": 2, // optional
"tres": 3, // without accent
"catorze": 14, // without accent
"dezesseis": 16, // without accent
"dezessete": 17, // without accent
"dezenove": 19, // without accent
},
STens: map[string]int{
"dez": 10,
"onze": 11,
"doze": 12,
"treze": 13,
"catorze": 14,
"quinze": 15,
"dezasseis": 16,
"dezassete": 17,
"dezoito": 18,
"dezanove": 19,
},
MTens: map[string]int{
"vinte": 20,
"trinta": 30,
"quarenta": 40,
"cinquenta": 50,
"sessenta": 60,
"setenta": 70,
"oitenta": 80,
"noventa": 90,
},
MTensWSTens: []string{},
Hundred: map[string]int{
"cem": 100,
"centena": 100,
"cento": 100,
"centenas": 100,
"duzentos": 200,
"duzentas": 200,
"trezentos": 300,
"trezentas": 300,
"quatrocentos": 400,
"quatrocentas": 400,
"quinhentos": 500,
"quinhentas": 500,
"seiscentos": 600,
"seiscentas": 600,
"setecentos": 700,
"setecentas": 700,
"oitocentos": 800,
"oitocentas": 800,
"novecentos": 900,
"novecentas": 900,
},
Sign: map[string]string{
"mais": "+",
"menos": "-",
},
Zero: []string{
"zero",
},
DecimalSep: "vírgula",
DecimalSYM: ",",
AndNums: []string{
"um",
"uma",
"duas",
"dois",
"três",
"tres",
"quatro",
"cinco",
"seis",
"sete",
"oito",
"nove",
"dez",
"onze",
"doze",
"treze",
"quatorze",
"catorze",
"quinze",
"dezasseis",
"dezesseis",
"dezassete",
"dezessete",
"dezoito",
"dezanove",
"dezenove",
"vinte",
"trinta",
"quarenta",
"cinquenta",
"sessenta",
"setenta",
"oitenta",
"noventa",
"cem",
"duzentos",
"trezentos",
"quatrocentos",
"quinhentos",
"seiscentos",
"setecentos",
"oitocentos",
"novecentos",
},

And: "e",
NeverIfAlone: []string{
"um",
"uma",
},
Relaxed: map[string]RelaxTuple{},
Composites: map[string]int{},
PtOrdinals: map[string]string{
"primeir": "um",
"segund": "dois",
"terceir": "três",
"quart": "quatro",
"quint": "cinco",
"sext": "seis",
"sétim": "sete",
"oitav": "oito",
"non": "nove",
"décim": "dez",
"vigésim": "vinte",
"trigésim": "trinta",
"quadragésim": "quarenta",
"quinquagésim": "cinquenta",
"sexagésim": "sessenta",
"septagésim": "setenta",
"octagésim": "oitenta",
"nonagésim": "noventa",
"centésim": "cem",
"ducentésim": "cem",
"trecentésim": "cem",
"quadrigentésim": "cem",
"quingentésim": "cem",
"sexgentésim": "cem",
"setingentésim": "cem",
"octigentésim": "cem",
"nonigentésim": "mil",
"milionésim": "milhão",
},
}

l.Numbers = maps.Clone(l.Multipliers)
maps.Copy(l.Numbers, l.Units)
maps.Copy(l.Numbers, l.STens)
maps.Copy(l.Numbers, l.MTens)
maps.Copy(l.Numbers, l.Hundred)
maps.Copy(l.Numbers, l.Composites)

return l, nil

default:
Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit fc1c55f

Please sign in to comment.