Skip to content

Commit

Permalink
Merge pull request #350 from ohanan/master
Browse files Browse the repository at this point in the history
feat(reader): work for cjk in findSubMatchReader
  • Loading branch information
yuin committed Apr 6, 2023
2 parents c6f0e7e + 023c1d9 commit 8e2127f
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 7 deletions.
17 changes: 10 additions & 7 deletions text/reader.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package text

import (
"bytes"
"io"
"regexp"
"unicode/utf8"
Expand Down Expand Up @@ -537,24 +538,26 @@ func matchReader(r Reader, reg *regexp.Regexp) bool {
}

func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
oldline, oldseg := r.Position()
oldLine, oldSeg := r.Position()
match := reg.FindReaderSubmatchIndex(r)
r.SetPosition(oldline, oldseg)
r.SetPosition(oldLine, oldSeg)
if match == nil {
return nil
}
runes := make([]rune, 0, match[1]-match[0])
var bb bytes.Buffer
bb.Grow(match[1] - match[0])
for i := 0; i < match[1]; {
r, size, _ := readRuneReader(r)
i += size
runes = append(runes, r)
bb.WriteRune(r)
}
result := [][]byte{}
bs := bb.Bytes()
var result [][]byte
for i := 0; i < len(match); i += 2 {
result = append(result, []byte(string(runes[match[i]:match[i+1]])))
result = append(result, bs[match[i]:match[i+1]])
}

r.SetPosition(oldline, oldseg)
r.SetPosition(oldLine, oldSeg)
r.Advance(match[1] - match[0])
return result
}
Expand Down
16 changes: 16 additions & 0 deletions text/reader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package text

import (
"regexp"
"testing"
)

func TestFindSubMatchReader(t *testing.T) {
s := "微笑"
r := NewReader([]byte(":" + s + ":"))
reg := regexp.MustCompile(`:(\p{L}+):`)
match := r.FindSubMatch(reg)
if len(match) != 2 || string(match[1]) != s {
t.Fatal("no match cjk")
}
}

0 comments on commit 8e2127f

Please sign in to comment.