123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- package goja
- import (
- "fmt"
- "github.com/dlclark/regexp2"
- "regexp"
- "unicode/utf16"
- "unicode/utf8"
- )
- type regexpPattern interface {
- FindSubmatchIndex(valueString, int) []int
- FindAllSubmatchIndex(valueString, int) [][]int
- FindAllSubmatchIndexUTF8(string, int) [][]int
- FindAllSubmatchIndexASCII(string, int) [][]int
- MatchString(valueString) bool
- }
- type regexp2Wrapper regexp2.Regexp
- type regexpWrapper regexp.Regexp
- type regexpObject struct {
- baseObject
- pattern regexpPattern
- source valueString
- global, multiline, ignoreCase, sticky bool
- }
- func (r *regexp2Wrapper) FindSubmatchIndex(s valueString, start int) (result []int) {
- wrapped := (*regexp2.Regexp)(r)
- var match *regexp2.Match
- var err error
- switch s := s.(type) {
- case asciiString:
- match, err = wrapped.FindStringMatch(string(s)[start:])
- case unicodeString:
- match, err = wrapped.FindRunesMatch(utf16.Decode(s[start:]))
- default:
- panic(fmt.Errorf("Unknown string type: %T", s))
- }
- if err != nil {
- return
- }
- if match == nil {
- return
- }
- groups := match.Groups()
- result = make([]int, 0, len(groups)<<1)
- for _, group := range groups {
- if len(group.Captures) > 0 {
- result = append(result, group.Index, group.Index+group.Length)
- } else {
- result = append(result, -1, 0)
- }
- }
- return
- }
- func (r *regexp2Wrapper) FindAllSubmatchIndexUTF8(s string, n int) [][]int {
- wrapped := (*regexp2.Regexp)(r)
- if n < 0 {
- n = len(s) + 1
- }
- results := make([][]int, 0, n)
- idxMap := make([]int, 0, len(s))
- runes := make([]rune, 0, len(s))
- for pos, rr := range s {
- runes = append(runes, rr)
- idxMap = append(idxMap, pos)
- }
- idxMap = append(idxMap, len(s))
- match, err := wrapped.FindRunesMatch(runes)
- if err != nil {
- return nil
- }
- i := 0
- for match != nil && i < n {
- groups := match.Groups()
- result := make([]int, 0, len(groups)<<1)
- for _, group := range groups {
- if len(group.Captures) > 0 {
- result = append(result, idxMap[group.Index], idxMap[group.Index+group.Length])
- } else {
- result = append(result, -1, 0)
- }
- }
- results = append(results, result)
- match, err = wrapped.FindNextMatch(match)
- if err != nil {
- return nil
- }
- i++
- }
- return results
- }
- func (r *regexp2Wrapper) FindAllSubmatchIndexASCII(s string, n int) [][]int {
- wrapped := (*regexp2.Regexp)(r)
- if n < 0 {
- n = len(s) + 1
- }
- results := make([][]int, 0, n)
- match, err := wrapped.FindStringMatch(s)
- if err != nil {
- return nil
- }
- i := 0
- for match != nil && i < n {
- groups := match.Groups()
- result := make([]int, 0, len(groups)<<1)
- for _, group := range groups {
- if len(group.Captures) > 0 {
- result = append(result, group.Index, group.Index+group.Length)
- } else {
- result = append(result, -1, 0)
- }
- }
- results = append(results, result)
- match, err = wrapped.FindNextMatch(match)
- if err != nil {
- return nil
- }
- i++
- }
- return results
- }
- func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]int {
- wrapped := (*regexp2.Regexp)(r)
- if n < 0 {
- n = len(s) + 1
- }
- results := make([][]int, 0, n)
- rd := runeReaderReplace{s.reader(0)}
- posMap := make([]int, s.length()+1)
- curPos := 0
- curRuneIdx := 0
- runes := make([]rune, 0, s.length())
- for {
- rn, size, err := rd.ReadRune()
- if err != nil {
- break
- }
- runes = append(runes, rn)
- posMap[curRuneIdx] = curPos
- curRuneIdx++
- curPos += size
- }
- posMap[curRuneIdx] = curPos
- match, err := wrapped.FindRunesMatch(runes)
- if err != nil {
- return nil
- }
- for match != nil {
- groups := match.Groups()
- result := make([]int, 0, len(groups)<<1)
- for _, group := range groups {
- if len(group.Captures) > 0 {
- start := posMap[group.Index]
- end := posMap[group.Index+group.Length]
- result = append(result, start, end)
- } else {
- result = append(result, -1, 0)
- }
- }
- results = append(results, result)
- match, err = wrapped.FindNextMatch(match)
- if err != nil {
- return nil
- }
- }
- return results
- }
- func (r *regexp2Wrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
- switch s := s.(type) {
- case asciiString:
- return r.FindAllSubmatchIndexASCII(string(s), n)
- case unicodeString:
- return r.findAllSubmatchIndexUTF16(s, n)
- default:
- panic("Unsupported string type")
- }
- }
- func (r *regexp2Wrapper) MatchString(s valueString) bool {
- wrapped := (*regexp2.Regexp)(r)
- switch s := s.(type) {
- case asciiString:
- matched, _ := wrapped.MatchString(string(s))
- return matched
- case unicodeString:
- matched, _ := wrapped.MatchRunes(utf16.Decode(s))
- return matched
- default:
- panic(fmt.Errorf("Unknown string type: %T", s))
- }
- }
- func (r *regexpWrapper) FindSubmatchIndex(s valueString, start int) (result []int) {
- wrapped := (*regexp.Regexp)(r)
- return wrapped.FindReaderSubmatchIndex(runeReaderReplace{s.reader(start)})
- }
- func (r *regexpWrapper) MatchString(s valueString) bool {
- wrapped := (*regexp.Regexp)(r)
- return wrapped.MatchReader(runeReaderReplace{s.reader(0)})
- }
- func (r *regexpWrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
- wrapped := (*regexp.Regexp)(r)
- switch s := s.(type) {
- case asciiString:
- return wrapped.FindAllStringSubmatchIndex(string(s), n)
- case unicodeString:
- return r.findAllSubmatchIndexUTF16(s, n)
- default:
- panic("Unsupported string type")
- }
- }
- func (r *regexpWrapper) FindAllSubmatchIndexUTF8(s string, n int) [][]int {
- wrapped := (*regexp.Regexp)(r)
- return wrapped.FindAllStringSubmatchIndex(s, n)
- }
- func (r *regexpWrapper) FindAllSubmatchIndexASCII(s string, n int) [][]int {
- return r.FindAllSubmatchIndexUTF8(s, n)
- }
- func (r *regexpWrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]int {
- wrapped := (*regexp.Regexp)(r)
- utf8Bytes := make([]byte, 0, len(s)*2)
- posMap := make(map[int]int)
- curPos := 0
- rd := runeReaderReplace{s.reader(0)}
- for {
- rn, size, err := rd.ReadRune()
- if err != nil {
- break
- }
- l := len(utf8Bytes)
- utf8Bytes = append(utf8Bytes, 0, 0, 0, 0)
- n := utf8.EncodeRune(utf8Bytes[l:], rn)
- utf8Bytes = utf8Bytes[:l+n]
- posMap[l] = curPos
- curPos += size
- }
- posMap[len(utf8Bytes)] = curPos
- rr := wrapped.FindAllSubmatchIndex(utf8Bytes, n)
- for _, res := range rr {
- for j, pos := range res {
- mapped, exists := posMap[pos]
- if !exists {
- panic("Unicode match is not on rune boundary")
- }
- res[j] = mapped
- }
- }
- return rr
- }
- func (r *regexpObject) execResultToArray(target valueString, result []int) Value {
- captureCount := len(result) >> 1
- valueArray := make([]Value, captureCount)
- matchIndex := result[0]
- lowerBound := matchIndex
- for index := 0; index < captureCount; index++ {
- offset := index << 1
- if result[offset] >= lowerBound {
- valueArray[index] = target.substring(int64(result[offset]), int64(result[offset+1]))
- lowerBound = result[offset]
- } else {
- valueArray[index] = _undefined
- }
- }
- match := r.val.runtime.newArrayValues(valueArray)
- match.self.setOwnStr("input", target, false)
- match.self.setOwnStr("index", intToValue(int64(matchIndex)), false)
- return match
- }
- func (r *regexpObject) execRegexp(target valueString) (match bool, result []int) {
- lastIndex := int64(0)
- if p := r.getStr("lastIndex", nil); p != nil {
- lastIndex = p.ToInteger()
- if lastIndex < 0 {
- lastIndex = 0
- }
- }
- index := lastIndex
- if !r.global && !r.sticky {
- index = 0
- }
- if index >= 0 && index <= target.length() {
- result = r.pattern.FindSubmatchIndex(target, int(index))
- }
- if result == nil || r.sticky && result[0] != 0 {
- r.setOwnStr("lastIndex", intToValue(0), true)
- return
- }
- match = true
- // We do this shift here because the .FindStringSubmatchIndex above
- // was done on a local subordinate slice of the string, not the whole string
- for i := range result {
- result[i] += int(index)
- }
- if r.global || r.sticky {
- r.setOwnStr("lastIndex", intToValue(int64(result[1])), true)
- }
- return
- }
- func (r *regexpObject) exec(target valueString) Value {
- match, result := r.execRegexp(target)
- if match {
- return r.execResultToArray(target, result)
- }
- return _null
- }
- func (r *regexpObject) test(target valueString) bool {
- match, _ := r.execRegexp(target)
- return match
- }
- func (r *regexpObject) clone() *Object {
- r1 := r.val.runtime.newRegexpObject(r.prototype)
- r1.source = r.source
- r1.pattern = r.pattern
- r1.global = r.global
- r1.ignoreCase = r.ignoreCase
- r1.multiline = r.multiline
- r1.sticky = r.sticky
- return r1.val
- }
- func (r *regexpObject) init() {
- r.baseObject.init()
- r._putProp("lastIndex", intToValue(0), true, false, false)
- }
|