|
@@ -11,7 +11,17 @@ import (
|
|
|
"unicode/utf16"
|
|
|
)
|
|
|
|
|
|
-type regexp2Wrapper regexp2.Regexp
|
|
|
+type regexp2MatchCache struct {
|
|
|
+ target valueString
|
|
|
+ runes []rune
|
|
|
+ posMap []int
|
|
|
+}
|
|
|
+
|
|
|
+type regexp2Wrapper struct {
|
|
|
+ rx *regexp2.Regexp
|
|
|
+ cache *regexp2MatchCache
|
|
|
+}
|
|
|
+
|
|
|
type regexpWrapper regexp.Regexp
|
|
|
|
|
|
type positionMapItem struct {
|
|
@@ -68,7 +78,7 @@ func compileRegexp2(src string, multiline, ignoreCase bool) (*regexp2Wrapper, er
|
|
|
return nil, fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", src, err1)
|
|
|
}
|
|
|
|
|
|
- return (*regexp2Wrapper)(regexp2Pattern), nil
|
|
|
+ return ®exp2Wrapper{rx: regexp2Pattern}, nil
|
|
|
}
|
|
|
|
|
|
func (p *regexpPattern) createRegexp2() {
|
|
@@ -107,14 +117,14 @@ func buildUTF8PosMap(s valueString) (positionMap, string) {
|
|
|
|
|
|
func (p *regexpPattern) findSubmatchIndex(s valueString, start int) []int {
|
|
|
if p.regexpWrapper == nil {
|
|
|
- return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode)
|
|
|
+ return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode, p.global || p.sticky)
|
|
|
}
|
|
|
if start != 0 {
|
|
|
// Unfortunately Go's regexp library does not allow starting from an arbitrary position.
|
|
|
// If we just drop the first _start_ characters of the string the assertions (^, $, \b and \B) will not
|
|
|
// work correctly.
|
|
|
p.createRegexp2()
|
|
|
- return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode)
|
|
|
+ return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode, p.global || p.sticky)
|
|
|
}
|
|
|
return p.regexpWrapper.findSubmatchIndex(s, p.unicode)
|
|
|
}
|
|
@@ -163,16 +173,41 @@ type regexpObject struct {
|
|
|
standard bool
|
|
|
}
|
|
|
|
|
|
-func (r *regexp2Wrapper) findSubmatchIndex(s valueString, start int, fullUnicode bool) (result []int) {
|
|
|
+func (r *regexp2Wrapper) findSubmatchIndex(s valueString, start int, fullUnicode, doCache bool) (result []int) {
|
|
|
if fullUnicode {
|
|
|
- return r.findSubmatchIndexUnicode(s, start)
|
|
|
+ return r.findSubmatchIndexUnicode(s, start, doCache)
|
|
|
+ }
|
|
|
+ return r.findSubmatchIndexUTF16(s, start, doCache)
|
|
|
+}
|
|
|
+
|
|
|
+func (r *regexp2Wrapper) findUTF16Cached(s valueString, start int, doCache bool) (match *regexp2.Match, runes []rune, err error) {
|
|
|
+ wrapped := r.rx
|
|
|
+ cache := r.cache
|
|
|
+ if cache != nil && cache.posMap == nil && cache.target.SameAs(s) {
|
|
|
+ runes = cache.runes
|
|
|
+ } else {
|
|
|
+ runes = s.utf16Runes()
|
|
|
+ cache = nil
|
|
|
+ }
|
|
|
+ match, err = wrapped.FindRunesMatchStartingAt(runes, start)
|
|
|
+ if doCache && match != nil && err == nil {
|
|
|
+ if cache == nil {
|
|
|
+ if r.cache == nil {
|
|
|
+ r.cache = new(regexp2MatchCache)
|
|
|
+ }
|
|
|
+ *r.cache = regexp2MatchCache{
|
|
|
+ target: s,
|
|
|
+ runes: runes,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ r.cache = nil
|
|
|
}
|
|
|
- return r.findSubmatchIndexUTF16(s, start)
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
-func (r *regexp2Wrapper) findSubmatchIndexUTF16(s valueString, start int) (result []int) {
|
|
|
- wrapped := (*regexp2.Regexp)(r)
|
|
|
- match, err := wrapped.FindRunesMatchStartingAt(s.utf16Runes(), start)
|
|
|
+func (r *regexp2Wrapper) findSubmatchIndexUTF16(s valueString, start int, doCache bool) (result []int) {
|
|
|
+ match, _, err := r.findUTF16Cached(s, start, doCache)
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
@@ -193,17 +228,55 @@ func (r *regexp2Wrapper) findSubmatchIndexUTF16(s valueString, start int) (resul
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func (r *regexp2Wrapper) findSubmatchIndexUnicode(s valueString, start int) (result []int) {
|
|
|
- wrapped := (*regexp2.Regexp)(r)
|
|
|
- posMap, runes, mappedStart := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), start)
|
|
|
- match, err := wrapped.FindRunesMatchStartingAt(runes, mappedStart)
|
|
|
- if err != nil {
|
|
|
- return
|
|
|
+func (r *regexp2Wrapper) findUnicodeCached(s valueString, start int, doCache bool) (match *regexp2.Match, posMap []int, err error) {
|
|
|
+ var (
|
|
|
+ runes []rune
|
|
|
+ mappedStart int
|
|
|
+ splitPair bool
|
|
|
+ savedRune rune
|
|
|
+ )
|
|
|
+ wrapped := r.rx
|
|
|
+ cache := r.cache
|
|
|
+ if cache != nil && cache.posMap != nil && cache.target.SameAs(s) {
|
|
|
+ runes, posMap = cache.runes, cache.posMap
|
|
|
+ mappedStart, splitPair = posMapReverseLookup(posMap, start)
|
|
|
+ } else {
|
|
|
+ posMap, runes, mappedStart, splitPair = buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), start)
|
|
|
+ cache = nil
|
|
|
+ }
|
|
|
+ if splitPair {
|
|
|
+ // temporarily set the rune at mappedStart to the second code point of the pair
|
|
|
+ _, second := utf16.EncodeRune(runes[mappedStart])
|
|
|
+ savedRune, runes[mappedStart] = runes[mappedStart], second
|
|
|
+ }
|
|
|
+ match, err = wrapped.FindRunesMatchStartingAt(runes, mappedStart)
|
|
|
+ if doCache && match != nil && err == nil {
|
|
|
+ if splitPair {
|
|
|
+ runes[mappedStart] = savedRune
|
|
|
+ }
|
|
|
+ if cache == nil {
|
|
|
+ if r.cache == nil {
|
|
|
+ r.cache = new(regexp2MatchCache)
|
|
|
+ }
|
|
|
+ *r.cache = regexp2MatchCache{
|
|
|
+ target: s,
|
|
|
+ runes: runes,
|
|
|
+ posMap: posMap,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ r.cache = nil
|
|
|
}
|
|
|
|
|
|
- if match == nil {
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func (r *regexp2Wrapper) findSubmatchIndexUnicode(s valueString, start int, doCache bool) (result []int) {
|
|
|
+ match, posMap, err := r.findUnicodeCached(s, start, doCache)
|
|
|
+ if match == nil || err != nil {
|
|
|
return
|
|
|
}
|
|
|
+
|
|
|
groups := match.Groups()
|
|
|
|
|
|
result = make([]int, 0, len(groups)<<1)
|
|
@@ -218,10 +291,9 @@ func (r *regexp2Wrapper) findSubmatchIndexUnicode(s valueString, start int) (res
|
|
|
}
|
|
|
|
|
|
func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s valueString, start, limit int, sticky bool) [][]int {
|
|
|
- wrapped := (*regexp2.Regexp)(r)
|
|
|
- runes := s.utf16Runes()
|
|
|
- match, err := wrapped.FindRunesMatchStartingAt(runes, start)
|
|
|
- if err != nil {
|
|
|
+ wrapped := r.rx
|
|
|
+ match, runes, err := r.findUTF16Cached(s, start, false)
|
|
|
+ if match == nil || err != nil {
|
|
|
return nil
|
|
|
}
|
|
|
if limit < 0 {
|
|
@@ -263,7 +335,7 @@ func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s valueString, start, limit i
|
|
|
return results
|
|
|
}
|
|
|
|
|
|
-func buildPosMap(rd io.RuneReader, l, start int) (posMap []int, runes []rune, mappedStart int) {
|
|
|
+func buildPosMap(rd io.RuneReader, l, start int) (posMap []int, runes []rune, mappedStart int, splitPair bool) {
|
|
|
posMap = make([]int, 0, l+1)
|
|
|
curPos := 0
|
|
|
runes = make([]rune, 0, l)
|
|
@@ -277,8 +349,7 @@ func buildPosMap(rd io.RuneReader, l, start int) (posMap []int, runes []rune, ma
|
|
|
if curPos > start {
|
|
|
// start position splits a surrogate pair
|
|
|
mappedStart = len(runes) - 1
|
|
|
- _, second := utf16.EncodeRune(runes[mappedStart])
|
|
|
- runes[mappedStart] = second
|
|
|
+ splitPair = true
|
|
|
startFound = true
|
|
|
}
|
|
|
}
|
|
@@ -294,15 +365,21 @@ func buildPosMap(rd io.RuneReader, l, start int) (posMap []int, runes []rune, ma
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+func posMapReverseLookup(posMap []int, pos int) (int, bool) {
|
|
|
+ mapped := sort.SearchInts(posMap, pos)
|
|
|
+ if mapped < len(posMap) && posMap[mapped] != pos {
|
|
|
+ return mapped - 1, true
|
|
|
+ }
|
|
|
+ return mapped, false
|
|
|
+}
|
|
|
+
|
|
|
func (r *regexp2Wrapper) findAllSubmatchIndexUnicode(s unicodeString, start, limit int, sticky bool) [][]int {
|
|
|
- wrapped := (*regexp2.Regexp)(r)
|
|
|
+ wrapped := r.rx
|
|
|
if limit < 0 {
|
|
|
limit = len(s) + 1
|
|
|
}
|
|
|
results := make([][]int, 0, limit)
|
|
|
- posMap, runes, mappedStart := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), start)
|
|
|
-
|
|
|
- match, err := wrapped.FindRunesMatchStartingAt(runes, mappedStart)
|
|
|
+ match, posMap, err := r.findUnicodeCached(s, start, false)
|
|
|
if err != nil {
|
|
|
return nil
|
|
|
}
|
|
@@ -371,7 +448,7 @@ func (r *regexpWrapper) findAllSubmatchIndex(s string, limit int, sticky bool) (
|
|
|
func (r *regexpWrapper) findSubmatchIndex(s valueString, fullUnicode bool) (result []int) {
|
|
|
wrapped := (*regexp.Regexp)(r)
|
|
|
if fullUnicode {
|
|
|
- posMap, runes, _ := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), 0)
|
|
|
+ posMap, runes, _, _ := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), 0)
|
|
|
res := wrapped.FindReaderSubmatchIndex(&arrayRuneReader{runes: runes})
|
|
|
for i, item := range res {
|
|
|
res[i] = posMap[item]
|