regexp.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. package goja
  2. import (
  3. "fmt"
  4. "github.com/dlclark/regexp2"
  5. "regexp"
  6. "unicode/utf16"
  7. "unicode/utf8"
  8. )
  9. type regexpPattern interface {
  10. FindSubmatchIndex(valueString, int) []int
  11. FindAllSubmatchIndex(valueString, int) [][]int
  12. FindAllSubmatchIndexUTF8(string, int) [][]int
  13. FindAllSubmatchIndexASCII(string, int) [][]int
  14. MatchString(valueString) bool
  15. }
  16. type regexp2Wrapper regexp2.Regexp
  17. type regexpWrapper regexp.Regexp
  18. type regexpObject struct {
  19. baseObject
  20. pattern regexpPattern
  21. source valueString
  22. global, multiline, ignoreCase, sticky bool
  23. }
  24. func (r *regexp2Wrapper) FindSubmatchIndex(s valueString, start int) (result []int) {
  25. wrapped := (*regexp2.Regexp)(r)
  26. var match *regexp2.Match
  27. var err error
  28. switch s := s.(type) {
  29. case asciiString:
  30. match, err = wrapped.FindStringMatch(string(s)[start:])
  31. case unicodeString:
  32. match, err = wrapped.FindRunesMatch(utf16.Decode(s[start:]))
  33. default:
  34. panic(fmt.Errorf("Unknown string type: %T", s))
  35. }
  36. if err != nil {
  37. return
  38. }
  39. if match == nil {
  40. return
  41. }
  42. groups := match.Groups()
  43. result = make([]int, 0, len(groups)<<1)
  44. for _, group := range groups {
  45. if len(group.Captures) > 0 {
  46. result = append(result, group.Index, group.Index+group.Length)
  47. } else {
  48. result = append(result, -1, 0)
  49. }
  50. }
  51. return
  52. }
  53. func (r *regexp2Wrapper) FindAllSubmatchIndexUTF8(s string, n int) [][]int {
  54. wrapped := (*regexp2.Regexp)(r)
  55. if n < 0 {
  56. n = len(s) + 1
  57. }
  58. results := make([][]int, 0, n)
  59. idxMap := make([]int, 0, len(s))
  60. runes := make([]rune, 0, len(s))
  61. for pos, rr := range s {
  62. runes = append(runes, rr)
  63. idxMap = append(idxMap, pos)
  64. }
  65. idxMap = append(idxMap, len(s))
  66. match, err := wrapped.FindRunesMatch(runes)
  67. if err != nil {
  68. return nil
  69. }
  70. i := 0
  71. for match != nil && i < n {
  72. groups := match.Groups()
  73. result := make([]int, 0, len(groups)<<1)
  74. for _, group := range groups {
  75. if len(group.Captures) > 0 {
  76. result = append(result, idxMap[group.Index], idxMap[group.Index+group.Length])
  77. } else {
  78. result = append(result, -1, 0)
  79. }
  80. }
  81. results = append(results, result)
  82. match, err = wrapped.FindNextMatch(match)
  83. if err != nil {
  84. return nil
  85. }
  86. i++
  87. }
  88. return results
  89. }
  90. func (r *regexp2Wrapper) FindAllSubmatchIndexASCII(s string, n int) [][]int {
  91. wrapped := (*regexp2.Regexp)(r)
  92. if n < 0 {
  93. n = len(s) + 1
  94. }
  95. results := make([][]int, 0, n)
  96. match, err := wrapped.FindStringMatch(s)
  97. if err != nil {
  98. return nil
  99. }
  100. i := 0
  101. for match != nil && i < n {
  102. groups := match.Groups()
  103. result := make([]int, 0, len(groups)<<1)
  104. for _, group := range groups {
  105. if len(group.Captures) > 0 {
  106. result = append(result, group.Index, group.Index+group.Length)
  107. } else {
  108. result = append(result, -1, 0)
  109. }
  110. }
  111. results = append(results, result)
  112. match, err = wrapped.FindNextMatch(match)
  113. if err != nil {
  114. return nil
  115. }
  116. i++
  117. }
  118. return results
  119. }
  120. func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]int {
  121. wrapped := (*regexp2.Regexp)(r)
  122. if n < 0 {
  123. n = len(s) + 1
  124. }
  125. results := make([][]int, 0, n)
  126. rd := runeReaderReplace{s.reader(0)}
  127. posMap := make([]int, s.length()+1)
  128. curPos := 0
  129. curRuneIdx := 0
  130. runes := make([]rune, 0, s.length())
  131. for {
  132. rn, size, err := rd.ReadRune()
  133. if err != nil {
  134. break
  135. }
  136. runes = append(runes, rn)
  137. posMap[curRuneIdx] = curPos
  138. curRuneIdx++
  139. curPos += size
  140. }
  141. posMap[curRuneIdx] = curPos
  142. match, err := wrapped.FindRunesMatch(runes)
  143. if err != nil {
  144. return nil
  145. }
  146. for match != nil {
  147. groups := match.Groups()
  148. result := make([]int, 0, len(groups)<<1)
  149. for _, group := range groups {
  150. if len(group.Captures) > 0 {
  151. start := posMap[group.Index]
  152. end := posMap[group.Index+group.Length]
  153. result = append(result, start, end)
  154. } else {
  155. result = append(result, -1, 0)
  156. }
  157. }
  158. results = append(results, result)
  159. match, err = wrapped.FindNextMatch(match)
  160. if err != nil {
  161. return nil
  162. }
  163. }
  164. return results
  165. }
  166. func (r *regexp2Wrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
  167. switch s := s.(type) {
  168. case asciiString:
  169. return r.FindAllSubmatchIndexASCII(string(s), n)
  170. case unicodeString:
  171. return r.findAllSubmatchIndexUTF16(s, n)
  172. default:
  173. panic("Unsupported string type")
  174. }
  175. }
  176. func (r *regexp2Wrapper) MatchString(s valueString) bool {
  177. wrapped := (*regexp2.Regexp)(r)
  178. switch s := s.(type) {
  179. case asciiString:
  180. matched, _ := wrapped.MatchString(string(s))
  181. return matched
  182. case unicodeString:
  183. matched, _ := wrapped.MatchRunes(utf16.Decode(s))
  184. return matched
  185. default:
  186. panic(fmt.Errorf("Unknown string type: %T", s))
  187. }
  188. }
  189. func (r *regexpWrapper) FindSubmatchIndex(s valueString, start int) (result []int) {
  190. wrapped := (*regexp.Regexp)(r)
  191. return wrapped.FindReaderSubmatchIndex(runeReaderReplace{s.reader(start)})
  192. }
  193. func (r *regexpWrapper) MatchString(s valueString) bool {
  194. wrapped := (*regexp.Regexp)(r)
  195. return wrapped.MatchReader(runeReaderReplace{s.reader(0)})
  196. }
  197. func (r *regexpWrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
  198. wrapped := (*regexp.Regexp)(r)
  199. switch s := s.(type) {
  200. case asciiString:
  201. return wrapped.FindAllStringSubmatchIndex(string(s), n)
  202. case unicodeString:
  203. return r.findAllSubmatchIndexUTF16(s, n)
  204. default:
  205. panic("Unsupported string type")
  206. }
  207. }
  208. func (r *regexpWrapper) FindAllSubmatchIndexUTF8(s string, n int) [][]int {
  209. wrapped := (*regexp.Regexp)(r)
  210. return wrapped.FindAllStringSubmatchIndex(s, n)
  211. }
  212. func (r *regexpWrapper) FindAllSubmatchIndexASCII(s string, n int) [][]int {
  213. return r.FindAllSubmatchIndexUTF8(s, n)
  214. }
  215. func (r *regexpWrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]int {
  216. wrapped := (*regexp.Regexp)(r)
  217. utf8Bytes := make([]byte, 0, len(s)*2)
  218. posMap := make(map[int]int)
  219. curPos := 0
  220. rd := runeReaderReplace{s.reader(0)}
  221. for {
  222. rn, size, err := rd.ReadRune()
  223. if err != nil {
  224. break
  225. }
  226. l := len(utf8Bytes)
  227. utf8Bytes = append(utf8Bytes, 0, 0, 0, 0)
  228. n := utf8.EncodeRune(utf8Bytes[l:], rn)
  229. utf8Bytes = utf8Bytes[:l+n]
  230. posMap[l] = curPos
  231. curPos += size
  232. }
  233. posMap[len(utf8Bytes)] = curPos
  234. rr := wrapped.FindAllSubmatchIndex(utf8Bytes, n)
  235. for _, res := range rr {
  236. for j, pos := range res {
  237. mapped, exists := posMap[pos]
  238. if !exists {
  239. panic("Unicode match is not on rune boundary")
  240. }
  241. res[j] = mapped
  242. }
  243. }
  244. return rr
  245. }
  246. func (r *regexpObject) execResultToArray(target valueString, result []int) Value {
  247. captureCount := len(result) >> 1
  248. valueArray := make([]Value, captureCount)
  249. matchIndex := result[0]
  250. lowerBound := matchIndex
  251. for index := 0; index < captureCount; index++ {
  252. offset := index << 1
  253. if result[offset] >= lowerBound {
  254. valueArray[index] = target.substring(int64(result[offset]), int64(result[offset+1]))
  255. lowerBound = result[offset]
  256. } else {
  257. valueArray[index] = _undefined
  258. }
  259. }
  260. match := r.val.runtime.newArrayValues(valueArray)
  261. match.self.setOwnStr("input", target, false)
  262. match.self.setOwnStr("index", intToValue(int64(matchIndex)), false)
  263. return match
  264. }
  265. func (r *regexpObject) execRegexp(target valueString) (match bool, result []int) {
  266. lastIndex := int64(0)
  267. if p := r.getStr("lastIndex", nil); p != nil {
  268. lastIndex = p.ToInteger()
  269. if lastIndex < 0 {
  270. lastIndex = 0
  271. }
  272. }
  273. index := lastIndex
  274. if !r.global && !r.sticky {
  275. index = 0
  276. }
  277. if index >= 0 && index <= target.length() {
  278. result = r.pattern.FindSubmatchIndex(target, int(index))
  279. }
  280. if result == nil || r.sticky && result[0] != 0 {
  281. r.setOwnStr("lastIndex", intToValue(0), true)
  282. return
  283. }
  284. match = true
  285. // We do this shift here because the .FindStringSubmatchIndex above
  286. // was done on a local subordinate slice of the string, not the whole string
  287. for i := range result {
  288. result[i] += int(index)
  289. }
  290. if r.global || r.sticky {
  291. r.setOwnStr("lastIndex", intToValue(int64(result[1])), true)
  292. }
  293. return
  294. }
  295. func (r *regexpObject) exec(target valueString) Value {
  296. match, result := r.execRegexp(target)
  297. if match {
  298. return r.execResultToArray(target, result)
  299. }
  300. return _null
  301. }
  302. func (r *regexpObject) test(target valueString) bool {
  303. match, _ := r.execRegexp(target)
  304. return match
  305. }
  306. func (r *regexpObject) clone() *Object {
  307. r1 := r.val.runtime.newRegexpObject(r.prototype)
  308. r1.source = r.source
  309. r1.pattern = r.pattern
  310. r1.global = r.global
  311. r1.ignoreCase = r.ignoreCase
  312. r1.multiline = r.multiline
  313. r1.sticky = r.sticky
  314. return r1.val
  315. }
  316. func (r *regexpObject) init() {
  317. r.baseObject.init()
  318. r._putProp("lastIndex", intToValue(0), true, false, false)
  319. }