builtin_regexp.go 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163
  1. package goja
  2. import (
  3. "fmt"
  4. "github.com/dop251/goja/parser"
  5. "regexp"
  6. "strings"
  7. "unicode/utf16"
  8. "unicode/utf8"
  9. )
  10. func (r *Runtime) newRegexpObject(proto *Object) *regexpObject {
  11. v := &Object{runtime: r}
  12. o := &regexpObject{}
  13. o.class = classRegExp
  14. o.val = v
  15. o.extensible = true
  16. v.self = o
  17. o.prototype = proto
  18. o.init()
  19. return o
  20. }
  21. func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr valueString, proto *Object) *Object {
  22. o := r.newRegexpObject(proto)
  23. o.pattern = pattern
  24. o.source = patternStr
  25. return o.val
  26. }
  27. func decodeHex(s string) (int, bool) {
  28. var hex int
  29. for i := 0; i < len(s); i++ {
  30. var n byte
  31. chr := s[i]
  32. switch {
  33. case '0' <= chr && chr <= '9':
  34. n = chr - '0'
  35. case 'a' <= chr && chr <= 'f':
  36. n = chr - 'a' + 10
  37. case 'A' <= chr && chr <= 'F':
  38. n = chr - 'A' + 10
  39. default:
  40. return 0, false
  41. }
  42. hex = hex*16 + int(n)
  43. }
  44. return hex, true
  45. }
  46. func writeHex4(b *strings.Builder, i int) {
  47. b.WriteByte(hex[i>>12])
  48. b.WriteByte(hex[(i>>8)&0xF])
  49. b.WriteByte(hex[(i>>4)&0xF])
  50. b.WriteByte(hex[i&0xF])
  51. }
  52. // Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters
  53. func convertRegexpToUnicode(patternStr string) string {
  54. var sb strings.Builder
  55. pos := 0
  56. for i := 0; i < len(patternStr)-11; {
  57. r, size := utf8.DecodeRuneInString(patternStr[i:])
  58. if r == '\\' {
  59. i++
  60. if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' {
  61. if first, ok := decodeHex(patternStr[i+1 : i+5]); ok {
  62. if isUTF16FirstSurrogate(rune(first)) {
  63. if second, ok := decodeHex(patternStr[i+7 : i+11]); ok {
  64. if isUTF16SecondSurrogate(rune(second)) {
  65. r = utf16.DecodeRune(rune(first), rune(second))
  66. sb.WriteString(patternStr[pos : i-1])
  67. sb.WriteRune(r)
  68. i += 11
  69. pos = i
  70. continue
  71. }
  72. }
  73. }
  74. }
  75. }
  76. i++
  77. } else {
  78. i += size
  79. }
  80. }
  81. if pos > 0 {
  82. sb.WriteString(patternStr[pos:])
  83. return sb.String()
  84. }
  85. return patternStr
  86. }
  87. // Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX
  88. func convertRegexpToUtf16(patternStr string) string {
  89. var sb strings.Builder
  90. pos := 0
  91. var prevRune rune
  92. for i := 0; i < len(patternStr); {
  93. r, size := utf8.DecodeRuneInString(patternStr[i:])
  94. if r > 0xFFFF {
  95. sb.WriteString(patternStr[pos:i])
  96. if prevRune == '\\' {
  97. sb.WriteRune('\\')
  98. }
  99. first, second := utf16.EncodeRune(r)
  100. sb.WriteString(`\u`)
  101. writeHex4(&sb, int(first))
  102. sb.WriteString(`\u`)
  103. writeHex4(&sb, int(second))
  104. pos = i + size
  105. }
  106. i += size
  107. prevRune = r
  108. }
  109. if pos > 0 {
  110. sb.WriteString(patternStr[pos:])
  111. return sb.String()
  112. }
  113. return patternStr
  114. }
  115. // convert any broken UTF-16 surrogate pairs to \uXXXX
  116. func escapeInvalidUtf16(s valueString) string {
  117. if ascii, ok := s.(asciiString); ok {
  118. return ascii.String()
  119. }
  120. var sb strings.Builder
  121. rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}
  122. pos := 0
  123. utf8Size := 0
  124. var utf8Buf [utf8.UTFMax]byte
  125. for {
  126. c, size, err := rd.ReadRune()
  127. if err != nil {
  128. break
  129. }
  130. if utf16.IsSurrogate(c) {
  131. if sb.Len() == 0 {
  132. sb.Grow(utf8Size + 7)
  133. hrd := s.reader(0)
  134. var c rune
  135. for p := 0; p < pos; {
  136. var size int
  137. var err error
  138. c, size, err = hrd.ReadRune()
  139. if err != nil {
  140. // will not happen
  141. panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err))
  142. }
  143. sb.WriteRune(c)
  144. p += size
  145. }
  146. if c == '\\' {
  147. sb.WriteRune(c)
  148. }
  149. }
  150. sb.WriteString(`\u`)
  151. writeHex4(&sb, int(c))
  152. } else {
  153. if sb.Len() > 0 {
  154. sb.WriteRune(c)
  155. } else {
  156. utf8Size += utf8.EncodeRune(utf8Buf[:], c)
  157. pos += size
  158. }
  159. }
  160. }
  161. if sb.Len() > 0 {
  162. return sb.String()
  163. }
  164. return s.String()
  165. }
  166. func compileRegexpFromValueString(patternStr valueString, flags string) (*regexpPattern, error) {
  167. return compileRegexp(escapeInvalidUtf16(patternStr), flags)
  168. }
  169. func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
  170. var global, ignoreCase, multiline, sticky, unicode bool
  171. var wrapper *regexpWrapper
  172. var wrapper2 *regexp2Wrapper
  173. if flags != "" {
  174. invalidFlags := func() {
  175. err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags)
  176. }
  177. for _, chr := range flags {
  178. switch chr {
  179. case 'g':
  180. if global {
  181. invalidFlags()
  182. return
  183. }
  184. global = true
  185. case 'm':
  186. if multiline {
  187. invalidFlags()
  188. return
  189. }
  190. multiline = true
  191. case 'i':
  192. if ignoreCase {
  193. invalidFlags()
  194. return
  195. }
  196. ignoreCase = true
  197. case 'y':
  198. if sticky {
  199. invalidFlags()
  200. return
  201. }
  202. sticky = true
  203. case 'u':
  204. if unicode {
  205. invalidFlags()
  206. }
  207. unicode = true
  208. default:
  209. invalidFlags()
  210. return
  211. }
  212. }
  213. }
  214. if unicode {
  215. patternStr = convertRegexpToUnicode(patternStr)
  216. } else {
  217. patternStr = convertRegexpToUtf16(patternStr)
  218. }
  219. re2Str, err1 := parser.TransformRegExp(patternStr)
  220. if err1 == nil {
  221. re2flags := ""
  222. if multiline {
  223. re2flags += "m"
  224. }
  225. if ignoreCase {
  226. re2flags += "i"
  227. }
  228. if len(re2flags) > 0 {
  229. re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str)
  230. }
  231. pattern, err1 := regexp.Compile(re2Str)
  232. if err1 != nil {
  233. err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1)
  234. return
  235. }
  236. wrapper = (*regexpWrapper)(pattern)
  237. } else {
  238. wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase)
  239. if err != nil {
  240. err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err1)
  241. }
  242. }
  243. p = &regexpPattern{
  244. src: patternStr,
  245. regexpWrapper: wrapper,
  246. regexp2Wrapper: wrapper2,
  247. global: global,
  248. ignoreCase: ignoreCase,
  249. multiline: multiline,
  250. sticky: sticky,
  251. unicode: unicode,
  252. }
  253. return
  254. }
  255. func (r *Runtime) _newRegExp(patternStr valueString, flags string, proto *Object) *Object {
  256. pattern, err := compileRegexpFromValueString(patternStr, flags)
  257. if err != nil {
  258. panic(r.newSyntaxError(err.Error(), -1))
  259. }
  260. return r.newRegExpp(pattern, patternStr, proto)
  261. }
  262. func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object {
  263. var patternVal, flagsVal Value
  264. if len(args) > 0 {
  265. patternVal = args[0]
  266. }
  267. if len(args) > 1 {
  268. flagsVal = args[1]
  269. }
  270. return r.newRegExp(patternVal, flagsVal, proto)
  271. }
  272. func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *Object {
  273. var pattern valueString
  274. var flags string
  275. if obj, ok := patternVal.(*Object); ok {
  276. if rx, ok := obj.self.(*regexpObject); ok {
  277. if flagsVal == nil || flagsVal == _undefined {
  278. return rx.clone()
  279. } else {
  280. return r._newRegExp(rx.source, flagsVal.toString().String(), proto)
  281. }
  282. } else {
  283. if isRegexp(patternVal) {
  284. pattern = nilSafe(obj.self.getStr("source", nil)).toString()
  285. if flagsVal == nil || flagsVal == _undefined {
  286. flags = nilSafe(obj.self.getStr("flags", nil)).toString().String()
  287. } else {
  288. flags = flagsVal.toString().String()
  289. }
  290. goto exit
  291. }
  292. }
  293. }
  294. if patternVal != nil && patternVal != _undefined {
  295. pattern = patternVal.toString()
  296. }
  297. if flagsVal != nil && flagsVal != _undefined {
  298. flags = flagsVal.toString().String()
  299. }
  300. if pattern == nil {
  301. pattern = stringEmpty
  302. }
  303. exit:
  304. return r._newRegExp(pattern, flags, proto)
  305. }
  306. func (r *Runtime) builtin_RegExp(call FunctionCall) Value {
  307. pattern := call.Argument(0)
  308. patternIsRegExp := isRegexp(pattern)
  309. flags := call.Argument(1)
  310. if patternIsRegExp && flags == _undefined {
  311. if obj, ok := call.Argument(0).(*Object); ok {
  312. patternConstructor := obj.self.getStr("constructor", nil)
  313. if patternConstructor == r.global.RegExp {
  314. return pattern
  315. }
  316. }
  317. }
  318. return r.newRegExp(pattern, flags, r.global.RegExpPrototype)
  319. }
  320. func (r *Runtime) regexpproto_compile(call FunctionCall) Value {
  321. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  322. var (
  323. pattern *regexpPattern
  324. source valueString
  325. flags string
  326. err error
  327. )
  328. patternVal := call.Argument(0)
  329. flagsVal := call.Argument(1)
  330. if o, ok := patternVal.(*Object); ok {
  331. if p, ok := o.self.(*regexpObject); ok {
  332. if flagsVal != _undefined {
  333. panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another"))
  334. }
  335. this.pattern = p.pattern
  336. this.source = p.source
  337. goto exit
  338. }
  339. }
  340. if patternVal != _undefined {
  341. source = patternVal.toString()
  342. } else {
  343. source = stringEmpty
  344. }
  345. if flagsVal != _undefined {
  346. flags = flagsVal.toString().String()
  347. }
  348. pattern, err = compileRegexpFromValueString(source, flags)
  349. if err != nil {
  350. panic(r.newSyntaxError(err.Error(), -1))
  351. }
  352. this.pattern = pattern
  353. this.source = source
  354. exit:
  355. this.setOwnStr("lastIndex", intToValue(0), true)
  356. return call.This
  357. }
  358. panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", call.This.toString()))
  359. }
  360. func (r *Runtime) regexpproto_exec(call FunctionCall) Value {
  361. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  362. return this.exec(call.Argument(0).toString())
  363. } else {
  364. r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", call.This.toString())
  365. return nil
  366. }
  367. }
  368. func (r *Runtime) regexpproto_test(call FunctionCall) Value {
  369. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  370. if this.test(call.Argument(0).toString()) {
  371. return valueTrue
  372. } else {
  373. return valueFalse
  374. }
  375. } else {
  376. r.typeErrorResult(true, "Method RegExp.prototype.test called on incompatible receiver %s", call.This.toString())
  377. return nil
  378. }
  379. }
  380. func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
  381. obj := r.toObject(call.This)
  382. if this := r.checkStdRegexp(obj); this != nil {
  383. var sb valueStringBuilder
  384. sb.WriteRune('/')
  385. if !this.writeEscapedSource(&sb) {
  386. sb.WriteString(this.source)
  387. }
  388. sb.WriteRune('/')
  389. if this.pattern.global {
  390. sb.WriteRune('g')
  391. }
  392. if this.pattern.ignoreCase {
  393. sb.WriteRune('i')
  394. }
  395. if this.pattern.multiline {
  396. sb.WriteRune('m')
  397. }
  398. if this.pattern.unicode {
  399. sb.WriteRune('u')
  400. }
  401. if this.pattern.sticky {
  402. sb.WriteRune('y')
  403. }
  404. return sb.String()
  405. }
  406. pattern := nilSafe(obj.self.getStr("source", nil)).toString()
  407. flags := nilSafe(obj.self.getStr("flags", nil)).toString()
  408. var sb valueStringBuilder
  409. sb.WriteRune('/')
  410. sb.WriteString(pattern)
  411. sb.WriteRune('/')
  412. sb.WriteString(flags)
  413. return sb.String()
  414. }
  415. func (r *regexpObject) writeEscapedSource(sb *valueStringBuilder) bool {
  416. if r.source.length() == 0 {
  417. sb.WriteString(asciiString("(?:)"))
  418. return true
  419. }
  420. pos := 0
  421. lastPos := 0
  422. rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader(0)}
  423. L:
  424. for {
  425. c, size, err := rd.ReadRune()
  426. if err != nil {
  427. break
  428. }
  429. switch c {
  430. case '\\':
  431. pos++
  432. _, size, err = rd.ReadRune()
  433. if err != nil {
  434. break L
  435. }
  436. case '/', '\u000a', '\u000d', '\u2028', '\u2029':
  437. sb.WriteSubstring(r.source, lastPos, pos)
  438. sb.WriteRune('\\')
  439. switch c {
  440. case '\u000a':
  441. sb.WriteRune('n')
  442. case '\u000d':
  443. sb.WriteRune('r')
  444. default:
  445. sb.WriteRune('u')
  446. sb.WriteRune(rune(hex[c>>12]))
  447. sb.WriteRune(rune(hex[(c>>8)&0xF]))
  448. sb.WriteRune(rune(hex[(c>>4)&0xF]))
  449. sb.WriteRune(rune(hex[c&0xF]))
  450. }
  451. lastPos = pos + size
  452. }
  453. pos += size
  454. }
  455. if lastPos > 0 {
  456. sb.WriteSubstring(r.source, lastPos, r.source.length())
  457. return true
  458. }
  459. return false
  460. }
  461. func (r *Runtime) regexpproto_getSource(call FunctionCall) Value {
  462. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  463. var sb valueStringBuilder
  464. if this.writeEscapedSource(&sb) {
  465. return sb.String()
  466. }
  467. return this.source
  468. } else {
  469. r.typeErrorResult(true, "Method RegExp.prototype.source getter called on incompatible receiver")
  470. return nil
  471. }
  472. }
  473. func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value {
  474. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  475. if this.pattern.global {
  476. return valueTrue
  477. } else {
  478. return valueFalse
  479. }
  480. } else {
  481. r.typeErrorResult(true, "Method RegExp.prototype.global getter called on incompatible receiver %s", call.This.toString())
  482. return nil
  483. }
  484. }
  485. func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
  486. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  487. if this.pattern.multiline {
  488. return valueTrue
  489. } else {
  490. return valueFalse
  491. }
  492. } else {
  493. r.typeErrorResult(true, "Method RegExp.prototype.multiline getter called on incompatible receiver %s", call.This.toString())
  494. return nil
  495. }
  496. }
  497. func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
  498. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  499. if this.pattern.ignoreCase {
  500. return valueTrue
  501. } else {
  502. return valueFalse
  503. }
  504. } else {
  505. r.typeErrorResult(true, "Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", call.This.toString())
  506. return nil
  507. }
  508. }
  509. func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value {
  510. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  511. if this.pattern.unicode {
  512. return valueTrue
  513. } else {
  514. return valueFalse
  515. }
  516. } else {
  517. r.typeErrorResult(true, "Method RegExp.prototype.unicode getter called on incompatible receiver %s", call.This.toString())
  518. return nil
  519. }
  520. }
  521. func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
  522. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  523. if this.pattern.sticky {
  524. return valueTrue
  525. } else {
  526. return valueFalse
  527. }
  528. } else {
  529. r.typeErrorResult(true, "Method RegExp.prototype.sticky getter called on incompatible receiver %s", call.This.toString())
  530. return nil
  531. }
  532. }
  533. func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
  534. var global, ignoreCase, multiline, sticky, unicode bool
  535. thisObj := r.toObject(call.This)
  536. size := 0
  537. if v := thisObj.self.getStr("global", nil); v != nil {
  538. global = v.ToBoolean()
  539. if global {
  540. size++
  541. }
  542. }
  543. if v := thisObj.self.getStr("ignoreCase", nil); v != nil {
  544. ignoreCase = v.ToBoolean()
  545. if ignoreCase {
  546. size++
  547. }
  548. }
  549. if v := thisObj.self.getStr("multiline", nil); v != nil {
  550. multiline = v.ToBoolean()
  551. if multiline {
  552. size++
  553. }
  554. }
  555. if v := thisObj.self.getStr("sticky", nil); v != nil {
  556. sticky = v.ToBoolean()
  557. if sticky {
  558. size++
  559. }
  560. }
  561. if v := thisObj.self.getStr("unicode", nil); v != nil {
  562. unicode = v.ToBoolean()
  563. if unicode {
  564. size++
  565. }
  566. }
  567. var sb strings.Builder
  568. sb.Grow(size)
  569. if global {
  570. sb.WriteByte('g')
  571. }
  572. if ignoreCase {
  573. sb.WriteByte('i')
  574. }
  575. if multiline {
  576. sb.WriteByte('m')
  577. }
  578. if unicode {
  579. sb.WriteByte('u')
  580. }
  581. if sticky {
  582. sb.WriteByte('y')
  583. }
  584. return asciiString(sb.String())
  585. }
  586. func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value {
  587. res := execFn(FunctionCall{
  588. This: rxObj,
  589. Arguments: []Value{arg},
  590. })
  591. if res != _null {
  592. if _, ok := res.(*Object); !ok {
  593. panic(r.NewTypeError("RegExp exec method returned something other than an Object or null"))
  594. }
  595. }
  596. return res
  597. }
  598. func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s valueString) []Value {
  599. fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean()
  600. rxObj.self.setOwnStr("lastIndex", intToValue(0), true)
  601. execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable()
  602. if !ok {
  603. panic(r.NewTypeError("exec is not a function"))
  604. }
  605. var a []Value
  606. for {
  607. res := r.regExpExec(execFn, rxObj, s)
  608. if res == _null {
  609. break
  610. }
  611. a = append(a, res)
  612. matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString()
  613. if matchStr.length() == 0 {
  614. thisIndex := toInt(nilSafe(rxObj.self.getStr("lastIndex", nil)).ToInteger())
  615. rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex(s, thisIndex, fullUnicode)), true)
  616. }
  617. }
  618. return a
  619. }
  620. func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s valueString) Value {
  621. rx := rxObj.self
  622. global := rx.getStr("global", nil)
  623. if global != nil && global.ToBoolean() {
  624. a := r.getGlobalRegexpMatches(rxObj, s)
  625. if len(a) == 0 {
  626. return _null
  627. }
  628. ar := make([]Value, 0, len(a))
  629. for _, result := range a {
  630. obj := r.toObject(result)
  631. matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString()
  632. ar = append(ar, matchStr)
  633. }
  634. return r.newArrayValues(ar)
  635. }
  636. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  637. if !ok {
  638. panic(r.NewTypeError("exec is not a function"))
  639. }
  640. return r.regExpExec(execFn, rxObj, s)
  641. }
  642. func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject {
  643. if deoptimiseRegexp {
  644. return nil
  645. }
  646. rx, ok := rxObj.self.(*regexpObject)
  647. if !ok {
  648. return nil
  649. }
  650. if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto {
  651. return nil
  652. }
  653. return rx
  654. }
  655. func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value {
  656. thisObj := r.toObject(call.This)
  657. s := call.Argument(0).toString()
  658. rx := r.checkStdRegexp(thisObj)
  659. if rx == nil {
  660. return r.regexpproto_stdMatcherGeneric(thisObj, s)
  661. }
  662. if rx.pattern.global {
  663. rx.setOwnStr("lastIndex", intToValue(0), true)
  664. var a []Value
  665. var previousLastIndex int64
  666. for {
  667. match, result := rx.execRegexp(s)
  668. if !match {
  669. break
  670. }
  671. thisIndex := rx.getStr("lastIndex", nil).ToInteger()
  672. if thisIndex == previousLastIndex {
  673. previousLastIndex = int64(advanceStringIndex(s, toInt(previousLastIndex), rx.pattern.unicode))
  674. rx.setOwnStr("lastIndex", intToValue(previousLastIndex), true)
  675. } else {
  676. previousLastIndex = thisIndex
  677. }
  678. a = append(a, s.substring(result[0], result[1]))
  679. }
  680. if len(a) == 0 {
  681. return _null
  682. }
  683. return r.newArrayValues(a)
  684. } else {
  685. return rx.exec(s)
  686. }
  687. }
  688. func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg valueString) Value {
  689. rx := rxObj.self
  690. previousLastIndex := nilSafe(rx.getStr("lastIndex", nil))
  691. zero := intToValue(0)
  692. if !previousLastIndex.SameAs(zero) {
  693. rx.setOwnStr("lastIndex", zero, true)
  694. }
  695. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  696. if !ok {
  697. panic(r.NewTypeError("exec is not a function"))
  698. }
  699. result := r.regExpExec(execFn, rxObj, arg)
  700. currentLastIndex := nilSafe(rx.getStr("lastIndex", nil))
  701. if !currentLastIndex.SameAs(previousLastIndex) {
  702. rx.setOwnStr("lastIndex", previousLastIndex, true)
  703. }
  704. if result == _null {
  705. return intToValue(-1)
  706. }
  707. return r.toObject(result).self.getStr("index", nil)
  708. }
  709. func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value {
  710. thisObj := r.toObject(call.This)
  711. s := call.Argument(0).toString()
  712. rx := r.checkStdRegexp(thisObj)
  713. if rx == nil {
  714. return r.regexpproto_stdSearchGeneric(thisObj, s)
  715. }
  716. previousLastIndex := rx.getStr("lastIndex", nil)
  717. rx.setOwnStr("lastIndex", intToValue(0), true)
  718. match, result := rx.execRegexp(s)
  719. rx.setOwnStr("lastIndex", previousLastIndex, true)
  720. if !match {
  721. return intToValue(-1)
  722. }
  723. return intToValue(int64(result[0]))
  724. }
  725. func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s valueString, limit Value, unicodeMatching bool) Value {
  726. var a []Value
  727. var lim int64
  728. if limit == nil || limit == _undefined {
  729. lim = maxInt - 1
  730. } else {
  731. lim = toLength(limit)
  732. }
  733. if lim == 0 {
  734. return r.newArrayValues(a)
  735. }
  736. size := s.length()
  737. p := 0
  738. execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil
  739. if size == 0 {
  740. if r.regExpExec(execFn, splitter, s) == _null {
  741. a = append(a, s)
  742. }
  743. return r.newArrayValues(a)
  744. }
  745. q := p
  746. for q < size {
  747. splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true)
  748. z := r.regExpExec(execFn, splitter, s)
  749. if z == _null {
  750. q = advanceStringIndex(s, q, unicodeMatching)
  751. } else {
  752. z := r.toObject(z)
  753. e := toLength(splitter.self.getStr("lastIndex", nil))
  754. if e == int64(p) {
  755. q = advanceStringIndex(s, q, unicodeMatching)
  756. } else {
  757. a = append(a, s.substring(p, q))
  758. if int64(len(a)) == lim {
  759. return r.newArrayValues(a)
  760. }
  761. if e > int64(size) {
  762. p = size
  763. } else {
  764. p = int(e)
  765. }
  766. numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0)
  767. for i := int64(1); i <= numberOfCaptures; i++ {
  768. a = append(a, z.self.getIdx(valueInt(i), nil))
  769. if int64(len(a)) == lim {
  770. return r.newArrayValues(a)
  771. }
  772. }
  773. q = p
  774. }
  775. }
  776. }
  777. a = append(a, s.substring(p, size))
  778. return r.newArrayValues(a)
  779. }
  780. func advanceStringIndex(s valueString, pos int, unicode bool) int {
  781. next := pos + 1
  782. if !unicode {
  783. return next
  784. }
  785. l := s.length()
  786. if next >= l {
  787. return next
  788. }
  789. if !isUTF16FirstSurrogate(s.charAt(pos)) {
  790. return next
  791. }
  792. if !isUTF16SecondSurrogate(s.charAt(next)) {
  793. return next
  794. }
  795. return next + 1
  796. }
  797. func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value {
  798. rxObj := r.toObject(call.This)
  799. c := r.speciesConstructor(rxObj, r.global.RegExp)
  800. flags := nilSafe(rxObj.self.getStr("flags", nil)).toString()
  801. flagsStr := flags.String()
  802. // Add 'y' flag if missing
  803. if !strings.Contains(flagsStr, "y") {
  804. flags = newStringValue(flagsStr + "y")
  805. }
  806. splitter := c([]Value{rxObj, flags}, nil)
  807. s := call.Argument(0).toString()
  808. limitValue := call.Argument(1)
  809. search := r.checkStdRegexp(splitter)
  810. if search == nil {
  811. return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u"))
  812. }
  813. limit := -1
  814. if limitValue != _undefined {
  815. limit = int(toUint32(limitValue))
  816. }
  817. if limit == 0 {
  818. return r.newArrayValues(nil)
  819. }
  820. targetLength := s.length()
  821. var valueArray []Value
  822. lastIndex := 0
  823. found := 0
  824. result := search.pattern.findAllSubmatchIndex(s, 0, -1, false)
  825. if targetLength == 0 {
  826. if result == nil {
  827. valueArray = append(valueArray, s)
  828. }
  829. goto RETURN
  830. }
  831. for _, match := range result {
  832. if match[0] == match[1] {
  833. // FIXME Ugh, this is a hack
  834. if match[0] == 0 || match[0] == targetLength {
  835. continue
  836. }
  837. }
  838. if lastIndex != match[0] {
  839. valueArray = append(valueArray, s.substring(lastIndex, match[0]))
  840. found++
  841. } else if lastIndex == match[0] {
  842. if lastIndex != -1 {
  843. valueArray = append(valueArray, stringEmpty)
  844. found++
  845. }
  846. }
  847. lastIndex = match[1]
  848. if found == limit {
  849. goto RETURN
  850. }
  851. captureCount := len(match) / 2
  852. for index := 1; index < captureCount; index++ {
  853. offset := index * 2
  854. var value Value
  855. if match[offset] != -1 {
  856. value = s.substring(match[offset], match[offset+1])
  857. } else {
  858. value = _undefined
  859. }
  860. valueArray = append(valueArray, value)
  861. found++
  862. if found == limit {
  863. goto RETURN
  864. }
  865. }
  866. }
  867. if found != limit {
  868. if lastIndex != targetLength {
  869. valueArray = append(valueArray, s.substring(lastIndex, targetLength))
  870. } else {
  871. valueArray = append(valueArray, stringEmpty)
  872. }
  873. }
  874. RETURN:
  875. return r.newArrayValues(valueArray)
  876. }
  877. func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr valueString, rcall func(FunctionCall) Value) Value {
  878. var results []Value
  879. if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() {
  880. results = r.getGlobalRegexpMatches(rxObj, s)
  881. } else {
  882. execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil
  883. result := r.regExpExec(execFn, rxObj, s)
  884. if result != _null {
  885. results = append(results, result)
  886. }
  887. }
  888. lengthS := s.length()
  889. nextSourcePosition := 0
  890. var resultBuf valueStringBuilder
  891. for _, result := range results {
  892. obj := r.toObject(result)
  893. nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0)
  894. matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString()
  895. matchLength := matched.length()
  896. position := toInt(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0))
  897. var captures []Value
  898. if rcall != nil {
  899. captures = make([]Value, 0, nCaptures+3)
  900. } else {
  901. captures = make([]Value, 0, nCaptures+1)
  902. }
  903. captures = append(captures, matched)
  904. for n := int64(1); n <= nCaptures; n++ {
  905. capN := nilSafe(obj.self.getIdx(valueInt(n), nil))
  906. if capN != _undefined {
  907. capN = capN.ToString()
  908. }
  909. captures = append(captures, capN)
  910. }
  911. var replacement valueString
  912. if rcall != nil {
  913. captures = append(captures, intToValue(int64(position)), s)
  914. replacement = rcall(FunctionCall{
  915. This: _undefined,
  916. Arguments: captures,
  917. }).toString()
  918. if position >= nextSourcePosition {
  919. resultBuf.WriteString(s.substring(nextSourcePosition, position))
  920. resultBuf.WriteString(replacement)
  921. nextSourcePosition = position + matchLength
  922. }
  923. } else {
  924. if position >= nextSourcePosition {
  925. resultBuf.WriteString(s.substring(nextSourcePosition, position))
  926. writeSubstitution(s, position, len(captures), func(idx int) valueString {
  927. capture := captures[idx]
  928. if capture != _undefined {
  929. return capture.toString()
  930. }
  931. return stringEmpty
  932. }, replaceStr, &resultBuf)
  933. nextSourcePosition = position + matchLength
  934. }
  935. }
  936. }
  937. if nextSourcePosition < lengthS {
  938. resultBuf.WriteString(s.substring(nextSourcePosition, lengthS))
  939. }
  940. return resultBuf.String()
  941. }
  942. func writeSubstitution(s valueString, position int, numCaptures int, getCapture func(int) valueString, replaceStr valueString, buf *valueStringBuilder) {
  943. l := s.length()
  944. rl := replaceStr.length()
  945. matched := getCapture(0)
  946. tailPos := position + matched.length()
  947. for i := 0; i < rl; i++ {
  948. c := replaceStr.charAt(i)
  949. if c == '$' && i < rl-1 {
  950. ch := replaceStr.charAt(i + 1)
  951. switch ch {
  952. case '$':
  953. buf.WriteRune('$')
  954. case '`':
  955. buf.WriteString(s.substring(0, position))
  956. case '\'':
  957. if tailPos < l {
  958. buf.WriteString(s.substring(tailPos, l))
  959. }
  960. case '&':
  961. buf.WriteString(matched)
  962. default:
  963. matchNumber := 0
  964. j := i + 1
  965. for j < rl {
  966. ch := replaceStr.charAt(j)
  967. if ch >= '0' && ch <= '9' {
  968. m := matchNumber*10 + int(ch-'0')
  969. if m >= numCaptures {
  970. break
  971. }
  972. matchNumber = m
  973. j++
  974. } else {
  975. break
  976. }
  977. }
  978. if matchNumber > 0 {
  979. buf.WriteString(getCapture(matchNumber))
  980. i = j - 1
  981. continue
  982. } else {
  983. buf.WriteRune('$')
  984. buf.WriteRune(ch)
  985. }
  986. }
  987. i++
  988. } else {
  989. buf.WriteRune(c)
  990. }
  991. }
  992. }
  993. func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value {
  994. rxObj := r.toObject(call.This)
  995. s := call.Argument(0).toString()
  996. replaceStr, rcall := getReplaceValue(call.Argument(1))
  997. rx := r.checkStdRegexp(rxObj)
  998. if rx == nil {
  999. return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall)
  1000. }
  1001. var index int64
  1002. find := 1
  1003. if rx.pattern.global {
  1004. find = -1
  1005. rx.setOwnStr("lastIndex", intToValue(0), true)
  1006. } else {
  1007. index = rx.getLastIndex()
  1008. }
  1009. found := rx.pattern.findAllSubmatchIndex(s, toInt(index), find, rx.pattern.sticky)
  1010. if len(found) > 0 {
  1011. if !rx.updateLastIndex(index, found[0], found[len(found)-1]) {
  1012. found = nil
  1013. }
  1014. } else {
  1015. rx.updateLastIndex(index, nil, nil)
  1016. }
  1017. return stringReplace(s, found, replaceStr, rcall)
  1018. }
  1019. func (r *Runtime) initRegExp() {
  1020. o := r.newGuardedObject(r.global.ObjectPrototype, classObject)
  1021. r.global.RegExpPrototype = o.val
  1022. r.global.stdRegexpProto = o
  1023. o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, nil, "compile", nil, 2), true, false, true)
  1024. o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, nil, "exec", nil, 1), true, false, true)
  1025. o._putProp("test", r.newNativeFunc(r.regexpproto_test, nil, "test", nil, 1), true, false, true)
  1026. o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, nil, "toString", nil, 0), true, false, true)
  1027. o.setOwnStr("source", &valueProperty{
  1028. configurable: true,
  1029. getterFunc: r.newNativeFunc(r.regexpproto_getSource, nil, "get source", nil, 0),
  1030. accessor: true,
  1031. }, false)
  1032. o.setOwnStr("global", &valueProperty{
  1033. configurable: true,
  1034. getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, nil, "get global", nil, 0),
  1035. accessor: true,
  1036. }, false)
  1037. o.setOwnStr("multiline", &valueProperty{
  1038. configurable: true,
  1039. getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, nil, "get multiline", nil, 0),
  1040. accessor: true,
  1041. }, false)
  1042. o.setOwnStr("ignoreCase", &valueProperty{
  1043. configurable: true,
  1044. getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, nil, "get ignoreCase", nil, 0),
  1045. accessor: true,
  1046. }, false)
  1047. o.setOwnStr("unicode", &valueProperty{
  1048. configurable: true,
  1049. getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, nil, "get unicode", nil, 0),
  1050. accessor: true,
  1051. }, false)
  1052. o.setOwnStr("sticky", &valueProperty{
  1053. configurable: true,
  1054. getterFunc: r.newNativeFunc(r.regexpproto_getSticky, nil, "get sticky", nil, 0),
  1055. accessor: true,
  1056. }, false)
  1057. o.setOwnStr("flags", &valueProperty{
  1058. configurable: true,
  1059. getterFunc: r.newNativeFunc(r.regexpproto_getFlags, nil, "get flags", nil, 0),
  1060. accessor: true,
  1061. }, false)
  1062. o._putSym(symMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, nil, "[Symbol.match]", nil, 1), true, false, true))
  1063. o._putSym(symSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, nil, "[Symbol.search]", nil, 1), true, false, true))
  1064. o._putSym(symSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, nil, "[Symbol.split]", nil, 2), true, false, true))
  1065. o._putSym(symReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, nil, "[Symbol.replace]", nil, 2), true, false, true))
  1066. o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky")
  1067. r.global.RegExp = r.newNativeFunc(r.builtin_RegExp, r.builtin_newRegExp, "RegExp", r.global.RegExpPrototype, 2)
  1068. rx := r.global.RegExp.self
  1069. rx._putSym(symSpecies, &valueProperty{
  1070. getterFunc: r.newNativeFunc(r.returnThis, nil, "get [Symbol.species]", nil, 0),
  1071. accessor: true,
  1072. configurable: true,
  1073. })
  1074. r.addToGlobal("RegExp", r.global.RegExp)
  1075. }