builtin_regexp.go 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278
  1. package goja
  2. import (
  3. "fmt"
  4. "github.com/dop251/goja/parser"
  5. "regexp"
  6. "strings"
  7. "unicode/utf16"
  8. "unicode/utf8"
  9. )
  10. func (r *Runtime) newRegexpObject(proto *Object) *regexpObject {
  11. v := &Object{runtime: r}
  12. o := &regexpObject{}
  13. o.class = classRegExp
  14. o.val = v
  15. o.extensible = true
  16. v.self = o
  17. o.prototype = proto
  18. o.init()
  19. return o
  20. }
  21. func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr valueString, proto *Object) *regexpObject {
  22. o := r.newRegexpObject(proto)
  23. o.pattern = pattern
  24. o.source = patternStr
  25. return o
  26. }
  27. func decodeHex(s string) (int, bool) {
  28. var hex int
  29. for i := 0; i < len(s); i++ {
  30. var n byte
  31. chr := s[i]
  32. switch {
  33. case '0' <= chr && chr <= '9':
  34. n = chr - '0'
  35. case 'a' <= chr && chr <= 'f':
  36. n = chr - 'a' + 10
  37. case 'A' <= chr && chr <= 'F':
  38. n = chr - 'A' + 10
  39. default:
  40. return 0, false
  41. }
  42. hex = hex*16 + int(n)
  43. }
  44. return hex, true
  45. }
  46. func writeHex4(b *strings.Builder, i int) {
  47. b.WriteByte(hex[i>>12])
  48. b.WriteByte(hex[(i>>8)&0xF])
  49. b.WriteByte(hex[(i>>4)&0xF])
  50. b.WriteByte(hex[i&0xF])
  51. }
  52. // Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters
  53. func convertRegexpToUnicode(patternStr string) string {
  54. var sb strings.Builder
  55. pos := 0
  56. for i := 0; i < len(patternStr)-11; {
  57. r, size := utf8.DecodeRuneInString(patternStr[i:])
  58. if r == '\\' {
  59. i++
  60. if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' {
  61. if first, ok := decodeHex(patternStr[i+1 : i+5]); ok {
  62. if isUTF16FirstSurrogate(rune(first)) {
  63. if second, ok := decodeHex(patternStr[i+7 : i+11]); ok {
  64. if isUTF16SecondSurrogate(rune(second)) {
  65. r = utf16.DecodeRune(rune(first), rune(second))
  66. sb.WriteString(patternStr[pos : i-1])
  67. sb.WriteRune(r)
  68. i += 11
  69. pos = i
  70. continue
  71. }
  72. }
  73. }
  74. }
  75. }
  76. i++
  77. } else {
  78. i += size
  79. }
  80. }
  81. if pos > 0 {
  82. sb.WriteString(patternStr[pos:])
  83. return sb.String()
  84. }
  85. return patternStr
  86. }
  87. // Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX
  88. func convertRegexpToUtf16(patternStr string) string {
  89. var sb strings.Builder
  90. pos := 0
  91. var prevRune rune
  92. for i := 0; i < len(patternStr); {
  93. r, size := utf8.DecodeRuneInString(patternStr[i:])
  94. if r > 0xFFFF {
  95. sb.WriteString(patternStr[pos:i])
  96. if prevRune == '\\' {
  97. sb.WriteRune('\\')
  98. }
  99. first, second := utf16.EncodeRune(r)
  100. sb.WriteString(`\u`)
  101. writeHex4(&sb, int(first))
  102. sb.WriteString(`\u`)
  103. writeHex4(&sb, int(second))
  104. pos = i + size
  105. }
  106. i += size
  107. prevRune = r
  108. }
  109. if pos > 0 {
  110. sb.WriteString(patternStr[pos:])
  111. return sb.String()
  112. }
  113. return patternStr
  114. }
  115. // convert any broken UTF-16 surrogate pairs to \uXXXX
  116. func escapeInvalidUtf16(s valueString) string {
  117. if ascii, ok := s.(asciiString); ok {
  118. return ascii.String()
  119. }
  120. var sb strings.Builder
  121. rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}
  122. pos := 0
  123. utf8Size := 0
  124. var utf8Buf [utf8.UTFMax]byte
  125. for {
  126. c, size, err := rd.ReadRune()
  127. if err != nil {
  128. break
  129. }
  130. if utf16.IsSurrogate(c) {
  131. if sb.Len() == 0 {
  132. sb.Grow(utf8Size + 7)
  133. hrd := s.reader(0)
  134. var c rune
  135. for p := 0; p < pos; {
  136. var size int
  137. var err error
  138. c, size, err = hrd.ReadRune()
  139. if err != nil {
  140. // will not happen
  141. panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err))
  142. }
  143. sb.WriteRune(c)
  144. p += size
  145. }
  146. if c == '\\' {
  147. sb.WriteRune(c)
  148. }
  149. }
  150. sb.WriteString(`\u`)
  151. writeHex4(&sb, int(c))
  152. } else {
  153. if sb.Len() > 0 {
  154. sb.WriteRune(c)
  155. } else {
  156. utf8Size += utf8.EncodeRune(utf8Buf[:], c)
  157. pos += size
  158. }
  159. }
  160. }
  161. if sb.Len() > 0 {
  162. return sb.String()
  163. }
  164. return s.String()
  165. }
  166. func compileRegexpFromValueString(patternStr valueString, flags string) (*regexpPattern, error) {
  167. return compileRegexp(escapeInvalidUtf16(patternStr), flags)
  168. }
  169. func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
  170. var global, ignoreCase, multiline, sticky, unicode bool
  171. var wrapper *regexpWrapper
  172. var wrapper2 *regexp2Wrapper
  173. if flags != "" {
  174. invalidFlags := func() {
  175. err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags)
  176. }
  177. for _, chr := range flags {
  178. switch chr {
  179. case 'g':
  180. if global {
  181. invalidFlags()
  182. return
  183. }
  184. global = true
  185. case 'm':
  186. if multiline {
  187. invalidFlags()
  188. return
  189. }
  190. multiline = true
  191. case 'i':
  192. if ignoreCase {
  193. invalidFlags()
  194. return
  195. }
  196. ignoreCase = true
  197. case 'y':
  198. if sticky {
  199. invalidFlags()
  200. return
  201. }
  202. sticky = true
  203. case 'u':
  204. if unicode {
  205. invalidFlags()
  206. }
  207. unicode = true
  208. default:
  209. invalidFlags()
  210. return
  211. }
  212. }
  213. }
  214. if unicode {
  215. patternStr = convertRegexpToUnicode(patternStr)
  216. } else {
  217. patternStr = convertRegexpToUtf16(patternStr)
  218. }
  219. re2Str, err1 := parser.TransformRegExp(patternStr)
  220. if err1 == nil {
  221. re2flags := ""
  222. if multiline {
  223. re2flags += "m"
  224. }
  225. if ignoreCase {
  226. re2flags += "i"
  227. }
  228. if len(re2flags) > 0 {
  229. re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str)
  230. }
  231. pattern, err1 := regexp.Compile(re2Str)
  232. if err1 != nil {
  233. err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1)
  234. return
  235. }
  236. wrapper = (*regexpWrapper)(pattern)
  237. } else {
  238. if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat {
  239. err = err1
  240. return
  241. }
  242. wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase)
  243. if err != nil {
  244. err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
  245. return
  246. }
  247. }
  248. p = &regexpPattern{
  249. src: patternStr,
  250. regexpWrapper: wrapper,
  251. regexp2Wrapper: wrapper2,
  252. global: global,
  253. ignoreCase: ignoreCase,
  254. multiline: multiline,
  255. sticky: sticky,
  256. unicode: unicode,
  257. }
  258. return
  259. }
  260. func (r *Runtime) _newRegExp(patternStr valueString, flags string, proto *Object) *regexpObject {
  261. pattern, err := compileRegexpFromValueString(patternStr, flags)
  262. if err != nil {
  263. panic(r.newSyntaxError(err.Error(), -1))
  264. }
  265. return r.newRegExpp(pattern, patternStr, proto)
  266. }
  267. func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object {
  268. var patternVal, flagsVal Value
  269. if len(args) > 0 {
  270. patternVal = args[0]
  271. }
  272. if len(args) > 1 {
  273. flagsVal = args[1]
  274. }
  275. return r.newRegExp(patternVal, flagsVal, proto).val
  276. }
  277. func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject {
  278. var pattern valueString
  279. var flags string
  280. if isRegexp(patternVal) { // this may have side effects so need to call it anyway
  281. if obj, ok := patternVal.(*Object); ok {
  282. if rx, ok := obj.self.(*regexpObject); ok {
  283. if flagsVal == nil || flagsVal == _undefined {
  284. return rx.clone()
  285. } else {
  286. return r._newRegExp(rx.source, flagsVal.toString().String(), proto)
  287. }
  288. } else {
  289. pattern = nilSafe(obj.self.getStr("source", nil)).toString()
  290. if flagsVal == nil || flagsVal == _undefined {
  291. flags = nilSafe(obj.self.getStr("flags", nil)).toString().String()
  292. } else {
  293. flags = flagsVal.toString().String()
  294. }
  295. goto exit
  296. }
  297. }
  298. }
  299. if patternVal != nil && patternVal != _undefined {
  300. pattern = patternVal.toString()
  301. }
  302. if flagsVal != nil && flagsVal != _undefined {
  303. flags = flagsVal.toString().String()
  304. }
  305. if pattern == nil {
  306. pattern = stringEmpty
  307. }
  308. exit:
  309. return r._newRegExp(pattern, flags, proto)
  310. }
  311. func (r *Runtime) builtin_RegExp(call FunctionCall) Value {
  312. pattern := call.Argument(0)
  313. patternIsRegExp := isRegexp(pattern)
  314. flags := call.Argument(1)
  315. if patternIsRegExp && flags == _undefined {
  316. if obj, ok := call.Argument(0).(*Object); ok {
  317. patternConstructor := obj.self.getStr("constructor", nil)
  318. if patternConstructor == r.global.RegExp {
  319. return pattern
  320. }
  321. }
  322. }
  323. return r.newRegExp(pattern, flags, r.global.RegExpPrototype).val
  324. }
  325. func (r *Runtime) regexpproto_compile(call FunctionCall) Value {
  326. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  327. var (
  328. pattern *regexpPattern
  329. source valueString
  330. flags string
  331. err error
  332. )
  333. patternVal := call.Argument(0)
  334. flagsVal := call.Argument(1)
  335. if o, ok := patternVal.(*Object); ok {
  336. if p, ok := o.self.(*regexpObject); ok {
  337. if flagsVal != _undefined {
  338. panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another"))
  339. }
  340. this.pattern = p.pattern
  341. this.source = p.source
  342. goto exit
  343. }
  344. }
  345. if patternVal != _undefined {
  346. source = patternVal.toString()
  347. } else {
  348. source = stringEmpty
  349. }
  350. if flagsVal != _undefined {
  351. flags = flagsVal.toString().String()
  352. }
  353. pattern, err = compileRegexpFromValueString(source, flags)
  354. if err != nil {
  355. panic(r.newSyntaxError(err.Error(), -1))
  356. }
  357. this.pattern = pattern
  358. this.source = source
  359. exit:
  360. this.setOwnStr("lastIndex", intToValue(0), true)
  361. return call.This
  362. }
  363. panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", call.This.toString()))
  364. }
  365. func (r *Runtime) regexpproto_exec(call FunctionCall) Value {
  366. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  367. return this.exec(call.Argument(0).toString())
  368. } else {
  369. r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", call.This.toString())
  370. return nil
  371. }
  372. }
  373. func (r *Runtime) regexpproto_test(call FunctionCall) Value {
  374. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  375. if this.test(call.Argument(0).toString()) {
  376. return valueTrue
  377. } else {
  378. return valueFalse
  379. }
  380. } else {
  381. r.typeErrorResult(true, "Method RegExp.prototype.test called on incompatible receiver %s", call.This.toString())
  382. return nil
  383. }
  384. }
  385. func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
  386. obj := r.toObject(call.This)
  387. if this := r.checkStdRegexp(obj); this != nil {
  388. var sb valueStringBuilder
  389. sb.WriteRune('/')
  390. if !this.writeEscapedSource(&sb) {
  391. sb.WriteString(this.source)
  392. }
  393. sb.WriteRune('/')
  394. if this.pattern.global {
  395. sb.WriteRune('g')
  396. }
  397. if this.pattern.ignoreCase {
  398. sb.WriteRune('i')
  399. }
  400. if this.pattern.multiline {
  401. sb.WriteRune('m')
  402. }
  403. if this.pattern.unicode {
  404. sb.WriteRune('u')
  405. }
  406. if this.pattern.sticky {
  407. sb.WriteRune('y')
  408. }
  409. return sb.String()
  410. }
  411. pattern := nilSafe(obj.self.getStr("source", nil)).toString()
  412. flags := nilSafe(obj.self.getStr("flags", nil)).toString()
  413. var sb valueStringBuilder
  414. sb.WriteRune('/')
  415. sb.WriteString(pattern)
  416. sb.WriteRune('/')
  417. sb.WriteString(flags)
  418. return sb.String()
  419. }
  420. func (r *regexpObject) writeEscapedSource(sb *valueStringBuilder) bool {
  421. if r.source.length() == 0 {
  422. sb.WriteString(asciiString("(?:)"))
  423. return true
  424. }
  425. pos := 0
  426. lastPos := 0
  427. rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader(0)}
  428. L:
  429. for {
  430. c, size, err := rd.ReadRune()
  431. if err != nil {
  432. break
  433. }
  434. switch c {
  435. case '\\':
  436. pos++
  437. _, size, err = rd.ReadRune()
  438. if err != nil {
  439. break L
  440. }
  441. case '/', '\u000a', '\u000d', '\u2028', '\u2029':
  442. sb.WriteSubstring(r.source, lastPos, pos)
  443. sb.WriteRune('\\')
  444. switch c {
  445. case '\u000a':
  446. sb.WriteRune('n')
  447. case '\u000d':
  448. sb.WriteRune('r')
  449. default:
  450. sb.WriteRune('u')
  451. sb.WriteRune(rune(hex[c>>12]))
  452. sb.WriteRune(rune(hex[(c>>8)&0xF]))
  453. sb.WriteRune(rune(hex[(c>>4)&0xF]))
  454. sb.WriteRune(rune(hex[c&0xF]))
  455. }
  456. lastPos = pos + size
  457. }
  458. pos += size
  459. }
  460. if lastPos > 0 {
  461. sb.WriteSubstring(r.source, lastPos, r.source.length())
  462. return true
  463. }
  464. return false
  465. }
  466. func (r *Runtime) regexpproto_getSource(call FunctionCall) Value {
  467. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  468. var sb valueStringBuilder
  469. if this.writeEscapedSource(&sb) {
  470. return sb.String()
  471. }
  472. return this.source
  473. } else if call.This == r.global.RegExpPrototype {
  474. return asciiString("(?:)")
  475. } else {
  476. panic(r.NewTypeError("Method RegExp.prototype.source getter called on incompatible receiver"))
  477. }
  478. }
  479. func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value {
  480. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  481. if this.pattern.global {
  482. return valueTrue
  483. } else {
  484. return valueFalse
  485. }
  486. } else if call.This == r.global.RegExpPrototype {
  487. return _undefined
  488. } else {
  489. panic(r.NewTypeError("Method RegExp.prototype.global getter called on incompatible receiver %s", call.This.toString()))
  490. }
  491. }
  492. func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
  493. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  494. if this.pattern.multiline {
  495. return valueTrue
  496. } else {
  497. return valueFalse
  498. }
  499. } else if call.This == r.global.RegExpPrototype {
  500. return _undefined
  501. } else {
  502. panic(r.NewTypeError("Method RegExp.prototype.multiline getter called on incompatible receiver %s", call.This.toString()))
  503. }
  504. }
  505. func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
  506. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  507. if this.pattern.ignoreCase {
  508. return valueTrue
  509. } else {
  510. return valueFalse
  511. }
  512. } else if call.This == r.global.RegExpPrototype {
  513. return _undefined
  514. } else {
  515. panic(r.NewTypeError("Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", call.This.toString()))
  516. }
  517. }
  518. func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value {
  519. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  520. if this.pattern.unicode {
  521. return valueTrue
  522. } else {
  523. return valueFalse
  524. }
  525. } else if call.This == r.global.RegExpPrototype {
  526. return _undefined
  527. } else {
  528. panic(r.NewTypeError("Method RegExp.prototype.unicode getter called on incompatible receiver %s", call.This.toString()))
  529. }
  530. }
  531. func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
  532. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  533. if this.pattern.sticky {
  534. return valueTrue
  535. } else {
  536. return valueFalse
  537. }
  538. } else if call.This == r.global.RegExpPrototype {
  539. return _undefined
  540. } else {
  541. panic(r.NewTypeError("Method RegExp.prototype.sticky getter called on incompatible receiver %s", call.This.toString()))
  542. }
  543. }
  544. func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
  545. var global, ignoreCase, multiline, sticky, unicode bool
  546. thisObj := r.toObject(call.This)
  547. size := 0
  548. if v := thisObj.self.getStr("global", nil); v != nil {
  549. global = v.ToBoolean()
  550. if global {
  551. size++
  552. }
  553. }
  554. if v := thisObj.self.getStr("ignoreCase", nil); v != nil {
  555. ignoreCase = v.ToBoolean()
  556. if ignoreCase {
  557. size++
  558. }
  559. }
  560. if v := thisObj.self.getStr("multiline", nil); v != nil {
  561. multiline = v.ToBoolean()
  562. if multiline {
  563. size++
  564. }
  565. }
  566. if v := thisObj.self.getStr("sticky", nil); v != nil {
  567. sticky = v.ToBoolean()
  568. if sticky {
  569. size++
  570. }
  571. }
  572. if v := thisObj.self.getStr("unicode", nil); v != nil {
  573. unicode = v.ToBoolean()
  574. if unicode {
  575. size++
  576. }
  577. }
  578. var sb strings.Builder
  579. sb.Grow(size)
  580. if global {
  581. sb.WriteByte('g')
  582. }
  583. if ignoreCase {
  584. sb.WriteByte('i')
  585. }
  586. if multiline {
  587. sb.WriteByte('m')
  588. }
  589. if unicode {
  590. sb.WriteByte('u')
  591. }
  592. if sticky {
  593. sb.WriteByte('y')
  594. }
  595. return asciiString(sb.String())
  596. }
  597. func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value {
  598. res := execFn(FunctionCall{
  599. This: rxObj,
  600. Arguments: []Value{arg},
  601. })
  602. if res != _null {
  603. if _, ok := res.(*Object); !ok {
  604. panic(r.NewTypeError("RegExp exec method returned something other than an Object or null"))
  605. }
  606. }
  607. return res
  608. }
  609. func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s valueString) []Value {
  610. fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean()
  611. rxObj.self.setOwnStr("lastIndex", intToValue(0), true)
  612. execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable()
  613. if !ok {
  614. panic(r.NewTypeError("exec is not a function"))
  615. }
  616. var a []Value
  617. for {
  618. res := r.regExpExec(execFn, rxObj, s)
  619. if res == _null {
  620. break
  621. }
  622. a = append(a, res)
  623. matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString()
  624. if matchStr.length() == 0 {
  625. thisIndex := toLength(rxObj.self.getStr("lastIndex", nil))
  626. rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true)
  627. }
  628. }
  629. return a
  630. }
  631. func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s valueString) Value {
  632. rx := rxObj.self
  633. global := rx.getStr("global", nil)
  634. if global != nil && global.ToBoolean() {
  635. a := r.getGlobalRegexpMatches(rxObj, s)
  636. if len(a) == 0 {
  637. return _null
  638. }
  639. ar := make([]Value, 0, len(a))
  640. for _, result := range a {
  641. obj := r.toObject(result)
  642. matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString()
  643. ar = append(ar, matchStr)
  644. }
  645. return r.newArrayValues(ar)
  646. }
  647. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  648. if !ok {
  649. panic(r.NewTypeError("exec is not a function"))
  650. }
  651. return r.regExpExec(execFn, rxObj, s)
  652. }
  653. func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject {
  654. if deoptimiseRegexp {
  655. return nil
  656. }
  657. rx, ok := rxObj.self.(*regexpObject)
  658. if !ok {
  659. return nil
  660. }
  661. if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto {
  662. return nil
  663. }
  664. return rx
  665. }
  666. func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value {
  667. thisObj := r.toObject(call.This)
  668. s := call.Argument(0).toString()
  669. rx := r.checkStdRegexp(thisObj)
  670. if rx == nil {
  671. return r.regexpproto_stdMatcherGeneric(thisObj, s)
  672. }
  673. if rx.pattern.global {
  674. res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky)
  675. if len(res) == 0 {
  676. rx.setOwnStr("lastIndex", intToValue(0), true)
  677. return _null
  678. }
  679. a := make([]Value, 0, len(res))
  680. for _, result := range res {
  681. a = append(a, s.substring(result[0], result[1]))
  682. }
  683. rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true)
  684. return r.newArrayValues(a)
  685. } else {
  686. return rx.exec(s)
  687. }
  688. }
  689. func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg valueString) Value {
  690. rx := rxObj.self
  691. previousLastIndex := nilSafe(rx.getStr("lastIndex", nil))
  692. zero := intToValue(0)
  693. if !previousLastIndex.SameAs(zero) {
  694. rx.setOwnStr("lastIndex", zero, true)
  695. }
  696. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  697. if !ok {
  698. panic(r.NewTypeError("exec is not a function"))
  699. }
  700. result := r.regExpExec(execFn, rxObj, arg)
  701. currentLastIndex := nilSafe(rx.getStr("lastIndex", nil))
  702. if !currentLastIndex.SameAs(previousLastIndex) {
  703. rx.setOwnStr("lastIndex", previousLastIndex, true)
  704. }
  705. if result == _null {
  706. return intToValue(-1)
  707. }
  708. return r.toObject(result).self.getStr("index", nil)
  709. }
  710. func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value {
  711. thisObj := r.toObject(call.This)
  712. s := call.Argument(0).toString()
  713. flags := nilSafe(thisObj.self.getStr("flags", nil)).toString()
  714. c := r.speciesConstructorObj(call.This.(*Object), r.global.RegExp)
  715. matcher := r.toConstructor(c)([]Value{call.This, flags}, nil)
  716. matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true)
  717. flagsStr := flags.String()
  718. global := strings.Contains(flagsStr, "g")
  719. fullUnicode := strings.Contains(flagsStr, "u")
  720. return r.createRegExpStringIterator(matcher, s, global, fullUnicode)
  721. }
  722. func (r *Runtime) createRegExpStringIterator(matcher *Object, s valueString, global, fullUnicode bool) Value {
  723. o := &Object{runtime: r}
  724. ri := &regExpStringIterObject{
  725. matcher: matcher,
  726. s: s,
  727. global: global,
  728. fullUnicode: fullUnicode,
  729. }
  730. ri.class = classRegExpStringIterator
  731. ri.val = o
  732. ri.extensible = true
  733. o.self = ri
  734. ri.prototype = r.global.RegExpStringIteratorPrototype
  735. ri.init()
  736. return o
  737. }
  738. type regExpStringIterObject struct {
  739. baseObject
  740. matcher *Object
  741. s valueString
  742. global, fullUnicode, done bool
  743. }
  744. // RegExpExec as defined in 21.2.5.2.1
  745. func regExpExec(r *Object, s valueString) Value {
  746. exec := r.self.getStr("exec", nil)
  747. if execObject, ok := exec.(*Object); ok {
  748. if execFn, ok := execObject.self.assertCallable(); ok {
  749. return r.runtime.regExpExec(execFn, r, s)
  750. }
  751. }
  752. if rx, ok := r.self.(*regexpObject); ok {
  753. return rx.exec(s)
  754. }
  755. panic(r.runtime.NewTypeError("no RegExpMatcher internal slot"))
  756. }
  757. func (ri *regExpStringIterObject) next() (v Value) {
  758. if ri.done {
  759. return ri.val.runtime.createIterResultObject(_undefined, true)
  760. }
  761. match := regExpExec(ri.matcher, ri.s)
  762. if IsNull(match) {
  763. ri.done = true
  764. return ri.val.runtime.createIterResultObject(_undefined, true)
  765. }
  766. if !ri.global {
  767. ri.done = true
  768. return ri.val.runtime.createIterResultObject(match, false)
  769. }
  770. matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString()
  771. if matchStr.length() == 0 {
  772. thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil))
  773. ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true)
  774. }
  775. return ri.val.runtime.createIterResultObject(match, false)
  776. }
  777. func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value {
  778. thisObj := r.toObject(call.This)
  779. s := call.Argument(0).toString()
  780. rx := r.checkStdRegexp(thisObj)
  781. if rx == nil {
  782. return r.regexpproto_stdSearchGeneric(thisObj, s)
  783. }
  784. previousLastIndex := rx.getStr("lastIndex", nil)
  785. rx.setOwnStr("lastIndex", intToValue(0), true)
  786. match, result := rx.execRegexp(s)
  787. rx.setOwnStr("lastIndex", previousLastIndex, true)
  788. if !match {
  789. return intToValue(-1)
  790. }
  791. return intToValue(int64(result[0]))
  792. }
  793. func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s valueString, limit Value, unicodeMatching bool) Value {
  794. var a []Value
  795. var lim int64
  796. if limit == nil || limit == _undefined {
  797. lim = maxInt - 1
  798. } else {
  799. lim = toLength(limit)
  800. }
  801. if lim == 0 {
  802. return r.newArrayValues(a)
  803. }
  804. size := s.length()
  805. p := 0
  806. execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil
  807. if size == 0 {
  808. if r.regExpExec(execFn, splitter, s) == _null {
  809. a = append(a, s)
  810. }
  811. return r.newArrayValues(a)
  812. }
  813. q := p
  814. for q < size {
  815. splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true)
  816. z := r.regExpExec(execFn, splitter, s)
  817. if z == _null {
  818. q = advanceStringIndex(s, q, unicodeMatching)
  819. } else {
  820. z := r.toObject(z)
  821. e := toLength(splitter.self.getStr("lastIndex", nil))
  822. if e == int64(p) {
  823. q = advanceStringIndex(s, q, unicodeMatching)
  824. } else {
  825. a = append(a, s.substring(p, q))
  826. if int64(len(a)) == lim {
  827. return r.newArrayValues(a)
  828. }
  829. if e > int64(size) {
  830. p = size
  831. } else {
  832. p = int(e)
  833. }
  834. numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0)
  835. for i := int64(1); i <= numberOfCaptures; i++ {
  836. a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil)))
  837. if int64(len(a)) == lim {
  838. return r.newArrayValues(a)
  839. }
  840. }
  841. q = p
  842. }
  843. }
  844. }
  845. a = append(a, s.substring(p, size))
  846. return r.newArrayValues(a)
  847. }
  848. func advanceStringIndex(s valueString, pos int, unicode bool) int {
  849. next := pos + 1
  850. if !unicode {
  851. return next
  852. }
  853. l := s.length()
  854. if next >= l {
  855. return next
  856. }
  857. if !isUTF16FirstSurrogate(s.charAt(pos)) {
  858. return next
  859. }
  860. if !isUTF16SecondSurrogate(s.charAt(next)) {
  861. return next
  862. }
  863. return next + 1
  864. }
  865. func advanceStringIndex64(s valueString, pos int64, unicode bool) int64 {
  866. next := pos + 1
  867. if !unicode {
  868. return next
  869. }
  870. l := int64(s.length())
  871. if next >= l {
  872. return next
  873. }
  874. if !isUTF16FirstSurrogate(s.charAt(int(pos))) {
  875. return next
  876. }
  877. if !isUTF16SecondSurrogate(s.charAt(int(next))) {
  878. return next
  879. }
  880. return next + 1
  881. }
  882. func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value {
  883. rxObj := r.toObject(call.This)
  884. s := call.Argument(0).toString()
  885. limitValue := call.Argument(1)
  886. var splitter *Object
  887. search := r.checkStdRegexp(rxObj)
  888. c := r.speciesConstructorObj(rxObj, r.global.RegExp)
  889. if search == nil || c != r.global.RegExp {
  890. flags := nilSafe(rxObj.self.getStr("flags", nil)).toString()
  891. flagsStr := flags.String()
  892. // Add 'y' flag if missing
  893. if !strings.Contains(flagsStr, "y") {
  894. flags = flags.concat(asciiString("y"))
  895. }
  896. splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil)
  897. search = r.checkStdRegexp(splitter)
  898. if search == nil {
  899. return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u"))
  900. }
  901. }
  902. pattern := search.pattern // toUint32() may recompile the pattern, but we still need to use the original
  903. limit := -1
  904. if limitValue != _undefined {
  905. limit = int(toUint32(limitValue))
  906. }
  907. if limit == 0 {
  908. return r.newArrayValues(nil)
  909. }
  910. targetLength := s.length()
  911. var valueArray []Value
  912. lastIndex := 0
  913. found := 0
  914. result := pattern.findAllSubmatchIndex(s, 0, -1, false)
  915. if targetLength == 0 {
  916. if result == nil {
  917. valueArray = append(valueArray, s)
  918. }
  919. goto RETURN
  920. }
  921. for _, match := range result {
  922. if match[0] == match[1] {
  923. // FIXME Ugh, this is a hack
  924. if match[0] == 0 || match[0] == targetLength {
  925. continue
  926. }
  927. }
  928. if lastIndex != match[0] {
  929. valueArray = append(valueArray, s.substring(lastIndex, match[0]))
  930. found++
  931. } else if lastIndex == match[0] {
  932. if lastIndex != -1 {
  933. valueArray = append(valueArray, stringEmpty)
  934. found++
  935. }
  936. }
  937. lastIndex = match[1]
  938. if found == limit {
  939. goto RETURN
  940. }
  941. captureCount := len(match) / 2
  942. for index := 1; index < captureCount; index++ {
  943. offset := index * 2
  944. var value Value
  945. if match[offset] != -1 {
  946. value = s.substring(match[offset], match[offset+1])
  947. } else {
  948. value = _undefined
  949. }
  950. valueArray = append(valueArray, value)
  951. found++
  952. if found == limit {
  953. goto RETURN
  954. }
  955. }
  956. }
  957. if found != limit {
  958. if lastIndex != targetLength {
  959. valueArray = append(valueArray, s.substring(lastIndex, targetLength))
  960. } else {
  961. valueArray = append(valueArray, stringEmpty)
  962. }
  963. }
  964. RETURN:
  965. return r.newArrayValues(valueArray)
  966. }
  967. func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr valueString, rcall func(FunctionCall) Value) Value {
  968. var results []Value
  969. if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() {
  970. results = r.getGlobalRegexpMatches(rxObj, s)
  971. } else {
  972. execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil
  973. result := r.regExpExec(execFn, rxObj, s)
  974. if result != _null {
  975. results = append(results, result)
  976. }
  977. }
  978. lengthS := s.length()
  979. nextSourcePosition := 0
  980. var resultBuf valueStringBuilder
  981. for _, result := range results {
  982. obj := r.toObject(result)
  983. nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0)
  984. matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString()
  985. matchLength := matched.length()
  986. position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0))
  987. var captures []Value
  988. if rcall != nil {
  989. captures = make([]Value, 0, nCaptures+3)
  990. } else {
  991. captures = make([]Value, 0, nCaptures+1)
  992. }
  993. captures = append(captures, matched)
  994. for n := int64(1); n <= nCaptures; n++ {
  995. capN := nilSafe(obj.self.getIdx(valueInt(n), nil))
  996. if capN != _undefined {
  997. capN = capN.ToString()
  998. }
  999. captures = append(captures, capN)
  1000. }
  1001. var replacement valueString
  1002. if rcall != nil {
  1003. captures = append(captures, intToValue(int64(position)), s)
  1004. replacement = rcall(FunctionCall{
  1005. This: _undefined,
  1006. Arguments: captures,
  1007. }).toString()
  1008. if position >= nextSourcePosition {
  1009. resultBuf.WriteString(s.substring(nextSourcePosition, position))
  1010. resultBuf.WriteString(replacement)
  1011. nextSourcePosition = position + matchLength
  1012. }
  1013. } else {
  1014. if position >= nextSourcePosition {
  1015. resultBuf.WriteString(s.substring(nextSourcePosition, position))
  1016. writeSubstitution(s, position, len(captures), func(idx int) valueString {
  1017. capture := captures[idx]
  1018. if capture != _undefined {
  1019. return capture.toString()
  1020. }
  1021. return stringEmpty
  1022. }, replaceStr, &resultBuf)
  1023. nextSourcePosition = position + matchLength
  1024. }
  1025. }
  1026. }
  1027. if nextSourcePosition < lengthS {
  1028. resultBuf.WriteString(s.substring(nextSourcePosition, lengthS))
  1029. }
  1030. return resultBuf.String()
  1031. }
  1032. func writeSubstitution(s valueString, position int, numCaptures int, getCapture func(int) valueString, replaceStr valueString, buf *valueStringBuilder) {
  1033. l := s.length()
  1034. rl := replaceStr.length()
  1035. matched := getCapture(0)
  1036. tailPos := position + matched.length()
  1037. for i := 0; i < rl; i++ {
  1038. c := replaceStr.charAt(i)
  1039. if c == '$' && i < rl-1 {
  1040. ch := replaceStr.charAt(i + 1)
  1041. switch ch {
  1042. case '$':
  1043. buf.WriteRune('$')
  1044. case '`':
  1045. buf.WriteString(s.substring(0, position))
  1046. case '\'':
  1047. if tailPos < l {
  1048. buf.WriteString(s.substring(tailPos, l))
  1049. }
  1050. case '&':
  1051. buf.WriteString(matched)
  1052. default:
  1053. matchNumber := 0
  1054. j := i + 1
  1055. for j < rl {
  1056. ch := replaceStr.charAt(j)
  1057. if ch >= '0' && ch <= '9' {
  1058. m := matchNumber*10 + int(ch-'0')
  1059. if m >= numCaptures {
  1060. break
  1061. }
  1062. matchNumber = m
  1063. j++
  1064. } else {
  1065. break
  1066. }
  1067. }
  1068. if matchNumber > 0 {
  1069. buf.WriteString(getCapture(matchNumber))
  1070. i = j - 1
  1071. continue
  1072. } else {
  1073. buf.WriteRune('$')
  1074. buf.WriteRune(ch)
  1075. }
  1076. }
  1077. i++
  1078. } else {
  1079. buf.WriteRune(c)
  1080. }
  1081. }
  1082. }
  1083. func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value {
  1084. rxObj := r.toObject(call.This)
  1085. s := call.Argument(0).toString()
  1086. replaceStr, rcall := getReplaceValue(call.Argument(1))
  1087. rx := r.checkStdRegexp(rxObj)
  1088. if rx == nil {
  1089. return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall)
  1090. }
  1091. var index int64
  1092. find := 1
  1093. if rx.pattern.global {
  1094. find = -1
  1095. rx.setOwnStr("lastIndex", intToValue(0), true)
  1096. } else {
  1097. index = rx.getLastIndex()
  1098. }
  1099. found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky)
  1100. if len(found) > 0 {
  1101. if !rx.updateLastIndex(index, found[0], found[len(found)-1]) {
  1102. found = nil
  1103. }
  1104. } else {
  1105. rx.updateLastIndex(index, nil, nil)
  1106. }
  1107. return stringReplace(s, found, replaceStr, rcall)
  1108. }
  1109. func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value {
  1110. thisObj := r.toObject(call.This)
  1111. if iter, ok := thisObj.self.(*regExpStringIterObject); ok {
  1112. return iter.next()
  1113. }
  1114. panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", thisObj.String()))
  1115. }
  1116. func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl {
  1117. o := newBaseObjectObj(val, r.global.IteratorPrototype, classObject)
  1118. o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, nil, "next", nil, 0), true, false, true)
  1119. o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true))
  1120. return o
  1121. }
  1122. func (r *Runtime) initRegExp() {
  1123. o := r.newGuardedObject(r.global.ObjectPrototype, classObject)
  1124. r.global.RegExpPrototype = o.val
  1125. r.global.stdRegexpProto = o
  1126. r.global.RegExpStringIteratorPrototype = r.newLazyObject(r.createRegExpStringIteratorPrototype)
  1127. o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, nil, "compile", nil, 2), true, false, true)
  1128. o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, nil, "exec", nil, 1), true, false, true)
  1129. o._putProp("test", r.newNativeFunc(r.regexpproto_test, nil, "test", nil, 1), true, false, true)
  1130. o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, nil, "toString", nil, 0), true, false, true)
  1131. o.setOwnStr("source", &valueProperty{
  1132. configurable: true,
  1133. getterFunc: r.newNativeFunc(r.regexpproto_getSource, nil, "get source", nil, 0),
  1134. accessor: true,
  1135. }, false)
  1136. o.setOwnStr("global", &valueProperty{
  1137. configurable: true,
  1138. getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, nil, "get global", nil, 0),
  1139. accessor: true,
  1140. }, false)
  1141. o.setOwnStr("multiline", &valueProperty{
  1142. configurable: true,
  1143. getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, nil, "get multiline", nil, 0),
  1144. accessor: true,
  1145. }, false)
  1146. o.setOwnStr("ignoreCase", &valueProperty{
  1147. configurable: true,
  1148. getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, nil, "get ignoreCase", nil, 0),
  1149. accessor: true,
  1150. }, false)
  1151. o.setOwnStr("unicode", &valueProperty{
  1152. configurable: true,
  1153. getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, nil, "get unicode", nil, 0),
  1154. accessor: true,
  1155. }, false)
  1156. o.setOwnStr("sticky", &valueProperty{
  1157. configurable: true,
  1158. getterFunc: r.newNativeFunc(r.regexpproto_getSticky, nil, "get sticky", nil, 0),
  1159. accessor: true,
  1160. }, false)
  1161. o.setOwnStr("flags", &valueProperty{
  1162. configurable: true,
  1163. getterFunc: r.newNativeFunc(r.regexpproto_getFlags, nil, "get flags", nil, 0),
  1164. accessor: true,
  1165. }, false)
  1166. o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, nil, "[Symbol.match]", nil, 1), true, false, true))
  1167. o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, nil, "[Symbol.matchAll]", nil, 1), true, false, true))
  1168. o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, nil, "[Symbol.search]", nil, 1), true, false, true))
  1169. o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, nil, "[Symbol.split]", nil, 2), true, false, true))
  1170. o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, nil, "[Symbol.replace]", nil, 2), true, false, true))
  1171. o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky")
  1172. r.global.RegExp = r.newNativeFunc(r.builtin_RegExp, r.builtin_newRegExp, "RegExp", r.global.RegExpPrototype, 2)
  1173. rx := r.global.RegExp.self
  1174. r.putSpeciesReturnThis(rx)
  1175. r.addToGlobal("RegExp", r.global.RegExp)
  1176. }