builtin_regexp.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. package goja
  2. import (
  3. "fmt"
  4. "github.com/dlclark/regexp2"
  5. "github.com/dop251/goja/parser"
  6. "regexp"
  7. "strings"
  8. )
  9. func (r *Runtime) newRegexpObject(proto *Object) *regexpObject {
  10. v := &Object{runtime: r}
  11. o := &regexpObject{}
  12. o.class = classRegExp
  13. o.val = v
  14. o.extensible = true
  15. v.self = o
  16. o.prototype = proto
  17. o.init()
  18. return o
  19. }
  20. func (r *Runtime) newRegExpp(pattern regexpPattern, patternStr valueString, global, ignoreCase, multiline, sticky bool, proto *Object) *Object {
  21. o := r.newRegexpObject(proto)
  22. o.pattern = pattern
  23. o.source = patternStr
  24. o.global = global
  25. o.ignoreCase = ignoreCase
  26. o.multiline = multiline
  27. o.sticky = sticky
  28. return o.val
  29. }
  30. func compileRegexp(patternStr, flags string) (p regexpPattern, global, ignoreCase, multiline, sticky bool, err error) {
  31. if flags != "" {
  32. invalidFlags := func() {
  33. err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags)
  34. }
  35. for _, chr := range flags {
  36. switch chr {
  37. case 'g':
  38. if global {
  39. invalidFlags()
  40. return
  41. }
  42. global = true
  43. case 'm':
  44. if multiline {
  45. invalidFlags()
  46. return
  47. }
  48. multiline = true
  49. case 'i':
  50. if ignoreCase {
  51. invalidFlags()
  52. return
  53. }
  54. ignoreCase = true
  55. case 'y':
  56. if sticky {
  57. invalidFlags()
  58. return
  59. }
  60. sticky = true
  61. default:
  62. invalidFlags()
  63. return
  64. }
  65. }
  66. }
  67. re2Str, err1 := parser.TransformRegExp(patternStr)
  68. if /*false &&*/ err1 == nil {
  69. re2flags := ""
  70. if multiline {
  71. re2flags += "m"
  72. }
  73. if ignoreCase {
  74. re2flags += "i"
  75. }
  76. if len(re2flags) > 0 {
  77. re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str)
  78. }
  79. pattern, err1 := regexp.Compile(re2Str)
  80. if err1 != nil {
  81. err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1)
  82. return
  83. }
  84. p = (*regexpWrapper)(pattern)
  85. } else {
  86. var opts regexp2.RegexOptions = regexp2.ECMAScript
  87. if multiline {
  88. opts |= regexp2.Multiline
  89. }
  90. if ignoreCase {
  91. opts |= regexp2.IgnoreCase
  92. }
  93. regexp2Pattern, err1 := regexp2.Compile(patternStr, opts)
  94. if err1 != nil {
  95. err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err1)
  96. return
  97. }
  98. p = (*regexp2Wrapper)(regexp2Pattern)
  99. }
  100. return
  101. }
  102. func (r *Runtime) newRegExp(patternStr valueString, flags string, proto *Object) *Object {
  103. pattern, global, ignoreCase, multiline, sticky, err := compileRegexp(patternStr.String(), flags)
  104. if err != nil {
  105. panic(r.newSyntaxError(err.Error(), -1))
  106. }
  107. return r.newRegExpp(pattern, patternStr, global, ignoreCase, multiline, sticky, proto)
  108. }
  109. func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object {
  110. var pattern valueString
  111. var flags string
  112. if len(args) > 0 {
  113. if obj, ok := args[0].(*Object); ok {
  114. if rx, ok := obj.self.(*regexpObject); ok {
  115. if len(args) < 2 || args[1] == _undefined {
  116. return rx.clone()
  117. } else {
  118. return r.newRegExp(rx.source, args[1].String(), proto)
  119. }
  120. }
  121. }
  122. if args[0] != _undefined {
  123. pattern = args[0].toString()
  124. }
  125. }
  126. if len(args) > 1 {
  127. if a := args[1]; a != _undefined {
  128. flags = a.String()
  129. }
  130. }
  131. if pattern == nil {
  132. pattern = stringEmpty
  133. }
  134. return r.newRegExp(pattern, flags, proto)
  135. }
  136. func (r *Runtime) builtin_RegExp(call FunctionCall) Value {
  137. flags := call.Argument(1)
  138. if flags == _undefined {
  139. if obj, ok := call.Argument(0).(*Object); ok {
  140. if _, ok := obj.self.(*regexpObject); ok {
  141. return call.Arguments[0]
  142. }
  143. }
  144. }
  145. return r.builtin_newRegExp(call.Arguments, r.global.RegExpPrototype)
  146. }
  147. func (r *Runtime) regexpproto_exec(call FunctionCall) Value {
  148. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  149. return this.exec(call.Argument(0).toString())
  150. } else {
  151. r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", call.This.toString())
  152. return nil
  153. }
  154. }
  155. func (r *Runtime) regexpproto_test(call FunctionCall) Value {
  156. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  157. if this.test(call.Argument(0).toString()) {
  158. return valueTrue
  159. } else {
  160. return valueFalse
  161. }
  162. } else {
  163. r.typeErrorResult(true, "Method RegExp.prototype.test called on incompatible receiver %s", call.This.toString())
  164. return nil
  165. }
  166. }
  167. func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
  168. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  169. var g, i, m, y string
  170. if this.global {
  171. g = "g"
  172. }
  173. if this.ignoreCase {
  174. i = "i"
  175. }
  176. if this.multiline {
  177. m = "m"
  178. }
  179. if this.sticky {
  180. y = "y"
  181. }
  182. return newStringValue(fmt.Sprintf("/%s/%s%s%s%s", this.source.String(), g, i, m, y))
  183. } else {
  184. r.typeErrorResult(true, "Method RegExp.prototype.toString called on incompatible receiver %s", call.This)
  185. return nil
  186. }
  187. }
  188. func (r *Runtime) regexpproto_getSource(call FunctionCall) Value {
  189. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  190. return this.source
  191. } else {
  192. r.typeErrorResult(true, "Method RegExp.prototype.source getter called on incompatible receiver %s", call.This.toString())
  193. return nil
  194. }
  195. }
  196. func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value {
  197. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  198. if this.global {
  199. return valueTrue
  200. } else {
  201. return valueFalse
  202. }
  203. } else {
  204. r.typeErrorResult(true, "Method RegExp.prototype.global getter called on incompatible receiver %s", call.This.toString())
  205. return nil
  206. }
  207. }
  208. func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
  209. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  210. if this.multiline {
  211. return valueTrue
  212. } else {
  213. return valueFalse
  214. }
  215. } else {
  216. r.typeErrorResult(true, "Method RegExp.prototype.multiline getter called on incompatible receiver %s", call.This.toString())
  217. return nil
  218. }
  219. }
  220. func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
  221. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  222. if this.ignoreCase {
  223. return valueTrue
  224. } else {
  225. return valueFalse
  226. }
  227. } else {
  228. r.typeErrorResult(true, "Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", call.This.toString())
  229. return nil
  230. }
  231. }
  232. func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
  233. if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
  234. if this.sticky {
  235. return valueTrue
  236. } else {
  237. return valueFalse
  238. }
  239. } else {
  240. r.typeErrorResult(true, "Method RegExp.prototype.sticky getter called on incompatible receiver %s", call.This.toString())
  241. return nil
  242. }
  243. }
  244. func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
  245. var global, ignoreCase, multiline, sticky bool
  246. thisObj := r.toObject(call.This)
  247. if this, ok := thisObj.self.(*regexpObject); ok {
  248. global, ignoreCase, multiline, sticky = this.global, this.ignoreCase, this.multiline, this.sticky
  249. } else {
  250. if v := thisObj.self.getStr("global", nil); v != nil {
  251. global = v.ToBoolean()
  252. }
  253. if v := thisObj.self.getStr("ignoreCase", nil); v != nil {
  254. ignoreCase = v.ToBoolean()
  255. }
  256. if v := thisObj.self.getStr("multiline", nil); v != nil {
  257. multiline = v.ToBoolean()
  258. }
  259. if v := thisObj.self.getStr("sticky", nil); v != nil {
  260. sticky = v.ToBoolean()
  261. }
  262. }
  263. var sb strings.Builder
  264. if global {
  265. sb.WriteByte('g')
  266. }
  267. if ignoreCase {
  268. sb.WriteByte('i')
  269. }
  270. if multiline {
  271. sb.WriteByte('m')
  272. }
  273. if sticky {
  274. sb.WriteByte('y')
  275. }
  276. return asciiString(sb.String())
  277. }
  278. func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value {
  279. res := execFn(FunctionCall{
  280. This: rxObj,
  281. Arguments: []Value{arg},
  282. })
  283. if res != _null {
  284. if _, ok := res.(*Object); !ok {
  285. panic(r.NewTypeError("RegExp exec method returned something other than an Object or null"))
  286. }
  287. }
  288. return res
  289. }
  290. func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, arg Value) Value {
  291. rx := rxObj.self
  292. global := rx.getStr("global", nil)
  293. if global != nil && global.ToBoolean() {
  294. rx.setOwnStr("lastIndex", intToValue(0), true)
  295. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  296. if !ok {
  297. panic(r.NewTypeError("exec is not a function"))
  298. }
  299. var a []Value
  300. for {
  301. res := r.regExpExec(execFn, rxObj, arg)
  302. if res == _null {
  303. break
  304. }
  305. matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString()
  306. a = append(a, matchStr)
  307. if matchStr.length() == 0 {
  308. thisIndex := rx.getStr("lastIndex", nil).ToInteger()
  309. rx.setOwnStr("lastIndex", intToValue(thisIndex+1), true) // TODO fullUnicode
  310. }
  311. }
  312. if len(a) == 0 {
  313. return _null
  314. }
  315. return r.newArrayValues(a)
  316. }
  317. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  318. if !ok {
  319. panic(r.NewTypeError("exec is not a function"))
  320. }
  321. return r.regExpExec(execFn, rxObj, arg)
  322. }
  323. func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject {
  324. rx, ok := rxObj.self.(*regexpObject)
  325. if !ok {
  326. return nil
  327. }
  328. execFn := rx.getStr("exec", nil)
  329. if execFn != nil && execFn != r.global.regexpProtoExec {
  330. return nil
  331. }
  332. return rx
  333. }
  334. func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value {
  335. thisObj := r.toObject(call.This)
  336. s := call.Argument(0).toString()
  337. rx := r.checkStdRegexp(thisObj)
  338. if rx == nil {
  339. return r.regexpproto_stdMatcherGeneric(thisObj, s)
  340. }
  341. if rx.global {
  342. rx.setOwnStr("lastIndex", intToValue(0), true)
  343. var a []Value
  344. var previousLastIndex int64
  345. for {
  346. match, result := rx.execRegexp(s)
  347. if !match {
  348. break
  349. }
  350. thisIndex := rx.getStr("lastIndex", nil).ToInteger()
  351. if thisIndex == previousLastIndex {
  352. previousLastIndex++
  353. rx.setOwnStr("lastIndex", intToValue(previousLastIndex), true)
  354. } else {
  355. previousLastIndex = thisIndex
  356. }
  357. a = append(a, s.substring(int64(result[0]), int64(result[1])))
  358. }
  359. if len(a) == 0 {
  360. return _null
  361. }
  362. return r.newArrayValues(a)
  363. } else {
  364. return rx.exec(s)
  365. }
  366. }
  367. func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg valueString) Value {
  368. rx := rxObj.self
  369. previousLastIndex := rx.getStr("lastIndex", nil)
  370. rx.setOwnStr("lastIndex", intToValue(0), true)
  371. execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
  372. if !ok {
  373. panic(r.NewTypeError("exec is not a function"))
  374. }
  375. result := r.regExpExec(execFn, rxObj, arg)
  376. rx.setOwnStr("lastIndex", previousLastIndex, true)
  377. if result == _null {
  378. return intToValue(-1)
  379. }
  380. return r.toObject(result).self.getStr("index", nil)
  381. }
  382. func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value {
  383. thisObj := r.toObject(call.This)
  384. s := call.Argument(0).toString()
  385. rx := r.checkStdRegexp(thisObj)
  386. if rx == nil {
  387. return r.regexpproto_stdSearchGeneric(thisObj, s)
  388. }
  389. previousLastIndex := rx.getStr("lastIndex", nil)
  390. rx.setOwnStr("lastIndex", intToValue(0), true)
  391. match, result := rx.execRegexp(s)
  392. rx.setOwnStr("lastIndex", previousLastIndex, true)
  393. if !match {
  394. return intToValue(-1)
  395. }
  396. return intToValue(int64(result[0]))
  397. }
  398. func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s valueString, limit Value) Value {
  399. var a []Value
  400. var lim int64
  401. if limit == nil || limit == _undefined {
  402. lim = maxInt - 1
  403. } else {
  404. lim = toLength(limit)
  405. }
  406. size := s.length()
  407. p := int64(0)
  408. if lim == 0 {
  409. return r.newArrayValues(a)
  410. }
  411. execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil
  412. if size == 0 {
  413. if r.regExpExec(execFn, splitter, s) == _null {
  414. a = append(a, s)
  415. }
  416. return r.newArrayValues(a)
  417. }
  418. q := p
  419. for q < size {
  420. splitter.self.setOwnStr("lastIndex", intToValue(q), true)
  421. z := r.regExpExec(execFn, splitter, s)
  422. if z == _null {
  423. q++
  424. } else {
  425. z := r.toObject(z)
  426. e := toLength(splitter.self.getStr("lastIndex", nil))
  427. if e == p {
  428. q++
  429. } else {
  430. a = append(a, s.substring(p, q))
  431. if int64(len(a)) == lim {
  432. return r.newArrayValues(a)
  433. }
  434. p = e
  435. numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0)
  436. for i := int64(1); i <= numberOfCaptures; i++ {
  437. a = append(a, z.self.getIdx(valueInt(i), nil))
  438. if int64(len(a)) == lim {
  439. return r.newArrayValues(a)
  440. }
  441. }
  442. q = p
  443. }
  444. }
  445. }
  446. a = append(a, s.substring(p, size))
  447. return r.newArrayValues(a)
  448. }
  449. func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value {
  450. rxObj := r.toObject(call.This)
  451. c := r.speciesConstructor(rxObj, r.global.RegExp)
  452. flags := nilSafe(rxObj.self.getStr("flags", nil)).toString()
  453. // Add 'y' flag if missing
  454. if flagsStr := flags.String(); !strings.Contains(flagsStr, "y") {
  455. flags = newStringValue(flagsStr + "y")
  456. }
  457. splitter := c([]Value{rxObj, flags}, nil)
  458. s := call.Argument(0).toString()
  459. limitValue := call.Argument(1)
  460. search := r.checkStdRegexp(splitter)
  461. if search == nil {
  462. return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue)
  463. }
  464. limit := -1
  465. if limitValue != _undefined {
  466. limit = int(toUInt32(limitValue))
  467. }
  468. if limit == 0 {
  469. return r.newArrayValues(nil)
  470. }
  471. targetLength := s.length()
  472. var valueArray []Value
  473. result := search.pattern.FindAllSubmatchIndex(s, -1)
  474. lastIndex := 0
  475. found := 0
  476. for _, match := range result {
  477. if match[0] == match[1] {
  478. // FIXME Ugh, this is a hack
  479. if match[0] == 0 || int64(match[0]) == targetLength {
  480. continue
  481. }
  482. }
  483. if lastIndex != match[0] {
  484. valueArray = append(valueArray, s.substring(int64(lastIndex), int64(match[0])))
  485. found++
  486. } else if lastIndex == match[0] {
  487. if lastIndex != -1 {
  488. valueArray = append(valueArray, stringEmpty)
  489. found++
  490. }
  491. }
  492. lastIndex = match[1]
  493. if found == limit {
  494. goto RETURN
  495. }
  496. captureCount := len(match) / 2
  497. for index := 1; index < captureCount; index++ {
  498. offset := index * 2
  499. var value Value
  500. if match[offset] != -1 {
  501. value = s.substring(int64(match[offset]), int64(match[offset+1]))
  502. } else {
  503. value = _undefined
  504. }
  505. valueArray = append(valueArray, value)
  506. found++
  507. if found == limit {
  508. goto RETURN
  509. }
  510. }
  511. }
  512. if found != limit {
  513. if int64(lastIndex) != targetLength {
  514. valueArray = append(valueArray, s.substring(int64(lastIndex), targetLength))
  515. } else {
  516. valueArray = append(valueArray, stringEmpty)
  517. }
  518. }
  519. RETURN:
  520. return r.newArrayValues(valueArray)
  521. }
  522. func (r *Runtime) initRegExp() {
  523. r.global.RegExpPrototype = r.NewObject()
  524. o := r.global.RegExpPrototype.self
  525. r.global.regexpProtoExec = valueProp(r.newNativeFunc(r.regexpproto_exec, nil, "exec", nil, 1), true, false, true)
  526. o.setOwnStr("exec", r.global.regexpProtoExec, true)
  527. o._putProp("test", r.newNativeFunc(r.regexpproto_test, nil, "test", nil, 1), true, false, true)
  528. o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, nil, "toString", nil, 0), true, false, true)
  529. o.setOwnStr("source", &valueProperty{
  530. configurable: true,
  531. getterFunc: r.newNativeFunc(r.regexpproto_getSource, nil, "get source", nil, 0),
  532. accessor: true,
  533. }, false)
  534. o.setOwnStr("global", &valueProperty{
  535. configurable: true,
  536. getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, nil, "get global", nil, 0),
  537. accessor: true,
  538. }, false)
  539. o.setOwnStr("multiline", &valueProperty{
  540. configurable: true,
  541. getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, nil, "get multiline", nil, 0),
  542. accessor: true,
  543. }, false)
  544. o.setOwnStr("ignoreCase", &valueProperty{
  545. configurable: true,
  546. getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, nil, "get ignoreCase", nil, 0),
  547. accessor: true,
  548. }, false)
  549. o.setOwnStr("sticky", &valueProperty{
  550. configurable: true,
  551. getterFunc: r.newNativeFunc(r.regexpproto_getSticky, nil, "get sticky", nil, 0),
  552. accessor: true,
  553. }, false)
  554. o.setOwnStr("flags", &valueProperty{
  555. configurable: true,
  556. getterFunc: r.newNativeFunc(r.regexpproto_getFlags, nil, "get flags", nil, 0),
  557. accessor: true,
  558. }, false)
  559. o._putSym(symMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, nil, "[Symbol.match]", nil, 1), true, false, true))
  560. o._putSym(symSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, nil, "[Symbol.search]", nil, 1), true, false, true))
  561. o._putSym(symSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, nil, "[Symbol.split]", nil, 2), true, false, true))
  562. r.global.RegExp = r.newNativeFunc(r.builtin_RegExp, r.builtin_newRegExp, "RegExp", r.global.RegExpPrototype, 2)
  563. o = r.global.RegExp.self
  564. o._putSym(symSpecies, &valueProperty{
  565. getterFunc: r.newNativeFunc(r.returnThis, nil, "get [Symbol.species]", nil, 0),
  566. accessor: true,
  567. configurable: true,
  568. })
  569. r.addToGlobal("RegExp", r.global.RegExp)
  570. }