1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348 |
- package goja
- import (
- "fmt"
- "github.com/dop251/goja/parser"
- "regexp"
- "strings"
- "unicode/utf16"
- "unicode/utf8"
- )
- func (r *Runtime) newRegexpObject(proto *Object) *regexpObject {
- v := &Object{runtime: r}
- o := ®expObject{}
- o.class = classRegExp
- o.val = v
- o.extensible = true
- v.self = o
- o.prototype = proto
- o.init()
- return o
- }
- func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr String, proto *Object) *regexpObject {
- o := r.newRegexpObject(proto)
- o.pattern = pattern
- o.source = patternStr
- return o
- }
- func decodeHex(s string) (int, bool) {
- var hex int
- for i := 0; i < len(s); i++ {
- var n byte
- chr := s[i]
- switch {
- case '0' <= chr && chr <= '9':
- n = chr - '0'
- case 'a' <= chr && chr <= 'f':
- n = chr - 'a' + 10
- case 'A' <= chr && chr <= 'F':
- n = chr - 'A' + 10
- default:
- return 0, false
- }
- hex = hex*16 + int(n)
- }
- return hex, true
- }
- func writeHex4(b *strings.Builder, i int) {
- b.WriteByte(hex[i>>12])
- b.WriteByte(hex[(i>>8)&0xF])
- b.WriteByte(hex[(i>>4)&0xF])
- b.WriteByte(hex[i&0xF])
- }
- // Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters
- func convertRegexpToUnicode(patternStr string) string {
- var sb strings.Builder
- pos := 0
- for i := 0; i < len(patternStr)-11; {
- r, size := utf8.DecodeRuneInString(patternStr[i:])
- if r == '\\' {
- i++
- if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' {
- if first, ok := decodeHex(patternStr[i+1 : i+5]); ok {
- if isUTF16FirstSurrogate(uint16(first)) {
- if second, ok := decodeHex(patternStr[i+7 : i+11]); ok {
- if isUTF16SecondSurrogate(uint16(second)) {
- r = utf16.DecodeRune(rune(first), rune(second))
- sb.WriteString(patternStr[pos : i-1])
- sb.WriteRune(r)
- i += 11
- pos = i
- continue
- }
- }
- }
- }
- }
- i++
- } else {
- i += size
- }
- }
- if pos > 0 {
- sb.WriteString(patternStr[pos:])
- return sb.String()
- }
- return patternStr
- }
- // Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX
- func convertRegexpToUtf16(patternStr string) string {
- var sb strings.Builder
- pos := 0
- var prevRune rune
- for i := 0; i < len(patternStr); {
- r, size := utf8.DecodeRuneInString(patternStr[i:])
- if r > 0xFFFF {
- sb.WriteString(patternStr[pos:i])
- if prevRune == '\\' {
- sb.WriteRune('\\')
- }
- first, second := utf16.EncodeRune(r)
- sb.WriteString(`\u`)
- writeHex4(&sb, int(first))
- sb.WriteString(`\u`)
- writeHex4(&sb, int(second))
- pos = i + size
- }
- i += size
- prevRune = r
- }
- if pos > 0 {
- sb.WriteString(patternStr[pos:])
- return sb.String()
- }
- return patternStr
- }
- // convert any broken UTF-16 surrogate pairs to \uXXXX
- func escapeInvalidUtf16(s String) string {
- if imported, ok := s.(*importedString); ok {
- return imported.s
- }
- if ascii, ok := s.(asciiString); ok {
- return ascii.String()
- }
- var sb strings.Builder
- rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader()}
- pos := 0
- utf8Size := 0
- var utf8Buf [utf8.UTFMax]byte
- for {
- c, size, err := rd.ReadRune()
- if err != nil {
- break
- }
- if utf16.IsSurrogate(c) {
- if sb.Len() == 0 {
- sb.Grow(utf8Size + 7)
- hrd := s.Reader()
- var c rune
- for p := 0; p < pos; {
- var size int
- var err error
- c, size, err = hrd.ReadRune()
- if err != nil {
- // will not happen
- panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err))
- }
- sb.WriteRune(c)
- p += size
- }
- if c == '\\' {
- sb.WriteRune(c)
- }
- }
- sb.WriteString(`\u`)
- writeHex4(&sb, int(c))
- } else {
- if sb.Len() > 0 {
- sb.WriteRune(c)
- } else {
- utf8Size += utf8.EncodeRune(utf8Buf[:], c)
- pos += size
- }
- }
- }
- if sb.Len() > 0 {
- return sb.String()
- }
- return s.String()
- }
- func compileRegexpFromValueString(patternStr String, flags string) (*regexpPattern, error) {
- return compileRegexp(escapeInvalidUtf16(patternStr), flags)
- }
- func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
- var global, ignoreCase, multiline, dotAll, sticky, unicode bool
- var wrapper *regexpWrapper
- var wrapper2 *regexp2Wrapper
- if flags != "" {
- invalidFlags := func() {
- err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags)
- }
- for _, chr := range flags {
- switch chr {
- case 'g':
- if global {
- invalidFlags()
- return
- }
- global = true
- case 'm':
- if multiline {
- invalidFlags()
- return
- }
- multiline = true
- case 's':
- if dotAll {
- invalidFlags()
- return
- }
- dotAll = true
- case 'i':
- if ignoreCase {
- invalidFlags()
- return
- }
- ignoreCase = true
- case 'y':
- if sticky {
- invalidFlags()
- return
- }
- sticky = true
- case 'u':
- if unicode {
- invalidFlags()
- }
- unicode = true
- default:
- invalidFlags()
- return
- }
- }
- }
- if unicode {
- patternStr = convertRegexpToUnicode(patternStr)
- } else {
- patternStr = convertRegexpToUtf16(patternStr)
- }
- re2Str, err1 := parser.TransformRegExp(patternStr, dotAll, unicode)
- if err1 == nil {
- re2flags := ""
- if multiline {
- re2flags += "m"
- }
- if dotAll {
- re2flags += "s"
- }
- if ignoreCase {
- re2flags += "i"
- }
- if len(re2flags) > 0 {
- re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str)
- }
- pattern, err1 := regexp.Compile(re2Str)
- if err1 != nil {
- err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1)
- return
- }
- wrapper = (*regexpWrapper)(pattern)
- } else {
- if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat {
- err = err1
- return
- }
- wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase, unicode)
- if err != nil {
- err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
- return
- }
- }
- p = ®expPattern{
- src: patternStr,
- regexpWrapper: wrapper,
- regexp2Wrapper: wrapper2,
- global: global,
- ignoreCase: ignoreCase,
- multiline: multiline,
- dotAll: dotAll,
- sticky: sticky,
- unicode: unicode,
- }
- return
- }
- func (r *Runtime) _newRegExp(patternStr String, flags string, proto *Object) *regexpObject {
- pattern, err := compileRegexpFromValueString(patternStr, flags)
- if err != nil {
- panic(r.newSyntaxError(err.Error(), -1))
- }
- return r.newRegExpp(pattern, patternStr, proto)
- }
- func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object {
- var patternVal, flagsVal Value
- if len(args) > 0 {
- patternVal = args[0]
- }
- if len(args) > 1 {
- flagsVal = args[1]
- }
- return r.newRegExp(patternVal, flagsVal, proto).val
- }
- func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject {
- var pattern String
- var flags string
- if isRegexp(patternVal) { // this may have side effects so need to call it anyway
- if obj, ok := patternVal.(*Object); ok {
- if rx, ok := obj.self.(*regexpObject); ok {
- if flagsVal == nil || flagsVal == _undefined {
- return rx.clone()
- } else {
- return r._newRegExp(rx.source, flagsVal.toString().String(), proto)
- }
- } else {
- pattern = nilSafe(obj.self.getStr("source", nil)).toString()
- if flagsVal == nil || flagsVal == _undefined {
- flags = nilSafe(obj.self.getStr("flags", nil)).toString().String()
- } else {
- flags = flagsVal.toString().String()
- }
- goto exit
- }
- }
- }
- if patternVal != nil && patternVal != _undefined {
- pattern = patternVal.toString()
- }
- if flagsVal != nil && flagsVal != _undefined {
- flags = flagsVal.toString().String()
- }
- if pattern == nil {
- pattern = stringEmpty
- }
- exit:
- return r._newRegExp(pattern, flags, proto)
- }
- func (r *Runtime) builtin_RegExp(call FunctionCall) Value {
- pattern := call.Argument(0)
- patternIsRegExp := isRegexp(pattern)
- flags := call.Argument(1)
- if patternIsRegExp && flags == _undefined {
- if obj, ok := call.Argument(0).(*Object); ok {
- patternConstructor := obj.self.getStr("constructor", nil)
- if patternConstructor == r.global.RegExp {
- return pattern
- }
- }
- }
- return r.newRegExp(pattern, flags, r.getRegExpPrototype()).val
- }
- func (r *Runtime) regexpproto_compile(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- var (
- pattern *regexpPattern
- source String
- flags string
- err error
- )
- patternVal := call.Argument(0)
- flagsVal := call.Argument(1)
- if o, ok := patternVal.(*Object); ok {
- if p, ok := o.self.(*regexpObject); ok {
- if flagsVal != _undefined {
- panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another"))
- }
- this.pattern = p.pattern
- this.source = p.source
- goto exit
- }
- }
- if patternVal != _undefined {
- source = patternVal.toString()
- } else {
- source = stringEmpty
- }
- if flagsVal != _undefined {
- flags = flagsVal.toString().String()
- }
- pattern, err = compileRegexpFromValueString(source, flags)
- if err != nil {
- panic(r.newSyntaxError(err.Error(), -1))
- }
- this.pattern = pattern
- this.source = source
- exit:
- this.setOwnStr("lastIndex", intToValue(0), true)
- return call.This
- }
- panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- func (r *Runtime) regexpproto_exec(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- return this.exec(call.Argument(0).toString())
- } else {
- r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))
- return nil
- }
- }
- func (r *Runtime) regexpproto_test(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.test(call.Argument(0).toString()) {
- return valueTrue
- } else {
- return valueFalse
- }
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.test called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
- obj := r.toObject(call.This)
- if this := r.checkStdRegexp(obj); this != nil {
- var sb StringBuilder
- sb.WriteRune('/')
- if !this.writeEscapedSource(&sb) {
- sb.WriteString(this.source)
- }
- sb.WriteRune('/')
- if this.pattern.global {
- sb.WriteRune('g')
- }
- if this.pattern.ignoreCase {
- sb.WriteRune('i')
- }
- if this.pattern.multiline {
- sb.WriteRune('m')
- }
- if this.pattern.dotAll {
- sb.WriteRune('s')
- }
- if this.pattern.unicode {
- sb.WriteRune('u')
- }
- if this.pattern.sticky {
- sb.WriteRune('y')
- }
- return sb.String()
- }
- pattern := nilSafe(obj.self.getStr("source", nil)).toString()
- flags := nilSafe(obj.self.getStr("flags", nil)).toString()
- var sb StringBuilder
- sb.WriteRune('/')
- sb.WriteString(pattern)
- sb.WriteRune('/')
- sb.WriteString(flags)
- return sb.String()
- }
- func (r *regexpObject) writeEscapedSource(sb *StringBuilder) bool {
- if r.source.Length() == 0 {
- sb.WriteString(asciiString("(?:)"))
- return true
- }
- pos := 0
- lastPos := 0
- rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader()}
- L:
- for {
- c, size, err := rd.ReadRune()
- if err != nil {
- break
- }
- switch c {
- case '\\':
- pos++
- _, size, err = rd.ReadRune()
- if err != nil {
- break L
- }
- case '/', '\u000a', '\u000d', '\u2028', '\u2029':
- sb.WriteSubstring(r.source, lastPos, pos)
- sb.WriteRune('\\')
- switch c {
- case '\u000a':
- sb.WriteRune('n')
- case '\u000d':
- sb.WriteRune('r')
- default:
- sb.WriteRune('u')
- sb.WriteRune(rune(hex[c>>12]))
- sb.WriteRune(rune(hex[(c>>8)&0xF]))
- sb.WriteRune(rune(hex[(c>>4)&0xF]))
- sb.WriteRune(rune(hex[c&0xF]))
- }
- lastPos = pos + size
- }
- pos += size
- }
- if lastPos > 0 {
- sb.WriteSubstring(r.source, lastPos, r.source.Length())
- return true
- }
- return false
- }
- func (r *Runtime) regexpproto_getSource(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- var sb StringBuilder
- if this.writeEscapedSource(&sb) {
- return sb.String()
- }
- return this.source
- } else if call.This == r.global.RegExpPrototype {
- return asciiString("(?:)")
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.source getter called on incompatible receiver"))
- }
- }
- func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.pattern.global {
- return valueTrue
- } else {
- return valueFalse
- }
- } else if call.This == r.global.RegExpPrototype {
- return _undefined
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.global getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.pattern.multiline {
- return valueTrue
- } else {
- return valueFalse
- }
- } else if call.This == r.global.RegExpPrototype {
- return _undefined
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.multiline getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_getDotAll(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.pattern.dotAll {
- return valueTrue
- } else {
- return valueFalse
- }
- } else if call.This == r.global.RegExpPrototype {
- return _undefined
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.dotAll getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.pattern.ignoreCase {
- return valueTrue
- } else {
- return valueFalse
- }
- } else if call.This == r.global.RegExpPrototype {
- return _undefined
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.pattern.unicode {
- return valueTrue
- } else {
- return valueFalse
- }
- } else if call.This == r.global.RegExpPrototype {
- return _undefined
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.unicode getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
- if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
- if this.pattern.sticky {
- return valueTrue
- } else {
- return valueFalse
- }
- } else if call.This == r.global.RegExpPrototype {
- return _undefined
- } else {
- panic(r.NewTypeError("Method RegExp.prototype.sticky getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
- }
- }
- func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
- var global, ignoreCase, multiline, dotAll, sticky, unicode bool
- thisObj := r.toObject(call.This)
- size := 0
- if v := thisObj.self.getStr("global", nil); v != nil {
- global = v.ToBoolean()
- if global {
- size++
- }
- }
- if v := thisObj.self.getStr("ignoreCase", nil); v != nil {
- ignoreCase = v.ToBoolean()
- if ignoreCase {
- size++
- }
- }
- if v := thisObj.self.getStr("multiline", nil); v != nil {
- multiline = v.ToBoolean()
- if multiline {
- size++
- }
- }
- if v := thisObj.self.getStr("dotAll", nil); v != nil {
- dotAll = v.ToBoolean()
- if dotAll {
- size++
- }
- }
- if v := thisObj.self.getStr("sticky", nil); v != nil {
- sticky = v.ToBoolean()
- if sticky {
- size++
- }
- }
- if v := thisObj.self.getStr("unicode", nil); v != nil {
- unicode = v.ToBoolean()
- if unicode {
- size++
- }
- }
- var sb strings.Builder
- sb.Grow(size)
- if global {
- sb.WriteByte('g')
- }
- if ignoreCase {
- sb.WriteByte('i')
- }
- if multiline {
- sb.WriteByte('m')
- }
- if dotAll {
- sb.WriteByte('s')
- }
- if unicode {
- sb.WriteByte('u')
- }
- if sticky {
- sb.WriteByte('y')
- }
- return asciiString(sb.String())
- }
- func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value {
- res := execFn(FunctionCall{
- This: rxObj,
- Arguments: []Value{arg},
- })
- if res != _null {
- if _, ok := res.(*Object); !ok {
- panic(r.NewTypeError("RegExp exec method returned something other than an Object or null"))
- }
- }
- return res
- }
- func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s String, fullUnicode bool) []Value {
- rxObj.self.setOwnStr("lastIndex", intToValue(0), true)
- execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable()
- if !ok {
- panic(r.NewTypeError("exec is not a function"))
- }
- var a []Value
- for {
- res := r.regExpExec(execFn, rxObj, s)
- if res == _null {
- break
- }
- a = append(a, res)
- matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString()
- if matchStr.Length() == 0 {
- thisIndex := toLength(rxObj.self.getStr("lastIndex", nil))
- rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true)
- }
- }
- return a
- }
- func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s String) Value {
- rx := rxObj.self
- flags := nilSafe(rx.getStr("flags", nil)).String()
- global := strings.ContainsRune(flags, 'g')
- if global {
- a := r.getGlobalRegexpMatches(rxObj, s, strings.ContainsRune(flags, 'u'))
- if len(a) == 0 {
- return _null
- }
- ar := make([]Value, 0, len(a))
- for _, result := range a {
- obj := r.toObject(result)
- matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString()
- ar = append(ar, matchStr)
- }
- return r.newArrayValues(ar)
- }
- execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
- if !ok {
- panic(r.NewTypeError("exec is not a function"))
- }
- return r.regExpExec(execFn, rxObj, s)
- }
- func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject {
- if deoptimiseRegexp {
- return nil
- }
- rx, ok := rxObj.self.(*regexpObject)
- if !ok {
- return nil
- }
- if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto {
- return nil
- }
- return rx
- }
- func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value {
- thisObj := r.toObject(call.This)
- s := call.Argument(0).toString()
- rx := r.checkStdRegexp(thisObj)
- if rx == nil {
- return r.regexpproto_stdMatcherGeneric(thisObj, s)
- }
- if rx.pattern.global {
- res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky)
- if len(res) == 0 {
- rx.setOwnStr("lastIndex", intToValue(0), true)
- return _null
- }
- a := make([]Value, 0, len(res))
- for _, result := range res {
- a = append(a, s.Substring(result[0], result[1]))
- }
- rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true)
- return r.newArrayValues(a)
- } else {
- return rx.exec(s)
- }
- }
- func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg String) Value {
- rx := rxObj.self
- previousLastIndex := nilSafe(rx.getStr("lastIndex", nil))
- zero := intToValue(0)
- if !previousLastIndex.SameAs(zero) {
- rx.setOwnStr("lastIndex", zero, true)
- }
- execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
- if !ok {
- panic(r.NewTypeError("exec is not a function"))
- }
- result := r.regExpExec(execFn, rxObj, arg)
- currentLastIndex := nilSafe(rx.getStr("lastIndex", nil))
- if !currentLastIndex.SameAs(previousLastIndex) {
- rx.setOwnStr("lastIndex", previousLastIndex, true)
- }
- if result == _null {
- return intToValue(-1)
- }
- return r.toObject(result).self.getStr("index", nil)
- }
- func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value {
- thisObj := r.toObject(call.This)
- s := call.Argument(0).toString()
- flags := nilSafe(thisObj.self.getStr("flags", nil)).toString()
- c := r.speciesConstructorObj(call.This.(*Object), r.getRegExp())
- matcher := r.toConstructor(c)([]Value{call.This, flags}, nil)
- matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true)
- flagsStr := flags.String()
- global := strings.Contains(flagsStr, "g")
- fullUnicode := strings.Contains(flagsStr, "u")
- return r.createRegExpStringIterator(matcher, s, global, fullUnicode)
- }
- func (r *Runtime) createRegExpStringIterator(matcher *Object, s String, global, fullUnicode bool) Value {
- o := &Object{runtime: r}
- ri := ®ExpStringIterObject{
- matcher: matcher,
- s: s,
- global: global,
- fullUnicode: fullUnicode,
- }
- ri.class = classObject
- ri.val = o
- ri.extensible = true
- o.self = ri
- ri.prototype = r.getRegExpStringIteratorPrototype()
- ri.init()
- return o
- }
- type regExpStringIterObject struct {
- baseObject
- matcher *Object
- s String
- global, fullUnicode, done bool
- }
- // RegExpExec as defined in 21.2.5.2.1
- func regExpExec(r *Object, s String) Value {
- exec := r.self.getStr("exec", nil)
- if execObject, ok := exec.(*Object); ok {
- if execFn, ok := execObject.self.assertCallable(); ok {
- return r.runtime.regExpExec(execFn, r, s)
- }
- }
- if rx, ok := r.self.(*regexpObject); ok {
- return rx.exec(s)
- }
- panic(r.runtime.NewTypeError("no RegExpMatcher internal slot"))
- }
- func (ri *regExpStringIterObject) next() (v Value) {
- if ri.done {
- return ri.val.runtime.createIterResultObject(_undefined, true)
- }
- match := regExpExec(ri.matcher, ri.s)
- if IsNull(match) {
- ri.done = true
- return ri.val.runtime.createIterResultObject(_undefined, true)
- }
- if !ri.global {
- ri.done = true
- return ri.val.runtime.createIterResultObject(match, false)
- }
- matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString()
- if matchStr.Length() == 0 {
- thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil))
- ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true)
- }
- return ri.val.runtime.createIterResultObject(match, false)
- }
- func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value {
- thisObj := r.toObject(call.This)
- s := call.Argument(0).toString()
- rx := r.checkStdRegexp(thisObj)
- if rx == nil {
- return r.regexpproto_stdSearchGeneric(thisObj, s)
- }
- previousLastIndex := rx.getStr("lastIndex", nil)
- rx.setOwnStr("lastIndex", intToValue(0), true)
- match, result := rx.execRegexp(s)
- rx.setOwnStr("lastIndex", previousLastIndex, true)
- if !match {
- return intToValue(-1)
- }
- return intToValue(int64(result[0]))
- }
- func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s String, limit Value, unicodeMatching bool) Value {
- var a []Value
- var lim int64
- if limit == nil || limit == _undefined {
- lim = maxInt - 1
- } else {
- lim = toLength(limit)
- }
- if lim == 0 {
- return r.newArrayValues(a)
- }
- size := s.Length()
- p := 0
- execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil
- if size == 0 {
- if r.regExpExec(execFn, splitter, s) == _null {
- a = append(a, s)
- }
- return r.newArrayValues(a)
- }
- q := p
- for q < size {
- splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true)
- z := r.regExpExec(execFn, splitter, s)
- if z == _null {
- q = advanceStringIndex(s, q, unicodeMatching)
- } else {
- z := r.toObject(z)
- e := toLength(splitter.self.getStr("lastIndex", nil))
- if e == int64(p) {
- q = advanceStringIndex(s, q, unicodeMatching)
- } else {
- a = append(a, s.Substring(p, q))
- if int64(len(a)) == lim {
- return r.newArrayValues(a)
- }
- if e > int64(size) {
- p = size
- } else {
- p = int(e)
- }
- numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0)
- for i := int64(1); i <= numberOfCaptures; i++ {
- a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil)))
- if int64(len(a)) == lim {
- return r.newArrayValues(a)
- }
- }
- q = p
- }
- }
- }
- a = append(a, s.Substring(p, size))
- return r.newArrayValues(a)
- }
- func advanceStringIndex(s String, pos int, unicode bool) int {
- next := pos + 1
- if !unicode {
- return next
- }
- l := s.Length()
- if next >= l {
- return next
- }
- if !isUTF16FirstSurrogate(s.CharAt(pos)) {
- return next
- }
- if !isUTF16SecondSurrogate(s.CharAt(next)) {
- return next
- }
- return next + 1
- }
- func advanceStringIndex64(s String, pos int64, unicode bool) int64 {
- next := pos + 1
- if !unicode {
- return next
- }
- l := int64(s.Length())
- if next >= l {
- return next
- }
- if !isUTF16FirstSurrogate(s.CharAt(int(pos))) {
- return next
- }
- if !isUTF16SecondSurrogate(s.CharAt(int(next))) {
- return next
- }
- return next + 1
- }
- func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value {
- rxObj := r.toObject(call.This)
- s := call.Argument(0).toString()
- limitValue := call.Argument(1)
- var splitter *Object
- search := r.checkStdRegexp(rxObj)
- c := r.speciesConstructorObj(rxObj, r.getRegExp())
- if search == nil || c != r.global.RegExp {
- flags := nilSafe(rxObj.self.getStr("flags", nil)).toString()
- flagsStr := flags.String()
- // Add 'y' flag if missing
- if !strings.Contains(flagsStr, "y") {
- flags = flags.Concat(asciiString("y"))
- }
- splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil)
- search = r.checkStdRegexp(splitter)
- if search == nil {
- return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u"))
- }
- }
- pattern := search.pattern // toUint32() may recompile the pattern, but we still need to use the original
- limit := -1
- if limitValue != _undefined {
- limit = int(toUint32(limitValue))
- }
- if limit == 0 {
- return r.newArrayValues(nil)
- }
- targetLength := s.Length()
- var valueArray []Value
- lastIndex := 0
- found := 0
- result := pattern.findAllSubmatchIndex(s, 0, -1, false)
- if targetLength == 0 {
- if result == nil {
- valueArray = append(valueArray, s)
- }
- goto RETURN
- }
- for _, match := range result {
- if match[0] == match[1] {
- // FIXME Ugh, this is a hack
- if match[0] == 0 || match[0] == targetLength {
- continue
- }
- }
- if lastIndex != match[0] {
- valueArray = append(valueArray, s.Substring(lastIndex, match[0]))
- found++
- } else if lastIndex == match[0] {
- if lastIndex != -1 {
- valueArray = append(valueArray, stringEmpty)
- found++
- }
- }
- lastIndex = match[1]
- if found == limit {
- goto RETURN
- }
- captureCount := len(match) / 2
- for index := 1; index < captureCount; index++ {
- offset := index * 2
- var value Value
- if match[offset] != -1 {
- value = s.Substring(match[offset], match[offset+1])
- } else {
- value = _undefined
- }
- valueArray = append(valueArray, value)
- found++
- if found == limit {
- goto RETURN
- }
- }
- }
- if found != limit {
- if lastIndex != targetLength {
- valueArray = append(valueArray, s.Substring(lastIndex, targetLength))
- } else {
- valueArray = append(valueArray, stringEmpty)
- }
- }
- RETURN:
- return r.newArrayValues(valueArray)
- }
- func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr String, rcall func(FunctionCall) Value) Value {
- var results []Value
- flags := nilSafe(rxObj.self.getStr("flags", nil)).String()
- isGlobal := strings.ContainsRune(flags, 'g')
- isUnicode := strings.ContainsRune(flags, 'u')
- if isGlobal {
- results = r.getGlobalRegexpMatches(rxObj, s, isUnicode)
- } else {
- execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil
- result := r.regExpExec(execFn, rxObj, s)
- if result != _null {
- results = append(results, result)
- }
- }
- lengthS := s.Length()
- nextSourcePosition := 0
- var resultBuf StringBuilder
- for _, result := range results {
- obj := r.toObject(result)
- nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0)
- matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString()
- matchLength := matched.Length()
- position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0))
- var captures []Value
- if rcall != nil {
- captures = make([]Value, 0, nCaptures+3)
- } else {
- captures = make([]Value, 0, nCaptures+1)
- }
- captures = append(captures, matched)
- for n := int64(1); n <= nCaptures; n++ {
- capN := nilSafe(obj.self.getIdx(valueInt(n), nil))
- if capN != _undefined {
- capN = capN.ToString()
- }
- captures = append(captures, capN)
- }
- var replacement String
- if rcall != nil {
- captures = append(captures, intToValue(int64(position)), s)
- replacement = rcall(FunctionCall{
- This: _undefined,
- Arguments: captures,
- }).toString()
- if position >= nextSourcePosition {
- resultBuf.WriteString(s.Substring(nextSourcePosition, position))
- resultBuf.WriteString(replacement)
- nextSourcePosition = position + matchLength
- }
- } else {
- if position >= nextSourcePosition {
- resultBuf.WriteString(s.Substring(nextSourcePosition, position))
- writeSubstitution(s, position, len(captures), func(idx int) String {
- capture := captures[idx]
- if capture != _undefined {
- return capture.toString()
- }
- return stringEmpty
- }, replaceStr, &resultBuf)
- nextSourcePosition = position + matchLength
- }
- }
- }
- if nextSourcePosition < lengthS {
- resultBuf.WriteString(s.Substring(nextSourcePosition, lengthS))
- }
- return resultBuf.String()
- }
- func writeSubstitution(s String, position int, numCaptures int, getCapture func(int) String, replaceStr String, buf *StringBuilder) {
- l := s.Length()
- rl := replaceStr.Length()
- matched := getCapture(0)
- tailPos := position + matched.Length()
- for i := 0; i < rl; i++ {
- c := replaceStr.CharAt(i)
- if c == '$' && i < rl-1 {
- ch := replaceStr.CharAt(i + 1)
- switch ch {
- case '$':
- buf.WriteRune('$')
- case '`':
- buf.WriteString(s.Substring(0, position))
- case '\'':
- if tailPos < l {
- buf.WriteString(s.Substring(tailPos, l))
- }
- case '&':
- buf.WriteString(matched)
- default:
- matchNumber := 0
- j := i + 1
- for j < rl {
- ch := replaceStr.CharAt(j)
- if ch >= '0' && ch <= '9' {
- m := matchNumber*10 + int(ch-'0')
- if m >= numCaptures {
- break
- }
- matchNumber = m
- j++
- } else {
- break
- }
- }
- if matchNumber > 0 {
- buf.WriteString(getCapture(matchNumber))
- i = j - 1
- continue
- } else {
- buf.WriteRune('$')
- buf.WriteRune(rune(ch))
- }
- }
- i++
- } else {
- buf.WriteRune(rune(c))
- }
- }
- }
- func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value {
- rxObj := r.toObject(call.This)
- s := call.Argument(0).toString()
- replaceStr, rcall := getReplaceValue(call.Argument(1))
- rx := r.checkStdRegexp(rxObj)
- if rx == nil {
- return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall)
- }
- var index int64
- find := 1
- if rx.pattern.global {
- find = -1
- rx.setOwnStr("lastIndex", intToValue(0), true)
- } else {
- index = rx.getLastIndex()
- }
- found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky)
- if len(found) > 0 {
- if !rx.updateLastIndex(index, found[0], found[len(found)-1]) {
- found = nil
- }
- } else {
- rx.updateLastIndex(index, nil, nil)
- }
- return stringReplace(s, found, replaceStr, rcall)
- }
- func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value {
- thisObj := r.toObject(call.This)
- if iter, ok := thisObj.self.(*regExpStringIterObject); ok {
- return iter.next()
- }
- panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: thisObj})))
- }
- func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl {
- o := newBaseObjectObj(val, r.getIteratorPrototype(), classObject)
- o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, "next", 0), true, false, true)
- o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true))
- return o
- }
- func (r *Runtime) getRegExpStringIteratorPrototype() *Object {
- var o *Object
- if o = r.global.RegExpStringIteratorPrototype; o == nil {
- o = &Object{runtime: r}
- r.global.RegExpStringIteratorPrototype = o
- o.self = r.createRegExpStringIteratorPrototype(o)
- }
- return o
- }
- func (r *Runtime) getRegExp() *Object {
- ret := r.global.RegExp
- if ret == nil {
- ret = &Object{runtime: r}
- r.global.RegExp = ret
- proto := r.getRegExpPrototype()
- r.newNativeFuncAndConstruct(ret, r.builtin_RegExp,
- r.wrapNativeConstruct(r.builtin_newRegExp, ret, proto), proto, "RegExp", intToValue(2))
- rx := ret.self
- r.putSpeciesReturnThis(rx)
- }
- return ret
- }
- func (r *Runtime) getRegExpPrototype() *Object {
- ret := r.global.RegExpPrototype
- if ret == nil {
- o := r.newGuardedObject(r.global.ObjectPrototype, classObject)
- ret = o.val
- r.global.RegExpPrototype = ret
- r.global.stdRegexpProto = o
- o._putProp("constructor", r.getRegExp(), true, false, true)
- o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, "compile", 2), true, false, true)
- o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, "exec", 1), true, false, true)
- o._putProp("test", r.newNativeFunc(r.regexpproto_test, "test", 1), true, false, true)
- o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, "toString", 0), true, false, true)
- o.setOwnStr("source", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getSource, "get source", 0),
- accessor: true,
- }, false)
- o.setOwnStr("global", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, "get global", 0),
- accessor: true,
- }, false)
- o.setOwnStr("multiline", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0),
- accessor: true,
- }, false)
- o.setOwnStr("dotAll", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getDotAll, "get dotAll", 0),
- accessor: true,
- }, false)
- o.setOwnStr("ignoreCase", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0),
- accessor: true,
- }, false)
- o.setOwnStr("unicode", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, "get unicode", 0),
- accessor: true,
- }, false)
- o.setOwnStr("sticky", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getSticky, "get sticky", 0),
- accessor: true,
- }, false)
- o.setOwnStr("flags", &valueProperty{
- configurable: true,
- getterFunc: r.newNativeFunc(r.regexpproto_getFlags, "get flags", 0),
- accessor: true,
- }, false)
- o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, "[Symbol.match]", 1), true, false, true))
- o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, "[Symbol.matchAll]", 1), true, false, true))
- o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, "[Symbol.search]", 1), true, false, true))
- o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, "[Symbol.split]", 2), true, false, true))
- o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, "[Symbol.replace]", 2), true, false, true))
- o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky")
- }
- return ret
- }
|