Browse Source

feat: regular expressions support DotAll flag (#593)

Levi 1 year ago
parent
commit
3491d4a58f
8 changed files with 83 additions and 18 deletions
  1. 45 4
      builtin_regexp.go
  2. 3 2
      go.mod
  3. 4 0
      go.sum
  4. 8 1
      parser/regexp.go
  5. 7 7
      parser/regexp_test.go
  6. 7 3
      regexp.go
  7. 9 0
      regexp_test.go
  8. 0 1
      tc39_test.go

+ 45 - 4
builtin_regexp.go

@@ -183,7 +183,7 @@ func compileRegexpFromValueString(patternStr String, flags string) (*regexpPatte
 }
 }
 
 
 func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
-	var global, ignoreCase, multiline, sticky, unicode bool
+	var global, ignoreCase, multiline, dotAll, sticky, unicode bool
 	var wrapper *regexpWrapper
 	var wrapper *regexpWrapper
 	var wrapper2 *regexp2Wrapper
 	var wrapper2 *regexp2Wrapper
 
 
@@ -205,6 +205,12 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 					return
 					return
 				}
 				}
 				multiline = true
 				multiline = true
+			case 's':
+				if dotAll {
+					invalidFlags()
+					return
+				}
+				dotAll = true
 			case 'i':
 			case 'i':
 				if ignoreCase {
 				if ignoreCase {
 					invalidFlags()
 					invalidFlags()
@@ -235,12 +241,15 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 		patternStr = convertRegexpToUtf16(patternStr)
 		patternStr = convertRegexpToUtf16(patternStr)
 	}
 	}
 
 
-	re2Str, err1 := parser.TransformRegExp(patternStr)
+	re2Str, err1 := parser.TransformRegExp(patternStr, dotAll)
 	if err1 == nil {
 	if err1 == nil {
 		re2flags := ""
 		re2flags := ""
 		if multiline {
 		if multiline {
 			re2flags += "m"
 			re2flags += "m"
 		}
 		}
+		if dotAll {
+			re2flags += "s"
+		}
 		if ignoreCase {
 		if ignoreCase {
 			re2flags += "i"
 			re2flags += "i"
 		}
 		}
@@ -259,7 +268,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 			err = err1
 			err = err1
 			return
 			return
 		}
 		}
-		wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase)
+		wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase)
 		if err != nil {
 		if err != nil {
 			err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
 			err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
 			return
 			return
@@ -273,6 +282,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 		global:         global,
 		global:         global,
 		ignoreCase:     ignoreCase,
 		ignoreCase:     ignoreCase,
 		multiline:      multiline,
 		multiline:      multiline,
+		dotAll:         dotAll,
 		sticky:         sticky,
 		sticky:         sticky,
 		unicode:        unicode,
 		unicode:        unicode,
 	}
 	}
@@ -431,6 +441,9 @@ func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
 		if this.pattern.multiline {
 		if this.pattern.multiline {
 			sb.WriteRune('m')
 			sb.WriteRune('m')
 		}
 		}
+		if this.pattern.dotAll {
+			sb.WriteRune('s')
+		}
 		if this.pattern.unicode {
 		if this.pattern.unicode {
 			sb.WriteRune('u')
 			sb.WriteRune('u')
 		}
 		}
@@ -538,6 +551,20 @@ func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
 	}
 	}
 }
 }
 
 
+func (r *Runtime) regexpproto_getDotAll(call FunctionCall) Value {
+	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
+		if this.pattern.dotAll {
+			return valueTrue
+		} else {
+			return valueFalse
+		}
+	} else if call.This == r.global.RegExpPrototype {
+		return _undefined
+	} else {
+		panic(r.NewTypeError("Method RegExp.prototype.dotAll getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
+	}
+}
+
 func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
 func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
 	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
 	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
 		if this.pattern.ignoreCase {
 		if this.pattern.ignoreCase {
@@ -581,7 +608,7 @@ func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
 }
 }
 
 
 func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
 func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
-	var global, ignoreCase, multiline, sticky, unicode bool
+	var global, ignoreCase, multiline, dotAll, sticky, unicode bool
 
 
 	thisObj := r.toObject(call.This)
 	thisObj := r.toObject(call.This)
 	size := 0
 	size := 0
@@ -603,6 +630,12 @@ func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
 			size++
 			size++
 		}
 		}
 	}
 	}
+	if v := thisObj.self.getStr("dotAll", nil); v != nil {
+		dotAll = v.ToBoolean()
+		if dotAll {
+			size++
+		}
+	}
 	if v := thisObj.self.getStr("sticky", nil); v != nil {
 	if v := thisObj.self.getStr("sticky", nil); v != nil {
 		sticky = v.ToBoolean()
 		sticky = v.ToBoolean()
 		if sticky {
 		if sticky {
@@ -627,6 +660,9 @@ func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
 	if multiline {
 	if multiline {
 		sb.WriteByte('m')
 		sb.WriteByte('m')
 	}
 	}
+	if dotAll {
+		sb.WriteByte('s')
+	}
 	if unicode {
 	if unicode {
 		sb.WriteByte('u')
 		sb.WriteByte('u')
 	}
 	}
@@ -1272,6 +1308,11 @@ func (r *Runtime) getRegExpPrototype() *Object {
 			getterFunc:   r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0),
 			getterFunc:   r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0),
 			accessor:     true,
 			accessor:     true,
 		}, false)
 		}, false)
+		o.setOwnStr("dotAll", &valueProperty{
+			configurable: true,
+			getterFunc:   r.newNativeFunc(r.regexpproto_getDotAll, "get dotAll", 0),
+			accessor:     true,
+		}, false)
 		o.setOwnStr("ignoreCase", &valueProperty{
 		o.setOwnStr("ignoreCase", &valueProperty{
 			configurable: true,
 			configurable: true,
 			getterFunc:   r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0),
 			getterFunc:   r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0),

+ 3 - 2
go.mod

@@ -3,11 +3,12 @@ module github.com/dop251/goja
 go 1.20
 go 1.20
 
 
 require (
 require (
-	github.com/dlclark/regexp2 v1.7.0
+	github.com/dlclark/regexp2 v1.11.4
 	github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d
 	github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d
 	github.com/go-sourcemap/sourcemap v2.1.3+incompatible
 	github.com/go-sourcemap/sourcemap v2.1.3+incompatible
 	github.com/google/pprof v0.0.0-20230207041349-798e818bf904
 	github.com/google/pprof v0.0.0-20230207041349-798e818bf904
-	github.com/kr/pretty v0.3.0 // indirect
 	golang.org/x/text v0.3.8
 	golang.org/x/text v0.3.8
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v2 v2.4.0
 )
 )
+
+require github.com/kr/pretty v0.3.0 // indirect

+ 4 - 0
go.sum

@@ -5,6 +5,10 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
 github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
 github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.11.3 h1:tdwMFLz4VxHteujuVYHzG5Bje3M2ORsvv2jvbCTufTA=
+github.com/dlclark/regexp2 v1.11.3/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
+github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
 github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
 github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y=
 github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y=
 github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d h1:W1n4DvpzZGOISgp7wWNtraLcHtnmnTwBlJidqtMIuwQ=
 github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d h1:W1n4DvpzZGOISgp7wWNtraLcHtnmnTwBlJidqtMIuwQ=

+ 8 - 1
parser/regexp.go

@@ -40,6 +40,8 @@ type _RegExp_parser struct {
 
 
 	goRegexp   strings.Builder
 	goRegexp   strings.Builder
 	passOffset int
 	passOffset int
+
+	dotAll bool // Enable dotAll mode
 }
 }
 
 
 // TransformRegExp transforms a JavaScript pattern into  a Go "regexp" pattern.
 // TransformRegExp transforms a JavaScript pattern into  a Go "regexp" pattern.
@@ -55,7 +57,7 @@ type _RegExp_parser struct {
 //
 //
 // If the pattern is invalid (not valid even in JavaScript), then this function
 // If the pattern is invalid (not valid even in JavaScript), then this function
 // returns an empty string and a generic error.
 // returns an empty string and a generic error.
-func TransformRegExp(pattern string) (transformed string, err error) {
+func TransformRegExp(pattern string, dotAll bool) (transformed string, err error) {
 
 
 	if pattern == "" {
 	if pattern == "" {
 		return "", nil
 		return "", nil
@@ -64,6 +66,7 @@ func TransformRegExp(pattern string) (transformed string, err error) {
 	parser := _RegExp_parser{
 	parser := _RegExp_parser{
 		str:    pattern,
 		str:    pattern,
 		length: len(pattern),
 		length: len(pattern),
+		dotAll: dotAll,
 	}
 	}
 	err = parser.parse()
 	err = parser.parse()
 	if err != nil {
 	if err != nil {
@@ -147,6 +150,10 @@ func (self *_RegExp_parser) scan() {
 			self.error(true, "Unmatched ')'")
 			self.error(true, "Unmatched ')'")
 			return
 			return
 		case '.':
 		case '.':
+			if self.dotAll {
+				self.pass()
+				break
+			}
 			self.writeString(Re2Dot)
 			self.writeString(Re2Dot)
 			self.read()
 			self.read()
 		default:
 		default:

+ 7 - 7
parser/regexp_test.go

@@ -10,7 +10,7 @@ func TestRegExp(t *testing.T) {
 		{
 		{
 			// err
 			// err
 			test := func(input string, expect interface{}) {
 			test := func(input string, expect interface{}) {
-				_, err := TransformRegExp(input)
+				_, err := TransformRegExp(input, false)
 				_, incompat := err.(RegexpErrorIncompatible)
 				_, incompat := err.(RegexpErrorIncompatible)
 				is(incompat, false)
 				is(incompat, false)
 				is(err, expect)
 				is(err, expect)
@@ -33,7 +33,7 @@ func TestRegExp(t *testing.T) {
 		{
 		{
 			// incompatible
 			// incompatible
 			test := func(input string, expectErr interface{}) {
 			test := func(input string, expectErr interface{}) {
-				_, err := TransformRegExp(input)
+				_, err := TransformRegExp(input, false)
 				_, incompat := err.(RegexpErrorIncompatible)
 				_, incompat := err.(RegexpErrorIncompatible)
 				is(incompat, true)
 				is(incompat, true)
 				is(err, expectErr)
 				is(err, expectErr)
@@ -54,7 +54,7 @@ func TestRegExp(t *testing.T) {
 		{
 		{
 			// err
 			// err
 			test := func(input string, expect string) {
 			test := func(input string, expect string) {
-				result, err := TransformRegExp(input)
+				result, err := TransformRegExp(input, false)
 				is(err, nil)
 				is(err, nil)
 				_, incompat := err.(RegexpErrorIncompatible)
 				_, incompat := err.(RegexpErrorIncompatible)
 				is(incompat, false)
 				is(incompat, false)
@@ -151,18 +151,18 @@ func TestRegExp(t *testing.T) {
 
 
 func TestTransformRegExp(t *testing.T) {
 func TestTransformRegExp(t *testing.T) {
 	tt(t, func() {
 	tt(t, func() {
-		pattern, err := TransformRegExp(`\s+abc\s+`)
+		pattern, err := TransformRegExp(`\s+abc\s+`, false)
 		is(err, nil)
 		is(err, nil)
 		is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
 		is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
 		is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
 		is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
 	})
 	})
 	tt(t, func() {
 	tt(t, func() {
-		pattern, err := TransformRegExp(`\u{1d306}`)
+		pattern, err := TransformRegExp(`\u{1d306}`, false)
 		is(err, nil)
 		is(err, nil)
 		is(pattern, `\x{1d306}`)
 		is(pattern, `\x{1d306}`)
 	})
 	})
 	tt(t, func() {
 	tt(t, func() {
-		pattern, err := TransformRegExp(`\u1234`)
+		pattern, err := TransformRegExp(`\u1234`, false)
 		is(err, nil)
 		is(err, nil)
 		is(pattern, `\x{1234}`)
 		is(pattern, `\x{1234}`)
 	})
 	})
@@ -173,7 +173,7 @@ func BenchmarkTransformRegExp(b *testing.B) {
 		b.ResetTimer()
 		b.ResetTimer()
 		b.ReportAllocs()
 		b.ReportAllocs()
 		for i := 0; i < b.N; i++ {
 		for i := 0; i < b.N; i++ {
-			_, _ = TransformRegExp(reStr)
+			_, _ = TransformRegExp(reStr, false)
 		}
 		}
 	}
 	}
 
 

+ 7 - 3
regexp.go

@@ -61,17 +61,20 @@ func (rd *arrayRuneReader) ReadRune() (r rune, size int, err error) {
 type regexpPattern struct {
 type regexpPattern struct {
 	src string
 	src string
 
 
-	global, ignoreCase, multiline, sticky, unicode bool
+	global, ignoreCase, multiline, dotAll, sticky, unicode bool
 
 
 	regexpWrapper  *regexpWrapper
 	regexpWrapper  *regexpWrapper
 	regexp2Wrapper *regexp2Wrapper
 	regexp2Wrapper *regexp2Wrapper
 }
 }
 
 
-func compileRegexp2(src string, multiline, ignoreCase bool) (*regexp2Wrapper, error) {
+func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wrapper, error) {
 	var opts regexp2.RegexOptions = regexp2.ECMAScript
 	var opts regexp2.RegexOptions = regexp2.ECMAScript
 	if multiline {
 	if multiline {
 		opts |= regexp2.Multiline
 		opts |= regexp2.Multiline
 	}
 	}
+	if dotAll {
+		opts |= regexp2.Singleline
+	}
 	if ignoreCase {
 	if ignoreCase {
 		opts |= regexp2.IgnoreCase
 		opts |= regexp2.IgnoreCase
 	}
 	}
@@ -87,7 +90,7 @@ func (p *regexpPattern) createRegexp2() {
 	if p.regexp2Wrapper != nil {
 	if p.regexp2Wrapper != nil {
 		return
 		return
 	}
 	}
-	rx, err := compileRegexp2(p.src, p.multiline, p.ignoreCase)
+	rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase)
 	if err != nil {
 	if err != nil {
 		// At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug.
 		// At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug.
 		panic(err)
 		panic(err)
@@ -175,6 +178,7 @@ func (p *regexpPattern) clone() *regexpPattern {
 		global:     p.global,
 		global:     p.global,
 		ignoreCase: p.ignoreCase,
 		ignoreCase: p.ignoreCase,
 		multiline:  p.multiline,
 		multiline:  p.multiline,
+		dotAll:     p.dotAll,
 		sticky:     p.sticky,
 		sticky:     p.sticky,
 		unicode:    p.unicode,
 		unicode:    p.unicode,
 	}
 	}

+ 9 - 0
regexp_test.go

@@ -712,6 +712,15 @@ func TestRegexpConcurrentLiterals(t *testing.T) {
 	_, _ = vm.RunProgram(prg)
 	_, _ = vm.RunProgram(prg)
 }
 }
 
 
+func TestRegexpDotAll(t *testing.T) {
+	const SCRIPT = `
+	var re = /./s;
+	re.test("\r") && re.test("\n")
+	`
+	testScript(SCRIPT, valueTrue, t)
+
+}
+
 func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
 func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
 	const SCRIPT = `
 	const SCRIPT = `
 	"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)
 	"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)

+ 0 - 1
tc39_test.go

@@ -207,7 +207,6 @@ var (
 		"BigInt",
 		"BigInt",
 		"resizable-arraybuffer",
 		"resizable-arraybuffer",
 		"regexp-named-groups",
 		"regexp-named-groups",
-		"regexp-dotall",
 		"regexp-unicode-property-escapes",
 		"regexp-unicode-property-escapes",
 		"regexp-match-indices",
 		"regexp-match-indices",
 		"legacy-regexp",
 		"legacy-regexp",