Browse Source

Numeric separator literal (#603)

* Added support for separators in numeric literals.

* Improved support for Unicode code point escapes (\u{...}) in regexes.

* Improved number parsing compatibility.

---------

Co-authored-by: Joan López de la Franca Beltran <[email protected]>
Dmitry Panov 1 year ago
parent
commit
016eb72565
10 changed files with 137 additions and 62 deletions
  1. 2 2
      builtin_regexp.go
  2. 12 8
      parser/lexer.go
  3. 5 0
      parser/lexer_test.go
  4. 10 7
      parser/regexp.go
  5. 7 7
      parser/regexp_test.go
  6. 5 2
      regexp.go
  7. 26 0
      regexp_test.go
  8. 25 0
      runtime_test.go
  9. 45 24
      string_ascii.go
  10. 0 12
      tc39_test.go

+ 2 - 2
builtin_regexp.go

@@ -241,7 +241,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 		patternStr = convertRegexpToUtf16(patternStr)
 	}
 
-	re2Str, err1 := parser.TransformRegExp(patternStr, dotAll)
+	re2Str, err1 := parser.TransformRegExp(patternStr, dotAll, unicode)
 	if err1 == nil {
 		re2flags := ""
 		if multiline {
@@ -268,7 +268,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
 			err = err1
 			return
 		}
-		wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase)
+		wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase, unicode)
 		if err != nil {
 			err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
 			return

+ 12 - 8
parser/lexer.go

@@ -633,9 +633,13 @@ func (self *_parser) skipWhiteSpace() {
 	}
 }
 
-func (self *_parser) scanMantissa(base int) {
-	for digitValue(self.chr) < base {
+func (self *_parser) scanMantissa(base int, allowSeparator bool) {
+	for digitValue(self.chr) < base || (allowSeparator && self.chr == '_') {
+		afterUnderscore := self.chr == '_'
 		self.read()
+		if afterUnderscore && !isDigit(self.chr, base) {
+			self.error(self.chrOffset, "Only one underscore is allowed as numeric separator")
+		}
 	}
 }
 
@@ -1140,7 +1144,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)
 
 	if decimalPoint {
 		offset--
-		self.scanMantissa(10)
+		self.scanMantissa(10, true)
 	} else {
 		if self.chr == '0' {
 			self.read()
@@ -1156,7 +1160,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)
 				// no-op
 			default:
 				// legacy octal
-				self.scanMantissa(8)
+				self.scanMantissa(8, false)
 				goto end
 			}
 			if base > 0 {
@@ -1164,15 +1168,15 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)
 				if !isDigit(self.chr, base) {
 					return token.ILLEGAL, self.str[offset:self.chrOffset]
 				}
-				self.scanMantissa(base)
+				self.scanMantissa(base, true)
 				goto end
 			}
 		} else {
-			self.scanMantissa(10)
+			self.scanMantissa(10, true)
 		}
 		if self.chr == '.' {
 			self.read()
-			self.scanMantissa(10)
+			self.scanMantissa(10, true)
 		}
 	}
 
@@ -1183,7 +1187,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)
 		}
 		if isDecimalDigit(self.chr) {
 			self.read()
-			self.scanMantissa(10)
+			self.scanMantissa(10, true)
 		} else {
 			return token.ILLEGAL, self.str[offset:self.chrOffset]
 		}

+ 5 - 0
parser/lexer_test.go

@@ -264,6 +264,11 @@ Second line \
 			token.NUMBER, "12.3", 5,
 		)
 
+		test("1_000 1_000_000",
+			token.NUMBER, "1_000", 1,
+			token.NUMBER, "1_000_000", 7,
+		)
+
 		test(`1n`,
 			token.NUMBER, "1n", 1,
 		)

+ 10 - 7
parser/regexp.go

@@ -41,7 +41,8 @@ type _RegExp_parser struct {
 	goRegexp   strings.Builder
 	passOffset int
 
-	dotAll bool // Enable dotAll mode
+	dotAll  bool // Enable dotAll mode
+	unicode bool
 }
 
 // TransformRegExp transforms a JavaScript pattern into  a Go "regexp" pattern.
@@ -57,16 +58,17 @@ type _RegExp_parser struct {
 //
 // If the pattern is invalid (not valid even in JavaScript), then this function
 // returns an empty string and a generic error.
-func TransformRegExp(pattern string, dotAll bool) (transformed string, err error) {
+func TransformRegExp(pattern string, dotAll, unicode bool) (transformed string, err error) {
 
 	if pattern == "" {
 		return "", nil
 	}
 
 	parser := _RegExp_parser{
-		str:    pattern,
-		length: len(pattern),
-		dotAll: dotAll,
+		str:     pattern,
+		length:  len(pattern),
+		dotAll:  dotAll,
+		unicode: unicode,
 	}
 	err = parser.parse()
 	if err != nil {
@@ -292,7 +294,7 @@ func (self *_RegExp_parser) scanEscape(inClass bool) {
 
 	case 'u':
 		self.read()
-		if self.chr == '{' {
+		if self.chr == '{' && self.unicode {
 			self.read()
 			length, base = 0, 16
 		} else {
@@ -392,7 +394,8 @@ func (self *_RegExp_parser) scanEscape(inClass bool) {
 			digit := uint32(digitValue(self.chr))
 			if digit >= base {
 				// Not a valid digit
-				goto skip
+				self.error(true, "Invalid Unicode escape")
+				return
 			}
 			self.read()
 		}

+ 7 - 7
parser/regexp_test.go

@@ -10,7 +10,7 @@ func TestRegExp(t *testing.T) {
 		{
 			// err
 			test := func(input string, expect interface{}) {
-				_, err := TransformRegExp(input, false)
+				_, err := TransformRegExp(input, false, false)
 				_, incompat := err.(RegexpErrorIncompatible)
 				is(incompat, false)
 				is(err, expect)
@@ -33,7 +33,7 @@ func TestRegExp(t *testing.T) {
 		{
 			// incompatible
 			test := func(input string, expectErr interface{}) {
-				_, err := TransformRegExp(input, false)
+				_, err := TransformRegExp(input, false, false)
 				_, incompat := err.(RegexpErrorIncompatible)
 				is(incompat, true)
 				is(err, expectErr)
@@ -54,7 +54,7 @@ func TestRegExp(t *testing.T) {
 		{
 			// err
 			test := func(input string, expect string) {
-				result, err := TransformRegExp(input, false)
+				result, err := TransformRegExp(input, false, false)
 				is(err, nil)
 				_, incompat := err.(RegexpErrorIncompatible)
 				is(incompat, false)
@@ -151,18 +151,18 @@ func TestRegExp(t *testing.T) {
 
 func TestTransformRegExp(t *testing.T) {
 	tt(t, func() {
-		pattern, err := TransformRegExp(`\s+abc\s+`, false)
+		pattern, err := TransformRegExp(`\s+abc\s+`, false, false)
 		is(err, nil)
 		is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
 		is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
 	})
 	tt(t, func() {
-		pattern, err := TransformRegExp(`\u{1d306}`, false)
+		pattern, err := TransformRegExp(`\u{1d306}`, false, true)
 		is(err, nil)
 		is(pattern, `\x{1d306}`)
 	})
 	tt(t, func() {
-		pattern, err := TransformRegExp(`\u1234`, false)
+		pattern, err := TransformRegExp(`\u1234`, false, false)
 		is(err, nil)
 		is(pattern, `\x{1234}`)
 	})
@@ -173,7 +173,7 @@ func BenchmarkTransformRegExp(b *testing.B) {
 		b.ResetTimer()
 		b.ReportAllocs()
 		for i := 0; i < b.N; i++ {
-			_, _ = TransformRegExp(reStr, false)
+			_, _ = TransformRegExp(reStr, false, false)
 		}
 	}
 

+ 5 - 2
regexp.go

@@ -67,7 +67,7 @@ type regexpPattern struct {
 	regexp2Wrapper *regexp2Wrapper
 }
 
-func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wrapper, error) {
+func compileRegexp2(src string, multiline, dotAll, ignoreCase, unicode bool) (*regexp2Wrapper, error) {
 	var opts regexp2.RegexOptions = regexp2.ECMAScript
 	if multiline {
 		opts |= regexp2.Multiline
@@ -78,6 +78,9 @@ func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wra
 	if ignoreCase {
 		opts |= regexp2.IgnoreCase
 	}
+	if unicode {
+		opts |= regexp2.Unicode
+	}
 	regexp2Pattern, err1 := regexp2.Compile(src, opts)
 	if err1 != nil {
 		return nil, fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", src, err1)
@@ -90,7 +93,7 @@ func (p *regexpPattern) createRegexp2() {
 	if p.regexp2Wrapper != nil {
 		return
 	}
-	rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase)
+	rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase, p.unicode)
 	if err != nil {
 		// At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug.
 		panic(err)

+ 26 - 0
regexp_test.go

@@ -721,6 +721,32 @@ func TestRegexpDotAll(t *testing.T) {
 
 }
 
+func TestRegexpNumSeparators(t *testing.T) {
+	const SCRIPT = `
+	const re = /(?<=a)\u{65}_/u;
+	assert(re.test("ae_") && !re.test("e_"));
+
+	assert.throws(SyntaxError, () => {
+		new RegExp("(?<=a)\\u{6_5}", "u");
+	});
+
+	assert.throws(SyntaxError, () => {
+		new RegExp("a\\u{6_5}", "u");
+	});
+
+	`
+	testScriptWithTestLib(SCRIPT, _undefined, t)
+}
+
+func TestRegexpUnicodeEscape(t *testing.T) {
+	const SCRIPT = `
+	assert.sameValue("u{0_2}".match(/\u{0_2}/)[0], "u{0_2}");
+	assert.sameValue("uu\x02".match(/\u{2}/u)[0], '\x02');
+	assert.sameValue("uu\x02".match(/\u{2}/)[0], "uu");
+	`
+	testScriptWithTestLib(SCRIPT, _undefined, t)
+}
+
 func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
 	const SCRIPT = `
 	"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)

+ 25 - 0
runtime_test.go

@@ -2979,6 +2979,31 @@ func TestDestructAssignToSymbol(t *testing.T) {
 	testScriptWithTestLib(SCRIPT, _undefined, t)
 }
 
+func TestToNumber(t *testing.T) {
+	const SCRIPT = `
+	assert(isNaN(Number("+")));
+	assert(isNaN(Number("++")));
+	assert(isNaN(Number("-")));
+	assert(isNaN(Number("0xfp1")));
+	assert(isNaN(Number("0Xfp1")));
+	assert(isNaN(Number("+0xfp1")));
+	assert(isNaN(Number(" +0xfp1")));
+	assert(isNaN(Number(" + 0xfp1")));
+	assert(isNaN(Number(" 0xfp1")));
+	assert(isNaN(Number("-0xfp1")));
+	assert(isNaN(Number("- 0xfp1")));
+	assert(isNaN(Number(" - 0xfp1")));
+	assert.sameValue(Number("0."), 0);
+	assert.sameValue(Number(" "), 0);
+	assert.sameValue(Number(" Infinity"), Infinity);
+
+	let a = [1];
+	assert.sameValue(1, a.at("0xfp1"));
+	assert.sameValue(1, a.at(" 0xfp1"));
+	`
+	testScriptWithTestLib(SCRIPT, _undefined, t)
+}
+
 /*
 func TestArrayConcatSparse(t *testing.T) {
 function foo(a,b,c)

+ 45 - 24
string_ascii.go

@@ -103,8 +103,8 @@ func stringToInt(ss string) (int64, error) {
 	return strconv.ParseInt(ss, 10, 64)
 }
 
-func (s asciiString) _toInt() (int64, error) {
-	return stringToInt(strings.TrimSpace(string(s)))
+func (s asciiString) _toInt(trimmed string) (int64, error) {
+	return stringToInt(trimmed)
 }
 
 func isRangeErr(err error) bool {
@@ -114,18 +114,36 @@ func isRangeErr(err error) bool {
 	return false
 }
 
-func (s asciiString) _toFloat() (float64, error) {
-	ss := strings.ToLower(strings.TrimSpace(string(s)))
-	if ss == "" {
+func (s asciiString) _toFloat(trimmed string) (float64, error) {
+	if trimmed == "" {
 		return 0, nil
 	}
-	if ss == "-0" {
+	if trimmed == "-0" {
 		var f float64
 		return -f, nil
 	}
 
-	f, err := strconv.ParseFloat(ss, 64)
+	// Go allows underscores in numbers, when parsed as floats, but ECMAScript expect them to be interpreted as NaN.
+	if strings.ContainsRune(trimmed, '_') {
+		return 0, strconv.ErrSyntax
+	}
+
+	// Hexadecimal floats are not supported by ECMAScript.
+	if len(trimmed) >= 2 {
+		var prefix string
+		if trimmed[0] == '-' || trimmed[0] == '+' {
+			prefix = trimmed[1:]
+		} else {
+			prefix = trimmed
+		}
+		if len(prefix) >= 2 && prefix[0] == '0' && (prefix[1] == 'x' || prefix[1] == 'X') {
+			return 0, strconv.ErrSyntax
+		}
+	}
+
+	f, err := strconv.ParseFloat(trimmed, 64)
 	if err == nil && math.IsInf(f, 0) {
+		ss := strings.ToLower(trimmed)
 		if strings.HasPrefix(ss, "inf") || strings.HasPrefix(ss, "-inf") || strings.HasPrefix(ss, "+inf") {
 			// We handle "Infinity" separately, prevent from being parsed as Infinity due to strconv.ParseFloat() permissive syntax
 			return 0, strconv.ErrSyntax
@@ -138,18 +156,19 @@ func (s asciiString) _toFloat() (float64, error) {
 }
 
 func (s asciiString) ToInteger() int64 {
-	if s == "" {
+	ss := strings.TrimSpace(string(s))
+	if ss == "" {
 		return 0
 	}
-	if s == "Infinity" || s == "+Infinity" {
+	if ss == "Infinity" || ss == "+Infinity" {
 		return math.MaxInt64
 	}
-	if s == "-Infinity" {
+	if ss == "-Infinity" {
 		return math.MinInt64
 	}
-	i, err := s._toInt()
+	i, err := s._toInt(ss)
 	if err != nil {
-		f, err := s._toFloat()
+		f, err := s._toFloat(ss)
 		if err == nil {
 			return int64(f)
 		}
@@ -170,18 +189,19 @@ func (s asciiString) String() string {
 }
 
 func (s asciiString) ToFloat() float64 {
-	if s == "" {
+	ss := strings.TrimSpace(string(s))
+	if ss == "" {
 		return 0
 	}
-	if s == "Infinity" || s == "+Infinity" {
+	if ss == "Infinity" || ss == "+Infinity" {
 		return math.Inf(1)
 	}
-	if s == "-Infinity" {
+	if ss == "-Infinity" {
 		return math.Inf(-1)
 	}
-	f, err := s._toFloat()
+	f, err := s._toFloat(ss)
 	if err != nil {
-		i, err := s._toInt()
+		i, err := s._toInt(ss)
 		if err == nil {
 			return float64(i)
 		}
@@ -195,21 +215,22 @@ func (s asciiString) ToBoolean() bool {
 }
 
 func (s asciiString) ToNumber() Value {
-	if s == "" {
+	ss := strings.TrimSpace(string(s))
+	if ss == "" {
 		return intToValue(0)
 	}
-	if s == "Infinity" || s == "+Infinity" {
+	if ss == "Infinity" || ss == "+Infinity" {
 		return _positiveInf
 	}
-	if s == "-Infinity" {
+	if ss == "-Infinity" {
 		return _negativeInf
 	}
 
-	if i, err := s._toInt(); err == nil {
+	if i, err := s._toInt(ss); err == nil {
 		return intToValue(i)
 	}
 
-	if f, err := s._toFloat(); err == nil {
+	if f, err := s._toFloat(ss); err == nil {
 		return floatToValue(f)
 	}
 
@@ -230,7 +251,7 @@ func (s asciiString) Equals(other Value) bool {
 	}
 
 	if o, ok := other.(valueInt); ok {
-		if o1, e := s._toInt(); e == nil {
+		if o1, e := s._toInt(strings.TrimSpace(string(s))); e == nil {
 			return o1 == int64(o)
 		}
 		return false
@@ -241,7 +262,7 @@ func (s asciiString) Equals(other Value) bool {
 	}
 
 	if o, ok := other.(valueBool); ok {
-		if o1, e := s._toFloat(); e == nil {
+		if o1, e := s._toFloat(strings.TrimSpace(string(s))); e == nil {
 			return o1 == o.ToFloat()
 		}
 		return false

+ 0 - 12
tc39_test.go

@@ -169,17 +169,6 @@ var (
 		"test/language/literals/string/S7.8.4_A4.3_T2.js":             true,
 		"test/language/literals/string/S7.8.4_A4.3_T1.js":             true,
 
-		// integer separators
-		"test/language/expressions/object/cpn-obj-lit-computed-property-name-from-integer-separators.js":                  true,
-		"test/language/expressions/class/cpn-class-expr-accessors-computed-property-name-from-integer-separators.js":      true,
-		"test/language/statements/class/cpn-class-decl-fields-computed-property-name-from-integer-separators.js":          true,
-		"test/language/statements/class/cpn-class-decl-computed-property-name-from-integer-separators.js":                 true,
-		"test/language/statements/class/cpn-class-decl-accessors-computed-property-name-from-integer-separators.js":       true,
-		"test/language/statements/class/cpn-class-decl-fields-methods-computed-property-name-from-integer-separators.js":  true,
-		"test/language/expressions/class/cpn-class-expr-fields-computed-property-name-from-integer-separators.js":         true,
-		"test/language/expressions/class/cpn-class-expr-computed-property-name-from-integer-separators.js":                true,
-		"test/language/expressions/class/cpn-class-expr-fields-methods-computed-property-name-from-integer-separators.js": true,
-
 		// Regexp
 		"test/language/literals/regexp/invalid-range-negative-lookbehind.js":    true,
 		"test/language/literals/regexp/invalid-range-lookbehind.js":             true,
@@ -227,7 +216,6 @@ var (
 		"Atomics.pause",
 		"FinalizationRegistry",
 		"WeakRef",
-		"numeric-separator-literal",
 		"__getter__",
 		"__setter__",
 		"ShadowRealm",