瀏覽代碼

Added missing Regexp functionality, enhanced unicode and UTF-16 support (#171)

Dmitry Panov 5 年之前
父節點
當前提交
b206dd5e2c
共有 20 個文件被更改,包括 1648 次插入560 次删除
  1. 14 15
      builtin_array.go
  2. 1 1
      builtin_date.go
  3. 14 10
      builtin_function.go
  4. 1 1
      builtin_global.go
  5. 12 3
      builtin_json.go
  6. 4 0
      builtin_json_test.go
  7. 630 111
      builtin_regexp.go
  8. 85 161
      builtin_string.go
  9. 58 0
      builtin_string_test.go
  10. 10 11
      builtin_typedarrays.go
  11. 4 10
      compiler_expr.go
  12. 44 0
      object.go
  13. 326 182
      regexp.go
  14. 160 0
      regexp_test.go
  15. 52 24
      runtime.go
  16. 6 3
      string.go
  17. 16 0
      string_ascii.go
  18. 170 14
      string_unicode.go
  19. 38 9
      tc39_test.go
  20. 3 5
      vm.go

+ 14 - 15
builtin_array.go

@@ -3,7 +3,6 @@ package goja
 import (
 	"math"
 	"sort"
-	"strings"
 )
 
 func (r *Runtime) newArray(prototype *Object) (a *arrayObject) {
@@ -174,32 +173,32 @@ func (r *Runtime) arrayproto_pop(call FunctionCall) Value {
 func (r *Runtime) arrayproto_join(call FunctionCall) Value {
 	o := call.This.ToObject(r)
 	l := int(toLength(o.self.getStr("length", nil)))
-	sep := ""
+	var sep valueString = asciiString("")
 	if s := call.Argument(0); s != _undefined {
-		sep = s.toString().String()
+		sep = s.toString()
 	} else {
-		sep = ","
+		sep = asciiString(",")
 	}
 	if l == 0 {
 		return stringEmpty
 	}
 
-	var buf strings.Builder
+	var buf valueStringBuilder
 
 	element0 := o.self.getIdx(valueInt(0), nil)
 	if element0 != nil && element0 != _undefined && element0 != _null {
-		buf.WriteString(element0.String())
+		buf.WriteString(element0.toString())
 	}
 
 	for i := 1; i < l; i++ {
 		buf.WriteString(sep)
 		element := o.self.getIdx(valueInt(int64(i)), nil)
 		if element != nil && element != _undefined && element != _null {
-			buf.WriteString(element.String())
+			buf.WriteString(element.toString())
 		}
 	}
 
-	return newStringValue(buf.String())
+	return buf.String()
 }
 
 func (r *Runtime) arrayproto_toString(call FunctionCall) Value {
@@ -217,14 +216,14 @@ func (r *Runtime) arrayproto_toString(call FunctionCall) Value {
 	})
 }
 
-func (r *Runtime) writeItemLocaleString(item Value, buf *strings.Builder) {
+func (r *Runtime) writeItemLocaleString(item Value, buf *valueStringBuilder) {
 	if item != nil && item != _undefined && item != _null {
 		if f, ok := r.getVStr(item, "toLocaleString").(*Object); ok {
 			if c, ok := f.self.assertCallable(); ok {
 				strVal := c(FunctionCall{
 					This: item,
 				})
-				buf.WriteString(strVal.ToString().String())
+				buf.WriteString(strVal.toString())
 				return
 			}
 		}
@@ -234,11 +233,11 @@ func (r *Runtime) writeItemLocaleString(item Value, buf *strings.Builder) {
 
 func (r *Runtime) arrayproto_toLocaleString(call FunctionCall) Value {
 	array := call.This.ToObject(r)
-	var buf strings.Builder
+	var buf valueStringBuilder
 	if a := r.checkStdArrayObj(array); a != nil {
 		for i, item := range a.values {
 			if i > 0 {
-				buf.WriteByte(',')
+				buf.WriteRune(',')
 			}
 			r.writeItemLocaleString(item, &buf)
 		}
@@ -246,14 +245,14 @@ func (r *Runtime) arrayproto_toLocaleString(call FunctionCall) Value {
 		length := toLength(array.self.getStr("length", nil))
 		for i := int64(0); i < length; i++ {
 			if i > 0 {
-				buf.WriteByte(',')
+				buf.WriteRune(',')
 			}
 			item := array.self.getIdx(valueInt(i), nil)
 			r.writeItemLocaleString(item, &buf)
 		}
 	}
 
-	return newStringValue(buf.String())
+	return buf.String()
 }
 
 func isConcatSpreadable(obj *Object) bool {
@@ -1331,7 +1330,7 @@ func (a *arraySortCtx) sortCompare(x, y Value) int {
 		}
 		return 0
 	}
-	return strings.Compare(x.String(), y.String())
+	return x.toString().compareTo(y.toString())
 }
 
 // sort.Interface

+ 1 - 1
builtin_date.go

@@ -72,7 +72,7 @@ func (r *Runtime) builtin_date(FunctionCall) Value {
 }
 
 func (r *Runtime) date_parse(call FunctionCall) Value {
-	t, set := dateParse(call.Argument(0).String())
+	t, set := dateParse(call.Argument(0).toString().String())
 	if set {
 		return intToValue(timeToMsec(t))
 	}

+ 14 - 10
builtin_function.go

@@ -5,20 +5,24 @@ import (
 )
 
 func (r *Runtime) builtin_Function(args []Value, proto *Object) *Object {
-	src := "(function anonymous("
+	var sb valueStringBuilder
+	sb.WriteString(asciiString("(function anonymous("))
 	if len(args) > 1 {
-		for _, arg := range args[:len(args)-1] {
-			src += arg.String() + ","
+		ar := args[:len(args)-1]
+		for i, arg := range ar {
+			sb.WriteString(arg.toString())
+			if i < len(ar)-1 {
+				sb.WriteRune(',')
+			}
 		}
-		src = src[:len(src)-1]
 	}
-	body := ""
+	sb.WriteString(asciiString("){"))
 	if len(args) > 0 {
-		body = args[len(args)-1].String()
+		sb.WriteString(args[len(args)-1].toString())
 	}
-	src += "){" + body + "})"
+	sb.WriteString(asciiString("})"))
 
-	ret := r.toObject(r.eval(src, false, false, _undefined))
+	ret := r.toObject(r.eval(sb.String(), false, false, _undefined))
 	ret.self.setProto(proto, true)
 	return ret
 }
@@ -43,9 +47,9 @@ repeat:
 		case *funcObject:
 			name = c.src
 		case *nativeFuncObject:
-			name = c.nameProp.get(call.This).String()
+			name = nilSafe(c.nameProp.get(call.This)).toString().String()
 		case *boundFuncObject:
-			name = c.nameProp.get(call.This).String()
+			name = nilSafe(c.nameProp.get(call.This)).toString().String()
 		case *lazyObject:
 			f.target.self = c.create(obj)
 			goto repeat2

+ 1 - 1
builtin_global.go

@@ -117,7 +117,7 @@ func (r *Runtime) _encode(uriString valueString, unescaped *[256]bool) valueStri
 			i++
 		}
 	}
-	return asciiString(string(buf))
+	return asciiString(buf)
 }
 
 func (r *Runtime) _decode(sv valueString, reservedSet *[256]bool) valueString {

+ 12 - 3
builtin_json.go

@@ -7,6 +7,7 @@ import (
 	"io"
 	"math"
 	"strings"
+	"unicode/utf16"
 
 	"github.com/dop251/goja/unistring"
 )
@@ -14,7 +15,7 @@ import (
 const hex = "0123456789abcdef"
 
 func (r *Runtime) builtinJSON_parse(call FunctionCall) Value {
-	d := json.NewDecoder(bytes.NewBufferString(call.Argument(0).String()))
+	d := json.NewDecoder(bytes.NewBufferString(call.Argument(0).toString().String()))
 
 	value, err := r.builtinJSON_decodeValue(d)
 	if err != nil {
@@ -456,7 +457,7 @@ func (ctx *_builtinJSON_stringifyContext) jo(object *Object) {
 
 func (ctx *_builtinJSON_stringifyContext) quote(str valueString) {
 	ctx.buf.WriteByte('"')
-	reader := str.reader(0)
+	reader := &lenientUtf16Decoder{utf16Reader: str.utf16Reader(0)}
 	for {
 		r, _, err := reader.ReadRune()
 		if err != nil {
@@ -482,7 +483,15 @@ func (ctx *_builtinJSON_stringifyContext) quote(str valueString) {
 				ctx.buf.WriteByte(hex[r>>4])
 				ctx.buf.WriteByte(hex[r&0xF])
 			} else {
-				ctx.buf.WriteRune(r)
+				if utf16.IsSurrogate(r) {
+					ctx.buf.WriteString(`\u`)
+					ctx.buf.WriteByte(hex[r>>12])
+					ctx.buf.WriteByte(hex[(r>>8)&0xF])
+					ctx.buf.WriteByte(hex[(r>>4)&0xF])
+					ctx.buf.WriteByte(hex[r&0xF])
+				} else {
+					ctx.buf.WriteRune(r)
+				}
 			}
 		}
 	}

+ 4 - 0
builtin_json_test.go

@@ -61,6 +61,10 @@ func TestJSONParseReviver(t *testing.T) {
 	testScript1(SCRIPT, intToValue(10), t)
 }
 
+func TestQuoteMalformedSurrogatePair(t *testing.T) {
+	testScript1(`JSON.stringify("\uD800")`, asciiString(`"\ud800"`), t)
+}
+
 func BenchmarkJSONStringify(b *testing.B) {
 	b.StopTimer()
 	vm := New()

文件差異過大導致無法顯示
+ 630 - 111
builtin_regexp.go


+ 85 - 161
builtin_string.go

@@ -1,7 +1,6 @@
 package goja
 
 import (
-	"bytes"
 	"github.com/dop251/goja/unistring"
 	"math"
 	"strings"
@@ -67,20 +66,6 @@ func (r *Runtime) builtin_newString(args []Value, proto *Object) *Object {
 	return r._newString(s, proto)
 }
 
-func searchSubstringUTF8(str, search string) (ret [][]int) {
-	searchPos := 0
-	l := len(str)
-	if searchPos < l {
-		p := strings.Index(str[searchPos:], search)
-		if p != -1 {
-			p += searchPos
-			searchPos = p + len(search)
-			ret = append(ret, []int{p, searchPos})
-		}
-	}
-	return
-}
-
 func (r *Runtime) stringproto_toStringValueOf(this Value, funcName string) Value {
 	if str, ok := this.(valueString); ok {
 		return str
@@ -132,10 +117,8 @@ func (r *Runtime) string_fromcharcode(call FunctionCall) Value {
 }
 
 func (r *Runtime) string_fromcodepoint(call FunctionCall) Value {
-	var b []byte
-	var sb unicodeStringBuilder
-	unicode := false
-	for i, arg := range call.Arguments {
+	var sb valueStringBuilder
+	for _, arg := range call.Arguments {
 		num := arg.ToNumber()
 		var c rune
 		if numInt, ok := num.(valueInt); ok {
@@ -146,27 +129,9 @@ func (r *Runtime) string_fromcodepoint(call FunctionCall) Value {
 		} else {
 			panic(r.newError(r.global.RangeError, "Invalid code point %s", num))
 		}
-		if c >= utf8.RuneSelf {
-			if !unicode {
-				unicode = true
-				sb.Grow(len(call.Arguments))
-				sb.writeASCII(b[:i])
-				b = nil
-			}
-		}
-		if unicode {
-			sb.writeRune(c)
-		} else {
-			if b == nil {
-				b = make([]byte, 0, len(call.Arguments))
-			}
-			b = append(b, byte(c))
-		}
-	}
-	if !unicode {
-		return asciiString(b)
+		sb.WriteRune(c)
 	}
-	return sb.string()
+	return sb.String()
 }
 
 func (r *Runtime) string_raw(call FunctionCall) Value {
@@ -176,17 +141,17 @@ func (r *Runtime) string_raw(call FunctionCall) Value {
 	if literalSegments <= 0 {
 		return stringEmpty
 	}
-	var stringElements unicodeStringBuilder
+	var stringElements valueStringBuilder
 	nextIndex := int64(0)
 	numberOfSubstitutions := int64(len(call.Arguments) - 1)
 	for {
 		nextSeg := nilSafe(raw.self.getIdx(valueInt(nextIndex), nil)).toString()
-		stringElements.writeString(nextSeg)
+		stringElements.WriteString(nextSeg)
 		if nextIndex+1 == literalSegments {
-			return stringElements.string()
+			return stringElements.String()
 		}
 		if nextIndex < numberOfSubstitutions {
-			stringElements.writeString(nilSafe(call.Arguments[nextIndex+1]).toString())
+			stringElements.WriteString(nilSafe(call.Arguments[nextIndex+1]).toString())
 		}
 		nextIndex++
 	}
@@ -250,7 +215,8 @@ func (r *Runtime) stringproto_concat(call FunctionCall) Value {
 	}
 
 	if allAscii {
-		buf := bytes.NewBuffer(make([]byte, 0, totalLen))
+		var buf strings.Builder
+		buf.Grow(totalLen)
 		for _, s := range strs {
 			buf.WriteString(s.String())
 		}
@@ -369,8 +335,8 @@ func (r *Runtime) stringproto_lastIndexOf(call FunctionCall) Value {
 
 func (r *Runtime) stringproto_localeCompare(call FunctionCall) Value {
 	r.checkObjectCoercible(call.This)
-	this := norm.NFD.String(call.This.String())
-	that := norm.NFD.String(call.Argument(0).String())
+	this := norm.NFD.String(call.This.toString().String())
+	that := norm.NFD.String(call.Argument(0).toString().String())
 	return intToValue(int64(r.collator().CompareString(this, that)))
 }
 
@@ -392,7 +358,7 @@ func (r *Runtime) stringproto_match(call FunctionCall) Value {
 	}
 
 	if rx == nil {
-		rx = r.builtin_newRegExp([]Value{regexp}, r.global.RegExpPrototype).self.(*regexpObject)
+		rx = r.newRegExp(regexp, nil, r.global.RegExpPrototype).self.(*regexpObject)
 	}
 
 	if matcher, ok := r.toObject(rx.getSym(symMatch, nil)).self.assertCallable(); ok {
@@ -410,7 +376,7 @@ func (r *Runtime) stringproto_normalize(call FunctionCall) Value {
 	s := call.This.toString()
 	var form string
 	if formArg := call.Argument(0); formArg != _undefined {
-		form = formArg.toString().String()
+		form = formArg.toString().toString().String()
 	} else {
 		form = "NFC"
 	}
@@ -475,17 +441,17 @@ func (r *Runtime) stringproto_padEnd(call FunctionCall) Value {
 	}
 	var sb unicodeStringBuilder
 	sb.Grow(toInt(maxLength))
-	sb.writeString(s)
+	sb.WriteString(s)
 	fl := filler.length()
 	for remaining >= fl {
-		sb.writeString(filler)
+		sb.WriteString(filler)
 		remaining -= fl
 	}
 	if remaining > 0 {
-		sb.writeString(filler.substring(0, remaining))
+		sb.WriteString(filler.substring(0, remaining))
 	}
 
-	return sb.string()
+	return sb.String()
 }
 
 func (r *Runtime) stringproto_padStart(call FunctionCall) Value {
@@ -529,15 +495,15 @@ func (r *Runtime) stringproto_padStart(call FunctionCall) Value {
 	sb.Grow(toInt(maxLength))
 	fl := filler.length()
 	for remaining >= fl {
-		sb.writeString(filler)
+		sb.WriteString(filler)
 		remaining -= fl
 	}
 	if remaining > 0 {
-		sb.writeString(filler.substring(0, remaining))
+		sb.WriteString(filler.substring(0, remaining))
 	}
-	sb.writeString(s)
+	sb.WriteString(s)
 
-	return sb.string()
+	return sb.String()
 }
 
 func (r *Runtime) stringproto_repeat(call FunctionCall) Value {
@@ -567,76 +533,42 @@ func (r *Runtime) stringproto_repeat(call FunctionCall) Value {
 	var sb unicodeStringBuilder
 	sb.Grow(s.length() * num)
 	for i := 0; i < num; i++ {
-		sb.writeString(s)
+		sb.WriteString(s)
 	}
-	return sb.string()
+	return sb.String()
 }
 
-func (r *Runtime) stringproto_replace(call FunctionCall) Value {
-	r.checkObjectCoercible(call.This)
-	searchValue := call.Argument(0)
-	replaceValue := call.Argument(1)
-	if searchValue != _undefined && searchValue != _null {
-		if replacer := toMethod(r.getV(searchValue, symReplace)); replacer != nil {
-			return replacer(FunctionCall{
-				This:      searchValue,
-				Arguments: []Value{call.This, replaceValue},
-			})
+func getReplaceValue(replaceValue Value) (str valueString, rcall func(FunctionCall) Value) {
+	if replaceValue, ok := replaceValue.(*Object); ok {
+		if c, ok := replaceValue.self.assertCallable(); ok {
+			rcall = c
+			return
 		}
 	}
+	str = replaceValue.toString()
+	return
+}
+
+func stringReplace(s valueString, found [][]int, newstring valueString, rcall func(FunctionCall) Value) Value {
+	if len(found) == 0 {
+		return s
+	}
 
-	s := call.This.toString()
 	var str string
 	var isASCII bool
 	if astr, ok := s.(asciiString); ok {
 		str = string(astr)
 		isASCII = true
-	} else {
-		str = s.String()
-	}
-
-	var found [][]int
-
-	if searchValue, ok := searchValue.(*Object); ok {
-		if regexp, ok := searchValue.self.(*regexpObject); ok {
-			find := 1
-			if regexp.global {
-				find = -1
-			}
-			if isASCII {
-				found = regexp.pattern.FindAllSubmatchIndexASCII(str, find)
-			} else {
-				found = regexp.pattern.FindAllSubmatchIndexUTF8(str, find)
-			}
-			if found == nil {
-				return s
-			}
-		}
 	}
 
-	if found == nil {
-		found = searchSubstringUTF8(str, searchValue.String())
-	}
-
-	if len(found) == 0 {
-		return s
-	}
+	var buf valueStringBuilder
 
-	var buf bytes.Buffer
 	lastIndex := 0
-
-	var rcall func(FunctionCall) Value
-
-	if replaceValue, ok := replaceValue.(*Object); ok {
-		if c, ok := replaceValue.self.assertCallable(); ok {
-			rcall = c
-		}
-	}
-
+	lengthS := s.length()
 	if rcall != nil {
 		for _, item := range found {
 			if item[0] != lastIndex {
-				buf.WriteString(str[lastIndex:item[0]])
+				buf.WriteSubstring(s, lastIndex, item[0])
 			}
 			matchCount := len(item) / 2
 			argumentList := make([]Value, matchCount+2)
@@ -646,7 +578,7 @@ func (r *Runtime) stringproto_replace(call FunctionCall) Value {
 					if isASCII {
 						argumentList[index] = asciiString(str[item[offset]:item[offset+1]])
 					} else {
-						argumentList[index] = newStringValue(str[item[offset]:item[offset+1]])
+						argumentList[index] = s.substring(item[offset], item[offset+1])
 					}
 				} else {
 					argumentList[index] = _undefined
@@ -657,71 +589,59 @@ func (r *Runtime) stringproto_replace(call FunctionCall) Value {
 			replacement := rcall(FunctionCall{
 				This:      _undefined,
 				Arguments: argumentList,
-			}).String()
+			}).toString()
 			buf.WriteString(replacement)
 			lastIndex = item[1]
 		}
 	} else {
-		newstring := replaceValue.String()
-
 		for _, item := range found {
 			if item[0] != lastIndex {
-				buf.WriteString(str[lastIndex:item[0]])
+				buf.WriteString(s.substring(lastIndex, item[0]))
 			}
-			matches := len(item) / 2
-			for i := 0; i < len(newstring); i++ {
-				if newstring[i] == '$' && i < len(newstring)-1 {
-					ch := newstring[i+1]
-					switch ch {
-					case '$':
-						buf.WriteByte('$')
-					case '`':
-						buf.WriteString(str[0:item[0]])
-					case '\'':
-						buf.WriteString(str[item[1]:])
-					case '&':
-						buf.WriteString(str[item[0]:item[1]])
-					default:
-						matchNumber := 0
-						l := 0
-						for _, ch := range newstring[i+1:] {
-							if ch >= '0' && ch <= '9' {
-								m := matchNumber*10 + int(ch-'0')
-								if m >= matches {
-									break
-								}
-								matchNumber = m
-								l++
-							} else {
-								break
-							}
-						}
-						if l > 0 {
-							offset := 2 * matchNumber
-							if offset < len(item) && item[offset] != -1 {
-								buf.WriteString(str[item[offset]:item[offset+1]])
-							}
-							i += l - 1
-						} else {
-							buf.WriteByte('$')
-							buf.WriteByte(ch)
-						}
-
+			matchCount := len(item) / 2
+			writeSubstitution(s, item[0], matchCount, func(idx int) valueString {
+				if item[idx*2] != -1 {
+					if isASCII {
+						return asciiString(str[item[idx*2]:item[idx*2+1]])
 					}
-					i++
-				} else {
-					buf.WriteByte(newstring[i])
+					return s.substring(item[idx*2], item[idx*2+1])
 				}
-			}
+				return stringEmpty
+			}, newstring, &buf)
 			lastIndex = item[1]
 		}
 	}
 
-	if lastIndex != len(str) {
-		buf.WriteString(str[lastIndex:])
+	if lastIndex != lengthS {
+		buf.WriteString(s.substring(lastIndex, lengthS))
+	}
+
+	return buf.String()
+}
+
+func (r *Runtime) stringproto_replace(call FunctionCall) Value {
+	r.checkObjectCoercible(call.This)
+	searchValue := call.Argument(0)
+	replaceValue := call.Argument(1)
+	if searchValue != _undefined && searchValue != _null {
+		if replacer := toMethod(r.getV(searchValue, symReplace)); replacer != nil {
+			return replacer(FunctionCall{
+				This:      searchValue,
+				Arguments: []Value{call.This, replaceValue},
+			})
+		}
+	}
+
+	s := call.This.toString()
+	var found [][]int
+	searchStr := searchValue.toString()
+	pos := s.index(searchStr, 0)
+	if pos != -1 {
+		found = append(found, []int{pos, pos + searchStr.length()})
 	}
 
-	return newStringValue(buf.String())
+	str, rcall := getReplaceValue(replaceValue)
+	return stringReplace(s, found, str, rcall)
 }
 
 func (r *Runtime) stringproto_search(call FunctionCall) Value {
@@ -742,7 +662,7 @@ func (r *Runtime) stringproto_search(call FunctionCall) Value {
 	}
 
 	if rx == nil {
-		rx = r.builtin_newRegExp([]Value{regexp}, r.global.RegExpPrototype).self.(*regexpObject)
+		rx = r.newRegExp(regexp, nil, r.global.RegExpPrototype).self.(*regexpObject)
 	}
 
 	if searcher, ok := r.toObject(rx.getSym(symSearch, nil)).self.assertCallable(); ok {
@@ -823,7 +743,7 @@ func (r *Runtime) stringproto_split(call FunctionCall) Value {
 		return r.newArrayValues([]Value{s})
 	}
 
-	separator := separatorValue.String()
+	separator := separatorValue.toString().String()
 
 	excess := false
 	str := s.String()
@@ -836,6 +756,7 @@ func (r *Runtime) stringproto_split(call FunctionCall) Value {
 		excess = true
 	}
 
+	// TODO handle invalid UTF-16
 	split := strings.SplitN(str, separator, splitLimit)
 
 	if excess && len(split) > limit {
@@ -925,6 +846,7 @@ func (r *Runtime) stringproto_trim(call FunctionCall) Value {
 	r.checkObjectCoercible(call.This)
 	s := call.This.toString()
 
+	// TODO handle invalid UTF-16
 	return newStringValue(strings.Trim(s.String(), parser.WhitespaceChars))
 }
 
@@ -932,6 +854,7 @@ func (r *Runtime) stringproto_trimEnd(call FunctionCall) Value {
 	r.checkObjectCoercible(call.This)
 	s := call.This.toString()
 
+	// TODO handle invalid UTF-16
 	return newStringValue(strings.TrimRight(s.String(), parser.WhitespaceChars))
 }
 
@@ -939,6 +862,7 @@ func (r *Runtime) stringproto_trimStart(call FunctionCall) Value {
 	r.checkObjectCoercible(call.This)
 	s := call.This.toString()
 
+	// TODO handle invalid UTF-16
 	return newStringValue(strings.TrimLeft(s.String(), parser.WhitespaceChars))
 }
 

+ 58 - 0
builtin_string_test.go

@@ -167,3 +167,61 @@ if (result.value !== pair) {
 `
 	testScript1(SCRIPT, _undefined, t)
 }
+
+func TestValueStringBuilder(t *testing.T) {
+	t.Run("substringASCII", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		str := newStringValue("a\U00010000b")
+		sb.WriteSubstring(str, 0, 1)
+		res := sb.String()
+		if res != asciiString("a") {
+			t.Fatal(res)
+		}
+	})
+
+	t.Run("substringASCIIPure", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		str := newStringValue("ab")
+		sb.WriteSubstring(str, 0, 1)
+		res := sb.String()
+		if res != asciiString("a") {
+			t.Fatal(res)
+		}
+	})
+
+	t.Run("substringUnicode", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		str := newStringValue("a\U00010000b")
+		sb.WriteSubstring(str, 1, 3)
+		res := sb.String()
+		if !res.SameAs(unicodeStringFromRunes([]rune{0x10000})) {
+			t.Fatal(res)
+		}
+	})
+
+	t.Run("substringASCIIUnicode", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		str := newStringValue("a\U00010000b")
+		sb.WriteSubstring(str, 0, 2)
+		res := sb.String()
+		if !res.SameAs(unicodeStringFromRunes([]rune{'a', 0xD800})) {
+			t.Fatal(res)
+		}
+	})
+
+	t.Run("substringUnicodeASCII", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		str := newStringValue("a\U00010000b")
+		sb.WriteSubstring(str, 2, 4)
+		res := sb.String()
+		if !res.SameAs(unicodeStringFromRunes([]rune{0xDC00, 'b'})) {
+			t.Fatal(res)
+		}
+	})
+
+}

+ 10 - 11
builtin_typedarrays.go

@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"math"
 	"sort"
-	"strings"
 	"unsafe"
 
 	"github.com/dop251/goja/unistring"
@@ -644,23 +643,23 @@ func (r *Runtime) typedArrayProto_join(call FunctionCall) Value {
 	if ta, ok := r.toObject(call.This).self.(*typedArrayObject); ok {
 		ta.viewedArrayBuf.ensureNotDetached()
 		s := call.Argument(0)
-		sep := ""
+		sep := stringEmpty
 		if s != _undefined {
-			sep = s.toString().String()
+			sep = s.toString()
 		} else {
-			sep = ","
+			sep = asciiString(",")
 		}
 		l := ta.length
 		if l == 0 {
 			return stringEmpty
 		}
 
-		var buf strings.Builder
+		var buf valueStringBuilder
 
 		ta.viewedArrayBuf.ensureNotDetached()
 		element0 := ta.typedArray.get(0)
 		if element0 != nil && element0 != _undefined && element0 != _null {
-			buf.WriteString(element0.String())
+			buf.WriteString(element0.toString())
 		}
 
 		for i := 1; i < l; i++ {
@@ -668,11 +667,11 @@ func (r *Runtime) typedArrayProto_join(call FunctionCall) Value {
 			buf.WriteString(sep)
 			element := ta.typedArray.get(i)
 			if element != nil && element != _undefined && element != _null {
-				buf.WriteString(element.String())
+				buf.WriteString(element.toString())
 			}
 		}
 
-		return newStringValue(buf.String())
+		return buf.String()
 	}
 	panic(r.NewTypeError("Method TypedArray.prototype.join called on incompatible receiver"))
 }
@@ -1002,16 +1001,16 @@ func (r *Runtime) typedArrayProto_subarray(call FunctionCall) Value {
 func (r *Runtime) typedArrayProto_toLocaleString(call FunctionCall) Value {
 	if ta, ok := r.toObject(call.This).self.(*typedArrayObject); ok {
 		length := ta.length
-		var buf strings.Builder
+		var buf valueStringBuilder
 		for i := 0; i < length; i++ {
 			ta.viewedArrayBuf.ensureNotDetached()
 			if i > 0 {
-				buf.WriteByte(',')
+				buf.WriteRune(',')
 			}
 			item := ta.typedArray.get(i)
 			r.writeItemLocaleString(item, &buf)
 		}
-		return newStringValue(buf.String())
+		return buf.String()
 	}
 	panic(r.NewTypeError("Method TypedArray.prototype.toLocaleString called on incompatible receiver %s", call.This.String()))
 }

+ 4 - 10
compiler_expr.go

@@ -993,7 +993,7 @@ func (c *compiler) compileSequenceExpression(v *ast.SequenceExpression) compiled
 
 func (c *compiler) emitThrow(v Value) {
 	if o, ok := v.(*Object); ok {
-		t := o.self.getStr("name", nil).String()
+		t := nilSafe(o.self.getStr("name", nil)).toString().String()
 		switch t {
 		case "TypeError":
 			c.emit(getVar1(t))
@@ -1008,7 +1008,7 @@ func (c *compiler) emitThrow(v Value) {
 			return
 		}
 	}
-	panic(fmt.Errorf("Unknown exception type thrown while evaliating constant expression: %s", v.String()))
+	panic(fmt.Errorf("unknown exception type thrown while evaliating constant expression: %s", v.String()))
 }
 
 func (c *compiler) emitConst(expr compiledExpr, putOnStack bool) {
@@ -1440,18 +1440,12 @@ func (c *compiler) compileArrayLiteral(v *ast.ArrayLiteral) compiledExpr {
 
 func (e *compiledRegexpLiteral) emitGetter(putOnStack bool) {
 	if putOnStack {
-		pattern, global, ignoreCase, multiline, sticky, err := compileRegexp(e.expr.Pattern, e.expr.Flags)
+		pattern, err := compileRegexp(e.expr.Pattern, e.expr.Flags)
 		if err != nil {
 			e.c.throwSyntaxError(e.offset, err.Error())
 		}
 
-		e.c.emit(&newRegexp{pattern: pattern,
-			src:        newStringValue(e.expr.Pattern),
-			global:     global,
-			ignoreCase: ignoreCase,
-			multiline:  multiline,
-			sticky:     sticky,
-		})
+		e.c.emit(&newRegexp{pattern: pattern, src: newStringValue(e.expr.Pattern)})
 	}
 }
 

+ 44 - 0
object.go

@@ -252,6 +252,11 @@ type baseObject struct {
 	symValues *orderedMap
 }
 
+type guardedObject struct {
+	baseObject
+	guardedProps map[unistring.String]struct{}
+}
+
 type primitiveValueObject struct {
 	baseObject
 	pValue Value
@@ -1405,3 +1410,42 @@ func (o *Object) getId() uint64 {
 	}
 	return o.id
 }
+
+func (o *guardedObject) guard(props ...unistring.String) {
+	if o.guardedProps == nil {
+		o.guardedProps = make(map[unistring.String]struct{})
+	}
+	for _, p := range props {
+		o.guardedProps[p] = struct{}{}
+	}
+}
+
+func (o *guardedObject) check(p unistring.String) {
+	if _, exists := o.guardedProps[p]; exists {
+		o.val.self = &o.baseObject
+	}
+}
+
+func (o *guardedObject) setOwnStr(p unistring.String, v Value, throw bool) bool {
+	res := o.baseObject.setOwnStr(p, v, throw)
+	if res {
+		o.check(p)
+	}
+	return res
+}
+
+func (o *guardedObject) defineOwnPropertyStr(name unistring.String, desc PropertyDescriptor, throw bool) bool {
+	res := o.baseObject.defineOwnPropertyStr(name, desc, throw)
+	if res {
+		o.check(name)
+	}
+	return res
+}
+
+func (o *guardedObject) deleteStr(name unistring.String, throw bool) bool {
+	res := o.baseObject.deleteStr(name, throw)
+	if res {
+		o.check(name)
+	}
+	return res
+}

+ 326 - 182
regexp.go

@@ -3,42 +3,176 @@ package goja
 import (
 	"fmt"
 	"github.com/dlclark/regexp2"
+	"github.com/dop251/goja/unistring"
+	"io"
 	"regexp"
+	"sort"
+	"strings"
 	"unicode/utf16"
-	"unicode/utf8"
 )
 
-type regexpPattern interface {
-	FindSubmatchIndex(valueString, int) []int
-	FindAllSubmatchIndex(valueString, int) [][]int
-	FindAllSubmatchIndexUTF8(string, int) [][]int
-	FindAllSubmatchIndexASCII(string, int) [][]int
-	MatchString(valueString) bool
-}
-
 type regexp2Wrapper regexp2.Regexp
 type regexpWrapper regexp.Regexp
 
+type positionMapItem struct {
+	src, dst int
+}
+type positionMap []positionMapItem
+
+func (m positionMap) get(src int) int {
+	if src == 0 {
+		return 0
+	}
+	res := sort.Search(len(m), func(n int) bool { return m[n].src >= src })
+	if res >= len(m) || m[res].src != src {
+		panic("index not found")
+	}
+	return m[res].dst
+}
+
+type arrayRuneReader struct {
+	runes []rune
+	pos   int
+}
+
+func (rd *arrayRuneReader) ReadRune() (r rune, size int, err error) {
+	if rd.pos < len(rd.runes) {
+		r = rd.runes[rd.pos]
+		size = 1
+		rd.pos++
+	} else {
+		err = io.EOF
+	}
+	return
+}
+
+type regexpPattern struct {
+	src string
+
+	global, ignoreCase, multiline, sticky, unicode bool
+
+	regexpWrapper  *regexpWrapper
+	regexp2Wrapper *regexp2Wrapper
+}
+
+func compileRegexp2(src string, multiline, ignoreCase bool) (*regexp2Wrapper, error) {
+	var opts regexp2.RegexOptions = regexp2.ECMAScript
+	if multiline {
+		opts |= regexp2.Multiline
+	}
+	if ignoreCase {
+		opts |= regexp2.IgnoreCase
+	}
+	regexp2Pattern, err1 := regexp2.Compile(src, opts)
+	if err1 != nil {
+		return nil, fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", src, err1)
+	}
+
+	return (*regexp2Wrapper)(regexp2Pattern), nil
+}
+
+func (p *regexpPattern) createRegexp2() {
+	if p.regexp2Wrapper != nil {
+		return
+	}
+	rx, err := compileRegexp2(p.src, p.multiline, p.ignoreCase)
+	if err != nil {
+		// At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug.
+		panic(err)
+	}
+	p.regexp2Wrapper = rx
+}
+
+func buildUTF8PosMap(s valueString) (positionMap, string) {
+	pm := make(positionMap, 0, s.length())
+	rd := s.reader(0)
+	sPos, utf8Pos := 0, 0
+	var sb strings.Builder
+	for {
+		r, size, err := rd.ReadRune()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			// the string contains invalid UTF-16, bailing out
+			return nil, ""
+		}
+		utf8Size, _ := sb.WriteRune(r)
+		sPos += size
+		utf8Pos += utf8Size
+		pm = append(pm, positionMapItem{src: utf8Pos, dst: sPos})
+	}
+	return pm, sb.String()
+}
+
+func (p *regexpPattern) findSubmatchIndex(s valueString, start int) []int {
+	if p.regexpWrapper == nil {
+		return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode)
+	}
+	if start != 0 {
+		// Unfortunately Go's regexp library does not allow starting from an arbitrary position.
+		// If we just drop the first _start_ characters of the string the assertions (^, $, \b and \B) will not
+		// work correctly.
+		p.createRegexp2()
+		return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode)
+	}
+	return p.regexpWrapper.findSubmatchIndex(s, p.unicode)
+}
+
+func (p *regexpPattern) findAllSubmatchIndex(s valueString, start int, limit int, sticky bool) [][]int {
+	if p.regexpWrapper == nil {
+		return p.regexp2Wrapper.findAllSubmatchIndex(s, start, limit, sticky, p.unicode)
+	}
+	if start == 0 {
+		if s, ok := s.(asciiString); ok {
+			return p.regexpWrapper.findAllSubmatchIndex(s.String(), limit, sticky)
+		}
+		if limit == 1 {
+			result := p.regexpWrapper.findSubmatchIndex(s, p.unicode)
+			if result == nil {
+				return nil
+			}
+			return [][]int{result}
+		}
+		// Unfortunately Go's regexp library lacks FindAllReaderSubmatchIndex(), so we have to use a UTF-8 string as an
+		// input.
+		if p.unicode {
+			// Try to convert s to UTF-8. If it does not contain any invalid UTF-16 we can do the matching in UTF-8.
+			pm, str := buildUTF8PosMap(s)
+			if pm != nil {
+				res := p.regexpWrapper.findAllSubmatchIndex(str, limit, sticky)
+				for _, result := range res {
+					for i, idx := range result {
+						result[i] = pm.get(idx)
+					}
+				}
+				return res
+			}
+		}
+	}
+
+	p.createRegexp2()
+	return p.regexp2Wrapper.findAllSubmatchIndex(s, start, limit, sticky, p.unicode)
+}
+
 type regexpObject struct {
 	baseObject
-	pattern regexpPattern
+	pattern *regexpPattern
 	source  valueString
 
-	global, multiline, ignoreCase, sticky bool
+	standard bool
 }
 
-func (r *regexp2Wrapper) FindSubmatchIndex(s valueString, start int) (result []int) {
-	wrapped := (*regexp2.Regexp)(r)
-	var match *regexp2.Match
-	var err error
-	switch s := s.(type) {
-	case asciiString:
-		match, err = wrapped.FindStringMatch(string(s)[start:])
-	case unicodeString:
-		match, err = wrapped.FindRunesMatch(utf16.Decode(s[start+1:]))
-	default:
-		panic(fmt.Errorf("Unknown string type: %T", s))
+func (r *regexp2Wrapper) findSubmatchIndex(s valueString, start int, fullUnicode bool) (result []int) {
+	if fullUnicode {
+		return r.findSubmatchIndexUnicode(s, start)
 	}
+	return r.findSubmatchIndexUTF16(s, start)
+}
+
+func (r *regexp2Wrapper) findSubmatchIndexUTF16(s valueString, start int) (result []int) {
+	wrapped := (*regexp2.Regexp)(r)
+	match, err := wrapped.FindRunesMatchStartingAt(s.utf16Runes(), start)
 	if err != nil {
 		return
 	}
@@ -59,109 +193,116 @@ func (r *regexp2Wrapper) FindSubmatchIndex(s valueString, start int) (result []i
 	return
 }
 
-func (r *regexp2Wrapper) FindAllSubmatchIndexUTF8(s string, n int) [][]int {
+func (r *regexp2Wrapper) findSubmatchIndexUnicode(s valueString, start int) (result []int) {
 	wrapped := (*regexp2.Regexp)(r)
-	if n < 0 {
-		n = len(s) + 1
-	}
-	results := make([][]int, 0, n)
-
-	idxMap := make([]int, 0, len(s))
-	runes := make([]rune, 0, len(s))
-	for pos, rr := range s {
-		runes = append(runes, rr)
-		idxMap = append(idxMap, pos)
-	}
-	idxMap = append(idxMap, len(s))
-
-	match, err := wrapped.FindRunesMatch(runes)
+	posMap, runes, mappedStart := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), start)
+	match, err := wrapped.FindRunesMatchStartingAt(runes, mappedStart)
 	if err != nil {
-		return nil
+		return
 	}
-	i := 0
-	for match != nil && i < n {
-		groups := match.Groups()
-
-		result := make([]int, 0, len(groups)<<1)
 
-		for _, group := range groups {
-			if len(group.Captures) > 0 {
-				result = append(result, idxMap[group.Index], idxMap[group.Index+group.Length])
-			} else {
-				result = append(result, -1, 0)
-			}
-		}
+	if match == nil {
+		return
+	}
+	groups := match.Groups()
 
-		results = append(results, result)
-		match, err = wrapped.FindNextMatch(match)
-		if err != nil {
-			return nil
+	result = make([]int, 0, len(groups)<<1)
+	for _, group := range groups {
+		if len(group.Captures) > 0 {
+			result = append(result, posMap[group.Index], posMap[group.Index+group.Length])
+		} else {
+			result = append(result, -1, 0)
 		}
-		i++
 	}
-	return results
+	return
 }
 
-func (r *regexp2Wrapper) FindAllSubmatchIndexASCII(s string, n int) [][]int {
+func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s valueString, start, limit int, sticky bool) [][]int {
 	wrapped := (*regexp2.Regexp)(r)
-	if n < 0 {
-		n = len(s) + 1
-	}
-	results := make([][]int, 0, n)
-
-	match, err := wrapped.FindStringMatch(s)
+	runes := s.utf16Runes()
+	match, err := wrapped.FindRunesMatchStartingAt(runes, start)
 	if err != nil {
 		return nil
 	}
-	i := 0
-	for match != nil && i < n {
+	if limit < 0 {
+		limit = len(runes) + 1
+	}
+	results := make([][]int, 0, limit)
+	for match != nil {
 		groups := match.Groups()
 
 		result := make([]int, 0, len(groups)<<1)
 
 		for _, group := range groups {
 			if len(group.Captures) > 0 {
-				result = append(result, group.Index, group.Index+group.Length)
+				startPos := group.Index
+				endPos := group.Index + group.Length
+				result = append(result, startPos, endPos)
 			} else {
 				result = append(result, -1, 0)
 			}
 		}
 
+		if sticky && len(result) > 1 {
+			if result[0] != start {
+				break
+			}
+			start = result[1]
+		}
+
 		results = append(results, result)
+		limit--
+		if limit <= 0 {
+			break
+		}
 		match, err = wrapped.FindNextMatch(match)
 		if err != nil {
 			return nil
 		}
-		i++
 	}
 	return results
 }
 
-func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]int {
-	wrapped := (*regexp2.Regexp)(r)
-	if n < 0 {
-		n = len(s) + 1
-	}
-	results := make([][]int, 0, n)
-
-	rd := runeReaderReplace{s.reader(0)}
-	posMap := make([]int, s.length()+1)
+func buildPosMap(rd io.RuneReader, l, start int) (posMap []int, runes []rune, mappedStart int) {
+	posMap = make([]int, 0, l+1)
 	curPos := 0
-	curRuneIdx := 0
-	runes := make([]rune, 0, s.length())
+	runes = make([]rune, 0, l)
+	startFound := false
 	for {
+		if !startFound {
+			if curPos == start {
+				mappedStart = len(runes)
+				startFound = true
+			}
+			if curPos > start {
+				// start position splits a surrogate pair
+				mappedStart = len(runes) - 1
+				_, second := utf16.EncodeRune(runes[mappedStart])
+				runes[mappedStart] = second
+				startFound = true
+			}
+		}
 		rn, size, err := rd.ReadRune()
 		if err != nil {
 			break
 		}
 		runes = append(runes, rn)
-		posMap[curRuneIdx] = curPos
-		curRuneIdx++
+		posMap = append(posMap, curPos)
 		curPos += size
 	}
-	posMap[curRuneIdx] = curPos
+	posMap = append(posMap, curPos)
+	return
+}
 
-	match, err := wrapped.FindRunesMatch(runes)
+func (r *regexp2Wrapper) findAllSubmatchIndexUnicode(s unicodeString, start, limit int, sticky bool) [][]int {
+	wrapped := (*regexp2.Regexp)(r)
+	if limit < 0 {
+		limit = len(s) + 1
+	}
+	results := make([][]int, 0, limit)
+	posMap, runes, mappedStart := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), start)
+
+	match, err := wrapped.FindRunesMatchStartingAt(runes, mappedStart)
 	if err != nil {
 		return nil
 	}
@@ -180,6 +321,13 @@ func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]i
 			}
 		}
 
+		if sticky && len(result) > 1 {
+			if result[0] != start {
+				break
+			}
+			start = result[1]
+		}
+
 		results = append(results, result)
 		match, err = wrapped.FindNextMatch(match)
 		if err != nil {
@@ -189,94 +337,48 @@ func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]i
 	return results
 }
 
-func (r *regexp2Wrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
+func (r *regexp2Wrapper) findAllSubmatchIndex(s valueString, start, limit int, sticky, fullUnicode bool) [][]int {
 	switch s := s.(type) {
 	case asciiString:
-		return r.FindAllSubmatchIndexASCII(string(s), n)
+		return r.findAllSubmatchIndexUTF16(s, start, limit, sticky)
 	case unicodeString:
-		return r.findAllSubmatchIndexUTF16(s, n)
+		if fullUnicode {
+			return r.findAllSubmatchIndexUnicode(s, start, limit, sticky)
+		}
+		return r.findAllSubmatchIndexUTF16(s, start, limit, sticky)
 	default:
 		panic("Unsupported string type")
 	}
 }
 
-func (r *regexp2Wrapper) MatchString(s valueString) bool {
-	wrapped := (*regexp2.Regexp)(r)
-
-	switch s := s.(type) {
-	case asciiString:
-		matched, _ := wrapped.MatchString(string(s))
-		return matched
-	case unicodeString:
-		matched, _ := wrapped.MatchRunes(utf16.Decode(s[1:]))
-		return matched
-	default:
-		panic(fmt.Errorf("Unknown string type: %T", s))
-	}
-}
-
-func (r *regexpWrapper) FindSubmatchIndex(s valueString, start int) (result []int) {
-	wrapped := (*regexp.Regexp)(r)
-	return wrapped.FindReaderSubmatchIndex(runeReaderReplace{s.reader(start)})
-}
-
-func (r *regexpWrapper) MatchString(s valueString) bool {
-	wrapped := (*regexp.Regexp)(r)
-	return wrapped.MatchReader(runeReaderReplace{s.reader(0)})
-}
-
-func (r *regexpWrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
+func (r *regexpWrapper) findAllSubmatchIndex(s string, limit int, sticky bool) (results [][]int) {
 	wrapped := (*regexp.Regexp)(r)
-	switch s := s.(type) {
-	case asciiString:
-		return wrapped.FindAllStringSubmatchIndex(string(s), n)
-	case unicodeString:
-		return r.findAllSubmatchIndexUTF16(s, n)
-	default:
-		panic("Unsupported string type")
+	results = wrapped.FindAllStringSubmatchIndex(s, limit)
+	pos := 0
+	if sticky {
+		for i, result := range results {
+			if len(result) > 1 {
+				if result[0] != pos {
+					return results[:i]
+				}
+				pos = result[1]
+			}
+		}
 	}
+	return
 }
 
-func (r *regexpWrapper) FindAllSubmatchIndexUTF8(s string, n int) [][]int {
-	wrapped := (*regexp.Regexp)(r)
-	return wrapped.FindAllStringSubmatchIndex(s, n)
-}
-
-func (r *regexpWrapper) FindAllSubmatchIndexASCII(s string, n int) [][]int {
-	return r.FindAllSubmatchIndexUTF8(s, n)
-}
-
-func (r *regexpWrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]int {
+func (r *regexpWrapper) findSubmatchIndex(s valueString, fullUnicode bool) (result []int) {
 	wrapped := (*regexp.Regexp)(r)
-	utf8Bytes := make([]byte, 0, len(s)*2)
-	posMap := make(map[int]int)
-	curPos := 0
-	rd := runeReaderReplace{s.reader(0)}
-	for {
-		rn, size, err := rd.ReadRune()
-		if err != nil {
-			break
+	if fullUnicode {
+		posMap, runes, _ := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), 0)
+		res := wrapped.FindReaderSubmatchIndex(&arrayRuneReader{runes: runes})
+		for i, item := range res {
+			res[i] = posMap[item]
 		}
-		l := len(utf8Bytes)
-		utf8Bytes = append(utf8Bytes, 0, 0, 0, 0)
-		n := utf8.EncodeRune(utf8Bytes[l:], rn)
-		utf8Bytes = utf8Bytes[:l+n]
-		posMap[l] = curPos
-		curPos += size
+		return res
 	}
-	posMap[len(utf8Bytes)] = curPos
-
-	rr := wrapped.FindAllSubmatchIndex(utf8Bytes, n)
-	for _, res := range rr {
-		for j, pos := range res {
-			mapped, exists := posMap[pos]
-			if !exists {
-				panic("Unicode match is not on rune boundary")
-			}
-			res[j] = mapped
-		}
-	}
-	return rr
+	return wrapped.FindReaderSubmatchIndex(s.utf16Reader(0))
 }
 
 func (r *regexpObject) execResultToArray(target valueString, result []int) Value {
@@ -299,34 +401,41 @@ func (r *regexpObject) execResultToArray(target valueString, result []int) Value
 	return match
 }
 
-func (r *regexpObject) execRegexp(target valueString) (match bool, result []int) {
-	lastIndex := int64(0)
-	if p := r.getStr("lastIndex", nil); p != nil {
-		lastIndex = p.ToInteger()
-		if lastIndex < 0 {
-			lastIndex = 0
+func (r *regexpObject) getLastIndex() int64 {
+	lastIndex := toLength(r.getStr("lastIndex", nil))
+	if !r.pattern.global && !r.pattern.sticky {
+		return 0
+	}
+	return lastIndex
+}
+
+func (r *regexpObject) updateLastIndex(index int64, firstResult, lastResult []int) bool {
+	if r.pattern.sticky {
+		if firstResult == nil || int64(firstResult[0]) != index {
+			r.setOwnStr("lastIndex", intToValue(0), true)
+			return false
+		}
+	} else {
+		if firstResult == nil {
+			if r.pattern.global {
+				r.setOwnStr("lastIndex", intToValue(0), true)
+			}
+			return false
 		}
 	}
-	index := lastIndex
-	if !r.global && !r.sticky {
-		index = 0
+
+	if r.pattern.global || r.pattern.sticky {
+		r.setOwnStr("lastIndex", intToValue(int64(lastResult[1])), true)
 	}
+	return true
+}
+
+func (r *regexpObject) execRegexp(target valueString) (match bool, result []int) {
+	index := r.getLastIndex()
 	if index >= 0 && index <= int64(target.length()) {
-		result = r.pattern.FindSubmatchIndex(target, int(index))
-	}
-	if result == nil || r.sticky && result[0] != 0 {
-		r.setOwnStr("lastIndex", intToValue(0), true)
-		return
-	}
-	match = true
-	// We do this shift here because the .FindStringSubmatchIndex above
-	// was done on a local subordinate slice of the string, not the whole string
-	for i := range result {
-		result[i] += int(index)
-	}
-	if r.global || r.sticky {
-		r.setOwnStr("lastIndex", intToValue(int64(result[1])), true)
+		result = r.pattern.findSubmatchIndex(target, int(index))
 	}
+	match = r.updateLastIndex(index, result, result)
 	return
 }
 
@@ -347,14 +456,49 @@ func (r *regexpObject) clone() *Object {
 	r1 := r.val.runtime.newRegexpObject(r.prototype)
 	r1.source = r.source
 	r1.pattern = r.pattern
-	r1.global = r.global
-	r1.ignoreCase = r.ignoreCase
-	r1.multiline = r.multiline
-	r1.sticky = r.sticky
+
 	return r1.val
 }
 
 func (r *regexpObject) init() {
 	r.baseObject.init()
+	r.standard = true
 	r._putProp("lastIndex", intToValue(0), true, false, false)
 }
+
+func (r *regexpObject) setProto(proto *Object, throw bool) bool {
+	res := r.baseObject.setProto(proto, throw)
+	if res {
+		r.standard = false
+	}
+	return res
+}
+
+func (r *regexpObject) defineOwnPropertyStr(name unistring.String, desc PropertyDescriptor, throw bool) bool {
+	res := r.baseObject.defineOwnPropertyStr(name, desc, throw)
+	if res {
+		r.standard = false
+	}
+	return res
+}
+
+func (r *regexpObject) deleteStr(name unistring.String, throw bool) bool {
+	res := r.baseObject.deleteStr(name, throw)
+	if res {
+		r.standard = false
+	}
+	return res
+}
+
+func (r *regexpObject) setOwnStr(name unistring.String, value Value, throw bool) bool {
+	if r.standard {
+		if name == "exec" {
+			res := r.baseObject.setOwnStr(name, value, throw)
+			if res {
+				r.standard = false
+			}
+			return res
+		}
+	}
+	return r.baseObject.setOwnStr(name, value, throw)
+}

+ 160 - 0
regexp_test.go

@@ -190,6 +190,166 @@ func TestEscapeNonASCII(t *testing.T) {
 	testScript1(SCRIPT, valueTrue, t)
 }
 
+func TestRegexpUTF16(t *testing.T) {
+	const SCRIPT = `
+	var str = "\uD800\uDC00";
+
+	assert(/\uD800/g.test(str), "#1");
+	assert(/\uD800/.test(str), "#2");
+	assert(/𐀀/.test(str), "#3");
+
+	var re = /\uD800/;
+
+	assert(compareArray(str.replace(re, "X"), ["X", "\uDC00"]), "#4");
+	assert(compareArray(str.split(re), ["", "\uDC00"]), "#5");
+	assert(compareArray("a\uD800\uDC00b".split(/\uD800/g), ["a", "\uDC00b"]), "#6");
+	assert(compareArray("a\uD800\uDC00b".split(/(?:)/g), ["a", "\uD800", "\uDC00", "b"]), "#7");
+
+	re = /(?=)a/; // a hack to use regexp2
+	assert.sameValue(re.exec('\ud83d\ude02a').index, 2, "#8");
+
+	assert.sameValue(/./.exec('\ud83d\ude02')[0], '\ud83d', "#9");
+
+	assert(RegExp("\uD800").test("\uD800"), "#10");
+
+	var cu = 0xD800;
+	var xx = "a\\" + String.fromCharCode(cu);
+	var pattern = eval("/" + xx + "/");
+	assert.sameValue(pattern.source, "a\\\\\\ud800", "Code unit: " + cu.toString(16), "#11");
+	assert(pattern.test("a\\\uD800"), "#12");
+	`
+
+	testScript1(TESTLIB+SCRIPT, _undefined, t)
+}
+
+func TestRegexpUnicode(t *testing.T) {
+	const SCRIPT = `
+
+	assert(!/\uD800/u.test("\uD800\uDC00"), "#1");
+	assert(!/\uFFFD/u.test("\uD800\uDC00"), "#2");
+
+	assert(/\uD800\uDC00/u.test("\uD800\uDC00"), "#3");
+
+	assert(/\uD800/u.test("\uD800"), "#4");
+
+	assert(compareArray("a\uD800\uDC00b".split(/\uD800/gu), ["a\uD800\uDC00b"]), "#5");
+
+	assert(compareArray("a\uD800\uDC00b".split(/(?:)/gu), ["a", "𐀀", "b"]), "#6");
+
+	var re = eval('/' + /\ud834\udf06/u.source + '/u');
+	assert(re.test('\ud834\udf06'), "#9");
+
+	/*re = RegExp("\\p{L}", "u");
+	if (!re.test("A")) {
+		throw new Error("Test 9 failed");
+	}*/
+	`
+
+	testScript1(TESTLIB+SCRIPT, _undefined, t)
+}
+
+func TestConvertRegexpToUnicode(t *testing.T) {
+	if s := convertRegexpToUnicode(`test\uD800\u0C00passed`); s != `test\uD800\u0C00passed` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`test\uD800\uDC00passed`); s != `test𐀀passed` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`test\u0023passed`); s != `test\u0023passed` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`test\u0passed`); s != `test\u0passed` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`test\uD800passed`); s != `test\uD800passed` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`test\uD800`); s != `test\uD800` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`test\uD80`); s != `test\uD80` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`\\uD800\uDC00passed`); s != `\\uD800\uDC00passed` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUnicode(`testpassed`); s != `testpassed` {
+		t.Fatal(s)
+	}
+}
+
+func TestConvertRegexpToUtf16(t *testing.T) {
+	if s := convertRegexpToUtf16(`𐀀`); s != `\ud800\udc00` {
+		t.Fatal(s)
+	}
+	if s := convertRegexpToUtf16(`\𐀀`); s != `\\\ud800\udc00` {
+		t.Fatal(s)
+	}
+}
+
+func TestEscapeInvalidUtf16(t *testing.T) {
+	if s := escapeInvalidUtf16(asciiString("test")); s != "test" {
+		t.Fatal(s)
+	}
+	if s := escapeInvalidUtf16(newStringValue("test\U00010000")); s != "test\U00010000" {
+		t.Fatal(s)
+	}
+	if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{'t', 0xD800})); s != "t\\ud800" {
+		t.Fatal(s)
+	}
+	if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{'t', 0xD800, 'p'})); s != "t\\ud800p" {
+		t.Fatal(s)
+	}
+	if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{0xD800, 'p'})); s != "\\ud800p" {
+		t.Fatal(s)
+	}
+	if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{'t', '\\', 0xD800, 'p'})); s != `t\\\ud800p` {
+		t.Fatal(s)
+	}
+}
+
+func TestRegexpAssertion(t *testing.T) {
+	const SCRIPT = `
+	var res = 'aaa'.match(/^a/g);
+	res.length === 1 || res[0] === 'a';
+	`
+	testScript1(SCRIPT, valueTrue, t)
+}
+
+func TestRegexpUnicodeAdvanceStringIndex(t *testing.T) {
+	const SCRIPT = `
+	// deoptimise RegExp
+	var origExec = RegExp.prototype.exec;
+	RegExp.prototype.exec = function(s) {
+		return origExec.call(this, s);
+	};
+
+	var re = /(?:)/gu;
+	var str = "a\uD800\uDC00b";
+	assert(compareArray(str.split(re), ["a", "𐀀", "b"]), "#1");
+
+	re.lastIndex = 3;
+	assert.sameValue(re.exec(str).index, 3, "#2");
+
+	re.lastIndex = 2;
+	assert.sameValue(re.exec(str).index, 1, "#3");
+
+	re.lastIndex = 4;
+	assert.sameValue(re.exec(str).index, 4, "#4");
+
+	re.lastIndex = 5;
+	assert.sameValue(re.exec(str), null, "#5");
+	`
+	testScript1(TESTLIB+SCRIPT, _undefined, t)
+}
+
+func TestRegexpInit(t *testing.T) {
+	const SCRIPT = `
+	RegExp(".").lastIndex;
+	`
+	testScript1(SCRIPT, intToValue(0), t)
+}
+
 func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
 	const SCRIPT = `
 	"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)

+ 52 - 24
runtime.go

@@ -23,6 +23,8 @@ import (
 
 const (
 	sqrt1_2 float64 = math.Sqrt2 / 2
+
+	deoptimiseRegexp = false
 )
 
 var (
@@ -119,13 +121,14 @@ type global struct {
 	thrower         *Object
 	throwerProperty Value
 
-	regexpProtoExec Value
-	weakSetAdder    *Object
-	weakMapAdder    *Object
-	mapAdder        *Object
-	setAdder        *Object
-	arrayValues     *Object
-	arrayToString   *Object
+	stdRegexpProto *guardedObject
+
+	weakSetAdder  *Object
+	weakMapAdder  *Object
+	mapAdder      *Object
+	setAdder      *Object
+	arrayValues   *Object
+	arrayToString *Object
 }
 
 type Flag int
@@ -406,11 +409,30 @@ func newBaseObjectObj(obj, proto *Object, class string) *baseObject {
 	return o
 }
 
+func newGuardedObj(proto *Object, class string) *guardedObject {
+	return &guardedObject{
+		baseObject: baseObject{
+			class:      class,
+			extensible: true,
+			prototype:  proto,
+		},
+	}
+}
+
 func (r *Runtime) newBaseObject(proto *Object, class string) (o *baseObject) {
 	v := &Object{runtime: r}
 	return newBaseObjectObj(v, proto, class)
 }
 
+func (r *Runtime) newGuardedObject(proto *Object, class string) (o *guardedObject) {
+	v := &Object{runtime: r}
+	o = newGuardedObj(proto, class)
+	v.self = o
+	o.val = v
+	o.init()
+	return
+}
+
 func (r *Runtime) NewObject() (v *Object) {
 	return r.newBaseObject(r.global.ObjectPrototype, classObject).val
 }
@@ -665,25 +687,31 @@ func (r *Runtime) builtin_newBoolean(args []Value, proto *Object) *Object {
 }
 
 func (r *Runtime) error_toString(call FunctionCall) Value {
+	var nameStr, msgStr valueString
 	obj := call.This.ToObject(r).self
-	msg := obj.getStr("message", nil)
 	name := obj.getStr("name", nil)
-	var nameStr, msgStr string
-	if name != nil && name != _undefined {
-		nameStr = name.String()
-	}
-	if msg != nil && msg != _undefined {
-		msgStr = msg.String()
+	if name == nil || name == _undefined {
+		nameStr = asciiString("Error")
+	} else {
+		nameStr = name.toString()
 	}
-	if nameStr != "" && msgStr != "" {
-		return newStringValue(fmt.Sprintf("%s: %s", name.String(), msgStr))
+	msg := obj.getStr("message", nil)
+	if msg == nil || msg == _undefined {
+		msgStr = stringEmpty
 	} else {
-		if nameStr != "" {
-			return name.toString()
-		} else {
-			return msg.toString()
-		}
+		msgStr = msg.toString()
+	}
+	if nameStr.length() == 0 {
+		return msgStr
 	}
+	if msgStr.length() == 0 {
+		return nameStr
+	}
+	var sb valueStringBuilder
+	sb.WriteString(nameStr)
+	sb.WriteString(asciiString(": "))
+	sb.WriteString(msgStr)
+	return sb.String()
 }
 
 func (r *Runtime) builtin_Error(args []Value, proto *Object) *Object {
@@ -707,8 +735,8 @@ func (r *Runtime) builtin_thrower(FunctionCall) Value {
 	return nil
 }
 
-func (r *Runtime) eval(src string, direct, strict bool, this Value) Value {
-
+func (r *Runtime) eval(srcVal valueString, direct, strict bool, this Value) Value {
+	src := escapeInvalidUtf16(srcVal)
 	p, err := r.compile("<eval>", src, strict, true)
 	if err != nil {
 		panic(err)
@@ -742,7 +770,7 @@ func (r *Runtime) builtin_eval(call FunctionCall) Value {
 		return _undefined
 	}
 	if str, ok := call.Arguments[0].(valueString); ok {
-		return r.eval(str.String(), false, false, r.globalObject)
+		return r.eval(str, false, false, r.globalObject)
 	}
 	return call.Arguments[0]
 }

+ 6 - 3
string.go

@@ -56,6 +56,9 @@ type valueString interface {
 	substring(start, end int) valueString
 	compareTo(valueString) int
 	reader(start int) io.RuneReader
+	utf16Reader(start int) io.RuneReader
+	runes() []rune
+	utf16Runes() []rune
 	index(valueString, int) int
 	lastIndex(valueString, int) int
 	toLower() valueString
@@ -101,15 +104,15 @@ func stringFromRune(r rune) valueString {
 	} else {
 		sb.Grow(2)
 	}
-	sb.writeRune(r)
-	return sb.string()
+	sb.WriteRune(r)
+	return sb.String()
 }
 
 func (r *Runtime) createStringIterator(s valueString) Value {
 	o := &Object{runtime: r}
 
 	si := &stringIterObject{
-		reader: s.reader(0),
+		reader: &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)},
 	}
 	si.class = classStringIterator
 	si.val = o

+ 16 - 0
string_ascii.go

@@ -36,6 +36,22 @@ func (s asciiString) reader(start int) io.RuneReader {
 	}
 }
 
+func (s asciiString) utf16Reader(start int) io.RuneReader {
+	return s.reader(start)
+}
+
+func (s asciiString) runes() []rune {
+	runes := make([]rune, len(s))
+	for i := 0; i < len(s); i++ {
+		runes[i] = rune(s[i])
+	}
+	return runes
+}
+
+func (s asciiString) utf16Runes() []rune {
+	return s.runes()
+}
+
 // ss must be trimmed
 func strToInt(ss string) (int64, error) {
 	if ss == "" {

+ 170 - 14
string_unicode.go

@@ -24,8 +24,21 @@ type unicodeRuneReader struct {
 	pos int
 }
 
-type runeReaderReplace struct {
-	wrapped io.RuneReader
+type utf16RuneReader struct {
+	s   unicodeString
+	pos int
+}
+
+// passes through invalid surrogate pairs
+type lenientUtf16Decoder struct {
+	utf16Reader io.RuneReader
+	prev        rune
+	prevSet     bool
+}
+
+type valueStringBuilder struct {
+	asciiBuilder   strings.Builder
+	unicodeBuilder unicodeStringBuilder
 }
 
 type unicodeStringBuilder struct {
@@ -34,15 +47,48 @@ type unicodeStringBuilder struct {
 }
 
 var (
-	InvalidRuneError = errors.New("Invalid rune")
+	InvalidRuneError = errors.New("invalid rune")
 )
 
-func (rr runeReaderReplace) ReadRune() (r rune, size int, err error) {
-	r, size, err = rr.wrapped.ReadRune()
-	if err == InvalidRuneError {
-		err = nil
-		r = utf8.RuneError
+func (rr *utf16RuneReader) ReadRune() (r rune, size int, err error) {
+	if rr.pos < len(rr.s) {
+		r = rune(rr.s[rr.pos])
+		size++
+		rr.pos++
+		return
+	}
+	err = io.EOF
+	return
+}
+
+func (rr *lenientUtf16Decoder) ReadRune() (r rune, size int, err error) {
+	if rr.prevSet {
+		r = rr.prev
+		size = 1
+		rr.prevSet = false
+	} else {
+		r, size, err = rr.utf16Reader.ReadRune()
+		if err != nil {
+			return
+		}
 	}
+	if isUTF16FirstSurrogate(r) {
+		second, _, err1 := rr.utf16Reader.ReadRune()
+		if err1 != nil {
+			if err1 != io.EOF {
+				err = err1
+			}
+			return
+		}
+		if isUTF16SecondSurrogate(second) {
+			r = utf16.DecodeRune(r, second)
+			size++
+		} else {
+			rr.prev = second
+			rr.prevSet = true
+		}
+	}
+
 	return
 }
 
@@ -92,8 +138,8 @@ func (b *unicodeStringBuilder) ensureStarted(initialSize int) {
 	}
 }
 
-func (b *unicodeStringBuilder) writeString(s valueString) {
-	b.ensureStarted(int(s.length()))
+func (b *unicodeStringBuilder) WriteString(s valueString) {
+	b.ensureStarted(s.length())
 	switch s := s.(type) {
 	case unicodeString:
 		b.buf = append(b.buf, s[1:]...)
@@ -107,7 +153,7 @@ func (b *unicodeStringBuilder) writeString(s valueString) {
 	}
 }
 
-func (b *unicodeStringBuilder) string() valueString {
+func (b *unicodeStringBuilder) String() valueString {
 	if b.unicode {
 		return unicodeString(b.buf)
 	}
@@ -121,11 +167,13 @@ func (b *unicodeStringBuilder) string() valueString {
 	return asciiString(buf)
 }
 
-func (b *unicodeStringBuilder) writeRune(r rune) {
+func (b *unicodeStringBuilder) WriteRune(r rune) {
 	if r <= 0xFFFF {
 		b.ensureStarted(1)
 		b.buf = append(b.buf, uint16(r))
-		b.unicode = r >= utf8.RuneSelf
+		if !b.unicode && r >= utf8.RuneSelf {
+			b.unicode = true
+		}
 	} else {
 		b.ensureStarted(2)
 		first, second := utf16.EncodeRune(r)
@@ -134,19 +182,126 @@ func (b *unicodeStringBuilder) writeRune(r rune) {
 	}
 }
 
-func (b *unicodeStringBuilder) writeASCII(bytes []byte) {
+func (b *unicodeStringBuilder) writeASCIIString(bytes string) {
 	b.ensureStarted(len(bytes))
 	for _, c := range bytes {
 		b.buf = append(b.buf, uint16(c))
 	}
 }
 
+func (b *valueStringBuilder) ascii() bool {
+	return len(b.unicodeBuilder.buf) == 0
+}
+
+func (b *valueStringBuilder) WriteString(s valueString) {
+	if ascii, ok := s.(asciiString); ok {
+		if b.ascii() {
+			b.asciiBuilder.WriteString(string(ascii))
+		} else {
+			b.unicodeBuilder.writeASCIIString(string(ascii))
+		}
+	} else {
+		b.switchToUnicode(s.length())
+		b.unicodeBuilder.WriteString(s)
+	}
+}
+
+func (b *valueStringBuilder) WriteRune(r rune) {
+	if r < utf8.RuneSelf {
+		if b.ascii() {
+			b.asciiBuilder.WriteByte(byte(r))
+		} else {
+			b.unicodeBuilder.WriteRune(r)
+		}
+	} else {
+		var extraLen int
+		if r <= 0xFFFF {
+			extraLen = 1
+		} else {
+			extraLen = 2
+		}
+		b.switchToUnicode(extraLen)
+		b.unicodeBuilder.WriteRune(r)
+	}
+}
+
+func (b *valueStringBuilder) String() valueString {
+	if b.ascii() {
+		return asciiString(b.asciiBuilder.String())
+	}
+	return b.unicodeBuilder.String()
+}
+
+func (b *valueStringBuilder) Grow(n int) {
+	if b.ascii() {
+		b.asciiBuilder.Grow(n)
+	} else {
+		b.unicodeBuilder.Grow(n)
+	}
+}
+
+func (b *valueStringBuilder) switchToUnicode(extraLen int) {
+	if b.ascii() {
+		b.unicodeBuilder.ensureStarted(b.asciiBuilder.Len() + extraLen)
+		b.unicodeBuilder.writeASCIIString(b.asciiBuilder.String())
+		b.asciiBuilder.Reset()
+	}
+}
+
+func (b *valueStringBuilder) WriteSubstring(source valueString, start int, end int) {
+	if ascii, ok := source.(asciiString); ok {
+		if b.ascii() {
+			b.asciiBuilder.WriteString(string(ascii[start:end]))
+			return
+		}
+	}
+	us := source.(unicodeString)
+	if b.ascii() {
+		uc := false
+		for i := start; i < end; i++ {
+			if us.charAt(i) >= utf8.RuneSelf {
+				uc = true
+				break
+			}
+		}
+		if uc {
+			b.switchToUnicode(end - start + 1)
+		} else {
+			b.asciiBuilder.Grow(end - start + 1)
+			for i := start; i < end; i++ {
+				b.asciiBuilder.WriteByte(byte(us.charAt(i)))
+			}
+			return
+		}
+	}
+	b.unicodeBuilder.buf = append(b.unicodeBuilder.buf, us[start+1:end+1]...)
+	b.unicodeBuilder.unicode = true
+}
+
 func (s unicodeString) reader(start int) io.RuneReader {
 	return &unicodeRuneReader{
 		s: s[start+1:],
 	}
 }
 
+func (s unicodeString) utf16Reader(start int) io.RuneReader {
+	return &utf16RuneReader{
+		s: s[start+1:],
+	}
+}
+
+func (s unicodeString) runes() []rune {
+	return utf16.Decode(s[1:])
+}
+
+func (s unicodeString) utf16Runes() []rune {
+	runes := make([]rune, len(s)-1)
+	for i, ch := range s[1:] {
+		runes[i] = rune(ch)
+	}
+	return runes
+}
+
 func (s unicodeString) ToInteger() int64 {
 	return 0
 }
@@ -274,6 +429,7 @@ func (s unicodeString) String() string {
 }
 
 func (s unicodeString) compareTo(other valueString) int {
+	// TODO handle invalid UTF-16
 	return strings.Compare(s.String(), other.String())
 }
 

+ 38 - 9
tc39_test.go

@@ -37,7 +37,10 @@ var (
 		"test/built-ins/Date/prototype/toISOString/15.9.5.43-0-8.js":  true, // timezone
 		"test/built-ins/Date/prototype/toISOString/15.9.5.43-0-9.js":  true, // timezone
 		"test/built-ins/Date/prototype/toISOString/15.9.5.43-0-10.js": true, // timezone
-		"test/annexB/built-ins/escape/escape-above-astral.js":         true, // \u{xxxxx}
+
+		// \u{xxxxx}
+		"test/annexB/built-ins/escape/escape-above-astral.js": true,
+		"test/built-ins/RegExp/prototype/source/value-u.js":   true,
 
 		// SharedArrayBuffer
 		"test/built-ins/ArrayBuffer/prototype/slice/this-is-sharedarraybuffer.js": true,
@@ -103,13 +106,9 @@ var (
 		"test/language/statements/class/subclass/builtin-objects/ArrayBuffer/regular-subclassing.js":                 true,
 		"test/built-ins/ArrayBuffer/isView/arg-is-typedarray-subclass-instance.js":                                   true,
 		"test/built-ins/ArrayBuffer/isView/arg-is-dataview-subclass-instance.js":                                     true,
-
-		// full unicode regexp flag
-		"test/built-ins/RegExp/prototype/Symbol.match/u-advance-after-empty.js":               true,
-		"test/built-ins/RegExp/prototype/Symbol.match/get-unicode-error.js":                   true,
-		"test/built-ins/RegExp/prototype/Symbol.match/builtin-success-u-return-val-groups.js": true,
-		"test/built-ins/RegExp/prototype/Symbol.match/builtin-infer-unicode.js":               true,
-		"test/built-ins/RegExp/unicode_identity_escape.js":                                    true,
+		"test/language/statements/class/subclass/builtin-objects/RegExp/super-must-be-called.js":                     true,
+		"test/language/statements/class/subclass/builtin-objects/RegExp/regular-subclassing.js":                      true,
+		"test/language/statements/class/subclass/builtin-objects/RegExp/lastIndex.js":                                true,
 
 		// object literals
 		"test/built-ins/Array/from/source-object-iterator-1.js":                   true,
@@ -133,6 +132,33 @@ var (
 		"test/built-ins/String/raw/template-substitutions-are-appended-on-same-index.js": true,
 		"test/built-ins/String/raw/special-characters.js":                                true,
 		"test/built-ins/String/raw/return-the-string-value-from-template.js":             true,
+
+		// restricted unicode regexp syntax
+		"test/built-ins/RegExp/unicode_restricted_quantifiable_assertion.js":         true,
+		"test/built-ins/RegExp/unicode_restricted_octal_escape.js":                   true,
+		"test/built-ins/RegExp/unicode_restricted_incomple_quantifier.js":            true,
+		"test/built-ins/RegExp/unicode_restricted_identity_escape_x.js":              true,
+		"test/built-ins/RegExp/unicode_restricted_identity_escape_u.js":              true,
+		"test/built-ins/RegExp/unicode_restricted_identity_escape_c.js":              true,
+		"test/built-ins/RegExp/unicode_restricted_identity_escape_alpha.js":          true,
+		"test/built-ins/RegExp/unicode_restricted_identity_escape.js":                true,
+		"test/built-ins/RegExp/unicode_restricted_brackets.js":                       true,
+		"test/built-ins/RegExp/unicode_restricted_character_class_escape.js":         true,
+		"test/annexB/built-ins/RegExp/prototype/compile/pattern-string-invalid-u.js": true,
+
+		// Because goja parser works in UTF-8 it is not possible to pass strings containing invalid UTF-16 code points.
+		// This is mitigated by escaping them as \uXXXX, however because of this the RegExp source becomes
+		// `\uXXXX` instead of `<the actual UTF-16 code point of XXXX>`.
+		// The resulting RegExp will work exactly the same, but it causes these two tests to fail.
+		"test/annexB/built-ins/RegExp/RegExp-leading-escape-BMP.js":  true,
+		"test/annexB/built-ins/RegExp/RegExp-trailing-escape-BMP.js": true,
+
+		// Looks like a bug in regexp2: decimal escapes that do not represent a capture are simply ignored instead
+		// of being treated as a character with the specified code.
+		"test/annexB/built-ins/RegExp/RegExp-decimal-escape-not-capturing.js": true,
+
+		// Promise
+		"test/built-ins/Symbol/species/builtin-getter-name.js": true,
 	}
 
 	featuresBlackList = []string{
@@ -155,7 +181,7 @@ var (
 		"20.2",
 		"20.3",
 		"21.1",
-		"21.2.5.6",
+		"21.2",
 		"22.1",
 		"22.2",
 		"23.1",
@@ -181,6 +207,7 @@ var (
 		"sec-math",
 		"sec-arraybuffer-length",
 		"sec-arraybuffer",
+		"sec-regexp",
 	}
 )
 
@@ -293,6 +320,7 @@ func (ctx *tc39TestCtx) runTC39Test(name, src string, meta *tc39Meta, t testing.
 	_262.Set("createRealm", ctx.throwIgnorableTestError)
 	vm.Set("$262", _262)
 	vm.Set("IgnorableTestError", ignorableTestError)
+	vm.Set("print", t.Log)
 	vm.RunProgram(sabStub)
 	err, early := ctx.runTC39Script(name, src, meta.Includes, vm)
 
@@ -574,6 +602,7 @@ func TestTC39(t *testing.T) {
 		ctx.runTC39Tests("test/annexB/built-ins/String/prototype/substr")
 		ctx.runTC39Tests("test/annexB/built-ins/escape")
 		ctx.runTC39Tests("test/annexB/built-ins/unescape")
+		ctx.runTC39Tests("test/annexB/built-ins/RegExp")
 
 		ctx.flush()
 	})

+ 3 - 5
vm.go

@@ -1272,14 +1272,12 @@ func (n *newArraySparse) exec(vm *vm) {
 }
 
 type newRegexp struct {
-	pattern regexpPattern
+	pattern *regexpPattern
 	src     valueString
-
-	global, ignoreCase, multiline, sticky bool
 }
 
 func (n *newRegexp) exec(vm *vm) {
-	vm.push(vm.r.newRegExpp(n.pattern, n.src, n.global, n.ignoreCase, n.multiline, n.sticky, vm.r.global.RegExpPrototype))
+	vm.push(vm.r.newRegExpp(n.pattern, n.src, vm.r.global.RegExpPrototype))
 	vm.pc++
 }
 
@@ -1723,7 +1721,7 @@ func (vm *vm) callEval(n int, strict bool) {
 				} else {
 					this = vm.r.globalObject
 				}
-				ret := vm.r.eval(src.String(), true, strict, this)
+				ret := vm.r.eval(src, true, strict, this)
 				vm.stack[vm.sp-n-2] = ret
 			} else {
 				vm.stack[vm.sp-n-2] = srcVal

部分文件因文件數量過多而無法顯示