Browse Source

Optimized CPU and memory usage when importing strings and using JSON.{parse,stringify}().

Dmitry Panov 3 years ago
parent
commit
c5c52bd8d6
14 changed files with 754 additions and 287 deletions
  1. 4 3
      builtin_global.go
  2. 17 4
      builtin_json.go
  3. 6 3
      builtin_regexp.go
  4. 71 90
      builtin_string.go
  5. 23 0
      builtin_string_test.go
  6. 19 24
      regexp.go
  7. 12 1
      runtime.go
  8. 27 38
      string.go
  9. 27 25
      string_ascii.go
  10. 284 0
      string_imported.go
  11. 136 1
      string_test.go
  12. 76 70
      string_unicode.go
  13. 35 24
      unistring/string.go
  14. 17 4
      vm.go

+ 4 - 3
builtin_global.go

@@ -62,7 +62,7 @@ func (r *Runtime) builtin_isFinite(call FunctionCall) Value {
 }
 
 func (r *Runtime) _encode(uriString valueString, unescaped *[256]bool) valueString {
-	reader := uriString.reader(0)
+	reader := uriString.reader()
 	utf8Buf := make([]byte, utf8.UTFMax)
 	needed := false
 	l := 0
@@ -92,7 +92,7 @@ func (r *Runtime) _encode(uriString valueString, unescaped *[256]bool) valueStri
 
 	buf := make([]byte, l)
 	i := 0
-	reader = uriString.reader(0)
+	reader = uriString.reader()
 	for {
 		rn, _, err := reader.ReadRune()
 		if err == io.EOF {
@@ -263,9 +263,10 @@ func (r *Runtime) builtin_escape(call FunctionCall) Value {
 func (r *Runtime) builtin_unescape(call FunctionCall) Value {
 	s := call.Argument(0).toString()
 	l := s.length()
-	_, unicode := s.(unicodeString)
 	var asciiBuf []byte
 	var unicodeBuf []uint16
+	_, u := devirtualizeString(s)
+	unicode := u != nil
 	if unicode {
 		unicodeBuf = make([]uint16, 1, l+1)
 		unicodeBuf[0] = unistring.BOM

+ 17 - 4
builtin_json.go

@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 	"unicode/utf16"
+	"unicode/utf8"
 
 	"github.com/dop251/goja/unistring"
 )
@@ -16,7 +17,7 @@ import (
 const hex = "0123456789abcdef"
 
 func (r *Runtime) builtinJSON_parse(call FunctionCall) Value {
-	d := json.NewDecoder(bytes.NewBufferString(call.Argument(0).toString().String()))
+	d := json.NewDecoder(strings.NewReader(call.Argument(0).toString().String()))
 
 	value, err := r.builtinJSON_decodeValue(d)
 	if err != nil {
@@ -171,11 +172,13 @@ type _builtinJSON_stringifyContext struct {
 	replacerFunction func(FunctionCall) Value
 	gap, indent      string
 	buf              bytes.Buffer
+	allAscii         bool
 }
 
 func (r *Runtime) builtinJSON_stringify(call FunctionCall) Value {
 	ctx := _builtinJSON_stringifyContext{
-		r: r,
+		r:        r,
+		allAscii: true,
 	}
 
 	replacer, _ := call.Argument(1).(*Object)
@@ -254,7 +257,13 @@ func (r *Runtime) builtinJSON_stringify(call FunctionCall) Value {
 	}
 
 	if ctx.do(call.Argument(0)) {
-		return newStringValue(ctx.buf.String())
+		if ctx.allAscii {
+			return asciiString(ctx.buf.String())
+		} else {
+			return &importedString{
+				s: ctx.buf.String(),
+			}
+		}
 	}
 	return _undefined
 }
@@ -306,6 +315,7 @@ func (ctx *_builtinJSON_stringifyContext) str(key Value, holder *Object) bool {
 					panic(err)
 				}
 				ctx.buf.Write(b)
+				ctx.allAscii = false
 				return true
 			} else {
 				switch o1.className() {
@@ -464,7 +474,7 @@ func (ctx *_builtinJSON_stringifyContext) jo(object *Object) {
 
 func (ctx *_builtinJSON_stringifyContext) quote(str valueString) {
 	ctx.buf.WriteByte('"')
-	reader := &lenientUtf16Decoder{utf16Reader: str.utf16Reader(0)}
+	reader := &lenientUtf16Decoder{utf16Reader: str.utf16Reader()}
 	for {
 		r, _, err := reader.ReadRune()
 		if err != nil {
@@ -498,6 +508,9 @@ func (ctx *_builtinJSON_stringifyContext) quote(str valueString) {
 					ctx.buf.WriteByte(hex[r&0xF])
 				} else {
 					ctx.buf.WriteRune(r)
+					if ctx.allAscii && r >= utf8.RuneSelf {
+						ctx.allAscii = false
+					}
 				}
 			}
 		}

+ 6 - 3
builtin_regexp.go

@@ -125,11 +125,14 @@ func convertRegexpToUtf16(patternStr string) string {
 
 // convert any broken UTF-16 surrogate pairs to \uXXXX
 func escapeInvalidUtf16(s valueString) string {
+	if imported, ok := s.(*importedString); ok {
+		return imported.s
+	}
 	if ascii, ok := s.(asciiString); ok {
 		return ascii.String()
 	}
 	var sb strings.Builder
-	rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}
+	rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader()}
 	pos := 0
 	utf8Size := 0
 	var utf8Buf [utf8.UTFMax]byte
@@ -141,7 +144,7 @@ func escapeInvalidUtf16(s valueString) string {
 		if utf16.IsSurrogate(c) {
 			if sb.Len() == 0 {
 				sb.Grow(utf8Size + 7)
-				hrd := s.reader(0)
+				hrd := s.reader()
 				var c rune
 				for p := 0; p < pos; {
 					var size int
@@ -453,7 +456,7 @@ func (r *regexpObject) writeEscapedSource(sb *valueStringBuilder) bool {
 	}
 	pos := 0
 	lastPos := 0
-	rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader(0)}
+	rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader()}
 L:
 	for {
 		c, size, err := rd.ReadRune()

+ 71 - 90
builtin_string.go

@@ -216,16 +216,27 @@ func (r *Runtime) stringproto_codePointAt(call FunctionCall) Value {
 func (r *Runtime) stringproto_concat(call FunctionCall) Value {
 	r.checkObjectCoercible(call.This)
 	strs := make([]valueString, len(call.Arguments)+1)
-	strs[0] = call.This.toString()
-	_, allAscii := strs[0].(asciiString)
-	totalLen := strs[0].length()
+	a, u := devirtualizeString(call.This.toString())
+	allAscii := true
+	totalLen := 0
+	if u == nil {
+		strs[0] = a
+		totalLen = len(a)
+	} else {
+		strs[0] = u
+		totalLen = u.length()
+		allAscii = false
+	}
 	for i, arg := range call.Arguments {
-		s := arg.toString()
-		if allAscii {
-			_, allAscii = s.(asciiString)
+		a, u := devirtualizeString(arg.toString())
+		if u != nil {
+			allAscii = false
+			totalLen += u.length()
+			strs[i+1] = u
+		} else {
+			totalLen += a.length()
+			strs[i+1] = a
 		}
-		strs[i+1] = s
-		totalLen += s.length()
 	}
 
 	if allAscii {
@@ -441,15 +452,23 @@ func (r *Runtime) stringproto_normalize(call FunctionCall) Value {
 		panic(r.newError(r.global.RangeError, "The normalization form should be one of NFC, NFD, NFKC, NFKD"))
 	}
 
-	if s, ok := s.(unicodeString); ok {
+	switch s := s.(type) {
+	case asciiString:
+		return s
+	case unicodeString:
 		ss := s.String()
 		return newStringValue(f.String(ss))
+	case *importedString:
+		if s.scanned && s.u == nil {
+			return asciiString(s.s)
+		}
+		return newStringValue(f.String(s.s))
+	default:
+		panic(unknownStringTypeErr(s))
 	}
-
-	return s
 }
 
-func (r *Runtime) stringproto_padEnd(call FunctionCall) Value {
+func (r *Runtime) _stringPad(call FunctionCall, start bool) Value {
 	r.checkObjectCoercible(call.This)
 	s := call.This.toString()
 	maxLength := toLength(call.Argument(0))
@@ -457,38 +476,45 @@ func (r *Runtime) stringproto_padEnd(call FunctionCall) Value {
 	if maxLength <= stringLength {
 		return s
 	}
+	strAscii, strUnicode := devirtualizeString(s)
 	var filler valueString
-	var fillerASCII bool
+	var fillerAscii asciiString
+	var fillerUnicode unicodeString
 	if fillString := call.Argument(1); fillString != _undefined {
 		filler = fillString.toString()
 		if filler.length() == 0 {
 			return s
 		}
-		_, fillerASCII = filler.(asciiString)
+		fillerAscii, fillerUnicode = devirtualizeString(filler)
 	} else {
-		filler = asciiString(" ")
-		fillerASCII = true
+		fillerAscii = " "
+		filler = fillerAscii
 	}
 	remaining := toIntStrict(maxLength - stringLength)
-	_, stringASCII := s.(asciiString)
-	if fillerASCII && stringASCII {
-		fl := filler.length()
+	if fillerUnicode == nil && strUnicode == nil {
+		fl := fillerAscii.length()
 		var sb strings.Builder
 		sb.Grow(toIntStrict(maxLength))
-		sb.WriteString(s.String())
-		fs := filler.String()
+		if !start {
+			sb.WriteString(string(strAscii))
+		}
 		for remaining >= fl {
-			sb.WriteString(fs)
+			sb.WriteString(string(fillerAscii))
 			remaining -= fl
 		}
 		if remaining > 0 {
-			sb.WriteString(fs[:remaining])
+			sb.WriteString(string(fillerAscii[:remaining]))
+		}
+		if start {
+			sb.WriteString(string(strAscii))
 		}
 		return asciiString(sb.String())
 	}
 	var sb unicodeStringBuilder
 	sb.Grow(toIntStrict(maxLength))
-	sb.WriteString(s)
+	if !start {
+		sb.WriteString(s)
+	}
 	fl := filler.length()
 	for remaining >= fl {
 		sb.WriteString(filler)
@@ -497,60 +523,19 @@ func (r *Runtime) stringproto_padEnd(call FunctionCall) Value {
 	if remaining > 0 {
 		sb.WriteString(filler.substring(0, remaining))
 	}
+	if start {
+		sb.WriteString(s)
+	}
 
 	return sb.String()
 }
 
-func (r *Runtime) stringproto_padStart(call FunctionCall) Value {
-	r.checkObjectCoercible(call.This)
-	s := call.This.toString()
-	maxLength := toLength(call.Argument(0))
-	stringLength := int64(s.length())
-	if maxLength <= stringLength {
-		return s
-	}
-	var filler valueString
-	var fillerASCII bool
-	if fillString := call.Argument(1); fillString != _undefined {
-		filler = fillString.toString()
-		if filler.length() == 0 {
-			return s
-		}
-		_, fillerASCII = filler.(asciiString)
-	} else {
-		filler = asciiString(" ")
-		fillerASCII = true
-	}
-	remaining := toIntStrict(maxLength - stringLength)
-	_, stringASCII := s.(asciiString)
-	if fillerASCII && stringASCII {
-		fl := filler.length()
-		var sb strings.Builder
-		sb.Grow(toIntStrict(maxLength))
-		fs := filler.String()
-		for remaining >= fl {
-			sb.WriteString(fs)
-			remaining -= fl
-		}
-		if remaining > 0 {
-			sb.WriteString(fs[:remaining])
-		}
-		sb.WriteString(s.String())
-		return asciiString(sb.String())
-	}
-	var sb unicodeStringBuilder
-	sb.Grow(toIntStrict(maxLength))
-	fl := filler.length()
-	for remaining >= fl {
-		sb.WriteString(filler)
-		remaining -= fl
-	}
-	if remaining > 0 {
-		sb.WriteString(filler.substring(0, remaining))
-	}
-	sb.WriteString(s)
+func (r *Runtime) stringproto_padEnd(call FunctionCall) Value {
+	return r._stringPad(call, false)
+}
 
-	return sb.String()
+func (r *Runtime) stringproto_padStart(call FunctionCall) Value {
+	return r._stringPad(call, true)
 }
 
 func (r *Runtime) stringproto_repeat(call FunctionCall) Value {
@@ -568,19 +553,20 @@ func (r *Runtime) stringproto_repeat(call FunctionCall) Value {
 		return stringEmpty
 	}
 	num := toIntStrict(numInt)
-	if s, ok := s.(asciiString); ok {
+	a, u := devirtualizeString(s)
+	if u == nil {
 		var sb strings.Builder
-		sb.Grow(len(s) * num)
+		sb.Grow(len(a) * num)
 		for i := 0; i < num; i++ {
-			sb.WriteString(string(s))
+			sb.WriteString(string(a))
 		}
 		return asciiString(sb.String())
 	}
 
 	var sb unicodeStringBuilder
-	sb.Grow(s.length() * num)
+	sb.Grow(u.length() * num)
 	for i := 0; i < num; i++ {
-		sb.WriteString(s)
+		sb.writeUnicodeString(u)
 	}
 	return sb.String()
 }
@@ -601,12 +587,7 @@ func stringReplace(s valueString, found [][]int, newstring valueString, rcall fu
 		return s
 	}
 
-	var str string
-	var isASCII bool
-	if astr, ok := s.(asciiString); ok {
-		str = string(astr)
-		isASCII = true
-	}
+	a, u := devirtualizeString(s)
 
 	var buf valueStringBuilder
 
@@ -622,10 +603,10 @@ func stringReplace(s valueString, found [][]int, newstring valueString, rcall fu
 			for index := 0; index < matchCount; index++ {
 				offset := 2 * index
 				if item[offset] != -1 {
-					if isASCII {
-						argumentList[index] = asciiString(str[item[offset]:item[offset+1]])
+					if u == nil {
+						argumentList[index] = a[item[offset]:item[offset+1]]
 					} else {
-						argumentList[index] = s.substring(item[offset], item[offset+1])
+						argumentList[index] = u.substring(item[offset], item[offset+1])
 					}
 				} else {
 					argumentList[index] = _undefined
@@ -648,10 +629,10 @@ func stringReplace(s valueString, found [][]int, newstring valueString, rcall fu
 			matchCount := len(item) / 2
 			writeSubstitution(s, item[0], matchCount, func(idx int) valueString {
 				if item[idx*2] != -1 {
-					if isASCII {
-						return asciiString(str[item[idx*2]:item[idx*2+1]])
+					if u == nil {
+						return a[item[idx*2]:item[idx*2+1]]
 					}
-					return s.substring(item[idx*2], item[idx*2+1])
+					return u.substring(item[idx*2], item[idx*2+1])
 				}
 				return stringEmpty
 			}, newstring, &buf)

+ 23 - 0
builtin_string_test.go

@@ -252,4 +252,27 @@ func TestValueStringBuilder(t *testing.T) {
 			t.Fatal(res)
 		}
 	})
+
+	t.Run("concat_ASCII_importedASCII", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		sb.WriteString(asciiString("ascii"))
+		sb.WriteString(&importedString{s: " imported_ascii1234567890"})
+		s := sb.String()
+		if res, ok := s.(asciiString); !ok || res != "ascii imported_ascii1234567890" {
+			t.Fatal(s)
+		}
+	})
+
+	t.Run("concat_ASCII_importedUnicode", func(t *testing.T) {
+		t.Parallel()
+		var sb valueStringBuilder
+		sb.WriteString(asciiString("ascii"))
+		sb.WriteString(&importedString{s: " imported_юникод"})
+		s := sb.String()
+		if res, ok := s.(unicodeString); !ok || !res.SameAs(newStringValue("ascii imported_юникод")) {
+			t.Fatal(s)
+		}
+	})
+
 }

+ 19 - 24
regexp.go

@@ -95,9 +95,9 @@ func (p *regexpPattern) createRegexp2() {
 	p.regexp2Wrapper = rx
 }
 
-func buildUTF8PosMap(s valueString) (positionMap, string) {
+func buildUTF8PosMap(s unicodeString) (positionMap, string) {
 	pm := make(positionMap, 0, s.length())
-	rd := s.reader(0)
+	rd := s.reader()
 	sPos, utf8Pos := 0, 0
 	var sb strings.Builder
 	for {
@@ -136,11 +136,12 @@ func (p *regexpPattern) findAllSubmatchIndex(s valueString, start int, limit int
 		return p.regexp2Wrapper.findAllSubmatchIndex(s, start, limit, sticky, p.unicode)
 	}
 	if start == 0 {
-		if s, ok := s.(asciiString); ok {
-			return p.regexpWrapper.findAllSubmatchIndex(s.String(), limit, sticky)
+		a, u := devirtualizeString(s)
+		if u == nil {
+			return p.regexpWrapper.findAllSubmatchIndex(string(a), limit, sticky)
 		}
 		if limit == 1 {
-			result := p.regexpWrapper.findSubmatchIndexUnicode(s.(unicodeString), p.unicode)
+			result := p.regexpWrapper.findSubmatchIndexUnicode(u, p.unicode)
 			if result == nil {
 				return nil
 			}
@@ -150,7 +151,7 @@ func (p *regexpPattern) findAllSubmatchIndex(s valueString, start int, limit int
 		// input.
 		if p.unicode {
 			// Try to convert s to UTF-8. If it does not contain any invalid UTF-16 we can do the matching in UTF-8.
-			pm, str := buildUTF8PosMap(s)
+			pm, str := buildUTF8PosMap(u)
 			if pm != nil {
 				res := p.regexpWrapper.findAllSubmatchIndex(str, limit, sticky)
 				for _, result := range res {
@@ -262,7 +263,7 @@ func (r *regexp2Wrapper) findUnicodeCached(s valueString, start int, doCache boo
 		runes, posMap = cache.runes, cache.posMap
 		mappedStart, splitPair = posMapReverseLookup(posMap, start)
 	} else {
-		posMap, runes, mappedStart, splitPair = buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), start)
+		posMap, runes, mappedStart, splitPair = buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader()}, s.length(), start)
 		cache = nil
 	}
 	if splitPair {
@@ -436,17 +437,14 @@ func (r *regexp2Wrapper) findAllSubmatchIndexUnicode(s unicodeString, start, lim
 }
 
 func (r *regexp2Wrapper) findAllSubmatchIndex(s valueString, start, limit int, sticky, fullUnicode bool) [][]int {
-	switch s := s.(type) {
-	case asciiString:
-		return r.findAllSubmatchIndexUTF16(s, start, limit, sticky)
-	case unicodeString:
+	a, u := devirtualizeString(s)
+	if u != nil {
 		if fullUnicode {
-			return r.findAllSubmatchIndexUnicode(s, start, limit, sticky)
+			return r.findAllSubmatchIndexUnicode(u, start, limit, sticky)
 		}
-		return r.findAllSubmatchIndexUTF16(s, start, limit, sticky)
-	default:
-		panic("Unsupported string type")
+		return r.findAllSubmatchIndexUTF16(u, start, limit, sticky)
 	}
+	return r.findAllSubmatchIndexUTF16(a, start, limit, sticky)
 }
 
 func (r *regexp2Wrapper) clone() *regexp2Wrapper {
@@ -473,14 +471,11 @@ func (r *regexpWrapper) findAllSubmatchIndex(s string, limit int, sticky bool) (
 }
 
 func (r *regexpWrapper) findSubmatchIndex(s valueString, fullUnicode bool) []int {
-	switch s := s.(type) {
-	case asciiString:
-		return r.findSubmatchIndexASCII(string(s))
-	case unicodeString:
-		return r.findSubmatchIndexUnicode(s, fullUnicode)
-	default:
-		panic("Unsupported string type")
+	a, u := devirtualizeString(s)
+	if u != nil {
+		return r.findSubmatchIndexUnicode(u, fullUnicode)
 	}
+	return r.findSubmatchIndexASCII(string(a))
 }
 
 func (r *regexpWrapper) findSubmatchIndexASCII(s string) []int {
@@ -491,7 +486,7 @@ func (r *regexpWrapper) findSubmatchIndexASCII(s string) []int {
 func (r *regexpWrapper) findSubmatchIndexUnicode(s unicodeString, fullUnicode bool) (result []int) {
 	wrapped := (*regexp.Regexp)(r)
 	if fullUnicode {
-		posMap, runes, _, _ := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}, s.length(), 0)
+		posMap, runes, _, _ := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader()}, s.length(), 0)
 		res := wrapped.FindReaderSubmatchIndex(&arrayRuneReader{runes: runes})
 		for i, item := range res {
 			if item >= 0 {
@@ -500,7 +495,7 @@ func (r *regexpWrapper) findSubmatchIndexUnicode(s unicodeString, fullUnicode bo
 		}
 		return res
 	}
-	return wrapped.FindReaderSubmatchIndex(s.utf16Reader(0))
+	return wrapped.FindReaderSubmatchIndex(s.utf16Reader())
 }
 
 func (r *regexpWrapper) clone() *regexpWrapper {

+ 12 - 1
runtime.go

@@ -1566,6 +1566,10 @@ UTF-8) conversion from JS to Go may be lossy. In particular, code points that ca
 (0xD800-0xDFFF) cannot be represented in UTF-8 unless they form a valid surrogate pair and are replaced with
 utf8.RuneError.
 
+The string value must be a valid UTF-8. If it is not, invalid characters are replaced with utf8.RuneError, but
+the behaviour of a subsequent Export() is unspecified (it may return the original value, or a value with replaced
+invalid characters).
+
 Nil
 
 Nil is converted to null.
@@ -1728,7 +1732,14 @@ func (r *Runtime) ToValue(i interface{}) Value {
 	case Value:
 		return i
 	case string:
-		return newStringValue(i)
+		// return newStringValue(i)
+		if len(i) <= 16 {
+			if u := unistring.Scan(i); u != nil {
+				return &importedString{s: i, u: u, scanned: true}
+			}
+			return asciiString(i)
+		}
+		return &importedString{s: i}
 	case bool:
 		if i {
 			return valueTrue

+ 27 - 38
string.go

@@ -4,7 +4,6 @@ import (
 	"io"
 	"strconv"
 	"strings"
-	"unicode/utf16"
 	"unicode/utf8"
 
 	"github.com/dop251/goja/unistring"
@@ -53,8 +52,8 @@ type valueString interface {
 	concat(valueString) valueString
 	substring(start, end int) valueString
 	compareTo(valueString) int
-	reader(start int) io.RuneReader
-	utf16Reader(start int) io.RuneReader
+	reader() io.RuneReader
+	utf16Reader() io.RuneReader
 	utf16Runes() []rune
 	index(valueString, int) int
 	lastIndex(valueString, int) int
@@ -91,16 +90,10 @@ func (si *stringIterObject) next() Value {
 func stringFromRune(r rune) valueString {
 	if r < utf8.RuneSelf {
 		var sb strings.Builder
-		sb.Grow(1)
 		sb.WriteByte(byte(r))
 		return asciiString(sb.String())
 	}
 	var sb unicodeStringBuilder
-	if r <= 0xFFFF {
-		sb.Grow(1)
-	} else {
-		sb.Grow(2)
-	}
 	sb.WriteRune(r)
 	return sb.String()
 }
@@ -109,7 +102,7 @@ func (r *Runtime) createStringIterator(s valueString) Value {
 	o := &Object{runtime: r}
 
 	si := &stringIterObject{
-		reader: &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)},
+		reader: &lenientUtf16Decoder{utf16Reader: s.utf16Reader()},
 	}
 	si.class = classStringIterator
 	si.val = o
@@ -129,35 +122,10 @@ type stringObject struct {
 }
 
 func newStringValue(s string) valueString {
-	utf16Size := 0
-	ascii := true
-	for _, chr := range s {
-		utf16Size++
-		if chr >= utf8.RuneSelf {
-			ascii = false
-			if chr > 0xFFFF {
-				utf16Size++
-			}
-		}
-	}
-	if ascii {
-		return asciiString(s)
-	}
-	buf := make([]uint16, utf16Size+1)
-	buf[0] = unistring.BOM
-	c := 1
-	for _, chr := range s {
-		if chr <= 0xFFFF {
-			buf[c] = uint16(chr)
-		} else {
-			first, second := utf16.EncodeRune(chr)
-			buf[c] = uint16(first)
-			c++
-			buf[c] = uint16(second)
-		}
-		c++
+	if u := unistring.Scan(s); u != nil {
+		return unicodeString(u)
 	}
-	return unicodeString(buf)
+	return asciiString(s)
 }
 
 func stringValueFromRaw(raw unistring.String) valueString {
@@ -338,3 +306,24 @@ func (s *stringObject) hasOwnPropertyIdx(idx valueInt) bool {
 	}
 	return s.baseObject.hasOwnPropertyStr(idx.string())
 }
+
+func devirtualizeString(s valueString) (asciiString, unicodeString) {
+	switch s := s.(type) {
+	case asciiString:
+		return s, nil
+	case unicodeString:
+		return "", s
+	case *importedString:
+		s.ensureScanned()
+		if s.u != nil {
+			return "", s.u
+		}
+		return asciiString(s.s), nil
+	default:
+		panic(unknownStringTypeErr(s))
+	}
+}
+
+func unknownStringTypeErr(v Value) interface{} {
+	return newTypeError("Internal bug: unknown string type: %T", v)
+}

+ 27 - 25
string_ascii.go

@@ -1,7 +1,6 @@
 package goja
 
 import (
-	"fmt"
 	"hash/maphash"
 	"io"
 	"math"
@@ -30,14 +29,14 @@ func (rr *asciiRuneReader) ReadRune() (r rune, size int, err error) {
 	return
 }
 
-func (s asciiString) reader(start int) io.RuneReader {
+func (s asciiString) reader() io.RuneReader {
 	return &asciiRuneReader{
-		s: s[start:],
+		s: s,
 	}
 }
 
-func (s asciiString) utf16Reader(start int) io.RuneReader {
-	return s.reader(start)
+func (s asciiString) utf16Reader() io.RuneReader {
+	return s.reader()
 }
 
 func (s asciiString) utf16Runes() []rune {
@@ -180,15 +179,12 @@ func (s asciiString) ToObject(r *Runtime) *Object {
 }
 
 func (s asciiString) SameAs(other Value) bool {
-	if otherStr, ok := other.(asciiString); ok {
-		return s == otherStr
-	}
-	return false
+	return s.StrictEquals(other)
 }
 
 func (s asciiString) Equals(other Value) bool {
-	if o, ok := other.(asciiString); ok {
-		return s == o
+	if s.StrictEquals(other) {
+		return true
 	}
 
 	if o, ok := other.(valueInt); ok {
@@ -219,6 +215,11 @@ func (s asciiString) StrictEquals(other Value) bool {
 	if otherStr, ok := other.(asciiString); ok {
 		return s == otherStr
 	}
+	if otherStr, ok := other.(*importedString); ok {
+		if otherStr.u == nil {
+			return string(s) == otherStr.s
+		}
+	}
 	return false
 }
 
@@ -245,20 +246,17 @@ func (s asciiString) length() int {
 }
 
 func (s asciiString) concat(other valueString) valueString {
-	switch other := other.(type) {
-	case asciiString:
-		return asciiString(s + other)
-	case unicodeString:
-		b := make([]uint16, len(s)+len(other))
+	a, u := devirtualizeString(other)
+	if u != nil {
+		b := make([]uint16, len(s)+len(u))
 		b[0] = unistring.BOM
 		for i := 0; i < len(s); i++ {
 			b[i+1] = uint16(s[i])
 		}
-		copy(b[len(s)+1:], other[1:])
+		copy(b[len(s)+1:], u[1:])
 		return unicodeString(b)
-	default:
-		panic(fmt.Errorf("unknown string type: %T", other))
 	}
+	return s + a
 }
 
 func (s asciiString) substring(start, end int) valueString {
@@ -271,14 +269,17 @@ func (s asciiString) compareTo(other valueString) int {
 		return strings.Compare(string(s), string(other))
 	case unicodeString:
 		return strings.Compare(string(s), other.String())
+	case *importedString:
+		return strings.Compare(string(s), other.s)
 	default:
-		panic(fmt.Errorf("unknown string type: %T", other))
+		panic(newTypeError("Internal bug: unknown string type: %T", other))
 	}
 }
 
 func (s asciiString) index(substr valueString, start int) int {
-	if substr, ok := substr.(asciiString); ok {
-		p := strings.Index(string(s[start:]), string(substr))
+	a, u := devirtualizeString(substr)
+	if u == nil {
+		p := strings.Index(string(s[start:]), string(a))
 		if p >= 0 {
 			return p + start
 		}
@@ -287,15 +288,16 @@ func (s asciiString) index(substr valueString, start int) int {
 }
 
 func (s asciiString) lastIndex(substr valueString, pos int) int {
-	if substr, ok := substr.(asciiString); ok {
-		end := pos + len(substr)
+	a, u := devirtualizeString(substr)
+	if u == nil {
+		end := pos + len(a)
 		var ss string
 		if end > len(s) {
 			ss = string(s)
 		} else {
 			ss = string(s[:end])
 		}
-		return strings.LastIndex(ss, string(substr))
+		return strings.LastIndex(ss, string(a))
 	}
 	return -1
 }

+ 284 - 0
string_imported.go

@@ -0,0 +1,284 @@
+package goja
+
+import (
+	"hash/maphash"
+	"io"
+	"math"
+	"reflect"
+	"strings"
+	"unicode/utf16"
+	"unicode/utf8"
+
+	"github.com/dop251/goja/parser"
+	"github.com/dop251/goja/unistring"
+
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
+)
+
+// Represents a string imported from Go. The idea is to delay the scanning for unicode characters and converting
+// to unicodeString until necessary. This way strings that are merely passed through never get scanned which
+// saves CPU and memory.
+// Currently, importedString is created in 2 cases: Runtime.ToValue() for strings longer than 16 bytes and as a result
+// of JSON.stringify() if it may contain unicode characters. More cases could be added in the future.
+type importedString struct {
+	s string
+	u unicodeString
+
+	scanned bool
+}
+
+func (i *importedString) scan() {
+	i.u = unistring.Scan(i.s)
+	i.scanned = true
+}
+
+func (i *importedString) ensureScanned() {
+	if !i.scanned {
+		i.scan()
+	}
+}
+
+func (i *importedString) ToInteger() int64 {
+	i.ensureScanned()
+	if i.u != nil {
+		return 0
+	}
+	return asciiString(i.s).ToInteger()
+}
+
+func (i *importedString) toString() valueString {
+	return i
+}
+
+func (i *importedString) string() unistring.String {
+	i.ensureScanned()
+	if i.u != nil {
+		return unistring.FromUtf16(i.u)
+	}
+	return unistring.String(i.s)
+}
+
+func (i *importedString) ToString() Value {
+	return i
+}
+
+func (i *importedString) String() string {
+	return i.s
+}
+
+func (i *importedString) ToFloat() float64 {
+	i.ensureScanned()
+	if i.u != nil {
+		return math.NaN()
+	}
+	return asciiString(i.s).ToFloat()
+}
+
+func (i *importedString) ToNumber() Value {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.ToNumber()
+	}
+	return asciiString(i.s).ToNumber()
+}
+
+func (i *importedString) ToBoolean() bool {
+	return len(i.s) != 0
+}
+
+func (i *importedString) ToObject(r *Runtime) *Object {
+	return r._newString(i, r.global.StringPrototype)
+}
+
+func (i *importedString) SameAs(other Value) bool {
+	return i.StrictEquals(other)
+}
+
+func (i *importedString) Equals(other Value) bool {
+	if i.StrictEquals(other) {
+		return true
+	}
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.Equals(other)
+	}
+	return asciiString(i.s).Equals(other)
+}
+
+func (i *importedString) StrictEquals(other Value) bool {
+	switch otherStr := other.(type) {
+	case asciiString:
+		if i.u != nil {
+			return false
+		}
+		return i.s == string(otherStr)
+	case unicodeString:
+		i.ensureScanned()
+		if i.u != nil && i.u.equals(otherStr) {
+			return true
+		}
+	case *importedString:
+		return i.s == otherStr.s
+	}
+	return false
+}
+
+func (i *importedString) Export() interface{} {
+	return i.s
+}
+
+func (i *importedString) ExportType() reflect.Type {
+	return reflectTypeString
+}
+
+func (i *importedString) baseObject(r *Runtime) *Object {
+	return asciiString(i.s).baseObject(r)
+}
+
+func (i *importedString) hash(hasher *maphash.Hash) uint64 {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.hash(hasher)
+	}
+	return asciiString(i.s).hash(hasher)
+}
+
+func (i *importedString) charAt(idx int) rune {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.charAt(idx)
+	}
+	return asciiString(i.s).charAt(idx)
+}
+
+func (i *importedString) length() int {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.length()
+	}
+	return asciiString(i.s).length()
+}
+
+func (i *importedString) concat(v valueString) valueString {
+	if !i.scanned {
+		if v, ok := v.(*importedString); ok {
+			if !v.scanned {
+				return &importedString{s: i.s + v.s}
+			}
+		}
+		i.ensureScanned()
+	}
+	if i.u != nil {
+		return i.u.concat(v)
+	}
+	return asciiString(i.s).concat(v)
+}
+
+func (i *importedString) substring(start, end int) valueString {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.substring(start, end)
+	}
+	return asciiString(i.s).substring(start, end)
+}
+
+func (i *importedString) compareTo(v valueString) int {
+	return strings.Compare(i.s, v.String())
+}
+
+func (i *importedString) reader() io.RuneReader {
+	if i.scanned {
+		if i.u != nil {
+			return i.u.reader()
+		}
+		return asciiString(i.s).reader()
+	}
+	return strings.NewReader(i.s)
+}
+
+type stringUtf16Reader struct {
+	s      string
+	pos    int
+	second rune
+}
+
+func (s *stringUtf16Reader) ReadRune() (r rune, size int, err error) {
+	if s.second >= 0 {
+		r = s.second
+		s.second = -1
+		size = 1
+		return
+	}
+	if s.pos < len(s.s) {
+		r1, size1 := utf8.DecodeRuneInString(s.s[s.pos:])
+		s.pos += size1
+		size = 1
+		if r1 <= 0xFFFF {
+			r = r1
+		} else {
+			r, s.second = utf16.EncodeRune(r1)
+		}
+	} else {
+		err = io.EOF
+	}
+	return
+}
+
+func (i *importedString) utf16Reader() io.RuneReader {
+	if i.scanned {
+		if i.u != nil {
+			return i.u.utf16Reader()
+		}
+		return asciiString(i.s).utf16Reader()
+	}
+	return &stringUtf16Reader{
+		s:      i.s,
+		second: -1,
+	}
+}
+
+func (i *importedString) utf16Runes() []rune {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.utf16Runes()
+	}
+	return asciiString(i.s).utf16Runes()
+}
+
+func (i *importedString) index(v valueString, start int) int {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.index(v, start)
+	}
+	return asciiString(i.s).index(v, start)
+}
+
+func (i *importedString) lastIndex(v valueString, pos int) int {
+	i.ensureScanned()
+	if i.u != nil {
+		return i.u.lastIndex(v, pos)
+	}
+	return asciiString(i.s).lastIndex(v, pos)
+}
+
+func (i *importedString) toLower() valueString {
+	i.ensureScanned()
+	if i.u != nil {
+		return toLower(i.s)
+	}
+	return asciiString(i.s).toLower()
+}
+
+func (i *importedString) toUpper() valueString {
+	i.ensureScanned()
+	if i.u != nil {
+		caser := cases.Upper(language.Und)
+		return newStringValue(caser.String(i.s))
+	}
+	return asciiString(i.s).toUpper()
+}
+
+func (i *importedString) toTrimmedUTF8() string {
+	return strings.Trim(i.s, parser.WhitespaceChars)
+}

+ 136 - 1
string_test.go

@@ -1,6 +1,10 @@
 package goja
 
-import "testing"
+import (
+	"strings"
+	"testing"
+	"unicode/utf16"
+)
 
 func TestStringOOBProperties(t *testing.T) {
 	const SCRIPT = `
@@ -13,6 +17,137 @@ func TestStringOOBProperties(t *testing.T) {
 	testScript(SCRIPT, valueInt(1), t)
 }
 
+func TestImportedString(t *testing.T) {
+	vm := New()
+
+	testUnaryOp := func(a, expr string, result interface{}, t *testing.T) {
+		v, err := vm.RunString("a => " + expr)
+		if err != nil {
+			t.Fatal(err)
+		}
+		var fn func(a Value) (Value, error)
+		err = vm.ExportTo(v, &fn)
+		if err != nil {
+			t.Fatal(err)
+		}
+		for _, aa := range []Value{newStringValue(a), vm.ToValue(a)} {
+			res, err := fn(aa)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if res.Export() != result {
+				t.Fatalf("%s, a:%v(%T). expected: %v, actual: %v", expr, aa, aa, result, res)
+			}
+		}
+	}
+
+	testBinaryOp := func(a, b, expr string, result interface{}, t *testing.T) {
+		v, err := vm.RunString("(a, b) => " + expr)
+		if err != nil {
+			t.Fatal(err)
+		}
+		var fn func(a, b Value) (Value, error)
+		err = vm.ExportTo(v, &fn)
+		if err != nil {
+			t.Fatal(err)
+		}
+		for _, aa := range []Value{newStringValue(a), vm.ToValue(a)} {
+			for _, bb := range []Value{newStringValue(b), vm.ToValue(b)} {
+				res, err := fn(aa, bb)
+				if err != nil {
+					t.Fatal(err)
+				}
+				if res.Export() != result {
+					t.Fatalf("%s, a:%v(%T), b:%v(%T). expected: %v, actual: %v", expr, aa, aa, bb, bb, result, res)
+				}
+			}
+		}
+	}
+
+	strs := []string{"shortAscii", "longlongAscii1234567890123456789", "short юникод", "long юникод 1234567890 юникод \U0001F600", "юникод", "Ascii", "long", "код"}
+	indexOfResults := [][]int{
+		/*
+			const strs = ["shortAscii", "longlongAscii1234567890123456789", "short юникод", "long юникод 1234567890 юникод \u{1F600}", "юникод", "Ascii", "long", "код"];
+
+			strs.forEach(a => {
+			    console.log("{", strs.map(b => a.indexOf(b)).join(", "), "},");
+			});
+		*/
+		{0, -1, -1, -1, -1, 5, -1, -1},
+		{-1, 0, -1, -1, -1, 8, 0, -1},
+		{-1, -1, 0, -1, 6, -1, -1, 9},
+		{-1, -1, -1, 0, 5, -1, 0, 8},
+		{-1, -1, -1, -1, 0, -1, -1, 3},
+		{-1, -1, -1, -1, -1, 0, -1, -1},
+		{-1, -1, -1, -1, -1, -1, 0, -1},
+		{-1, -1, -1, -1, -1, -1, -1, 0},
+	}
+
+	lastIndexOfResults := [][]int{
+		/*
+			strs.forEach(a => {
+			    console.log("{", strs.map(b => a.lastIndexOf(b)).join(", "), "},");
+			});
+		*/
+		{0, -1, -1, -1, -1, 5, -1, -1},
+		{-1, 0, -1, -1, -1, 8, 4, -1},
+		{-1, -1, 0, -1, 6, -1, -1, 9},
+		{-1, -1, -1, 0, 23, -1, 0, 26},
+		{-1, -1, -1, -1, 0, -1, -1, 3},
+		{-1, -1, -1, -1, -1, 0, -1, -1},
+		{-1, -1, -1, -1, -1, -1, 0, -1},
+		{-1, -1, -1, -1, -1, -1, -1, 0},
+	}
+
+	pad := func(s, p string, n int, start bool) string {
+		if n == 0 {
+			return s
+		}
+		if p == "" {
+			p = " "
+		}
+		var b strings.Builder
+		ss := utf16.Encode([]rune(s))
+		b.Grow(n)
+		n -= len(ss)
+		if !start {
+			b.WriteString(s)
+		}
+		if n > 0 {
+			pp := utf16.Encode([]rune(p))
+			for n > 0 {
+				if n > len(pp) {
+					b.WriteString(p)
+					n -= len(pp)
+				} else {
+					b.WriteString(string(utf16.Decode(pp[:n])))
+					n = 0
+				}
+			}
+		}
+		if start {
+			b.WriteString(s)
+		}
+		return b.String()
+	}
+
+	for i, a := range strs {
+		testUnaryOp(a, "JSON.parse(JSON.stringify(a))", a, t)
+		for j, b := range strs {
+			testBinaryOp(a, b, "a === b", a == b, t)
+			testBinaryOp(a, b, "a == b", a == b, t)
+			testBinaryOp(a, b, "a + b", a+b, t)
+			testBinaryOp(a, b, "a > b", strings.Compare(a, b) > 0, t)
+			testBinaryOp(a, b, "`A${a}B${b}C`", "A"+a+"B"+b+"C", t)
+			testBinaryOp(a, b, "a.indexOf(b)", int64(indexOfResults[i][j]), t)
+			testBinaryOp(a, b, "a.lastIndexOf(b)", int64(lastIndexOfResults[i][j]), t)
+			testBinaryOp(a, b, "a.padStart(32, b)", pad(a, b, 32, true), t)
+			testBinaryOp(a, b, "a.padEnd(32, b)", pad(a, b, 32, false), t)
+			testBinaryOp(a, b, "a.replace(b, '')", strings.Replace(a, b, "", 1), t)
+		}
+	}
+}
+
 func BenchmarkASCIIConcat(b *testing.B) {
 	vm := New()
 

+ 76 - 70
string_unicode.go

@@ -2,7 +2,6 @@ package goja
 
 import (
 	"errors"
-	"fmt"
 	"hash/maphash"
 	"io"
 	"math"
@@ -119,7 +118,10 @@ func (rr *unicodeRuneReader) ReadRune() (r rune, size int, err error) {
 	return
 }
 
-func (b *unicodeStringBuilder) grow(n int) {
+func (b *unicodeStringBuilder) Grow(n int) {
+	if len(b.buf) == 0 {
+		n++
+	}
 	if cap(b.buf)-len(b.buf) < n {
 		buf := make([]uint16, len(b.buf), 2*cap(b.buf)+n)
 		copy(buf, b.buf)
@@ -127,12 +129,8 @@ func (b *unicodeStringBuilder) grow(n int) {
 	}
 }
 
-func (b *unicodeStringBuilder) Grow(n int) {
-	b.grow(n + 1)
-}
-
 func (b *unicodeStringBuilder) ensureStarted(initialSize int) {
-	b.grow(len(b.buf) + initialSize + 1)
+	b.Grow(initialSize)
 	if len(b.buf) == 0 {
 		b.buf = append(b.buf, unistring.BOM)
 	}
@@ -140,16 +138,14 @@ func (b *unicodeStringBuilder) ensureStarted(initialSize int) {
 
 func (b *unicodeStringBuilder) WriteString(s valueString) {
 	b.ensureStarted(s.length())
-	switch s := s.(type) {
-	case unicodeString:
-		b.buf = append(b.buf, s[1:]...)
+	a, u := devirtualizeString(s)
+	if u != nil {
+		b.buf = append(b.buf, u[1:]...)
 		b.unicode = true
-	case asciiString:
-		for i := 0; i < len(s); i++ {
-			b.buf = append(b.buf, uint16(s[i]))
+	} else {
+		for i := 0; i < len(a); i++ {
+			b.buf = append(b.buf, uint16(a[i]))
 		}
-	default:
-		panic(fmt.Errorf("unsupported string type: %T", s))
 	}
 }
 
@@ -189,20 +185,27 @@ func (b *unicodeStringBuilder) writeASCIIString(bytes string) {
 	}
 }
 
+func (b *unicodeStringBuilder) writeUnicodeString(str unicodeString) {
+	b.ensureStarted(str.length())
+	b.buf = append(b.buf, str[1:]...)
+	b.unicode = true
+}
+
 func (b *valueStringBuilder) ascii() bool {
 	return len(b.unicodeBuilder.buf) == 0
 }
 
 func (b *valueStringBuilder) WriteString(s valueString) {
-	if ascii, ok := s.(asciiString); ok {
+	a, u := devirtualizeString(s)
+	if u != nil {
+		b.switchToUnicode(u.length())
+		b.unicodeBuilder.writeUnicodeString(u)
+	} else {
 		if b.ascii() {
-			b.asciiBuilder.WriteString(string(ascii))
+			b.asciiBuilder.WriteString(string(a))
 		} else {
-			b.unicodeBuilder.writeASCIIString(string(ascii))
+			b.unicodeBuilder.writeASCIIString(string(a))
 		}
-	} else {
-		b.switchToUnicode(s.length())
-		b.unicodeBuilder.WriteString(s)
 	}
 }
 
@@ -257,15 +260,15 @@ func (b *valueStringBuilder) switchToUnicode(extraLen int) {
 }
 
 func (b *valueStringBuilder) WriteSubstring(source valueString, start int, end int) {
-	if ascii, ok := source.(asciiString); ok {
+	a, us := devirtualizeString(source)
+	if us == nil {
 		if b.ascii() {
-			b.asciiBuilder.WriteString(string(ascii[start:end]))
+			b.asciiBuilder.WriteString(string(a[start:end]))
 		} else {
-			b.unicodeBuilder.writeASCIIString(string(ascii[start:end]))
+			b.unicodeBuilder.writeASCIIString(string(a[start:end]))
 		}
 		return
 	}
-	us := source.(unicodeString)
 	if b.ascii() {
 		uc := false
 		for i := start; i < end; i++ {
@@ -288,15 +291,15 @@ func (b *valueStringBuilder) WriteSubstring(source valueString, start int, end i
 	b.unicodeBuilder.unicode = true
 }
 
-func (s unicodeString) reader(start int) io.RuneReader {
+func (s unicodeString) reader() io.RuneReader {
 	return &unicodeRuneReader{
-		s: s[start+1:],
+		s: s[1:],
 	}
 }
 
-func (s unicodeString) utf16Reader(start int) io.RuneReader {
+func (s unicodeString) utf16Reader() io.RuneReader {
 	return &utf16RuneReader{
-		s: s[start+1:],
+		s: s[1:],
 	}
 }
 
@@ -356,15 +359,11 @@ func (s unicodeString) equals(other unicodeString) bool {
 }
 
 func (s unicodeString) SameAs(other Value) bool {
-	if otherStr, ok := other.(unicodeString); ok {
-		return s.equals(otherStr)
-	}
-
-	return false
+	return s.StrictEquals(other)
 }
 
 func (s unicodeString) Equals(other Value) bool {
-	if s.SameAs(other) {
+	if s.StrictEquals(other) {
 		return true
 	}
 
@@ -375,7 +374,17 @@ func (s unicodeString) Equals(other Value) bool {
 }
 
 func (s unicodeString) StrictEquals(other Value) bool {
-	return s.SameAs(other)
+	if otherStr, ok := other.(unicodeString); ok {
+		return s.equals(otherStr)
+	}
+	if otherStr, ok := other.(*importedString); ok {
+		otherStr.ensureScanned()
+		if otherStr.u != nil {
+			return s.equals(otherStr.u)
+		}
+	}
+
+	return false
 }
 
 func (s unicodeString) baseObject(r *Runtime) *Object {
@@ -394,23 +403,20 @@ func (s unicodeString) length() int {
 }
 
 func (s unicodeString) concat(other valueString) valueString {
-	switch other := other.(type) {
-	case unicodeString:
-		b := make(unicodeString, len(s)+len(other)-1)
+	a, u := devirtualizeString(other)
+	if u != nil {
+		b := make(unicodeString, len(s)+len(u)-1)
 		copy(b, s)
-		copy(b[len(s):], other[1:])
+		copy(b[len(s):], u[1:])
 		return b
-	case asciiString:
-		b := make([]uint16, len(s)+len(other))
-		copy(b, s)
-		b1 := b[len(s):]
-		for i := 0; i < len(other); i++ {
-			b1[i] = uint16(other[i])
-		}
-		return unicodeString(b)
-	default:
-		panic(fmt.Errorf("Unknown string type: %T", other))
 	}
+	b := make([]uint16, len(s)+len(a))
+	copy(b, s)
+	b1 := b[len(s):]
+	for i := 0; i < len(a); i++ {
+		b1[i] = uint16(a[i])
+	}
+	return unicodeString(b)
 }
 
 func (s unicodeString) substring(start, end int) valueString {
@@ -441,16 +447,14 @@ func (s unicodeString) compareTo(other valueString) int {
 
 func (s unicodeString) index(substr valueString, start int) int {
 	var ss []uint16
-	switch substr := substr.(type) {
-	case unicodeString:
-		ss = substr[1:]
-	case asciiString:
-		ss = make([]uint16, len(substr))
-		for i := 0; i < len(substr); i++ {
-			ss[i] = uint16(substr[i])
+	a, u := devirtualizeString(substr)
+	if u != nil {
+		ss = u[1:]
+	} else {
+		ss = make([]uint16, len(a))
+		for i := 0; i < len(a); i++ {
+			ss[i] = uint16(a[i])
 		}
-	default:
-		panic(fmt.Errorf("unknown string type: %T", substr))
 	}
 	s1 := s[1:]
 	// TODO: optimise
@@ -471,16 +475,14 @@ func (s unicodeString) index(substr valueString, start int) int {
 
 func (s unicodeString) lastIndex(substr valueString, start int) int {
 	var ss []uint16
-	switch substr := substr.(type) {
-	case unicodeString:
-		ss = substr[1:]
-	case asciiString:
-		ss = make([]uint16, len(substr))
-		for i := 0; i < len(substr); i++ {
-			ss[i] = uint16(substr[i])
+	a, u := devirtualizeString(substr)
+	if u != nil {
+		ss = u[1:]
+	} else {
+		ss = make([]uint16, len(a))
+		for i := 0; i < len(a); i++ {
+			ss[i] = uint16(a[i])
 		}
-	default:
-		panic(fmt.Errorf("Unknown string type: %T", substr))
 	}
 
 	s1 := s[1:]
@@ -506,9 +508,9 @@ func unicodeStringFromRunes(r []rune) unicodeString {
 	return unistring.NewFromRunes(r).AsUtf16()
 }
 
-func (s unicodeString) toLower() valueString {
+func toLower(s string) valueString {
 	caser := cases.Lower(language.Und)
-	r := []rune(caser.String(s.String()))
+	r := []rune(caser.String(s))
 	// Workaround
 	ascii := true
 	for i := 0; i < len(r)-1; i++ {
@@ -529,6 +531,10 @@ func (s unicodeString) toLower() valueString {
 	return unicodeStringFromRunes(r)
 }
 
+func (s unicodeString) toLower() valueString {
+	return toLower(s.String())
+}
+
 func (s unicodeString) toUpper() valueString {
 	caser := cases.Upper(language.Und)
 	return newStringValue(caser.String(s.String()))

+ 35 - 24
unistring/string.go

@@ -21,36 +21,47 @@ const (
 
 type String string
 
-func NewFromString(s string) String {
-	ascii := true
-	size := 0
-	for _, c := range s {
-		if c >= utf8.RuneSelf {
-			ascii = false
-			if c > 0xFFFF {
-				size++
-			}
+// Scan checks if the string contains any unicode characters. If it does, converts to an array suitable for creating
+// a String using FromUtf16, otherwise returns nil.
+func Scan(s string) []uint16 {
+	utf16Size := 0
+	for ; utf16Size < len(s); utf16Size++ {
+		if s[utf16Size] >= utf8.RuneSelf {
+			goto unicode
 		}
-		size++
 	}
-	if ascii {
-		return String(s)
+	return nil
+unicode:
+	for _, chr := range s[utf16Size:] {
+		utf16Size++
+		if chr > 0xFFFF {
+			utf16Size++
+		}
 	}
-	b := make([]uint16, size+1)
-	b[0] = BOM
-	i := 1
-	for _, c := range s {
-		if c <= 0xFFFF {
-			b[i] = uint16(c)
+
+	buf := make([]uint16, utf16Size+1)
+	buf[0] = BOM
+	c := 1
+	for _, chr := range s {
+		if chr <= 0xFFFF {
+			buf[c] = uint16(chr)
 		} else {
-			first, second := utf16.EncodeRune(c)
-			b[i] = uint16(first)
-			i++
-			b[i] = uint16(second)
+			first, second := utf16.EncodeRune(chr)
+			buf[c] = uint16(first)
+			c++
+			buf[c] = uint16(second)
 		}
-		i++
+		c++
 	}
-	return FromUtf16(b)
+
+	return buf
+}
+
+func NewFromString(s string) String {
+	if buf := Scan(s); buf != nil {
+		return FromUtf16(buf)
+	}
+	return String(s)
 }
 
 func NewFromRunes(s []rune) String {

+ 17 - 4
vm.go

@@ -4676,13 +4676,26 @@ func (n concatStrings) exec(vm *vm) {
 	strs := vm.stack[vm.sp-int(n) : vm.sp]
 	length := 0
 	allAscii := true
-	for _, s := range strs {
-		if allAscii {
-			if _, ok := s.(unicodeString); ok {
+	for i, s := range strs {
+		switch s := s.(type) {
+		case asciiString:
+			length += s.length()
+		case unicodeString:
+			length += s.length()
+			allAscii = false
+		case *importedString:
+			s.ensureScanned()
+			if s.u != nil {
+				strs[i] = s.u
+				length += s.u.length()
 				allAscii = false
+			} else {
+				strs[i] = asciiString(s.s)
+				length += len(s.s)
 			}
+		default:
+			panic(unknownStringTypeErr(s))
 		}
-		length += s.(valueString).length()
 	}
 
 	vm.sp -= int(n) - 1