Browse Source

Use FindAllSubmatchIndexASCII() instead of FindAllSubmatchIndexUTF8() for non-Unicode strings. This improves performance a bit.

Dmitry Panov 8 years ago
parent
commit
2acb68835d
2 changed files with 25 additions and 1 deletions
  1. 1 1
      regexp.go
  2. 24 0
      regexp_test.go

+ 1 - 1
regexp.go

@@ -192,7 +192,7 @@ func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s unicodeString, n int) [][]i
 func (r *regexp2Wrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
 func (r *regexp2Wrapper) FindAllSubmatchIndex(s valueString, n int) [][]int {
 	switch s := s.(type) {
 	switch s := s.(type) {
 	case asciiString:
 	case asciiString:
-		return r.FindAllSubmatchIndexUTF8(string(s), n)
+		return r.FindAllSubmatchIndexASCII(string(s), n)
 	case unicodeString:
 	case unicodeString:
 		return r.findAllSubmatchIndexUTF16(s, n)
 		return r.findAllSubmatchIndexUTF16(s, n)
 	default:
 	default:

+ 24 - 0
regexp_test.go

@@ -173,3 +173,27 @@ func TestRegexpDotMatchSlashRInGroup(t *testing.T) {
 
 
 	testScript1(SCRIPT, valueFalse, t)
 	testScript1(SCRIPT, valueFalse, t)
 }
 }
+
+func TestRegexpSplitWithBackRef(t *testing.T) {
+	const SCRIPT = `
+	"a++b+-c".split(/([+-])\1/).join(" $$ ")
+	`
+
+	testScript1(SCRIPT, asciiString("a $$ + $$ b+-c"), t)
+}
+
+func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
+	const SCRIPT = `
+	"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)
+	`
+	b.StopTimer()
+	prg, err := Compile("test.js", SCRIPT, false)
+	if err != nil {
+		b.Fatal(err)
+	}
+	vm := New()
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		vm.RunProgram(prg)
+	}
+}