Browse Source

Support lookbehind assertions in regexp

Dmitry Panov 5 years ago
parent
commit
bf18fe8c88
3 changed files with 20 additions and 8 deletions
  1. 3 1
      parser/regexp.go
  2. 4 7
      parser/regexp_test.go
  3. 13 0
      regexp_test.go

+ 3 - 1
parser/regexp.go

@@ -99,7 +99,9 @@ func (self *_RegExp_parser) scanGroup() {
 			switch {
 			case ch == '=' || ch == '!':
 				self.error(-1, "re2: Invalid (%s) <lookahead>", self.str[self.chrOffset:self.chrOffset+2])
-			case ch != ':' && ch != '<':
+			case ch == '<':
+				self.error(-1, "re2: Invalid (%s) <lookbehind>", self.str[self.chrOffset:self.chrOffset+2])
+			case ch != ':':
 				self.error(-1, "Invalid group")
 				self.invalid = true
 			}

+ 4 - 7
parser/regexp_test.go

@@ -40,6 +40,10 @@ func TestRegExp(t *testing.T) {
 			test("(?U)", "", "Invalid group")
 			test("(?)|(?i)", "", "Invalid group")
 			test("(?P<w>)(?P<w>)(?P<D>)", "", "Invalid group")
+
+			test(`<%([\s\S]+?)%>`, `<%([`+WhitespaceChars+`S]+?)%>`, "S in class")
+
+			test("(?<=y)x", "(?<=y)x", "re2: Invalid (?<) <lookbehind>")
 		}
 
 		{
@@ -52,11 +56,6 @@ func TestRegExp(t *testing.T) {
 				is(err, nil)
 			}
 
-			testErr := func(input string, expectErr string) {
-				_, err := TransformRegExp(input)
-				is(err, expectErr)
-			}
-
 			test("", "")
 
 			test("abc", "abc")
@@ -109,8 +108,6 @@ func TestRegExp(t *testing.T) {
 
 			test("\\04", "\\x04")
 
-			testErr(`<%([\s\S]+?)%>`, "S in class")
-
 			test(`(.)^`, "([^\\r\\n])^")
 
 			test(`\$`, `\$`)

+ 13 - 0
regexp_test.go

@@ -498,6 +498,19 @@ func TestRegexpInvalidGroup(t *testing.T) {
 	testScript1(TESTLIB+SCRIPT, _undefined, t)
 }
 
+func TestRegexpLookbehindAssertion(t *testing.T) {
+	const SCRIPT = `
+	var re = /(?<=Jack|Tom)Sprat/;
+	assert(re.test("JackSprat"), "#1");
+	assert(!re.test("JohnSprat"), "#2");
+
+	re = /(?<!-)\d+/;
+	assert(re.test("3"), "#3");
+	assert(!re.test("-3"), "#4");
+	`
+	testScript1(TESTLIB+SCRIPT, _undefined, t)
+}
+
 func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
 	const SCRIPT = `
 	"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)