regexp_test.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package parser
  2. import (
  3. "regexp"
  4. "testing"
  5. )
  6. func TestRegExp(t *testing.T) {
  7. tt(t, func() {
  8. {
  9. // err
  10. test := func(input string, expect interface{}) {
  11. _, err := TransformRegExp(input)
  12. _, incompat := err.(RegexpErrorIncompatible)
  13. is(incompat, false)
  14. is(err, expect)
  15. }
  16. test("[", "Unterminated character class")
  17. test("(", "Unterminated group")
  18. test("\\(?=)", "Unmatched ')'")
  19. test(")", "Unmatched ')'")
  20. test("0:(?)", "Invalid group")
  21. test("(?)", "Invalid group")
  22. test("(?U)", "Invalid group")
  23. test("(?)|(?i)", "Invalid group")
  24. test("(?P<w>)(?P<w>)(?P<D>)", "Invalid group")
  25. }
  26. {
  27. // incompatible
  28. test := func(input string, expectErr interface{}) {
  29. _, err := TransformRegExp(input)
  30. _, incompat := err.(RegexpErrorIncompatible)
  31. is(incompat, true)
  32. is(err, expectErr)
  33. }
  34. test(`<%([\s\S]+?)%>`, "S in class")
  35. test("(?<=y)x", "re2: Invalid (?<) <lookbehind>")
  36. test(`(?!test)`, "re2: Invalid (?!) <lookahead>")
  37. test(`\1`, "re2: Invalid \\1 <backreference>")
  38. test(`\8`, "re2: Invalid \\8 <backreference>")
  39. }
  40. {
  41. // err
  42. test := func(input string, expect string) {
  43. result, err := TransformRegExp(input)
  44. is(err, nil)
  45. _, incompat := err.(RegexpErrorIncompatible)
  46. is(incompat, false)
  47. is(result, expect)
  48. _, err = regexp.Compile(result)
  49. is(err, nil)
  50. }
  51. test("", "")
  52. test("abc", "abc")
  53. test(`\abc`, `abc`)
  54. test(`\a\b\c`, `a\bc`)
  55. test(`\x`, `x`)
  56. test(`\c`, `c`)
  57. test(`\cA`, `\x01`)
  58. test(`\cz`, `\x1a`)
  59. test(`\ca`, `\x01`)
  60. test(`\cj`, `\x0a`)
  61. test(`\ck`, `\x0b`)
  62. test(`\+`, `\+`)
  63. test(`[\b]`, `[\x08]`)
  64. test(`\u0z01\x\undefined`, `u0z01xundefined`)
  65. test(`\\|'|\r|\n|\t|\u2028|\u2029`, `\\|'|\r|\n|\t|\x{2028}|\x{2029}`)
  66. test("]", "]")
  67. test("}", "}")
  68. test("%", "%")
  69. test("(%)", "(%)")
  70. test("(?:[%\\s])", "(?:[%"+WhitespaceChars+"])")
  71. test("[[]", "[[]")
  72. test("\\101", "\\x41")
  73. test("\\51", "\\x29")
  74. test("\\051", "\\x29")
  75. test("\\175", "\\x7d")
  76. test("\\0", "\\0")
  77. test("\\04", "\\x04")
  78. test(`(.)^`, "("+Re2Dot+")^")
  79. test(`\$`, `\$`)
  80. test(`[G-b]`, `[G-b]`)
  81. test(`[G-b\0]`, `[G-b\0]`)
  82. test(`\k`, `k`)
  83. test(`\x20`, `\x20`)
  84. test(`😊`, `😊`)
  85. test(`^.*`, `^`+Re2Dot+`*`)
  86. test(`(\n)`, `(\n)`)
  87. test(`(a(bc))`, `(a(bc))`)
  88. test(`[]`, "[^\u0000-\U0001FFFF]")
  89. test(`[^]`, "[\u0000-\U0001FFFF]")
  90. test(`\s+`, "["+WhitespaceChars+"]+")
  91. test(`\S+`, "[^"+WhitespaceChars+"]+")
  92. }
  93. })
  94. }
  95. func TestTransformRegExp(t *testing.T) {
  96. tt(t, func() {
  97. pattern, err := TransformRegExp(`\s+abc\s+`)
  98. is(err, nil)
  99. is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
  100. is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
  101. })
  102. tt(t, func() {
  103. pattern, err := TransformRegExp(`\u{1d306}`)
  104. is(err, nil)
  105. is(pattern, `\x{1d306}`)
  106. })
  107. tt(t, func() {
  108. pattern, err := TransformRegExp(`\u1234`)
  109. is(err, nil)
  110. is(pattern, `\x{1234}`)
  111. })
  112. }
  113. func BenchmarkTransformRegExp(b *testing.B) {
  114. f := func(reStr string, b *testing.B) {
  115. b.ResetTimer()
  116. b.ReportAllocs()
  117. for i := 0; i < b.N; i++ {
  118. _, _ = TransformRegExp(reStr)
  119. }
  120. }
  121. b.Run("Re", func(b *testing.B) {
  122. f(`^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$`, b)
  123. })
  124. b.Run("Re2-1", func(b *testing.B) {
  125. f(`(?=)^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$`, b)
  126. })
  127. b.Run("Re2-1", func(b *testing.B) {
  128. f(`^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$(?=)`, b)
  129. })
  130. }