regexp_test.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. package goja
  2. import (
  3. "testing"
  4. )
  5. func TestRegexp1(t *testing.T) {
  6. const SCRIPT = `
  7. var r = new RegExp("(['\"])(.*?)\\1");
  8. var m = r.exec("'test'");
  9. m !== null && m.length == 3 && m[2] === "test";
  10. `
  11. testScript1(SCRIPT, valueTrue, t)
  12. }
  13. func TestRegexp2(t *testing.T) {
  14. const SCRIPT = `
  15. var r = new RegExp("(['\"])(.*?)['\"]");
  16. var m = r.exec("'test'");
  17. m !== null && m.length == 3 && m[2] === "test";
  18. `
  19. testScript1(SCRIPT, valueTrue, t)
  20. }
  21. func TestRegexpLiteral(t *testing.T) {
  22. const SCRIPT = `
  23. var r = /(['\"])(.*?)\1/;
  24. var m = r.exec("'test'");
  25. m !== null && m.length == 3 && m[2] === "test";
  26. `
  27. testScript1(SCRIPT, valueTrue, t)
  28. }
  29. func TestRegexpRe2Unicode(t *testing.T) {
  30. const SCRIPT = `
  31. var r = /(тест)/i;
  32. var m = r.exec("'Тест'");
  33. m !== null && m.length == 2 && m[1] === "Тест";
  34. `
  35. testScript1(SCRIPT, valueTrue, t)
  36. }
  37. func TestRegexpRe2UnicodeTarget(t *testing.T) {
  38. const SCRIPT = `
  39. var r = /(['\"])(.*?)['\"]/i;
  40. var m = r.exec("'Тест'");
  41. m !== null && m.length == 3 && m[2] === "Тест";
  42. `
  43. testScript1(SCRIPT, valueTrue, t)
  44. }
  45. func TestRegexpRegexp2Unicode(t *testing.T) {
  46. const SCRIPT = `
  47. var r = /(['\"])(тест)\1/i;
  48. var m = r.exec("'Тест'");
  49. m !== null && m.length == 3 && m[2] === "Тест";
  50. `
  51. testScript1(SCRIPT, valueTrue, t)
  52. }
  53. func TestRegexpRegexp2UnicodeTarget(t *testing.T) {
  54. const SCRIPT = `
  55. var r = /(['\"])(.*?)\1/;
  56. var m = r.exec("'Тест'");
  57. m !== null && m.length == 3 && m[2] === "Тест";
  58. `
  59. testScript1(SCRIPT, valueTrue, t)
  60. }
  61. func TestRegexpRe2Whitespace(t *testing.T) {
  62. const SCRIPT = `
  63. "\u2000\u2001\u2002\u200b".replace(/\s+/g, "") === "\u200b";
  64. `
  65. testScript1(SCRIPT, valueTrue, t)
  66. }
  67. func TestRegexpRegexp2Whitespace(t *testing.T) {
  68. const SCRIPT = `
  69. "A\u2000\u2001\u2002A\u200b".replace(/(A)\s+\1/g, "") === "\u200b"
  70. `
  71. testScript1(SCRIPT, valueTrue, t)
  72. }
  73. func TestEmptyCharClassRe2(t *testing.T) {
  74. const SCRIPT = `
  75. /[]/.test("\u0000");
  76. `
  77. testScript1(SCRIPT, valueFalse, t)
  78. }
  79. func TestNegatedEmptyCharClassRe2(t *testing.T) {
  80. const SCRIPT = `
  81. /[^]/.test("\u0000");
  82. `
  83. testScript1(SCRIPT, valueTrue, t)
  84. }
  85. func TestEmptyCharClassRegexp2(t *testing.T) {
  86. const SCRIPT = `
  87. /([])\1/.test("\u0000\u0000");
  88. `
  89. testScript1(SCRIPT, valueFalse, t)
  90. }
  91. func TestRegexp2Negate(t *testing.T) {
  92. const SCRIPT = `
  93. /([\D1])\1/.test("aa");
  94. `
  95. testScript1(SCRIPT, valueTrue, t)
  96. }
  97. func TestAlternativeRe2(t *testing.T) {
  98. const SCRIPT = `
  99. /()|/.exec("") !== null;
  100. `
  101. testScript1(SCRIPT, valueTrue, t)
  102. }
  103. func TestRegexpReplaceGlobal(t *testing.T) {
  104. const SCRIPT = `
  105. "QBZPbage\ny_cynprubyqre".replace(/^\s*|\s*$/g, '')
  106. `
  107. testScript1(SCRIPT, asciiString("QBZPbage\ny_cynprubyqre"), t)
  108. }
  109. func TestRegexpNumCaptures(t *testing.T) {
  110. const SCRIPT = `
  111. "Fubpxjnir Synfu 9.0 e115".replace(/([a-zA-Z]|\s)+/, '')
  112. `
  113. testScript1(SCRIPT, asciiString("9.0 e115"), t)
  114. }
  115. func TestRegexpNumCaptures1(t *testing.T) {
  116. const SCRIPT = `
  117. "Fubpxjnir Sy\tfu 9.0 e115".replace(/^.*\s+(\S+\s+\S+$)/, '')
  118. `
  119. testScript1(SCRIPT, asciiString(""), t)
  120. }
  121. func TestRegexpSInClass(t *testing.T) {
  122. const SCRIPT = `
  123. /[\S]/.test("\u2028");
  124. `
  125. testScript1(SCRIPT, valueFalse, t)
  126. }
  127. func TestRegexpDotMatchSlashR(t *testing.T) {
  128. const SCRIPT = `
  129. /./.test("\r");
  130. `
  131. testScript1(SCRIPT, valueFalse, t)
  132. }
  133. func TestRegexpDotMatchSlashRInGroup(t *testing.T) {
  134. const SCRIPT = `
  135. /(.)/.test("\r");
  136. `
  137. testScript1(SCRIPT, valueFalse, t)
  138. }
  139. func TestRegexpSplitWithBackRef(t *testing.T) {
  140. const SCRIPT = `
  141. "a++b+-c".split(/([+-])\1/).join(" $$ ")
  142. `
  143. testScript1(SCRIPT, asciiString("a $$ + $$ b+-c"), t)
  144. }
  145. func TestEscapeNonASCII(t *testing.T) {
  146. const SCRIPT = `
  147. /\⩓/.test("⩓")
  148. `
  149. testScript1(SCRIPT, valueTrue, t)
  150. }
  151. func TestRegexpUTF16(t *testing.T) {
  152. const SCRIPT = `
  153. var str = "\uD800\uDC00";
  154. assert(/\uD800/g.test(str), "#1");
  155. assert(/\uD800/.test(str), "#2");
  156. assert(/𐀀/.test(str), "#3");
  157. var re = /\uD800/;
  158. assert(compareArray(str.replace(re, "X"), ["X", "\uDC00"]), "#4");
  159. assert(compareArray(str.split(re), ["", "\uDC00"]), "#5");
  160. assert(compareArray("a\uD800\uDC00b".split(/\uD800/g), ["a", "\uDC00b"]), "#6");
  161. assert(compareArray("a\uD800\uDC00b".split(/(?:)/g), ["a", "\uD800", "\uDC00", "b"]), "#7");
  162. re = /(?=)a/; // a hack to use regexp2
  163. assert.sameValue(re.exec('\ud83d\ude02a').index, 2, "#8");
  164. assert.sameValue(/./.exec('\ud83d\ude02')[0], '\ud83d', "#9");
  165. assert(RegExp("\uD800").test("\uD800"), "#10");
  166. var cu = 0xD800;
  167. var xx = "a\\" + String.fromCharCode(cu);
  168. var pattern = eval("/" + xx + "/");
  169. assert.sameValue(pattern.source, "a\\\\\\ud800", "Code unit: " + cu.toString(16), "#11");
  170. assert(pattern.test("a\\\uD800"), "#12");
  171. `
  172. testScript1(TESTLIB+SCRIPT, _undefined, t)
  173. }
  174. func TestRegexpUnicode(t *testing.T) {
  175. const SCRIPT = `
  176. assert(!/\uD800/u.test("\uD800\uDC00"), "#1");
  177. assert(!/\uFFFD/u.test("\uD800\uDC00"), "#2");
  178. assert(/\uD800\uDC00/u.test("\uD800\uDC00"), "#3");
  179. assert(/\uD800/u.test("\uD800"), "#4");
  180. assert(compareArray("a\uD800\uDC00b".split(/\uD800/gu), ["a\uD800\uDC00b"]), "#5");
  181. assert(compareArray("a\uD800\uDC00b".split(/(?:)/gu), ["a", "𐀀", "b"]), "#6");
  182. var re = eval('/' + /\ud834\udf06/u.source + '/u');
  183. assert(re.test('\ud834\udf06'), "#9");
  184. /*re = RegExp("\\p{L}", "u");
  185. if (!re.test("A")) {
  186. throw new Error("Test 9 failed");
  187. }*/
  188. `
  189. testScript1(TESTLIB+SCRIPT, _undefined, t)
  190. }
  191. func TestConvertRegexpToUnicode(t *testing.T) {
  192. if s := convertRegexpToUnicode(`test\uD800\u0C00passed`); s != `test\uD800\u0C00passed` {
  193. t.Fatal(s)
  194. }
  195. if s := convertRegexpToUnicode(`test\uD800\uDC00passed`); s != `test𐀀passed` {
  196. t.Fatal(s)
  197. }
  198. if s := convertRegexpToUnicode(`test\u0023passed`); s != `test\u0023passed` {
  199. t.Fatal(s)
  200. }
  201. if s := convertRegexpToUnicode(`test\u0passed`); s != `test\u0passed` {
  202. t.Fatal(s)
  203. }
  204. if s := convertRegexpToUnicode(`test\uD800passed`); s != `test\uD800passed` {
  205. t.Fatal(s)
  206. }
  207. if s := convertRegexpToUnicode(`test\uD800`); s != `test\uD800` {
  208. t.Fatal(s)
  209. }
  210. if s := convertRegexpToUnicode(`test\uD80`); s != `test\uD80` {
  211. t.Fatal(s)
  212. }
  213. if s := convertRegexpToUnicode(`\\uD800\uDC00passed`); s != `\\uD800\uDC00passed` {
  214. t.Fatal(s)
  215. }
  216. if s := convertRegexpToUnicode(`testpassed`); s != `testpassed` {
  217. t.Fatal(s)
  218. }
  219. }
  220. func TestConvertRegexpToUtf16(t *testing.T) {
  221. if s := convertRegexpToUtf16(`𐀀`); s != `\ud800\udc00` {
  222. t.Fatal(s)
  223. }
  224. if s := convertRegexpToUtf16(`\𐀀`); s != `\\\ud800\udc00` {
  225. t.Fatal(s)
  226. }
  227. }
  228. func TestEscapeInvalidUtf16(t *testing.T) {
  229. if s := escapeInvalidUtf16(asciiString("test")); s != "test" {
  230. t.Fatal(s)
  231. }
  232. if s := escapeInvalidUtf16(newStringValue("test\U00010000")); s != "test\U00010000" {
  233. t.Fatal(s)
  234. }
  235. if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{'t', 0xD800})); s != "t\\ud800" {
  236. t.Fatal(s)
  237. }
  238. if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{'t', 0xD800, 'p'})); s != "t\\ud800p" {
  239. t.Fatal(s)
  240. }
  241. if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{0xD800, 'p'})); s != "\\ud800p" {
  242. t.Fatal(s)
  243. }
  244. if s := escapeInvalidUtf16(unicodeStringFromRunes([]rune{'t', '\\', 0xD800, 'p'})); s != `t\\\ud800p` {
  245. t.Fatal(s)
  246. }
  247. }
  248. func TestRegexpAssertion(t *testing.T) {
  249. const SCRIPT = `
  250. var res = 'aaa'.match(/^a/g);
  251. res.length === 1 || res[0] === 'a';
  252. `
  253. testScript1(SCRIPT, valueTrue, t)
  254. }
  255. func TestRegexpUnicodeAdvanceStringIndex(t *testing.T) {
  256. const SCRIPT = `
  257. // deoptimise RegExp
  258. var origExec = RegExp.prototype.exec;
  259. RegExp.prototype.exec = function(s) {
  260. return origExec.call(this, s);
  261. };
  262. var re = /(?:)/gu;
  263. var str = "a\uD800\uDC00b";
  264. assert(compareArray(str.split(re), ["a", "𐀀", "b"]), "#1");
  265. re.lastIndex = 3;
  266. assert.sameValue(re.exec(str).index, 3, "#2");
  267. re.lastIndex = 2;
  268. assert.sameValue(re.exec(str).index, 1, "#3");
  269. re.lastIndex = 4;
  270. assert.sameValue(re.exec(str).index, 4, "#4");
  271. re.lastIndex = 5;
  272. assert.sameValue(re.exec(str), null, "#5");
  273. `
  274. testScript1(TESTLIB+SCRIPT, _undefined, t)
  275. }
  276. func TestRegexpInit(t *testing.T) {
  277. const SCRIPT = `
  278. RegExp(".").lastIndex;
  279. `
  280. testScript1(SCRIPT, intToValue(0), t)
  281. }
  282. func TestRegexpToString(t *testing.T) {
  283. const SCRIPT = `
  284. RegExp.prototype.toString.call({
  285. source: 'foo',
  286. flags: 'bar'});
  287. `
  288. testScript1(SCRIPT, asciiString("/foo/bar"), t)
  289. }
  290. func TestRegexpEscapeSource(t *testing.T) {
  291. const SCRIPT = `
  292. /href="(.+?)(\/.*\/\S+?)\/"/.source;
  293. `
  294. testScript1(SCRIPT, asciiString(`href="(.+?)(\/.*\/\S+?)\/"`), t)
  295. }
  296. func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
  297. const SCRIPT = `
  298. "aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)
  299. `
  300. b.StopTimer()
  301. prg, err := Compile("test.js", SCRIPT, false)
  302. if err != nil {
  303. b.Fatal(err)
  304. }
  305. vm := New()
  306. b.StartTimer()
  307. for i := 0; i < b.N; i++ {
  308. vm.RunProgram(prg)
  309. }
  310. }
  311. func BenchmarkRegexpMatch(b *testing.B) {
  312. const SCRIPT = `
  313. "a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  314. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  315. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  316. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  317. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  318. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  319. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  320. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  321. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  322. a\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\ra\nb\r\c\nd\r\e\n\f\rg\nh\r\
  323. ".match(/[^\r\n]+/g)
  324. `
  325. b.StopTimer()
  326. prg, err := Compile("test.js", SCRIPT, false)
  327. if err != nil {
  328. b.Fatal(err)
  329. }
  330. vm := New()
  331. b.StartTimer()
  332. for i := 0; i < b.N; i++ {
  333. vm.RunProgram(prg)
  334. }
  335. }