RegExpPrototype.cs 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. using System.Diagnostics.CodeAnalysis;
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4. using Jint.Collections;
  5. using Jint.Native.Number;
  6. using Jint.Native.Object;
  7. using Jint.Native.String;
  8. using Jint.Native.Symbol;
  9. using Jint.Pooling;
  10. using Jint.Runtime;
  11. using Jint.Runtime.Descriptors;
  12. using Jint.Runtime.Interop;
  13. namespace Jint.Native.RegExp
  14. {
  15. internal sealed class RegExpPrototype : Prototype
  16. {
  17. private static readonly JsString PropertyExec = new("exec");
  18. private static readonly JsString PropertyIndex = new("index");
  19. private static readonly JsString PropertyInput = new("input");
  20. private static readonly JsString PropertySticky = new("sticky");
  21. private static readonly JsString PropertyGlobal = new("global");
  22. internal static readonly JsString PropertySource = new("source");
  23. private static readonly JsString DefaultSource = new("(?:)");
  24. internal static readonly JsString PropertyFlags = new("flags");
  25. private static readonly JsString PropertyGroups = new("groups");
  26. private readonly RegExpConstructor _constructor;
  27. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  28. internal RegExpPrototype(
  29. Engine engine,
  30. Realm realm,
  31. RegExpConstructor constructor,
  32. ObjectPrototype objectPrototype) : base(engine, realm)
  33. {
  34. _defaultExec = Exec;
  35. _constructor = constructor;
  36. _prototype = objectPrototype;
  37. }
  38. protected override void Initialize()
  39. {
  40. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  41. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<JsRegExp, JsValue> valueExtractor, JsValue? protoValue = null)
  42. {
  43. return new GetSetPropertyDescriptor(
  44. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  45. {
  46. if (ReferenceEquals(thisObj, this))
  47. {
  48. return protoValue ?? Undefined;
  49. }
  50. var r = thisObj as JsRegExp;
  51. if (r is null)
  52. {
  53. ExceptionHelper.ThrowTypeError(_realm);
  54. }
  55. return valueExtractor(r);
  56. }, 0, lengthFlags),
  57. set: Undefined,
  58. flags: PropertyFlag.Configurable);
  59. }
  60. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  61. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  62. {
  63. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  64. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  65. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  66. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  67. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  68. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  69. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  70. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  71. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  72. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  73. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  74. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  75. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  76. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  77. };
  78. SetProperties(properties);
  79. var symbols = new SymbolDictionary(5)
  80. {
  81. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  85. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  86. };
  87. SetSymbols(symbols);
  88. }
  89. /// <summary>
  90. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  91. /// </summary>
  92. private JsValue Source(JsValue thisObject, JsValue[] arguments)
  93. {
  94. if (ReferenceEquals(thisObject, this))
  95. {
  96. return DefaultSource;
  97. }
  98. var r = thisObject as JsRegExp;
  99. if (r is null)
  100. {
  101. ExceptionHelper.ThrowTypeError(_realm);
  102. }
  103. if (string.IsNullOrEmpty(r.Source))
  104. {
  105. return JsRegExp.regExpForMatchingAllCharacters;
  106. }
  107. return r.Source
  108. .Replace("\\/", "/") // ensure forward-slashes
  109. .Replace("/", "\\/") // then escape again
  110. .Replace("\n", "\\n");
  111. }
  112. /// <summary>
  113. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  114. /// </summary>
  115. private JsValue Replace(JsValue thisObject, JsValue[] arguments)
  116. {
  117. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.replace");
  118. var s = TypeConverter.ToString(arguments.At(0));
  119. var lengthS = s.Length;
  120. var replaceValue = arguments.At(1);
  121. var functionalReplace = replaceValue is ICallable;
  122. // we need heavier logic if we have named captures
  123. var mayHaveNamedCaptures = false;
  124. if (!functionalReplace)
  125. {
  126. var value = TypeConverter.ToString(replaceValue);
  127. replaceValue = value;
  128. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  129. }
  130. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  131. var global = flags.IndexOf('g') != -1;
  132. var fullUnicode = false;
  133. if (global)
  134. {
  135. fullUnicode = flags.IndexOf('u') != -1;
  136. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  137. }
  138. // check if we can access fast path
  139. if (!fullUnicode
  140. && !mayHaveNamedCaptures
  141. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  142. && rx is JsRegExp rei && rei.TryGetDefaultRegExpExec(out _))
  143. {
  144. var count = global ? int.MaxValue : 1;
  145. string result;
  146. if (functionalReplace)
  147. {
  148. string Evaluator(Match match)
  149. {
  150. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  151. replacerArgs.Add(match.Value);
  152. ObjectInstance? groups = null;
  153. for (var i = 1; i < match.Groups.Count; i++)
  154. {
  155. var capture = match.Groups[i];
  156. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  157. var groupName = GetRegexGroupName(rei.Value, i);
  158. if (!string.IsNullOrWhiteSpace(groupName))
  159. {
  160. groups ??= OrdinaryObjectCreate(_engine, null);
  161. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  162. }
  163. }
  164. replacerArgs.Add(match.Index);
  165. replacerArgs.Add(s);
  166. if (groups is not null)
  167. {
  168. replacerArgs.Add(groups);
  169. }
  170. return CallFunctionalReplace(replaceValue, replacerArgs);
  171. }
  172. result = rei.Value.Replace(s, Evaluator, count);
  173. }
  174. else
  175. {
  176. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  177. }
  178. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero);
  179. return result;
  180. }
  181. var results = new List<ObjectInstance>();
  182. while (true)
  183. {
  184. var result = RegExpExec(rx, s);
  185. if (result.IsNull())
  186. {
  187. break;
  188. }
  189. results.Add((ObjectInstance) result);
  190. if (!global)
  191. {
  192. break;
  193. }
  194. var matchStr = TypeConverter.ToString(result.Get(0));
  195. if (matchStr == "")
  196. {
  197. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  198. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  199. rx.Set(JsRegExp.PropertyLastIndex, nextIndex);
  200. }
  201. }
  202. var accumulatedResult = "";
  203. var nextSourcePosition = 0;
  204. var captures = new List<string>();
  205. for (var i = 0; i < results.Count; i++)
  206. {
  207. var result = results[i];
  208. var nCaptures = (int) result.Length;
  209. nCaptures = System.Math.Max(nCaptures - 1, 0);
  210. var matched = TypeConverter.ToString(result.Get(0));
  211. var matchLength = matched.Length;
  212. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  213. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  214. uint n = 1;
  215. captures.Clear();
  216. while (n <= nCaptures)
  217. {
  218. var capN = result.Get(n);
  219. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  220. captures.Add(value);
  221. n++;
  222. }
  223. var namedCaptures = result.Get(PropertyGroups);
  224. string replacement;
  225. if (functionalReplace)
  226. {
  227. var replacerArgs = new List<JsValue>();
  228. replacerArgs.Add(matched);
  229. foreach (var capture in captures)
  230. {
  231. replacerArgs.Add(capture);
  232. }
  233. replacerArgs.Add(position);
  234. replacerArgs.Add(s);
  235. if (!namedCaptures.IsUndefined())
  236. {
  237. replacerArgs.Add(namedCaptures);
  238. }
  239. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  240. }
  241. else
  242. {
  243. if (!namedCaptures.IsUndefined())
  244. {
  245. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  246. }
  247. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  248. }
  249. if (position >= nextSourcePosition)
  250. {
  251. accumulatedResult = accumulatedResult +
  252. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  253. replacement;
  254. nextSourcePosition = position + matchLength;
  255. }
  256. }
  257. if (nextSourcePosition >= lengthS)
  258. {
  259. return accumulatedResult;
  260. }
  261. return accumulatedResult + s.Substring(nextSourcePosition);
  262. }
  263. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  264. {
  265. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  266. return TypeConverter.ToString(result);
  267. }
  268. /// <summary>
  269. /// https://tc39.es/ecma262/#sec-getsubstitution
  270. /// </summary>
  271. internal static string GetSubstitution(
  272. string matched,
  273. string str,
  274. int position,
  275. string[] captures,
  276. JsValue namedCaptures,
  277. string replacement)
  278. {
  279. // If there is no pattern, replace the pattern as is.
  280. if (replacement.IndexOf('$') < 0)
  281. {
  282. return replacement;
  283. }
  284. // Patterns
  285. // $$ Inserts a "$".
  286. // $& Inserts the matched substring.
  287. // $` Inserts the portion of the string that precedes the matched substring.
  288. // $' Inserts the portion of the string that follows the matched substring.
  289. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  290. using var replacementBuilder = StringBuilderPool.Rent();
  291. var sb = replacementBuilder.Builder;
  292. for (var i = 0; i < replacement.Length; i++)
  293. {
  294. char c = replacement[i];
  295. if (c == '$' && i < replacement.Length - 1)
  296. {
  297. c = replacement[++i];
  298. switch (c)
  299. {
  300. case '$':
  301. sb.Append('$');
  302. break;
  303. case '&':
  304. sb.Append(matched);
  305. break;
  306. case '`':
  307. sb.Append(str.Substring(0, position));
  308. break;
  309. case '\'':
  310. sb.Append(str.Substring(position + matched.Length));
  311. break;
  312. case '<':
  313. var gtPos = replacement.IndexOf('>', i + 1);
  314. if (gtPos == -1 || namedCaptures.IsUndefined())
  315. {
  316. sb.Append('$');
  317. sb.Append(c);
  318. }
  319. else
  320. {
  321. var startIndex = i + 1;
  322. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  323. var capture = namedCaptures.Get(groupName);
  324. if (!capture.IsUndefined())
  325. {
  326. sb.Append(TypeConverter.ToString(capture));
  327. }
  328. i = gtPos;
  329. }
  330. break;
  331. default:
  332. {
  333. if (char.IsDigit(c))
  334. {
  335. int matchNumber1 = c - '0';
  336. // The match number can be one or two digits long.
  337. int matchNumber2 = 0;
  338. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  339. {
  340. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  341. }
  342. // Try the two digit capture first.
  343. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  344. {
  345. // Two digit capture replacement.
  346. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  347. i++;
  348. }
  349. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  350. {
  351. // Single digit capture replacement.
  352. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  353. }
  354. else
  355. {
  356. // Capture does not exist.
  357. sb.Append('$');
  358. i--;
  359. }
  360. }
  361. else
  362. {
  363. // Unknown replacement pattern.
  364. sb.Append('$');
  365. sb.Append(c);
  366. }
  367. break;
  368. }
  369. }
  370. }
  371. else
  372. {
  373. sb.Append(c);
  374. }
  375. }
  376. return replacementBuilder.ToString();
  377. }
  378. /// <summary>
  379. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  380. /// </summary>
  381. private JsValue Split(JsValue thisObject, JsValue[] arguments)
  382. {
  383. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.split");
  384. var s = TypeConverter.ToString(arguments.At(0));
  385. var limit = arguments.At(1);
  386. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  387. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  388. var unicodeMatching = flags.IndexOf('u') > -1;
  389. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  390. var splitter = Construct(c, new JsValue[]
  391. {
  392. rx,
  393. newFlags
  394. });
  395. uint lengthA = 0;
  396. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  397. if (lim == 0)
  398. {
  399. return _realm.Intrinsics.Array.ArrayCreate(0);
  400. }
  401. if (s.Length == 0)
  402. {
  403. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  404. var z = RegExpExec(splitter, s);
  405. if (!z.IsNull())
  406. {
  407. return a;
  408. }
  409. a.SetIndexValue(0, s, updateLength: true);
  410. return a;
  411. }
  412. if (!unicodeMatching && rx is JsRegExp R && R.TryGetDefaultRegExpExec(out _))
  413. {
  414. // we can take faster path
  415. if (R.Source == JsRegExp.regExpForMatchingAllCharacters)
  416. {
  417. // if empty string, just a string split
  418. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  419. }
  420. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  421. var match = R.Value.Match(s, 0);
  422. if (!match.Success) // No match at all return the string in an array
  423. {
  424. a.SetIndexValue(0, s, updateLength: true);
  425. return a;
  426. }
  427. int lastIndex = 0;
  428. uint index = 0;
  429. while (match.Success && index < lim)
  430. {
  431. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  432. {
  433. match = match.NextMatch();
  434. continue;
  435. }
  436. // Add the match results to the array.
  437. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  438. if (index >= lim)
  439. {
  440. return a;
  441. }
  442. lastIndex = match.Index + match.Length;
  443. for (int i = 1; i < match.Groups.Count; i++)
  444. {
  445. var group = match.Groups[i];
  446. var item = Undefined;
  447. if (group.Captures.Count > 0)
  448. {
  449. item = match.Groups[i].Value;
  450. }
  451. a.SetIndexValue(index++, item, updateLength: true);
  452. if (index >= lim)
  453. {
  454. return a;
  455. }
  456. }
  457. match = match.NextMatch();
  458. if (!match.Success) // Add the last part of the split
  459. {
  460. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  461. }
  462. }
  463. return a;
  464. }
  465. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  466. }
  467. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  468. {
  469. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  470. ulong previousStringIndex = 0;
  471. ulong currentIndex = 0;
  472. while (currentIndex < (ulong) s.Length)
  473. {
  474. splitter.Set(JsRegExp.PropertyLastIndex, currentIndex, true);
  475. var z = RegExpExec(splitter, s);
  476. if (z.IsNull())
  477. {
  478. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  479. continue;
  480. }
  481. var endIndex = TypeConverter.ToLength(splitter.Get(JsRegExp.PropertyLastIndex));
  482. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  483. if (endIndex == previousStringIndex)
  484. {
  485. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  486. continue;
  487. }
  488. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  489. a.SetIndexValue(lengthA, t, updateLength: true);
  490. lengthA++;
  491. if (lengthA == lim)
  492. {
  493. return a;
  494. }
  495. previousStringIndex = endIndex;
  496. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  497. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  498. var i = 1;
  499. while (i <= numberOfCaptures)
  500. {
  501. var nextCapture = z.Get(i);
  502. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  503. i++;
  504. lengthA++;
  505. if (lengthA == lim)
  506. {
  507. return a;
  508. }
  509. }
  510. currentIndex = previousStringIndex;
  511. }
  512. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  513. return a;
  514. }
  515. private JsValue Flags(JsValue thisObject, JsValue[] arguments)
  516. {
  517. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.flags");
  518. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  519. {
  520. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  521. }
  522. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  523. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  524. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  525. result = AddFlagIfPresent(r, "multiline", 'm', result);
  526. result = AddFlagIfPresent(r, "dotAll", 's', result);
  527. result = AddFlagIfPresent(r, "unicode", 'u', result);
  528. result = AddFlagIfPresent(r, "unicodeSets", 'v', result);
  529. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  530. return result;
  531. }
  532. private JsValue ToRegExpString(JsValue thisObject, JsValue[] arguments)
  533. {
  534. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.toString");
  535. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  536. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  537. return "/" + pattern + "/" + flags;
  538. }
  539. private JsValue Test(JsValue thisObject, JsValue[] arguments)
  540. {
  541. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.test");
  542. var s = TypeConverter.ToString(arguments.At(0));
  543. // check couple fast paths
  544. if (r is JsRegExp R && !R.FullUnicode)
  545. {
  546. if (!R.Sticky && !R.Global)
  547. {
  548. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  549. return R.Value.IsMatch(s);
  550. }
  551. var lastIndex = (int) TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  552. if (lastIndex >= s.Length && s.Length > 0)
  553. {
  554. return JsBoolean.False;
  555. }
  556. var m = R.Value.Match(s, lastIndex);
  557. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  558. {
  559. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  560. return JsBoolean.False;
  561. }
  562. R.Set(JsRegExp.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  563. return JsBoolean.True;
  564. }
  565. var match = RegExpExec(r, s);
  566. return !match.IsNull();
  567. }
  568. /// <summary>
  569. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  570. /// </summary>
  571. private JsValue Search(JsValue thisObject, JsValue[] arguments)
  572. {
  573. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.search");
  574. var s = TypeConverter.ToString(arguments.At(0));
  575. var previousLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  576. if (!SameValue(previousLastIndex, 0))
  577. {
  578. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  579. }
  580. var result = RegExpExec(rx, s);
  581. var currentLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  582. if (!SameValue(currentLastIndex, previousLastIndex))
  583. {
  584. rx.Set(JsRegExp.PropertyLastIndex, previousLastIndex, true);
  585. }
  586. if (result.IsNull())
  587. {
  588. return -1;
  589. }
  590. return result.Get(PropertyIndex);
  591. }
  592. /// <summary>
  593. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  594. /// </summary>
  595. private JsValue Match(JsValue thisObject, JsValue[] arguments)
  596. {
  597. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.match");
  598. var s = TypeConverter.ToString(arguments.At(0));
  599. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  600. var global = flags.IndexOf('g') != -1;
  601. if (!global)
  602. {
  603. return RegExpExec(rx, s);
  604. }
  605. var fullUnicode = flags.IndexOf('u') != -1;
  606. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  607. if (!fullUnicode
  608. && rx is JsRegExp rei
  609. && rei.TryGetDefaultRegExpExec(out _))
  610. {
  611. // fast path
  612. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  613. if (rei.Sticky)
  614. {
  615. var match = rei.Value.Match(s);
  616. if (!match.Success || match.Index != 0)
  617. {
  618. return Null;
  619. }
  620. a.SetIndexValue(0, match.Value, updateLength: false);
  621. uint li = 0;
  622. while (true)
  623. {
  624. match = match.NextMatch();
  625. if (!match.Success || match.Index != ++li)
  626. break;
  627. a.SetIndexValue(li, match.Value, updateLength: false);
  628. }
  629. a.SetLength(li);
  630. return a;
  631. }
  632. else
  633. {
  634. var matches = rei.Value.Matches(s);
  635. if (matches.Count == 0)
  636. {
  637. return Null;
  638. }
  639. a.EnsureCapacity((uint) matches.Count);
  640. a.SetLength((uint) matches.Count);
  641. for (var i = 0; i < matches.Count; i++)
  642. {
  643. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  644. }
  645. return a;
  646. }
  647. }
  648. return MatchSlow(rx, s, fullUnicode);
  649. }
  650. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  651. {
  652. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  653. uint n = 0;
  654. while (true)
  655. {
  656. var result = RegExpExec(rx, s);
  657. if (result.IsNull())
  658. {
  659. a.SetLength(n);
  660. return n == 0 ? Null : a;
  661. }
  662. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  663. a.SetIndexValue(n, matchStr, updateLength: false);
  664. if (matchStr == "")
  665. {
  666. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  667. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  668. rx.Set(JsRegExp.PropertyLastIndex, nextIndex, true);
  669. }
  670. n++;
  671. }
  672. }
  673. /// <summary>
  674. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  675. /// </summary>
  676. private JsValue MatchAll(JsValue thisObject, JsValue[] arguments)
  677. {
  678. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.matchAll");
  679. var s = TypeConverter.ToString(arguments.At(0));
  680. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  681. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  682. var matcher = Construct(c, new JsValue[]
  683. {
  684. r,
  685. flags
  686. });
  687. var lastIndex = TypeConverter.ToLength(r.Get(JsRegExp.PropertyLastIndex));
  688. matcher.Set(JsRegExp.PropertyLastIndex, lastIndex, true);
  689. var global = flags.IndexOf('g') != -1;
  690. var fullUnicode = flags.IndexOf('u') != -1;
  691. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  692. }
  693. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  694. {
  695. if (!unicode || index + 1 >= (ulong) s.Length)
  696. {
  697. return index + 1;
  698. }
  699. var first = s[(int) index];
  700. if (first < 0xD800 || first > 0xDBFF)
  701. {
  702. return index + 1;
  703. }
  704. var second = s[(int) (index + 1)];
  705. if (second < 0xDC00 || second > 0xDFFF)
  706. {
  707. return index + 1;
  708. }
  709. return index + 2;
  710. }
  711. internal static JsValue RegExpExec(ObjectInstance r, string s)
  712. {
  713. var exec = r.Get(PropertyExec);
  714. if (exec is ICallable callable)
  715. {
  716. var result = callable.Call(r, new JsValue[] { s });
  717. if (!result.IsNull() && !result.IsObject())
  718. {
  719. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  720. }
  721. return result;
  722. }
  723. var ri = r as JsRegExp;
  724. if (ri is null)
  725. {
  726. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  727. }
  728. return RegExpBuiltinExec(ri, s);
  729. }
  730. internal bool TryGetDefaultExec(ObjectInstance o, [NotNullWhen((true))] out Func<JsValue, JsValue[], JsValue>? exec)
  731. {
  732. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  733. {
  734. exec = _defaultExec;
  735. return true;
  736. }
  737. exec = default;
  738. return false;
  739. }
  740. /// <summary>
  741. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  742. /// </summary>
  743. private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
  744. {
  745. var length = (ulong) s.Length;
  746. var lastIndex = TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  747. var global = R.Global;
  748. var sticky = R.Sticky;
  749. if (!global && !sticky)
  750. {
  751. lastIndex = 0;
  752. }
  753. if (R.Source == JsRegExp.regExpForMatchingAllCharacters) // Reg Exp is really ""
  754. {
  755. if (lastIndex > (ulong) s.Length)
  756. {
  757. return Null;
  758. }
  759. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  760. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  761. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  762. array.FastSetDataProperty(PropertyInput._value, s);
  763. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  764. return array;
  765. }
  766. var matcher = R.Value;
  767. var fullUnicode = R.FullUnicode;
  768. var hasIndices = R.Indices;
  769. if (!global & !sticky && !fullUnicode && !hasIndices)
  770. {
  771. // we can the non-stateful fast path which is the common case
  772. var m = matcher.Match(s, (int) lastIndex);
  773. if (!m.Success)
  774. {
  775. return Null;
  776. }
  777. return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
  778. }
  779. // the stateful version
  780. Match match;
  781. if (lastIndex > length)
  782. {
  783. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  784. return Null;
  785. }
  786. var startAt = (int) lastIndex;
  787. while (true)
  788. {
  789. match = R.Value.Match(s, startAt);
  790. // The conversion of Unicode regex patterns to .NET Regex has some flaws:
  791. // when the pattern may match empty strings, the adapted Regex will return empty string matches
  792. // in the middle of surrogate pairs. As a best effort solution, we remove these fake positive matches.
  793. // (See also: https://github.com/sebastienros/esprima-dotnet/pull/364#issuecomment-1606045259)
  794. if (match.Success
  795. && fullUnicode
  796. && match.Length == 0
  797. && 0 < match.Index && match.Index < s.Length
  798. && char.IsHighSurrogate(s[match.Index - 1]) && char.IsLowSurrogate(s[match.Index]))
  799. {
  800. startAt++;
  801. continue;
  802. }
  803. break;
  804. }
  805. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  806. if (!success)
  807. {
  808. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  809. return Null;
  810. }
  811. var e = match.Index + match.Length;
  812. // NOTE: Even in Unicode mode, we don't need to translate indices as .NET regexes always return code unit indices.
  813. if (global || sticky)
  814. {
  815. R.Set(JsRegExp.PropertyLastIndex, e, true);
  816. }
  817. return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
  818. }
  819. private static JsArray CreateReturnValueArray(
  820. Engine engine,
  821. Regex regex,
  822. Match match,
  823. string s,
  824. bool fullUnicode,
  825. bool hasIndices)
  826. {
  827. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
  828. array.CreateDataProperty(PropertyIndex, match.Index);
  829. array.CreateDataProperty(PropertyInput, s);
  830. ObjectInstance? groups = null;
  831. List<string>? groupNames = null;
  832. var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
  833. for (uint i = 0; i < match.Groups.Count; i++)
  834. {
  835. var capture = match.Groups[(int) i];
  836. var capturedValue = Undefined;
  837. if (capture?.Success == true)
  838. {
  839. capturedValue = capture.Value;
  840. }
  841. if (hasIndices)
  842. {
  843. if (capture?.Success == true)
  844. {
  845. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  846. }
  847. else
  848. {
  849. indices!.Add(null);
  850. }
  851. }
  852. var groupName = GetRegexGroupName(regex, (int) i);
  853. if (!string.IsNullOrWhiteSpace(groupName))
  854. {
  855. groups ??= OrdinaryObjectCreate(engine, null);
  856. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  857. groupNames ??= new List<string>();
  858. groupNames.Add(groupName!);
  859. }
  860. array.SetIndexValue(i, capturedValue, updateLength: false);
  861. }
  862. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  863. if (hasIndices)
  864. {
  865. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  866. array.CreateDataPropertyOrThrow("indices", indicesArray);
  867. }
  868. return array;
  869. }
  870. /// <summary>
  871. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  872. /// </summary>
  873. private static JsArray MakeMatchIndicesIndexPairArray(
  874. Engine engine,
  875. string s,
  876. List<JsNumber[]?> indices,
  877. List<string>? groupNames,
  878. bool hasGroups)
  879. {
  880. var n = indices.Count;
  881. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  882. ObjectInstance? groups = null;
  883. if (hasGroups)
  884. {
  885. groups = OrdinaryObjectCreate(engine, null);
  886. }
  887. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  888. for (var i = 0; i < n; ++i)
  889. {
  890. var matchIndices = indices[i];
  891. var matchIndexPair = matchIndices is not null
  892. ? GetMatchIndexPair(engine, s, matchIndices)
  893. : Undefined;
  894. a.Push(matchIndexPair);
  895. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  896. {
  897. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  898. }
  899. }
  900. return a;
  901. }
  902. /// <summary>
  903. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  904. /// </summary>
  905. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  906. {
  907. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  908. }
  909. private static string? GetRegexGroupName(Regex regex, int index)
  910. {
  911. if (index == 0)
  912. {
  913. return null;
  914. }
  915. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  916. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  917. {
  918. // regex defaults to index as group name when it's not a named group
  919. return null;
  920. }
  921. // The characters allowed in group names differs between the JS and .NET regex engines.
  922. // For example the group name "$group" is valid in JS but invalid in .NET.
  923. // As a workaround for this issue, the parser make an attempt to encode the problematic group names to
  924. // names which are valid in .NET and probably won't collide with other group names present in the pattern
  925. // (https://github.com/sebastienros/esprima-dotnet/blob/v3.0.0-rc-03/src/Esprima/Scanner.RegExpParser.cs#L942).
  926. // We need to decode such group names.
  927. const string encodedGroupNamePrefix = "__utf8_";
  928. if (groupNameFromNumber.StartsWith(encodedGroupNamePrefix, StringComparison.Ordinal))
  929. {
  930. try
  931. {
  932. var bytes = groupNameFromNumber.AsSpan(encodedGroupNamePrefix.Length).BytesFromHexString();
  933. groupNameFromNumber = Encoding.UTF8.GetString(bytes);
  934. }
  935. catch { /* intentional no-op */ }
  936. }
  937. return groupNameFromNumber;
  938. }
  939. private JsValue Exec(JsValue thisObject, JsValue[] arguments)
  940. {
  941. var r = thisObject as JsRegExp;
  942. if (r is null)
  943. {
  944. ExceptionHelper.ThrowTypeError(_engine.Realm);
  945. }
  946. var s = TypeConverter.ToString(arguments.At(0));
  947. return RegExpBuiltinExec(r, s);
  948. }
  949. }
  950. }