RegExpPrototype.cs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083
  1. #pragma warning disable CA1859 // Use concrete types when possible for improved performance -- most of prototype methods return JsValue
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4. using Jint.Native.Number;
  5. using Jint.Native.Object;
  6. using Jint.Native.String;
  7. using Jint.Native.Symbol;
  8. using Jint.Runtime;
  9. using Jint.Runtime.Descriptors;
  10. using Jint.Runtime.Interop;
  11. namespace Jint.Native.RegExp;
  12. internal sealed class RegExpPrototype : Prototype
  13. {
  14. private static readonly JsString PropertyExec = new("exec");
  15. private static readonly JsString PropertyIndex = new("index");
  16. private static readonly JsString PropertyInput = new("input");
  17. private static readonly JsString PropertySticky = new("sticky");
  18. private static readonly JsString PropertyGlobal = new("global");
  19. internal static readonly JsString PropertySource = new("source");
  20. private static readonly JsString DefaultSource = new("(?:)");
  21. internal static readonly JsString PropertyFlags = new("flags");
  22. private static readonly JsString PropertyGroups = new("groups");
  23. private static readonly JsString PropertyIgnoreCase = new("ignoreCase");
  24. private static readonly JsString PropertyMultiline = new("multiline");
  25. private static readonly JsString PropertyDotAll = new("dotAll");
  26. private static readonly JsString PropertyUnicode = new("unicode");
  27. private static readonly JsString PropertyUnicodeSets = new("unicodeSets");
  28. private readonly RegExpConstructor _constructor;
  29. private readonly JsCallDelegate _defaultExec;
  30. internal RegExpPrototype(
  31. Engine engine,
  32. Realm realm,
  33. RegExpConstructor constructor,
  34. ObjectPrototype objectPrototype) : base(engine, realm)
  35. {
  36. _defaultExec = Exec;
  37. _constructor = constructor;
  38. _prototype = objectPrototype;
  39. }
  40. protected override void Initialize()
  41. {
  42. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  43. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<JsRegExp, JsValue> valueExtractor, JsValue? protoValue = null)
  44. {
  45. return new GetSetPropertyDescriptor(
  46. get: new ClrFunction(Engine, name, (thisObj, arguments) =>
  47. {
  48. if (ReferenceEquals(thisObj, this))
  49. {
  50. return protoValue ?? Undefined;
  51. }
  52. var r = thisObj as JsRegExp;
  53. if (r is null)
  54. {
  55. ExceptionHelper.ThrowTypeError(_realm);
  56. }
  57. return valueExtractor(r);
  58. }, 0, lengthFlags),
  59. set: Undefined,
  60. flags: PropertyFlag.Configurable);
  61. }
  62. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  63. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  64. {
  65. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  66. ["toString"] = new PropertyDescriptor(new ClrFunction(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  67. ["exec"] = new PropertyDescriptor(new ClrFunction(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  68. ["test"] = new PropertyDescriptor(new ClrFunction(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  69. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  70. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunction(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  71. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  72. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  73. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  74. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  75. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunction(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  76. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  77. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  78. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  79. };
  80. SetProperties(properties);
  81. var symbols = new SymbolDictionary(5)
  82. {
  83. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  85. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  86. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  87. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  88. };
  89. SetSymbols(symbols);
  90. }
  91. /// <summary>
  92. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  93. /// </summary>
  94. private JsValue Source(JsValue thisObject, JsCallArguments arguments)
  95. {
  96. if (ReferenceEquals(thisObject, this))
  97. {
  98. return DefaultSource;
  99. }
  100. var r = thisObject as JsRegExp;
  101. if (r is null)
  102. {
  103. ExceptionHelper.ThrowTypeError(_realm);
  104. }
  105. if (string.IsNullOrEmpty(r.Source))
  106. {
  107. return JsRegExp.regExpForMatchingAllCharacters;
  108. }
  109. return r.Source
  110. .Replace("\\/", "/") // ensure forward-slashes
  111. .Replace("/", "\\/") // then escape again
  112. .Replace("\n", "\\n");
  113. }
  114. /// <summary>
  115. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  116. /// </summary>
  117. private JsValue Replace(JsValue thisObject, JsCallArguments arguments)
  118. {
  119. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.replace");
  120. var s = TypeConverter.ToString(arguments.At(0));
  121. var lengthS = s.Length;
  122. var replaceValue = arguments.At(1);
  123. var functionalReplace = replaceValue is ICallable;
  124. // we need heavier logic if we have named captures
  125. var mayHaveNamedCaptures = false;
  126. if (!functionalReplace)
  127. {
  128. var value = TypeConverter.ToString(replaceValue);
  129. replaceValue = value;
  130. mayHaveNamedCaptures = value.Contains('$');
  131. }
  132. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  133. var global = flags.Contains('g');
  134. var fullUnicode = false;
  135. if (global)
  136. {
  137. fullUnicode = flags.Contains('u');
  138. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  139. }
  140. // check if we can access fast path
  141. if (!fullUnicode
  142. && !mayHaveNamedCaptures
  143. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  144. && rx is JsRegExp rei && rei.HasDefaultRegExpExec)
  145. {
  146. var count = global ? int.MaxValue : 1;
  147. string result;
  148. if (functionalReplace)
  149. {
  150. string Evaluator(Match match)
  151. {
  152. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  153. var replacerArgs = new List<JsValue>(actualGroupCount + 2);
  154. replacerArgs.Add(match.Value);
  155. ObjectInstance? groups = null;
  156. for (var i = 1; i < actualGroupCount; i++)
  157. {
  158. var capture = match.Groups[i];
  159. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  160. var groupName = GetRegexGroupName(rei, i);
  161. if (!string.IsNullOrWhiteSpace(groupName))
  162. {
  163. groups ??= OrdinaryObjectCreate(_engine, null);
  164. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  165. }
  166. }
  167. replacerArgs.Add(match.Index);
  168. replacerArgs.Add(s);
  169. if (groups is not null)
  170. {
  171. replacerArgs.Add(groups);
  172. }
  173. return CallFunctionalReplace(replaceValue, replacerArgs);
  174. }
  175. result = rei.Value.Replace(s, Evaluator, count);
  176. }
  177. else
  178. {
  179. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  180. }
  181. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero);
  182. return result;
  183. }
  184. var results = new List<ObjectInstance>();
  185. while (true)
  186. {
  187. var result = RegExpExec(rx, s);
  188. if (result.IsNull())
  189. {
  190. break;
  191. }
  192. results.Add((ObjectInstance) result);
  193. if (!global)
  194. {
  195. break;
  196. }
  197. var matchStr = TypeConverter.ToString(result.Get(0));
  198. if (matchStr == "")
  199. {
  200. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  201. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  202. rx.Set(JsRegExp.PropertyLastIndex, nextIndex);
  203. }
  204. }
  205. var accumulatedResult = "";
  206. var nextSourcePosition = 0;
  207. var captures = new List<string>();
  208. for (var i = 0; i < results.Count; i++)
  209. {
  210. var result = results[i];
  211. var nCaptures = (int) result.GetLength();
  212. nCaptures = System.Math.Max(nCaptures - 1, 0);
  213. var matched = TypeConverter.ToString(result.Get(0));
  214. var matchLength = matched.Length;
  215. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  216. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  217. uint n = 1;
  218. captures.Clear();
  219. while (n <= nCaptures)
  220. {
  221. var capN = result.Get(n);
  222. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  223. captures.Add(value);
  224. n++;
  225. }
  226. var namedCaptures = result.Get(PropertyGroups);
  227. string replacement;
  228. if (functionalReplace)
  229. {
  230. var replacerArgs = new List<JsValue>();
  231. replacerArgs.Add(matched);
  232. foreach (var capture in captures)
  233. {
  234. replacerArgs.Add(capture);
  235. }
  236. replacerArgs.Add(position);
  237. replacerArgs.Add(s);
  238. if (!namedCaptures.IsUndefined())
  239. {
  240. replacerArgs.Add(namedCaptures);
  241. }
  242. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  243. }
  244. else
  245. {
  246. if (!namedCaptures.IsUndefined())
  247. {
  248. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  249. }
  250. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  251. }
  252. if (position >= nextSourcePosition)
  253. {
  254. #pragma warning disable CA1845
  255. accumulatedResult = accumulatedResult +
  256. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  257. replacement;
  258. #pragma warning restore CA1845
  259. nextSourcePosition = position + matchLength;
  260. }
  261. }
  262. if (nextSourcePosition >= lengthS)
  263. {
  264. return accumulatedResult;
  265. }
  266. #pragma warning disable CA1845
  267. return accumulatedResult + s.Substring(nextSourcePosition);
  268. #pragma warning restore CA1845
  269. }
  270. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  271. {
  272. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  273. return TypeConverter.ToString(result);
  274. }
  275. /// <summary>
  276. /// https://tc39.es/ecma262/#sec-getsubstitution
  277. /// </summary>
  278. internal static string GetSubstitution(
  279. string matched,
  280. string str,
  281. int position,
  282. string[] captures,
  283. JsValue namedCaptures,
  284. string replacement)
  285. {
  286. // If there is no pattern, replace the pattern as is.
  287. if (!replacement.Contains('$'))
  288. {
  289. return replacement;
  290. }
  291. // Patterns
  292. // $$ Inserts a "$".
  293. // $& Inserts the matched substring.
  294. // $` Inserts the portion of the string that precedes the matched substring.
  295. // $' Inserts the portion of the string that follows the matched substring.
  296. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  297. using var sb = new ValueStringBuilder(stackalloc char[128]);
  298. for (var i = 0; i < replacement.Length; i++)
  299. {
  300. char c = replacement[i];
  301. if (c == '$' && i < replacement.Length - 1)
  302. {
  303. c = replacement[++i];
  304. switch (c)
  305. {
  306. case '$':
  307. sb.Append('$');
  308. break;
  309. case '&':
  310. sb.Append(matched);
  311. break;
  312. case '`':
  313. sb.Append(str.AsSpan(0, position));
  314. break;
  315. case '\'':
  316. sb.Append(str.AsSpan(position + matched.Length));
  317. break;
  318. case '<':
  319. var gtPos = replacement.IndexOf('>', i + 1);
  320. if (gtPos == -1 || namedCaptures.IsUndefined())
  321. {
  322. sb.Append('$');
  323. sb.Append(c);
  324. }
  325. else
  326. {
  327. var startIndex = i + 1;
  328. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  329. var capture = namedCaptures.Get(groupName);
  330. if (!capture.IsUndefined())
  331. {
  332. sb.Append(TypeConverter.ToString(capture));
  333. }
  334. i = gtPos;
  335. }
  336. break;
  337. default:
  338. {
  339. if (char.IsDigit(c))
  340. {
  341. int matchNumber1 = c - '0';
  342. // The match number can be one or two digits long.
  343. int matchNumber2 = 0;
  344. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  345. {
  346. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  347. }
  348. // Try the two digit capture first.
  349. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  350. {
  351. // Two digit capture replacement.
  352. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  353. i++;
  354. }
  355. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  356. {
  357. // Single digit capture replacement.
  358. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  359. }
  360. else
  361. {
  362. // Capture does not exist.
  363. sb.Append('$');
  364. i--;
  365. }
  366. }
  367. else
  368. {
  369. // Unknown replacement pattern.
  370. sb.Append('$');
  371. sb.Append(c);
  372. }
  373. break;
  374. }
  375. }
  376. }
  377. else
  378. {
  379. sb.Append(c);
  380. }
  381. }
  382. return sb.ToString();
  383. }
  384. /// <summary>
  385. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  386. /// </summary>
  387. private JsValue Split(JsValue thisObject, JsCallArguments arguments)
  388. {
  389. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.split");
  390. var s = TypeConverter.ToString(arguments.At(0));
  391. var limit = arguments.At(1);
  392. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  393. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  394. var unicodeMatching = flags.Contains('u');
  395. var newFlags = flags.Contains('y') ? flags : new JsString(flags.ToString() + 'y');
  396. var splitter = Construct(c, [
  397. rx,
  398. newFlags
  399. ]);
  400. uint lengthA = 0;
  401. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  402. if (lim == 0)
  403. {
  404. return _realm.Intrinsics.Array.ArrayCreate(0);
  405. }
  406. if (s.Length == 0)
  407. {
  408. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  409. var z = RegExpExec(splitter, s);
  410. if (!z.IsNull())
  411. {
  412. return a;
  413. }
  414. a.SetIndexValue(0, s, updateLength: true);
  415. return a;
  416. }
  417. if (!unicodeMatching && rx is JsRegExp R && R.HasDefaultRegExpExec)
  418. {
  419. // we can take faster path
  420. if (string.Equals(R.Source, JsRegExp.regExpForMatchingAllCharacters, StringComparison.Ordinal))
  421. {
  422. // if empty string, just a string split
  423. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  424. }
  425. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  426. int lastIndex = 0;
  427. uint index = 0;
  428. for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
  429. {
  430. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  431. {
  432. continue;
  433. }
  434. // Add the match results to the array.
  435. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  436. if (index >= lim)
  437. {
  438. return a;
  439. }
  440. lastIndex = match.Index + match.Length;
  441. var actualGroupCount = GetActualRegexGroupCount(R, match);
  442. for (int i = 1; i < actualGroupCount; i++)
  443. {
  444. var group = match.Groups[i];
  445. var item = Undefined;
  446. if (group.Captures.Count > 0)
  447. {
  448. item = match.Groups[i].Value;
  449. }
  450. a.SetIndexValue(index++, item, updateLength: true);
  451. if (index >= lim)
  452. {
  453. return a;
  454. }
  455. }
  456. }
  457. // Add the last part of the split
  458. a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);
  459. return a;
  460. }
  461. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  462. }
  463. private JsArray SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  464. {
  465. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  466. ulong previousStringIndex = 0;
  467. ulong currentIndex = 0;
  468. while (currentIndex < (ulong) s.Length)
  469. {
  470. splitter.Set(JsRegExp.PropertyLastIndex, currentIndex, true);
  471. var z = RegExpExec(splitter, s);
  472. if (z.IsNull())
  473. {
  474. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  475. continue;
  476. }
  477. var endIndex = TypeConverter.ToLength(splitter.Get(JsRegExp.PropertyLastIndex));
  478. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  479. if (endIndex == previousStringIndex)
  480. {
  481. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  482. continue;
  483. }
  484. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  485. a.SetIndexValue(lengthA, t, updateLength: true);
  486. lengthA++;
  487. if (lengthA == lim)
  488. {
  489. return a;
  490. }
  491. previousStringIndex = endIndex;
  492. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  493. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  494. var i = 1;
  495. while (i <= numberOfCaptures)
  496. {
  497. var nextCapture = z.Get(i);
  498. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  499. i++;
  500. lengthA++;
  501. if (lengthA == lim)
  502. {
  503. return a;
  504. }
  505. }
  506. currentIndex = previousStringIndex;
  507. }
  508. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  509. return a;
  510. }
  511. private JsValue Flags(JsValue thisObject, JsCallArguments arguments)
  512. {
  513. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.flags");
  514. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  515. {
  516. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  517. }
  518. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  519. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  520. result = AddFlagIfPresent(r, PropertyIgnoreCase, 'i', result);
  521. result = AddFlagIfPresent(r, PropertyMultiline, 'm', result);
  522. result = AddFlagIfPresent(r, PropertyDotAll, 's', result);
  523. result = AddFlagIfPresent(r, PropertyUnicode, 'u', result);
  524. result = AddFlagIfPresent(r, PropertyUnicodeSets, 'v', result);
  525. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  526. return result;
  527. }
  528. private JsValue ToRegExpString(JsValue thisObject, JsCallArguments arguments)
  529. {
  530. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.toString");
  531. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  532. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  533. return "/" + pattern + "/" + flags;
  534. }
  535. private JsValue Test(JsValue thisObject, JsCallArguments arguments)
  536. {
  537. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.test");
  538. var s = TypeConverter.ToString(arguments.At(0));
  539. // check couple fast paths
  540. if (r is JsRegExp R && !R.FullUnicode)
  541. {
  542. if (!R.Sticky && !R.Global)
  543. {
  544. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  545. return R.Value.IsMatch(s);
  546. }
  547. var lastIndex = (int) TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  548. if (lastIndex >= s.Length && s.Length > 0)
  549. {
  550. return JsBoolean.False;
  551. }
  552. var m = R.Value.Match(s, lastIndex);
  553. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  554. {
  555. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  556. return JsBoolean.False;
  557. }
  558. R.Set(JsRegExp.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  559. return JsBoolean.True;
  560. }
  561. var match = RegExpExec(r, s);
  562. return !match.IsNull();
  563. }
  564. /// <summary>
  565. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  566. /// </summary>
  567. private JsValue Search(JsValue thisObject, JsCallArguments arguments)
  568. {
  569. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.search");
  570. var s = TypeConverter.ToString(arguments.At(0));
  571. var previousLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  572. if (!SameValue(previousLastIndex, 0))
  573. {
  574. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  575. }
  576. var result = RegExpExec(rx, s);
  577. var currentLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  578. if (!SameValue(currentLastIndex, previousLastIndex))
  579. {
  580. rx.Set(JsRegExp.PropertyLastIndex, previousLastIndex, true);
  581. }
  582. if (result.IsNull())
  583. {
  584. return -1;
  585. }
  586. return result.Get(PropertyIndex);
  587. }
  588. /// <summary>
  589. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  590. /// </summary>
  591. private JsValue Match(JsValue thisObject, JsCallArguments arguments)
  592. {
  593. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.match");
  594. var s = TypeConverter.ToString(arguments.At(0));
  595. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  596. var global = flags.Contains('g');
  597. if (!global)
  598. {
  599. return RegExpExec(rx, s);
  600. }
  601. var fullUnicode = flags.Contains('u');
  602. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  603. if (!fullUnicode
  604. && rx is JsRegExp rei
  605. && rei.HasDefaultRegExpExec)
  606. {
  607. // fast path
  608. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  609. if (rei.Sticky)
  610. {
  611. var match = rei.Value.Match(s);
  612. if (!match.Success || match.Index != 0)
  613. {
  614. return Null;
  615. }
  616. a.SetIndexValue(0, match.Value, updateLength: false);
  617. uint li = 0;
  618. while (true)
  619. {
  620. match = match.NextMatch();
  621. if (!match.Success || match.Index != ++li)
  622. break;
  623. a.SetIndexValue(li, match.Value, updateLength: false);
  624. }
  625. a.SetLength(li);
  626. return a;
  627. }
  628. else
  629. {
  630. var matches = rei.Value.Matches(s);
  631. if (matches.Count == 0)
  632. {
  633. return Null;
  634. }
  635. a.EnsureCapacity((uint) matches.Count);
  636. a.SetLength((uint) matches.Count);
  637. for (var i = 0; i < matches.Count; i++)
  638. {
  639. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  640. }
  641. return a;
  642. }
  643. }
  644. return MatchSlow(rx, s, fullUnicode);
  645. }
  646. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  647. {
  648. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  649. uint n = 0;
  650. while (true)
  651. {
  652. var result = RegExpExec(rx, s);
  653. if (result.IsNull())
  654. {
  655. a.SetLength(n);
  656. return n == 0 ? Null : a;
  657. }
  658. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  659. a.SetIndexValue(n, matchStr, updateLength: false);
  660. if (matchStr == "")
  661. {
  662. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  663. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  664. rx.Set(JsRegExp.PropertyLastIndex, nextIndex, true);
  665. }
  666. n++;
  667. }
  668. }
  669. /// <summary>
  670. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  671. /// </summary>
  672. private JsValue MatchAll(JsValue thisObject, JsCallArguments arguments)
  673. {
  674. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.matchAll");
  675. var s = TypeConverter.ToString(arguments.At(0));
  676. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  677. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  678. var matcher = Construct(c, [
  679. r,
  680. flags
  681. ]);
  682. var lastIndex = TypeConverter.ToLength(r.Get(JsRegExp.PropertyLastIndex));
  683. matcher.Set(JsRegExp.PropertyLastIndex, lastIndex, true);
  684. var global = flags.Contains('g');
  685. var fullUnicode = flags.Contains('u');
  686. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  687. }
  688. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  689. {
  690. if (!unicode || index + 1 >= (ulong) s.Length)
  691. {
  692. return index + 1;
  693. }
  694. var first = s[(int) index];
  695. if (first < 0xD800 || first > 0xDBFF)
  696. {
  697. return index + 1;
  698. }
  699. var second = s[(int) (index + 1)];
  700. if (second < 0xDC00 || second > 0xDFFF)
  701. {
  702. return index + 1;
  703. }
  704. return index + 2;
  705. }
  706. internal static JsValue RegExpExec(ObjectInstance r, string s)
  707. {
  708. var ri = r as JsRegExp;
  709. if ((ri is null || !ri.HasDefaultRegExpExec) && r.Get(PropertyExec) is ICallable callable)
  710. {
  711. var result = callable.Call(r, s);
  712. if (!result.IsNull() && !result.IsObject())
  713. {
  714. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  715. }
  716. return result;
  717. }
  718. if (ri is null)
  719. {
  720. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  721. }
  722. return RegExpBuiltinExec(ri, s);
  723. }
  724. internal bool HasDefaultExec => Get(PropertyExec) is ClrFunction functionInstance && functionInstance._func == _defaultExec;
  725. /// <summary>
  726. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  727. /// </summary>
  728. private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
  729. {
  730. var length = (ulong) s.Length;
  731. var lastIndex = TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  732. var global = R.Global;
  733. var sticky = R.Sticky;
  734. if (!global && !sticky)
  735. {
  736. lastIndex = 0;
  737. }
  738. if (string.Equals(R.Source, JsRegExp.regExpForMatchingAllCharacters, StringComparison.Ordinal)) // Reg Exp is really ""
  739. {
  740. if (lastIndex > (ulong) s.Length)
  741. {
  742. return Null;
  743. }
  744. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  745. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  746. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  747. array.FastSetDataProperty(PropertyInput._value, s);
  748. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  749. return array;
  750. }
  751. var matcher = R.Value;
  752. var fullUnicode = R.FullUnicode;
  753. var hasIndices = R.Indices;
  754. if (!global && !sticky && !fullUnicode && !hasIndices)
  755. {
  756. // we can the non-stateful fast path which is the common case
  757. var m = matcher.Match(s, (int) lastIndex);
  758. if (!m.Success)
  759. {
  760. return Null;
  761. }
  762. return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
  763. }
  764. // the stateful version
  765. Match match;
  766. if (lastIndex > length)
  767. {
  768. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  769. return Null;
  770. }
  771. var startAt = (int) lastIndex;
  772. while (true)
  773. {
  774. match = R.Value.Match(s, startAt);
  775. // The conversion of Unicode regex patterns to .NET Regex has some flaws:
  776. // when the pattern may match empty strings, the adapted Regex will return empty string matches
  777. // in the middle of surrogate pairs. As a best effort solution, we remove these fake positive matches.
  778. // (See also: https://github.com/sebastienros/esprima-dotnet/pull/364#issuecomment-1606045259)
  779. if (match.Success
  780. && fullUnicode
  781. && match.Length == 0
  782. && 0 < match.Index && match.Index < s.Length
  783. && char.IsHighSurrogate(s[match.Index - 1]) && char.IsLowSurrogate(s[match.Index]))
  784. {
  785. startAt++;
  786. continue;
  787. }
  788. break;
  789. }
  790. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  791. if (!success)
  792. {
  793. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  794. return Null;
  795. }
  796. var e = match.Index + match.Length;
  797. // NOTE: Even in Unicode mode, we don't need to translate indices as .NET regexes always return code unit indices.
  798. if (global || sticky)
  799. {
  800. R.Set(JsRegExp.PropertyLastIndex, e, true);
  801. }
  802. return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
  803. }
  804. private static JsArray CreateReturnValueArray(
  805. JsRegExp rei,
  806. Match match,
  807. string s,
  808. bool fullUnicode,
  809. bool hasIndices)
  810. {
  811. var engine = rei.Engine;
  812. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  813. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
  814. array.CreateDataProperty(PropertyIndex, match.Index);
  815. array.CreateDataProperty(PropertyInput, s);
  816. ObjectInstance? groups = null;
  817. List<string>? groupNames = null;
  818. var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
  819. for (uint i = 0; i < actualGroupCount; i++)
  820. {
  821. var capture = match.Groups[(int) i];
  822. var capturedValue = Undefined;
  823. if (capture?.Success == true)
  824. {
  825. capturedValue = capture.Value;
  826. }
  827. if (hasIndices)
  828. {
  829. if (capture?.Success == true)
  830. {
  831. indices!.Add([JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length)]);
  832. }
  833. else
  834. {
  835. indices!.Add(null);
  836. }
  837. }
  838. var groupName = GetRegexGroupName(rei, (int) i);
  839. if (!string.IsNullOrWhiteSpace(groupName))
  840. {
  841. groups ??= OrdinaryObjectCreate(engine, null);
  842. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  843. groupNames ??= [];
  844. groupNames.Add(groupName!);
  845. }
  846. array.SetIndexValue(i, capturedValue, updateLength: false);
  847. }
  848. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  849. if (hasIndices)
  850. {
  851. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  852. array.CreateDataPropertyOrThrow("indices", indicesArray);
  853. }
  854. return array;
  855. }
  856. /// <summary>
  857. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  858. /// </summary>
  859. private static JsArray MakeMatchIndicesIndexPairArray(
  860. Engine engine,
  861. string s,
  862. List<JsNumber[]?> indices,
  863. List<string>? groupNames,
  864. bool hasGroups)
  865. {
  866. var n = indices.Count;
  867. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  868. ObjectInstance? groups = null;
  869. if (hasGroups)
  870. {
  871. groups = OrdinaryObjectCreate(engine, null);
  872. }
  873. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  874. for (var i = 0; i < n; ++i)
  875. {
  876. var matchIndices = indices[i];
  877. var matchIndexPair = matchIndices is not null
  878. ? GetMatchIndexPair(engine, s, matchIndices)
  879. : Undefined;
  880. a.Push(matchIndexPair);
  881. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  882. {
  883. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  884. }
  885. }
  886. return a;
  887. }
  888. /// <summary>
  889. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  890. /// </summary>
  891. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  892. {
  893. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  894. }
  895. private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
  896. {
  897. return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
  898. }
  899. private static string? GetRegexGroupName(JsRegExp rei, int index)
  900. {
  901. if (index == 0)
  902. {
  903. return null;
  904. }
  905. var regex = rei.Value;
  906. if (rei.ParseResult.Success)
  907. {
  908. return rei.ParseResult.GetRegexGroupName(index);
  909. }
  910. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  911. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  912. {
  913. // regex defaults to index as group name when it's not a named group
  914. return null;
  915. }
  916. return groupNameFromNumber;
  917. }
  918. private JsValue Exec(JsValue thisObject, JsCallArguments arguments)
  919. {
  920. var r = thisObject as JsRegExp;
  921. if (r is null)
  922. {
  923. ExceptionHelper.ThrowTypeError(_engine.Realm);
  924. }
  925. var s = TypeConverter.ToString(arguments.At(0));
  926. return RegExpBuiltinExec(r, s);
  927. }
  928. }