RegExpPrototype.cs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. #pragma warning disable CA1859 // Use concrete types when possible for improved performance -- most of prototype methods return JsValue
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4. using Jint.Collections;
  5. using Jint.Native.Number;
  6. using Jint.Native.Object;
  7. using Jint.Native.String;
  8. using Jint.Native.Symbol;
  9. using Jint.Runtime;
  10. using Jint.Runtime.Descriptors;
  11. using Jint.Runtime.Interop;
  12. namespace Jint.Native.RegExp;
  13. internal sealed class RegExpPrototype : Prototype
  14. {
  15. private static readonly JsString PropertyExec = new("exec");
  16. private static readonly JsString PropertyIndex = new("index");
  17. private static readonly JsString PropertyInput = new("input");
  18. private static readonly JsString PropertySticky = new("sticky");
  19. private static readonly JsString PropertyGlobal = new("global");
  20. internal static readonly JsString PropertySource = new("source");
  21. private static readonly JsString DefaultSource = new("(?:)");
  22. internal static readonly JsString PropertyFlags = new("flags");
  23. private static readonly JsString PropertyGroups = new("groups");
  24. private static readonly JsString PropertyIgnoreCase = new("ignoreCase");
  25. private static readonly JsString PropertyMultiline = new("multiline");
  26. private static readonly JsString PropertyDotAll = new("dotAll");
  27. private static readonly JsString PropertyUnicode = new("unicode");
  28. private static readonly JsString PropertyUnicodeSets = new("unicodeSets");
  29. private readonly RegExpConstructor _constructor;
  30. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  31. internal RegExpPrototype(
  32. Engine engine,
  33. Realm realm,
  34. RegExpConstructor constructor,
  35. ObjectPrototype objectPrototype) : base(engine, realm)
  36. {
  37. _defaultExec = Exec;
  38. _constructor = constructor;
  39. _prototype = objectPrototype;
  40. }
  41. protected override void Initialize()
  42. {
  43. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  44. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<JsRegExp, JsValue> valueExtractor, JsValue? protoValue = null)
  45. {
  46. return new GetSetPropertyDescriptor(
  47. get: new ClrFunction(Engine, name, (thisObj, arguments) =>
  48. {
  49. if (ReferenceEquals(thisObj, this))
  50. {
  51. return protoValue ?? Undefined;
  52. }
  53. var r = thisObj as JsRegExp;
  54. if (r is null)
  55. {
  56. ExceptionHelper.ThrowTypeError(_realm);
  57. }
  58. return valueExtractor(r);
  59. }, 0, lengthFlags),
  60. set: Undefined,
  61. flags: PropertyFlag.Configurable);
  62. }
  63. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  64. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  65. {
  66. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  67. ["toString"] = new PropertyDescriptor(new ClrFunction(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  68. ["exec"] = new PropertyDescriptor(new ClrFunction(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  69. ["test"] = new PropertyDescriptor(new ClrFunction(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  70. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  71. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunction(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  72. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  73. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  74. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  75. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  76. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunction(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  77. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  78. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  79. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  80. };
  81. SetProperties(properties);
  82. var symbols = new SymbolDictionary(5)
  83. {
  84. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  85. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  86. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  87. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  88. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  89. };
  90. SetSymbols(symbols);
  91. }
  92. /// <summary>
  93. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  94. /// </summary>
  95. private JsValue Source(JsValue thisObject, JsValue[] arguments)
  96. {
  97. if (ReferenceEquals(thisObject, this))
  98. {
  99. return DefaultSource;
  100. }
  101. var r = thisObject as JsRegExp;
  102. if (r is null)
  103. {
  104. ExceptionHelper.ThrowTypeError(_realm);
  105. }
  106. if (string.IsNullOrEmpty(r.Source))
  107. {
  108. return JsRegExp.regExpForMatchingAllCharacters;
  109. }
  110. return r.Source
  111. .Replace("\\/", "/") // ensure forward-slashes
  112. .Replace("/", "\\/") // then escape again
  113. .Replace("\n", "\\n");
  114. }
  115. /// <summary>
  116. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  117. /// </summary>
  118. private JsValue Replace(JsValue thisObject, JsValue[] arguments)
  119. {
  120. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.replace");
  121. var s = TypeConverter.ToString(arguments.At(0));
  122. var lengthS = s.Length;
  123. var replaceValue = arguments.At(1);
  124. var functionalReplace = replaceValue is ICallable;
  125. // we need heavier logic if we have named captures
  126. var mayHaveNamedCaptures = false;
  127. if (!functionalReplace)
  128. {
  129. var value = TypeConverter.ToString(replaceValue);
  130. replaceValue = value;
  131. mayHaveNamedCaptures = value.Contains('$');
  132. }
  133. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  134. var global = flags.Contains('g');
  135. var fullUnicode = false;
  136. if (global)
  137. {
  138. fullUnicode = flags.Contains('u');
  139. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  140. }
  141. // check if we can access fast path
  142. if (!fullUnicode
  143. && !mayHaveNamedCaptures
  144. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  145. && rx is JsRegExp rei && rei.HasDefaultRegExpExec)
  146. {
  147. var count = global ? int.MaxValue : 1;
  148. string result;
  149. if (functionalReplace)
  150. {
  151. string Evaluator(Match match)
  152. {
  153. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  154. var replacerArgs = new List<JsValue>(actualGroupCount + 2);
  155. replacerArgs.Add(match.Value);
  156. ObjectInstance? groups = null;
  157. for (var i = 1; i < actualGroupCount; i++)
  158. {
  159. var capture = match.Groups[i];
  160. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  161. var groupName = GetRegexGroupName(rei, i);
  162. if (!string.IsNullOrWhiteSpace(groupName))
  163. {
  164. groups ??= OrdinaryObjectCreate(_engine, null);
  165. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  166. }
  167. }
  168. replacerArgs.Add(match.Index);
  169. replacerArgs.Add(s);
  170. if (groups is not null)
  171. {
  172. replacerArgs.Add(groups);
  173. }
  174. return CallFunctionalReplace(replaceValue, replacerArgs);
  175. }
  176. result = rei.Value.Replace(s, Evaluator, count);
  177. }
  178. else
  179. {
  180. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  181. }
  182. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero);
  183. return result;
  184. }
  185. var results = new List<ObjectInstance>();
  186. while (true)
  187. {
  188. var result = RegExpExec(rx, s);
  189. if (result.IsNull())
  190. {
  191. break;
  192. }
  193. results.Add((ObjectInstance) result);
  194. if (!global)
  195. {
  196. break;
  197. }
  198. var matchStr = TypeConverter.ToString(result.Get(0));
  199. if (matchStr == "")
  200. {
  201. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  202. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  203. rx.Set(JsRegExp.PropertyLastIndex, nextIndex);
  204. }
  205. }
  206. var accumulatedResult = "";
  207. var nextSourcePosition = 0;
  208. var captures = new List<string>();
  209. for (var i = 0; i < results.Count; i++)
  210. {
  211. var result = results[i];
  212. var nCaptures = (int) result.GetLength();
  213. nCaptures = System.Math.Max(nCaptures - 1, 0);
  214. var matched = TypeConverter.ToString(result.Get(0));
  215. var matchLength = matched.Length;
  216. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  217. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  218. uint n = 1;
  219. captures.Clear();
  220. while (n <= nCaptures)
  221. {
  222. var capN = result.Get(n);
  223. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  224. captures.Add(value);
  225. n++;
  226. }
  227. var namedCaptures = result.Get(PropertyGroups);
  228. string replacement;
  229. if (functionalReplace)
  230. {
  231. var replacerArgs = new List<JsValue>();
  232. replacerArgs.Add(matched);
  233. foreach (var capture in captures)
  234. {
  235. replacerArgs.Add(capture);
  236. }
  237. replacerArgs.Add(position);
  238. replacerArgs.Add(s);
  239. if (!namedCaptures.IsUndefined())
  240. {
  241. replacerArgs.Add(namedCaptures);
  242. }
  243. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  244. }
  245. else
  246. {
  247. if (!namedCaptures.IsUndefined())
  248. {
  249. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  250. }
  251. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  252. }
  253. if (position >= nextSourcePosition)
  254. {
  255. #pragma warning disable CA1845
  256. accumulatedResult = accumulatedResult +
  257. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  258. replacement;
  259. #pragma warning restore CA1845
  260. nextSourcePosition = position + matchLength;
  261. }
  262. }
  263. if (nextSourcePosition >= lengthS)
  264. {
  265. return accumulatedResult;
  266. }
  267. #pragma warning disable CA1845
  268. return accumulatedResult + s.Substring(nextSourcePosition);
  269. #pragma warning restore CA1845
  270. }
  271. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  272. {
  273. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  274. return TypeConverter.ToString(result);
  275. }
  276. /// <summary>
  277. /// https://tc39.es/ecma262/#sec-getsubstitution
  278. /// </summary>
  279. internal static string GetSubstitution(
  280. string matched,
  281. string str,
  282. int position,
  283. string[] captures,
  284. JsValue namedCaptures,
  285. string replacement)
  286. {
  287. // If there is no pattern, replace the pattern as is.
  288. if (!replacement.Contains('$'))
  289. {
  290. return replacement;
  291. }
  292. // Patterns
  293. // $$ Inserts a "$".
  294. // $& Inserts the matched substring.
  295. // $` Inserts the portion of the string that precedes the matched substring.
  296. // $' Inserts the portion of the string that follows the matched substring.
  297. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  298. using var sb = new ValueStringBuilder(stackalloc char[128]);
  299. for (var i = 0; i < replacement.Length; i++)
  300. {
  301. char c = replacement[i];
  302. if (c == '$' && i < replacement.Length - 1)
  303. {
  304. c = replacement[++i];
  305. switch (c)
  306. {
  307. case '$':
  308. sb.Append('$');
  309. break;
  310. case '&':
  311. sb.Append(matched);
  312. break;
  313. case '`':
  314. sb.Append(str.AsSpan(0, position));
  315. break;
  316. case '\'':
  317. sb.Append(str.AsSpan(position + matched.Length));
  318. break;
  319. case '<':
  320. var gtPos = replacement.IndexOf('>', i + 1);
  321. if (gtPos == -1 || namedCaptures.IsUndefined())
  322. {
  323. sb.Append('$');
  324. sb.Append(c);
  325. }
  326. else
  327. {
  328. var startIndex = i + 1;
  329. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  330. var capture = namedCaptures.Get(groupName);
  331. if (!capture.IsUndefined())
  332. {
  333. sb.Append(TypeConverter.ToString(capture));
  334. }
  335. i = gtPos;
  336. }
  337. break;
  338. default:
  339. {
  340. if (char.IsDigit(c))
  341. {
  342. int matchNumber1 = c - '0';
  343. // The match number can be one or two digits long.
  344. int matchNumber2 = 0;
  345. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  346. {
  347. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  348. }
  349. // Try the two digit capture first.
  350. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  351. {
  352. // Two digit capture replacement.
  353. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  354. i++;
  355. }
  356. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  357. {
  358. // Single digit capture replacement.
  359. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  360. }
  361. else
  362. {
  363. // Capture does not exist.
  364. sb.Append('$');
  365. i--;
  366. }
  367. }
  368. else
  369. {
  370. // Unknown replacement pattern.
  371. sb.Append('$');
  372. sb.Append(c);
  373. }
  374. break;
  375. }
  376. }
  377. }
  378. else
  379. {
  380. sb.Append(c);
  381. }
  382. }
  383. return sb.ToString();
  384. }
  385. /// <summary>
  386. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  387. /// </summary>
  388. private JsValue Split(JsValue thisObject, JsValue[] arguments)
  389. {
  390. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.split");
  391. var s = TypeConverter.ToString(arguments.At(0));
  392. var limit = arguments.At(1);
  393. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  394. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  395. var unicodeMatching = flags.Contains('u');
  396. var newFlags = flags.Contains('y') ? flags : new JsString(flags.ToString() + 'y');
  397. var splitter = Construct(c, new JsValue[]
  398. {
  399. rx,
  400. newFlags
  401. });
  402. uint lengthA = 0;
  403. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  404. if (lim == 0)
  405. {
  406. return _realm.Intrinsics.Array.ArrayCreate(0);
  407. }
  408. if (s.Length == 0)
  409. {
  410. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  411. var z = RegExpExec(splitter, s);
  412. if (!z.IsNull())
  413. {
  414. return a;
  415. }
  416. a.SetIndexValue(0, s, updateLength: true);
  417. return a;
  418. }
  419. if (!unicodeMatching && rx is JsRegExp R && R.HasDefaultRegExpExec)
  420. {
  421. // we can take faster path
  422. if (string.Equals(R.Source, JsRegExp.regExpForMatchingAllCharacters, StringComparison.Ordinal))
  423. {
  424. // if empty string, just a string split
  425. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  426. }
  427. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  428. int lastIndex = 0;
  429. uint index = 0;
  430. for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
  431. {
  432. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  433. {
  434. continue;
  435. }
  436. // Add the match results to the array.
  437. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  438. if (index >= lim)
  439. {
  440. return a;
  441. }
  442. lastIndex = match.Index + match.Length;
  443. var actualGroupCount = GetActualRegexGroupCount(R, match);
  444. for (int i = 1; i < actualGroupCount; i++)
  445. {
  446. var group = match.Groups[i];
  447. var item = Undefined;
  448. if (group.Captures.Count > 0)
  449. {
  450. item = match.Groups[i].Value;
  451. }
  452. a.SetIndexValue(index++, item, updateLength: true);
  453. if (index >= lim)
  454. {
  455. return a;
  456. }
  457. }
  458. }
  459. // Add the last part of the split
  460. a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);
  461. return a;
  462. }
  463. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  464. }
  465. private JsArray SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  466. {
  467. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  468. ulong previousStringIndex = 0;
  469. ulong currentIndex = 0;
  470. while (currentIndex < (ulong) s.Length)
  471. {
  472. splitter.Set(JsRegExp.PropertyLastIndex, currentIndex, true);
  473. var z = RegExpExec(splitter, s);
  474. if (z.IsNull())
  475. {
  476. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  477. continue;
  478. }
  479. var endIndex = TypeConverter.ToLength(splitter.Get(JsRegExp.PropertyLastIndex));
  480. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  481. if (endIndex == previousStringIndex)
  482. {
  483. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  484. continue;
  485. }
  486. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  487. a.SetIndexValue(lengthA, t, updateLength: true);
  488. lengthA++;
  489. if (lengthA == lim)
  490. {
  491. return a;
  492. }
  493. previousStringIndex = endIndex;
  494. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  495. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  496. var i = 1;
  497. while (i <= numberOfCaptures)
  498. {
  499. var nextCapture = z.Get(i);
  500. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  501. i++;
  502. lengthA++;
  503. if (lengthA == lim)
  504. {
  505. return a;
  506. }
  507. }
  508. currentIndex = previousStringIndex;
  509. }
  510. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  511. return a;
  512. }
  513. private JsValue Flags(JsValue thisObject, JsValue[] arguments)
  514. {
  515. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.flags");
  516. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  517. {
  518. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  519. }
  520. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  521. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  522. result = AddFlagIfPresent(r, PropertyIgnoreCase, 'i', result);
  523. result = AddFlagIfPresent(r, PropertyMultiline, 'm', result);
  524. result = AddFlagIfPresent(r, PropertyDotAll, 's', result);
  525. result = AddFlagIfPresent(r, PropertyUnicode, 'u', result);
  526. result = AddFlagIfPresent(r, PropertyUnicodeSets, 'v', result);
  527. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  528. return result;
  529. }
  530. private JsValue ToRegExpString(JsValue thisObject, JsValue[] arguments)
  531. {
  532. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.toString");
  533. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  534. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  535. return "/" + pattern + "/" + flags;
  536. }
  537. private JsValue Test(JsValue thisObject, JsValue[] arguments)
  538. {
  539. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.test");
  540. var s = TypeConverter.ToString(arguments.At(0));
  541. // check couple fast paths
  542. if (r is JsRegExp R && !R.FullUnicode)
  543. {
  544. if (!R.Sticky && !R.Global)
  545. {
  546. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  547. return R.Value.IsMatch(s);
  548. }
  549. var lastIndex = (int) TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  550. if (lastIndex >= s.Length && s.Length > 0)
  551. {
  552. return JsBoolean.False;
  553. }
  554. var m = R.Value.Match(s, lastIndex);
  555. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  556. {
  557. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  558. return JsBoolean.False;
  559. }
  560. R.Set(JsRegExp.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  561. return JsBoolean.True;
  562. }
  563. var match = RegExpExec(r, s);
  564. return !match.IsNull();
  565. }
  566. /// <summary>
  567. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  568. /// </summary>
  569. private JsValue Search(JsValue thisObject, JsValue[] arguments)
  570. {
  571. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.search");
  572. var s = TypeConverter.ToString(arguments.At(0));
  573. var previousLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  574. if (!SameValue(previousLastIndex, 0))
  575. {
  576. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  577. }
  578. var result = RegExpExec(rx, s);
  579. var currentLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  580. if (!SameValue(currentLastIndex, previousLastIndex))
  581. {
  582. rx.Set(JsRegExp.PropertyLastIndex, previousLastIndex, true);
  583. }
  584. if (result.IsNull())
  585. {
  586. return -1;
  587. }
  588. return result.Get(PropertyIndex);
  589. }
  590. /// <summary>
  591. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  592. /// </summary>
  593. private JsValue Match(JsValue thisObject, JsValue[] arguments)
  594. {
  595. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.match");
  596. var s = TypeConverter.ToString(arguments.At(0));
  597. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  598. var global = flags.Contains('g');
  599. if (!global)
  600. {
  601. return RegExpExec(rx, s);
  602. }
  603. var fullUnicode = flags.Contains('u');
  604. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  605. if (!fullUnicode
  606. && rx is JsRegExp rei
  607. && rei.HasDefaultRegExpExec)
  608. {
  609. // fast path
  610. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  611. if (rei.Sticky)
  612. {
  613. var match = rei.Value.Match(s);
  614. if (!match.Success || match.Index != 0)
  615. {
  616. return Null;
  617. }
  618. a.SetIndexValue(0, match.Value, updateLength: false);
  619. uint li = 0;
  620. while (true)
  621. {
  622. match = match.NextMatch();
  623. if (!match.Success || match.Index != ++li)
  624. break;
  625. a.SetIndexValue(li, match.Value, updateLength: false);
  626. }
  627. a.SetLength(li);
  628. return a;
  629. }
  630. else
  631. {
  632. var matches = rei.Value.Matches(s);
  633. if (matches.Count == 0)
  634. {
  635. return Null;
  636. }
  637. a.EnsureCapacity((uint) matches.Count);
  638. a.SetLength((uint) matches.Count);
  639. for (var i = 0; i < matches.Count; i++)
  640. {
  641. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  642. }
  643. return a;
  644. }
  645. }
  646. return MatchSlow(rx, s, fullUnicode);
  647. }
  648. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  649. {
  650. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  651. uint n = 0;
  652. while (true)
  653. {
  654. var result = RegExpExec(rx, s);
  655. if (result.IsNull())
  656. {
  657. a.SetLength(n);
  658. return n == 0 ? Null : a;
  659. }
  660. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  661. a.SetIndexValue(n, matchStr, updateLength: false);
  662. if (matchStr == "")
  663. {
  664. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  665. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  666. rx.Set(JsRegExp.PropertyLastIndex, nextIndex, true);
  667. }
  668. n++;
  669. }
  670. }
  671. /// <summary>
  672. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  673. /// </summary>
  674. private JsValue MatchAll(JsValue thisObject, JsValue[] arguments)
  675. {
  676. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.matchAll");
  677. var s = TypeConverter.ToString(arguments.At(0));
  678. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  679. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  680. var matcher = Construct(c, new JsValue[]
  681. {
  682. r,
  683. flags
  684. });
  685. var lastIndex = TypeConverter.ToLength(r.Get(JsRegExp.PropertyLastIndex));
  686. matcher.Set(JsRegExp.PropertyLastIndex, lastIndex, true);
  687. var global = flags.Contains('g');
  688. var fullUnicode = flags.Contains('u');
  689. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  690. }
  691. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  692. {
  693. if (!unicode || index + 1 >= (ulong) s.Length)
  694. {
  695. return index + 1;
  696. }
  697. var first = s[(int) index];
  698. if (first < 0xD800 || first > 0xDBFF)
  699. {
  700. return index + 1;
  701. }
  702. var second = s[(int) (index + 1)];
  703. if (second < 0xDC00 || second > 0xDFFF)
  704. {
  705. return index + 1;
  706. }
  707. return index + 2;
  708. }
  709. internal static JsValue RegExpExec(ObjectInstance r, string s)
  710. {
  711. var ri = r as JsRegExp;
  712. if ((ri is null || !ri.HasDefaultRegExpExec) && r.Get(PropertyExec) is ICallable callable)
  713. {
  714. var result = callable.Call(r, new JsValue[] { s });
  715. if (!result.IsNull() && !result.IsObject())
  716. {
  717. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  718. }
  719. return result;
  720. }
  721. if (ri is null)
  722. {
  723. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  724. }
  725. return RegExpBuiltinExec(ri, s);
  726. }
  727. internal bool HasDefaultExec => Get(PropertyExec) is ClrFunction functionInstance && functionInstance._func == _defaultExec;
  728. /// <summary>
  729. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  730. /// </summary>
  731. private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
  732. {
  733. var length = (ulong) s.Length;
  734. var lastIndex = TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  735. var global = R.Global;
  736. var sticky = R.Sticky;
  737. if (!global && !sticky)
  738. {
  739. lastIndex = 0;
  740. }
  741. if (string.Equals(R.Source, JsRegExp.regExpForMatchingAllCharacters, StringComparison.Ordinal)) // Reg Exp is really ""
  742. {
  743. if (lastIndex > (ulong) s.Length)
  744. {
  745. return Null;
  746. }
  747. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  748. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  749. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  750. array.FastSetDataProperty(PropertyInput._value, s);
  751. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  752. return array;
  753. }
  754. var matcher = R.Value;
  755. var fullUnicode = R.FullUnicode;
  756. var hasIndices = R.Indices;
  757. if (!global && !sticky && !fullUnicode && !hasIndices)
  758. {
  759. // we can the non-stateful fast path which is the common case
  760. var m = matcher.Match(s, (int) lastIndex);
  761. if (!m.Success)
  762. {
  763. return Null;
  764. }
  765. return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
  766. }
  767. // the stateful version
  768. Match match;
  769. if (lastIndex > length)
  770. {
  771. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  772. return Null;
  773. }
  774. var startAt = (int) lastIndex;
  775. while (true)
  776. {
  777. match = R.Value.Match(s, startAt);
  778. // The conversion of Unicode regex patterns to .NET Regex has some flaws:
  779. // when the pattern may match empty strings, the adapted Regex will return empty string matches
  780. // in the middle of surrogate pairs. As a best effort solution, we remove these fake positive matches.
  781. // (See also: https://github.com/sebastienros/esprima-dotnet/pull/364#issuecomment-1606045259)
  782. if (match.Success
  783. && fullUnicode
  784. && match.Length == 0
  785. && 0 < match.Index && match.Index < s.Length
  786. && char.IsHighSurrogate(s[match.Index - 1]) && char.IsLowSurrogate(s[match.Index]))
  787. {
  788. startAt++;
  789. continue;
  790. }
  791. break;
  792. }
  793. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  794. if (!success)
  795. {
  796. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  797. return Null;
  798. }
  799. var e = match.Index + match.Length;
  800. // NOTE: Even in Unicode mode, we don't need to translate indices as .NET regexes always return code unit indices.
  801. if (global || sticky)
  802. {
  803. R.Set(JsRegExp.PropertyLastIndex, e, true);
  804. }
  805. return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
  806. }
  807. private static JsArray CreateReturnValueArray(
  808. JsRegExp rei,
  809. Match match,
  810. string s,
  811. bool fullUnicode,
  812. bool hasIndices)
  813. {
  814. var engine = rei.Engine;
  815. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  816. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
  817. array.CreateDataProperty(PropertyIndex, match.Index);
  818. array.CreateDataProperty(PropertyInput, s);
  819. ObjectInstance? groups = null;
  820. List<string>? groupNames = null;
  821. var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
  822. for (uint i = 0; i < actualGroupCount; i++)
  823. {
  824. var capture = match.Groups[(int) i];
  825. var capturedValue = Undefined;
  826. if (capture?.Success == true)
  827. {
  828. capturedValue = capture.Value;
  829. }
  830. if (hasIndices)
  831. {
  832. if (capture?.Success == true)
  833. {
  834. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  835. }
  836. else
  837. {
  838. indices!.Add(null);
  839. }
  840. }
  841. var groupName = GetRegexGroupName(rei, (int) i);
  842. if (!string.IsNullOrWhiteSpace(groupName))
  843. {
  844. groups ??= OrdinaryObjectCreate(engine, null);
  845. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  846. groupNames ??= new List<string>();
  847. groupNames.Add(groupName!);
  848. }
  849. array.SetIndexValue(i, capturedValue, updateLength: false);
  850. }
  851. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  852. if (hasIndices)
  853. {
  854. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  855. array.CreateDataPropertyOrThrow("indices", indicesArray);
  856. }
  857. return array;
  858. }
  859. /// <summary>
  860. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  861. /// </summary>
  862. private static JsArray MakeMatchIndicesIndexPairArray(
  863. Engine engine,
  864. string s,
  865. List<JsNumber[]?> indices,
  866. List<string>? groupNames,
  867. bool hasGroups)
  868. {
  869. var n = indices.Count;
  870. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  871. ObjectInstance? groups = null;
  872. if (hasGroups)
  873. {
  874. groups = OrdinaryObjectCreate(engine, null);
  875. }
  876. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  877. for (var i = 0; i < n; ++i)
  878. {
  879. var matchIndices = indices[i];
  880. var matchIndexPair = matchIndices is not null
  881. ? GetMatchIndexPair(engine, s, matchIndices)
  882. : Undefined;
  883. a.Push(matchIndexPair);
  884. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  885. {
  886. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  887. }
  888. }
  889. return a;
  890. }
  891. /// <summary>
  892. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  893. /// </summary>
  894. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  895. {
  896. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  897. }
  898. private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
  899. {
  900. return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
  901. }
  902. private static string? GetRegexGroupName(JsRegExp rei, int index)
  903. {
  904. if (index == 0)
  905. {
  906. return null;
  907. }
  908. var regex = rei.Value;
  909. if (rei.ParseResult.Success)
  910. {
  911. return rei.ParseResult.GetRegexGroupName(index);
  912. }
  913. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  914. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  915. {
  916. // regex defaults to index as group name when it's not a named group
  917. return null;
  918. }
  919. return groupNameFromNumber;
  920. }
  921. private JsValue Exec(JsValue thisObject, JsValue[] arguments)
  922. {
  923. var r = thisObject as JsRegExp;
  924. if (r is null)
  925. {
  926. ExceptionHelper.ThrowTypeError(_engine.Realm);
  927. }
  928. var s = TypeConverter.ToString(arguments.At(0));
  929. return RegExpBuiltinExec(r, s);
  930. }
  931. }