RegExpPrototype.cs 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. #pragma warning disable CA1859 // Use concrete types when possible for improved performance -- most of prototype methods return JsValue
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4. using Jint.Collections;
  5. using Jint.Native.Number;
  6. using Jint.Native.Object;
  7. using Jint.Native.String;
  8. using Jint.Native.Symbol;
  9. using Jint.Runtime;
  10. using Jint.Runtime.Descriptors;
  11. using Jint.Runtime.Interop;
  12. namespace Jint.Native.RegExp
  13. {
  14. internal sealed class RegExpPrototype : Prototype
  15. {
  16. private static readonly JsString PropertyExec = new("exec");
  17. private static readonly JsString PropertyIndex = new("index");
  18. private static readonly JsString PropertyInput = new("input");
  19. private static readonly JsString PropertySticky = new("sticky");
  20. private static readonly JsString PropertyGlobal = new("global");
  21. internal static readonly JsString PropertySource = new("source");
  22. private static readonly JsString DefaultSource = new("(?:)");
  23. internal static readonly JsString PropertyFlags = new("flags");
  24. private static readonly JsString PropertyGroups = new("groups");
  25. private static readonly JsString PropertyIgnoreCase = new("ignoreCase");
  26. private static readonly JsString PropertyMultiline = new("multiline");
  27. private static readonly JsString PropertyDotAll = new("dotAll");
  28. private static readonly JsString PropertyUnicode = new("unicode");
  29. private static readonly JsString PropertyUnicodeSets = new("unicodeSets");
  30. private readonly RegExpConstructor _constructor;
  31. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  32. internal RegExpPrototype(
  33. Engine engine,
  34. Realm realm,
  35. RegExpConstructor constructor,
  36. ObjectPrototype objectPrototype) : base(engine, realm)
  37. {
  38. _defaultExec = Exec;
  39. _constructor = constructor;
  40. _prototype = objectPrototype;
  41. }
  42. protected override void Initialize()
  43. {
  44. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  45. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<JsRegExp, JsValue> valueExtractor, JsValue? protoValue = null)
  46. {
  47. return new GetSetPropertyDescriptor(
  48. get: new ClrFunction(Engine, name, (thisObj, arguments) =>
  49. {
  50. if (ReferenceEquals(thisObj, this))
  51. {
  52. return protoValue ?? Undefined;
  53. }
  54. var r = thisObj as JsRegExp;
  55. if (r is null)
  56. {
  57. ExceptionHelper.ThrowTypeError(_realm);
  58. }
  59. return valueExtractor(r);
  60. }, 0, lengthFlags),
  61. set: Undefined,
  62. flags: PropertyFlag.Configurable);
  63. }
  64. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  65. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  66. {
  67. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  68. ["toString"] = new PropertyDescriptor(new ClrFunction(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  69. ["exec"] = new PropertyDescriptor(new ClrFunction(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  70. ["test"] = new PropertyDescriptor(new ClrFunction(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  71. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  72. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunction(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  73. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  74. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  75. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  76. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  77. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunction(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  78. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  79. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  80. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  81. };
  82. SetProperties(properties);
  83. var symbols = new SymbolDictionary(5)
  84. {
  85. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  86. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  87. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  88. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  89. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunction(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  90. };
  91. SetSymbols(symbols);
  92. }
  93. /// <summary>
  94. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  95. /// </summary>
  96. private JsValue Source(JsValue thisObject, JsValue[] arguments)
  97. {
  98. if (ReferenceEquals(thisObject, this))
  99. {
  100. return DefaultSource;
  101. }
  102. var r = thisObject as JsRegExp;
  103. if (r is null)
  104. {
  105. ExceptionHelper.ThrowTypeError(_realm);
  106. }
  107. if (string.IsNullOrEmpty(r.Source))
  108. {
  109. return JsRegExp.regExpForMatchingAllCharacters;
  110. }
  111. return r.Source
  112. .Replace("\\/", "/") // ensure forward-slashes
  113. .Replace("/", "\\/") // then escape again
  114. .Replace("\n", "\\n");
  115. }
  116. /// <summary>
  117. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  118. /// </summary>
  119. private JsValue Replace(JsValue thisObject, JsValue[] arguments)
  120. {
  121. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.replace");
  122. var s = TypeConverter.ToString(arguments.At(0));
  123. var lengthS = s.Length;
  124. var replaceValue = arguments.At(1);
  125. var functionalReplace = replaceValue is ICallable;
  126. // we need heavier logic if we have named captures
  127. var mayHaveNamedCaptures = false;
  128. if (!functionalReplace)
  129. {
  130. var value = TypeConverter.ToString(replaceValue);
  131. replaceValue = value;
  132. mayHaveNamedCaptures = value.Contains('$');
  133. }
  134. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  135. var global = flags.Contains('g');
  136. var fullUnicode = false;
  137. if (global)
  138. {
  139. fullUnicode = flags.Contains('u');
  140. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  141. }
  142. // check if we can access fast path
  143. if (!fullUnicode
  144. && !mayHaveNamedCaptures
  145. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  146. && rx is JsRegExp rei && rei.HasDefaultRegExpExec)
  147. {
  148. var count = global ? int.MaxValue : 1;
  149. string result;
  150. if (functionalReplace)
  151. {
  152. string Evaluator(Match match)
  153. {
  154. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  155. var replacerArgs = new List<JsValue>(actualGroupCount + 2);
  156. replacerArgs.Add(match.Value);
  157. ObjectInstance? groups = null;
  158. for (var i = 1; i < actualGroupCount; i++)
  159. {
  160. var capture = match.Groups[i];
  161. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  162. var groupName = GetRegexGroupName(rei, i);
  163. if (!string.IsNullOrWhiteSpace(groupName))
  164. {
  165. groups ??= OrdinaryObjectCreate(_engine, null);
  166. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  167. }
  168. }
  169. replacerArgs.Add(match.Index);
  170. replacerArgs.Add(s);
  171. if (groups is not null)
  172. {
  173. replacerArgs.Add(groups);
  174. }
  175. return CallFunctionalReplace(replaceValue, replacerArgs);
  176. }
  177. result = rei.Value.Replace(s, Evaluator, count);
  178. }
  179. else
  180. {
  181. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  182. }
  183. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero);
  184. return result;
  185. }
  186. var results = new List<ObjectInstance>();
  187. while (true)
  188. {
  189. var result = RegExpExec(rx, s);
  190. if (result.IsNull())
  191. {
  192. break;
  193. }
  194. results.Add((ObjectInstance) result);
  195. if (!global)
  196. {
  197. break;
  198. }
  199. var matchStr = TypeConverter.ToString(result.Get(0));
  200. if (matchStr == "")
  201. {
  202. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  203. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  204. rx.Set(JsRegExp.PropertyLastIndex, nextIndex);
  205. }
  206. }
  207. var accumulatedResult = "";
  208. var nextSourcePosition = 0;
  209. var captures = new List<string>();
  210. for (var i = 0; i < results.Count; i++)
  211. {
  212. var result = results[i];
  213. var nCaptures = (int) result.GetLength();
  214. nCaptures = System.Math.Max(nCaptures - 1, 0);
  215. var matched = TypeConverter.ToString(result.Get(0));
  216. var matchLength = matched.Length;
  217. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  218. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  219. uint n = 1;
  220. captures.Clear();
  221. while (n <= nCaptures)
  222. {
  223. var capN = result.Get(n);
  224. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  225. captures.Add(value);
  226. n++;
  227. }
  228. var namedCaptures = result.Get(PropertyGroups);
  229. string replacement;
  230. if (functionalReplace)
  231. {
  232. var replacerArgs = new List<JsValue>();
  233. replacerArgs.Add(matched);
  234. foreach (var capture in captures)
  235. {
  236. replacerArgs.Add(capture);
  237. }
  238. replacerArgs.Add(position);
  239. replacerArgs.Add(s);
  240. if (!namedCaptures.IsUndefined())
  241. {
  242. replacerArgs.Add(namedCaptures);
  243. }
  244. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  245. }
  246. else
  247. {
  248. if (!namedCaptures.IsUndefined())
  249. {
  250. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  251. }
  252. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  253. }
  254. if (position >= nextSourcePosition)
  255. {
  256. #pragma warning disable CA1845
  257. accumulatedResult = accumulatedResult +
  258. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  259. replacement;
  260. #pragma warning restore CA1845
  261. nextSourcePosition = position + matchLength;
  262. }
  263. }
  264. if (nextSourcePosition >= lengthS)
  265. {
  266. return accumulatedResult;
  267. }
  268. #pragma warning disable CA1845
  269. return accumulatedResult + s.Substring(nextSourcePosition);
  270. #pragma warning restore CA1845
  271. }
  272. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  273. {
  274. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  275. return TypeConverter.ToString(result);
  276. }
  277. /// <summary>
  278. /// https://tc39.es/ecma262/#sec-getsubstitution
  279. /// </summary>
  280. internal static string GetSubstitution(
  281. string matched,
  282. string str,
  283. int position,
  284. string[] captures,
  285. JsValue namedCaptures,
  286. string replacement)
  287. {
  288. // If there is no pattern, replace the pattern as is.
  289. if (!replacement.Contains('$'))
  290. {
  291. return replacement;
  292. }
  293. // Patterns
  294. // $$ Inserts a "$".
  295. // $& Inserts the matched substring.
  296. // $` Inserts the portion of the string that precedes the matched substring.
  297. // $' Inserts the portion of the string that follows the matched substring.
  298. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  299. using var sb = new ValueStringBuilder(stackalloc char[128]);
  300. for (var i = 0; i < replacement.Length; i++)
  301. {
  302. char c = replacement[i];
  303. if (c == '$' && i < replacement.Length - 1)
  304. {
  305. c = replacement[++i];
  306. switch (c)
  307. {
  308. case '$':
  309. sb.Append('$');
  310. break;
  311. case '&':
  312. sb.Append(matched);
  313. break;
  314. case '`':
  315. sb.Append(str.AsSpan(0, position));
  316. break;
  317. case '\'':
  318. sb.Append(str.AsSpan(position + matched.Length));
  319. break;
  320. case '<':
  321. var gtPos = replacement.IndexOf('>', i + 1);
  322. if (gtPos == -1 || namedCaptures.IsUndefined())
  323. {
  324. sb.Append('$');
  325. sb.Append(c);
  326. }
  327. else
  328. {
  329. var startIndex = i + 1;
  330. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  331. var capture = namedCaptures.Get(groupName);
  332. if (!capture.IsUndefined())
  333. {
  334. sb.Append(TypeConverter.ToString(capture));
  335. }
  336. i = gtPos;
  337. }
  338. break;
  339. default:
  340. {
  341. if (char.IsDigit(c))
  342. {
  343. int matchNumber1 = c - '0';
  344. // The match number can be one or two digits long.
  345. int matchNumber2 = 0;
  346. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  347. {
  348. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  349. }
  350. // Try the two digit capture first.
  351. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  352. {
  353. // Two digit capture replacement.
  354. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  355. i++;
  356. }
  357. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  358. {
  359. // Single digit capture replacement.
  360. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  361. }
  362. else
  363. {
  364. // Capture does not exist.
  365. sb.Append('$');
  366. i--;
  367. }
  368. }
  369. else
  370. {
  371. // Unknown replacement pattern.
  372. sb.Append('$');
  373. sb.Append(c);
  374. }
  375. break;
  376. }
  377. }
  378. }
  379. else
  380. {
  381. sb.Append(c);
  382. }
  383. }
  384. return sb.ToString();
  385. }
  386. /// <summary>
  387. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  388. /// </summary>
  389. private JsValue Split(JsValue thisObject, JsValue[] arguments)
  390. {
  391. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.split");
  392. var s = TypeConverter.ToString(arguments.At(0));
  393. var limit = arguments.At(1);
  394. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  395. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  396. var unicodeMatching = flags.Contains('u');
  397. var newFlags = flags.Contains('y') ? flags : new JsString(flags.ToString() + 'y');
  398. var splitter = Construct(c, new JsValue[]
  399. {
  400. rx,
  401. newFlags
  402. });
  403. uint lengthA = 0;
  404. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  405. if (lim == 0)
  406. {
  407. return _realm.Intrinsics.Array.ArrayCreate(0);
  408. }
  409. if (s.Length == 0)
  410. {
  411. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  412. var z = RegExpExec(splitter, s);
  413. if (!z.IsNull())
  414. {
  415. return a;
  416. }
  417. a.SetIndexValue(0, s, updateLength: true);
  418. return a;
  419. }
  420. if (!unicodeMatching && rx is JsRegExp R && R.HasDefaultRegExpExec)
  421. {
  422. // we can take faster path
  423. if (string.Equals(R.Source, JsRegExp.regExpForMatchingAllCharacters, StringComparison.Ordinal))
  424. {
  425. // if empty string, just a string split
  426. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  427. }
  428. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  429. int lastIndex = 0;
  430. uint index = 0;
  431. for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
  432. {
  433. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  434. {
  435. continue;
  436. }
  437. // Add the match results to the array.
  438. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  439. if (index >= lim)
  440. {
  441. return a;
  442. }
  443. lastIndex = match.Index + match.Length;
  444. var actualGroupCount = GetActualRegexGroupCount(R, match);
  445. for (int i = 1; i < actualGroupCount; i++)
  446. {
  447. var group = match.Groups[i];
  448. var item = Undefined;
  449. if (group.Captures.Count > 0)
  450. {
  451. item = match.Groups[i].Value;
  452. }
  453. a.SetIndexValue(index++, item, updateLength: true);
  454. if (index >= lim)
  455. {
  456. return a;
  457. }
  458. }
  459. }
  460. // Add the last part of the split
  461. a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);
  462. return a;
  463. }
  464. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  465. }
  466. private JsArray SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  467. {
  468. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  469. ulong previousStringIndex = 0;
  470. ulong currentIndex = 0;
  471. while (currentIndex < (ulong) s.Length)
  472. {
  473. splitter.Set(JsRegExp.PropertyLastIndex, currentIndex, true);
  474. var z = RegExpExec(splitter, s);
  475. if (z.IsNull())
  476. {
  477. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  478. continue;
  479. }
  480. var endIndex = TypeConverter.ToLength(splitter.Get(JsRegExp.PropertyLastIndex));
  481. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  482. if (endIndex == previousStringIndex)
  483. {
  484. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  485. continue;
  486. }
  487. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  488. a.SetIndexValue(lengthA, t, updateLength: true);
  489. lengthA++;
  490. if (lengthA == lim)
  491. {
  492. return a;
  493. }
  494. previousStringIndex = endIndex;
  495. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  496. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  497. var i = 1;
  498. while (i <= numberOfCaptures)
  499. {
  500. var nextCapture = z.Get(i);
  501. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  502. i++;
  503. lengthA++;
  504. if (lengthA == lim)
  505. {
  506. return a;
  507. }
  508. }
  509. currentIndex = previousStringIndex;
  510. }
  511. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  512. return a;
  513. }
  514. private JsValue Flags(JsValue thisObject, JsValue[] arguments)
  515. {
  516. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.flags");
  517. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  518. {
  519. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  520. }
  521. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  522. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  523. result = AddFlagIfPresent(r, PropertyIgnoreCase, 'i', result);
  524. result = AddFlagIfPresent(r, PropertyMultiline, 'm', result);
  525. result = AddFlagIfPresent(r, PropertyDotAll, 's', result);
  526. result = AddFlagIfPresent(r, PropertyUnicode, 'u', result);
  527. result = AddFlagIfPresent(r, PropertyUnicodeSets, 'v', result);
  528. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  529. return result;
  530. }
  531. private JsValue ToRegExpString(JsValue thisObject, JsValue[] arguments)
  532. {
  533. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.toString");
  534. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  535. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  536. return "/" + pattern + "/" + flags;
  537. }
  538. private JsValue Test(JsValue thisObject, JsValue[] arguments)
  539. {
  540. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.test");
  541. var s = TypeConverter.ToString(arguments.At(0));
  542. // check couple fast paths
  543. if (r is JsRegExp R && !R.FullUnicode)
  544. {
  545. if (!R.Sticky && !R.Global)
  546. {
  547. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  548. return R.Value.IsMatch(s);
  549. }
  550. var lastIndex = (int) TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  551. if (lastIndex >= s.Length && s.Length > 0)
  552. {
  553. return JsBoolean.False;
  554. }
  555. var m = R.Value.Match(s, lastIndex);
  556. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  557. {
  558. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  559. return JsBoolean.False;
  560. }
  561. R.Set(JsRegExp.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  562. return JsBoolean.True;
  563. }
  564. var match = RegExpExec(r, s);
  565. return !match.IsNull();
  566. }
  567. /// <summary>
  568. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  569. /// </summary>
  570. private JsValue Search(JsValue thisObject, JsValue[] arguments)
  571. {
  572. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.search");
  573. var s = TypeConverter.ToString(arguments.At(0));
  574. var previousLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  575. if (!SameValue(previousLastIndex, 0))
  576. {
  577. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  578. }
  579. var result = RegExpExec(rx, s);
  580. var currentLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  581. if (!SameValue(currentLastIndex, previousLastIndex))
  582. {
  583. rx.Set(JsRegExp.PropertyLastIndex, previousLastIndex, true);
  584. }
  585. if (result.IsNull())
  586. {
  587. return -1;
  588. }
  589. return result.Get(PropertyIndex);
  590. }
  591. /// <summary>
  592. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  593. /// </summary>
  594. private JsValue Match(JsValue thisObject, JsValue[] arguments)
  595. {
  596. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.match");
  597. var s = TypeConverter.ToString(arguments.At(0));
  598. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  599. var global = flags.Contains('g');
  600. if (!global)
  601. {
  602. return RegExpExec(rx, s);
  603. }
  604. var fullUnicode = flags.Contains('u');
  605. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  606. if (!fullUnicode
  607. && rx is JsRegExp rei
  608. && rei.HasDefaultRegExpExec)
  609. {
  610. // fast path
  611. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  612. if (rei.Sticky)
  613. {
  614. var match = rei.Value.Match(s);
  615. if (!match.Success || match.Index != 0)
  616. {
  617. return Null;
  618. }
  619. a.SetIndexValue(0, match.Value, updateLength: false);
  620. uint li = 0;
  621. while (true)
  622. {
  623. match = match.NextMatch();
  624. if (!match.Success || match.Index != ++li)
  625. break;
  626. a.SetIndexValue(li, match.Value, updateLength: false);
  627. }
  628. a.SetLength(li);
  629. return a;
  630. }
  631. else
  632. {
  633. var matches = rei.Value.Matches(s);
  634. if (matches.Count == 0)
  635. {
  636. return Null;
  637. }
  638. a.EnsureCapacity((uint) matches.Count);
  639. a.SetLength((uint) matches.Count);
  640. for (var i = 0; i < matches.Count; i++)
  641. {
  642. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  643. }
  644. return a;
  645. }
  646. }
  647. return MatchSlow(rx, s, fullUnicode);
  648. }
  649. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  650. {
  651. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  652. uint n = 0;
  653. while (true)
  654. {
  655. var result = RegExpExec(rx, s);
  656. if (result.IsNull())
  657. {
  658. a.SetLength(n);
  659. return n == 0 ? Null : a;
  660. }
  661. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  662. a.SetIndexValue(n, matchStr, updateLength: false);
  663. if (matchStr == "")
  664. {
  665. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  666. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  667. rx.Set(JsRegExp.PropertyLastIndex, nextIndex, true);
  668. }
  669. n++;
  670. }
  671. }
  672. /// <summary>
  673. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  674. /// </summary>
  675. private JsValue MatchAll(JsValue thisObject, JsValue[] arguments)
  676. {
  677. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.matchAll");
  678. var s = TypeConverter.ToString(arguments.At(0));
  679. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  680. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  681. var matcher = Construct(c, new JsValue[]
  682. {
  683. r,
  684. flags
  685. });
  686. var lastIndex = TypeConverter.ToLength(r.Get(JsRegExp.PropertyLastIndex));
  687. matcher.Set(JsRegExp.PropertyLastIndex, lastIndex, true);
  688. var global = flags.Contains('g');
  689. var fullUnicode = flags.Contains('u');
  690. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  691. }
  692. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  693. {
  694. if (!unicode || index + 1 >= (ulong) s.Length)
  695. {
  696. return index + 1;
  697. }
  698. var first = s[(int) index];
  699. if (first < 0xD800 || first > 0xDBFF)
  700. {
  701. return index + 1;
  702. }
  703. var second = s[(int) (index + 1)];
  704. if (second < 0xDC00 || second > 0xDFFF)
  705. {
  706. return index + 1;
  707. }
  708. return index + 2;
  709. }
  710. internal static JsValue RegExpExec(ObjectInstance r, string s)
  711. {
  712. var ri = r as JsRegExp;
  713. if ((ri is null || !ri.HasDefaultRegExpExec) && r.Get(PropertyExec) is ICallable callable)
  714. {
  715. var result = callable.Call(r, new JsValue[] { s });
  716. if (!result.IsNull() && !result.IsObject())
  717. {
  718. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  719. }
  720. return result;
  721. }
  722. if (ri is null)
  723. {
  724. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  725. }
  726. return RegExpBuiltinExec(ri, s);
  727. }
  728. internal bool HasDefaultExec => Get(PropertyExec) is ClrFunction functionInstance && functionInstance._func == _defaultExec;
  729. /// <summary>
  730. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  731. /// </summary>
  732. private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
  733. {
  734. var length = (ulong) s.Length;
  735. var lastIndex = TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  736. var global = R.Global;
  737. var sticky = R.Sticky;
  738. if (!global && !sticky)
  739. {
  740. lastIndex = 0;
  741. }
  742. if (string.Equals(R.Source, JsRegExp.regExpForMatchingAllCharacters, StringComparison.Ordinal)) // Reg Exp is really ""
  743. {
  744. if (lastIndex > (ulong) s.Length)
  745. {
  746. return Null;
  747. }
  748. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  749. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  750. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  751. array.FastSetDataProperty(PropertyInput._value, s);
  752. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  753. return array;
  754. }
  755. var matcher = R.Value;
  756. var fullUnicode = R.FullUnicode;
  757. var hasIndices = R.Indices;
  758. if (!global & !sticky && !fullUnicode && !hasIndices)
  759. {
  760. // we can the non-stateful fast path which is the common case
  761. var m = matcher.Match(s, (int) lastIndex);
  762. if (!m.Success)
  763. {
  764. return Null;
  765. }
  766. return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
  767. }
  768. // the stateful version
  769. Match match;
  770. if (lastIndex > length)
  771. {
  772. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  773. return Null;
  774. }
  775. var startAt = (int) lastIndex;
  776. while (true)
  777. {
  778. match = R.Value.Match(s, startAt);
  779. // The conversion of Unicode regex patterns to .NET Regex has some flaws:
  780. // when the pattern may match empty strings, the adapted Regex will return empty string matches
  781. // in the middle of surrogate pairs. As a best effort solution, we remove these fake positive matches.
  782. // (See also: https://github.com/sebastienros/esprima-dotnet/pull/364#issuecomment-1606045259)
  783. if (match.Success
  784. && fullUnicode
  785. && match.Length == 0
  786. && 0 < match.Index && match.Index < s.Length
  787. && char.IsHighSurrogate(s[match.Index - 1]) && char.IsLowSurrogate(s[match.Index]))
  788. {
  789. startAt++;
  790. continue;
  791. }
  792. break;
  793. }
  794. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  795. if (!success)
  796. {
  797. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  798. return Null;
  799. }
  800. var e = match.Index + match.Length;
  801. // NOTE: Even in Unicode mode, we don't need to translate indices as .NET regexes always return code unit indices.
  802. if (global || sticky)
  803. {
  804. R.Set(JsRegExp.PropertyLastIndex, e, true);
  805. }
  806. return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
  807. }
  808. private static JsArray CreateReturnValueArray(
  809. JsRegExp rei,
  810. Match match,
  811. string s,
  812. bool fullUnicode,
  813. bool hasIndices)
  814. {
  815. var engine = rei.Engine;
  816. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  817. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
  818. array.CreateDataProperty(PropertyIndex, match.Index);
  819. array.CreateDataProperty(PropertyInput, s);
  820. ObjectInstance? groups = null;
  821. List<string>? groupNames = null;
  822. var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
  823. for (uint i = 0; i < actualGroupCount; i++)
  824. {
  825. var capture = match.Groups[(int) i];
  826. var capturedValue = Undefined;
  827. if (capture?.Success == true)
  828. {
  829. capturedValue = capture.Value;
  830. }
  831. if (hasIndices)
  832. {
  833. if (capture?.Success == true)
  834. {
  835. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  836. }
  837. else
  838. {
  839. indices!.Add(null);
  840. }
  841. }
  842. var groupName = GetRegexGroupName(rei, (int) i);
  843. if (!string.IsNullOrWhiteSpace(groupName))
  844. {
  845. groups ??= OrdinaryObjectCreate(engine, null);
  846. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  847. groupNames ??= new List<string>();
  848. groupNames.Add(groupName!);
  849. }
  850. array.SetIndexValue(i, capturedValue, updateLength: false);
  851. }
  852. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  853. if (hasIndices)
  854. {
  855. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  856. array.CreateDataPropertyOrThrow("indices", indicesArray);
  857. }
  858. return array;
  859. }
  860. /// <summary>
  861. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  862. /// </summary>
  863. private static JsArray MakeMatchIndicesIndexPairArray(
  864. Engine engine,
  865. string s,
  866. List<JsNumber[]?> indices,
  867. List<string>? groupNames,
  868. bool hasGroups)
  869. {
  870. var n = indices.Count;
  871. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  872. ObjectInstance? groups = null;
  873. if (hasGroups)
  874. {
  875. groups = OrdinaryObjectCreate(engine, null);
  876. }
  877. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  878. for (var i = 0; i < n; ++i)
  879. {
  880. var matchIndices = indices[i];
  881. var matchIndexPair = matchIndices is not null
  882. ? GetMatchIndexPair(engine, s, matchIndices)
  883. : Undefined;
  884. a.Push(matchIndexPair);
  885. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  886. {
  887. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  888. }
  889. }
  890. return a;
  891. }
  892. /// <summary>
  893. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  894. /// </summary>
  895. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  896. {
  897. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  898. }
  899. private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
  900. {
  901. return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
  902. }
  903. private static string? GetRegexGroupName(JsRegExp rei, int index)
  904. {
  905. if (index == 0)
  906. {
  907. return null;
  908. }
  909. var regex = rei.Value;
  910. if (rei.ParseResult.Success)
  911. {
  912. return rei.ParseResult.GetRegexGroupName(index);
  913. }
  914. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  915. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  916. {
  917. // regex defaults to index as group name when it's not a named group
  918. return null;
  919. }
  920. return groupNameFromNumber;
  921. }
  922. private JsValue Exec(JsValue thisObject, JsValue[] arguments)
  923. {
  924. var r = thisObject as JsRegExp;
  925. if (r is null)
  926. {
  927. ExceptionHelper.ThrowTypeError(_engine.Realm);
  928. }
  929. var s = TypeConverter.ToString(arguments.At(0));
  930. return RegExpBuiltinExec(r, s);
  931. }
  932. }
  933. }