RegExpPrototype.cs 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. using System.Diagnostics.CodeAnalysis;
  2. using System.Text.RegularExpressions;
  3. using Jint.Collections;
  4. using Jint.Native.Array;
  5. using Jint.Native.Number;
  6. using Jint.Native.Object;
  7. using Jint.Native.String;
  8. using Jint.Native.Symbol;
  9. using Jint.Pooling;
  10. using Jint.Runtime;
  11. using Jint.Runtime.Descriptors;
  12. using Jint.Runtime.Interop;
  13. namespace Jint.Native.RegExp
  14. {
  15. public sealed class RegExpPrototype : Prototype
  16. {
  17. private static readonly JsString PropertyExec = new("exec");
  18. private static readonly JsString PropertyIndex = new("index");
  19. private static readonly JsString PropertyInput = new("input");
  20. private static readonly JsString PropertySticky = new("sticky");
  21. private static readonly JsString PropertyGlobal = new("global");
  22. internal static readonly JsString PropertySource = new("source");
  23. private static readonly JsString DefaultSource = new("(?:)");
  24. internal static readonly JsString PropertyFlags = new("flags");
  25. private static readonly JsString PropertyGroups = new("groups");
  26. private readonly RegExpConstructor _constructor;
  27. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  28. internal RegExpPrototype(
  29. Engine engine,
  30. Realm realm,
  31. RegExpConstructor constructor,
  32. ObjectPrototype objectPrototype) : base(engine, realm)
  33. {
  34. _defaultExec = Exec;
  35. _constructor = constructor;
  36. _prototype = objectPrototype;
  37. }
  38. protected override void Initialize()
  39. {
  40. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  41. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpInstance, JsValue> valueExtractor, JsValue? protoValue = null)
  42. {
  43. return new GetSetPropertyDescriptor(
  44. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  45. {
  46. if (ReferenceEquals(thisObj, this))
  47. {
  48. return protoValue ?? Undefined;
  49. }
  50. var r = thisObj as RegExpInstance;
  51. if (r is null)
  52. {
  53. ExceptionHelper.ThrowTypeError(_realm);
  54. }
  55. return valueExtractor(r);
  56. }, 0, lengthFlags),
  57. set: Undefined,
  58. flags: PropertyFlag.Configurable);
  59. }
  60. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  61. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  62. {
  63. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  64. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  65. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  66. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  67. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  68. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  69. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  70. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  71. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  72. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  73. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  74. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  75. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  76. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  77. };
  78. SetProperties(properties);
  79. var symbols = new SymbolDictionary(5)
  80. {
  81. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  85. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  86. };
  87. SetSymbols(symbols);
  88. }
  89. /// <summary>
  90. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  91. /// </summary>
  92. private JsValue Source(JsValue thisObj, JsValue[] arguments)
  93. {
  94. if (ReferenceEquals(thisObj, this))
  95. {
  96. return DefaultSource;
  97. }
  98. var r = thisObj as RegExpInstance;
  99. if (r is null)
  100. {
  101. ExceptionHelper.ThrowTypeError(_realm);
  102. }
  103. if (r.Source is null)
  104. {
  105. return JsString.Empty;
  106. }
  107. return r.Source.Replace("/", "\\/").Replace("\n", "\\n");
  108. }
  109. /// <summary>
  110. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  111. /// </summary>
  112. private JsValue Replace(JsValue thisObj, JsValue[] arguments)
  113. {
  114. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.replace");
  115. var s = TypeConverter.ToString(arguments.At(0));
  116. var lengthS = s.Length;
  117. var replaceValue = arguments.At(1);
  118. var functionalReplace = replaceValue is ICallable;
  119. // we need heavier logic if we have named captures
  120. var mayHaveNamedCaptures = false;
  121. if (!functionalReplace)
  122. {
  123. var value = TypeConverter.ToString(replaceValue);
  124. replaceValue = value;
  125. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  126. }
  127. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  128. var global = flags.IndexOf('g') != -1;
  129. var fullUnicode = false;
  130. if (global)
  131. {
  132. fullUnicode = flags.IndexOf('u') != -1;
  133. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  134. }
  135. // check if we can access fast path
  136. if (!fullUnicode
  137. && !mayHaveNamedCaptures
  138. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  139. && rx is RegExpInstance rei && rei.TryGetDefaultRegExpExec(out _))
  140. {
  141. var count = global ? int.MaxValue : 1;
  142. string result;
  143. if (functionalReplace)
  144. {
  145. string Evaluator(Match match)
  146. {
  147. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  148. replacerArgs.Add(match.Value);
  149. ObjectInstance? groups = null;
  150. for (var i = 1; i < match.Groups.Count; i++)
  151. {
  152. var capture = match.Groups[i];
  153. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  154. var groupName = GetRegexGroupName(rei.Value, i);
  155. if (!string.IsNullOrWhiteSpace(groupName))
  156. {
  157. groups ??= OrdinaryObjectCreate(_engine, null);
  158. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  159. }
  160. }
  161. replacerArgs.Add(match.Index);
  162. replacerArgs.Add(s);
  163. if (groups is not null)
  164. {
  165. replacerArgs.Add(groups);
  166. }
  167. return CallFunctionalReplace(replaceValue, replacerArgs);
  168. }
  169. result = rei.Value.Replace(s, Evaluator, count);
  170. }
  171. else
  172. {
  173. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  174. }
  175. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero);
  176. return result;
  177. }
  178. var results = new List<ObjectInstance>();
  179. while (true)
  180. {
  181. var result = RegExpExec(rx, s);
  182. if (result.IsNull())
  183. {
  184. break;
  185. }
  186. results.Add((ObjectInstance) result);
  187. if (!global)
  188. {
  189. break;
  190. }
  191. var matchStr = TypeConverter.ToString(result.Get(0));
  192. if (matchStr == "")
  193. {
  194. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  195. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  196. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex);
  197. }
  198. }
  199. var accumulatedResult = "";
  200. var nextSourcePosition = 0;
  201. var captures = new List<string>();
  202. for (var i = 0; i < results.Count; i++)
  203. {
  204. var result = results[i];
  205. var nCaptures = (int) result.Length;
  206. nCaptures = System.Math.Max(nCaptures - 1, 0);
  207. var matched = TypeConverter.ToString(result.Get(0));
  208. var matchLength = matched.Length;
  209. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  210. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  211. uint n = 1;
  212. captures.Clear();
  213. while (n <= nCaptures)
  214. {
  215. var capN = result.Get(n);
  216. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  217. captures.Add(value);
  218. n++;
  219. }
  220. var namedCaptures = result.Get(PropertyGroups);
  221. string replacement;
  222. if (functionalReplace)
  223. {
  224. var replacerArgs = new List<JsValue>();
  225. replacerArgs.Add(matched);
  226. foreach (var capture in captures)
  227. {
  228. replacerArgs.Add(capture);
  229. }
  230. replacerArgs.Add(position);
  231. replacerArgs.Add(s);
  232. if (!namedCaptures.IsUndefined())
  233. {
  234. replacerArgs.Add(namedCaptures);
  235. }
  236. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  237. }
  238. else
  239. {
  240. if (!namedCaptures.IsUndefined())
  241. {
  242. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  243. }
  244. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  245. }
  246. if (position >= nextSourcePosition)
  247. {
  248. accumulatedResult = accumulatedResult +
  249. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  250. replacement;
  251. nextSourcePosition = position + matchLength;
  252. }
  253. }
  254. if (nextSourcePosition >= lengthS)
  255. {
  256. return accumulatedResult;
  257. }
  258. return accumulatedResult + s.Substring(nextSourcePosition);
  259. }
  260. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  261. {
  262. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  263. return TypeConverter.ToString(result);
  264. }
  265. /// <summary>
  266. /// https://tc39.es/ecma262/#sec-getsubstitution
  267. /// </summary>
  268. internal static string GetSubstitution(
  269. string matched,
  270. string str,
  271. int position,
  272. string[] captures,
  273. JsValue namedCaptures,
  274. string replacement)
  275. {
  276. // If there is no pattern, replace the pattern as is.
  277. if (replacement.IndexOf('$') < 0)
  278. {
  279. return replacement;
  280. }
  281. // Patterns
  282. // $$ Inserts a "$".
  283. // $& Inserts the matched substring.
  284. // $` Inserts the portion of the string that precedes the matched substring.
  285. // $' Inserts the portion of the string that follows the matched substring.
  286. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  287. using var replacementBuilder = StringBuilderPool.Rent();
  288. var sb = replacementBuilder.Builder;
  289. for (var i = 0; i < replacement.Length; i++)
  290. {
  291. char c = replacement[i];
  292. if (c == '$' && i < replacement.Length - 1)
  293. {
  294. c = replacement[++i];
  295. switch (c)
  296. {
  297. case '$':
  298. sb.Append('$');
  299. break;
  300. case '&':
  301. sb.Append(matched);
  302. break;
  303. case '`':
  304. sb.Append(str.Substring(0, position));
  305. break;
  306. case '\'':
  307. sb.Append(str.Substring(position + matched.Length));
  308. break;
  309. case '<':
  310. var gtPos = replacement.IndexOf('>', i + 1);
  311. if (gtPos == -1 || namedCaptures.IsUndefined())
  312. {
  313. sb.Append('$');
  314. sb.Append(c);
  315. }
  316. else
  317. {
  318. var startIndex = i + 1;
  319. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  320. var capture = namedCaptures.Get(groupName);
  321. if (!capture.IsUndefined())
  322. {
  323. sb.Append(TypeConverter.ToString(capture));
  324. }
  325. i = gtPos;
  326. }
  327. break;
  328. default:
  329. {
  330. if (char.IsDigit(c))
  331. {
  332. int matchNumber1 = c - '0';
  333. // The match number can be one or two digits long.
  334. int matchNumber2 = 0;
  335. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  336. {
  337. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  338. }
  339. // Try the two digit capture first.
  340. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  341. {
  342. // Two digit capture replacement.
  343. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  344. i++;
  345. }
  346. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  347. {
  348. // Single digit capture replacement.
  349. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  350. }
  351. else
  352. {
  353. // Capture does not exist.
  354. sb.Append('$');
  355. i--;
  356. }
  357. }
  358. else
  359. {
  360. // Unknown replacement pattern.
  361. sb.Append('$');
  362. sb.Append(c);
  363. }
  364. break;
  365. }
  366. }
  367. }
  368. else
  369. {
  370. sb.Append(c);
  371. }
  372. }
  373. return replacementBuilder.ToString();
  374. }
  375. /// <summary>
  376. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  377. /// </summary>
  378. private JsValue Split(JsValue thisObj, JsValue[] arguments)
  379. {
  380. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.split");
  381. var s = TypeConverter.ToString(arguments.At(0));
  382. var limit = arguments.At(1);
  383. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  384. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  385. var unicodeMatching = flags.IndexOf('u') > -1;
  386. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  387. var splitter = Construct(c, new JsValue[]
  388. {
  389. rx,
  390. newFlags
  391. });
  392. uint lengthA = 0;
  393. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  394. if (lim == 0)
  395. {
  396. return _realm.Intrinsics.Array.ArrayCreate(0);
  397. }
  398. if (s.Length == 0)
  399. {
  400. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  401. var z = RegExpExec(splitter, s);
  402. if (!z.IsNull())
  403. {
  404. return a;
  405. }
  406. a.SetIndexValue(0, s, updateLength: true);
  407. return a;
  408. }
  409. if (!unicodeMatching && rx is RegExpInstance R && R.TryGetDefaultRegExpExec(out _))
  410. {
  411. // we can take faster path
  412. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters)
  413. {
  414. // if empty string, just a string split
  415. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  416. }
  417. var a = (ArrayInstance) _realm.Intrinsics.Array.Construct(Arguments.Empty);
  418. var match = R.Value.Match(s, 0);
  419. if (!match.Success) // No match at all return the string in an array
  420. {
  421. a.SetIndexValue(0, s, updateLength: true);
  422. return a;
  423. }
  424. int lastIndex = 0;
  425. uint index = 0;
  426. while (match.Success && index < lim)
  427. {
  428. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  429. {
  430. match = match.NextMatch();
  431. continue;
  432. }
  433. // Add the match results to the array.
  434. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  435. if (index >= lim)
  436. {
  437. return a;
  438. }
  439. lastIndex = match.Index + match.Length;
  440. for (int i = 1; i < match.Groups.Count; i++)
  441. {
  442. var group = match.Groups[i];
  443. var item = Undefined;
  444. if (group.Captures.Count > 0)
  445. {
  446. item = match.Groups[i].Value;
  447. }
  448. a.SetIndexValue(index++, item, updateLength: true);
  449. if (index >= lim)
  450. {
  451. return a;
  452. }
  453. }
  454. match = match.NextMatch();
  455. if (!match.Success) // Add the last part of the split
  456. {
  457. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  458. }
  459. }
  460. return a;
  461. }
  462. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  463. }
  464. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  465. {
  466. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  467. ulong previousStringIndex = 0;
  468. ulong currentIndex = 0;
  469. while (currentIndex < (ulong) s.Length)
  470. {
  471. splitter.Set(RegExpInstance.PropertyLastIndex, currentIndex, true);
  472. var z = RegExpExec(splitter, s);
  473. if (z.IsNull())
  474. {
  475. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  476. continue;
  477. }
  478. var endIndex = TypeConverter.ToLength(splitter.Get(RegExpInstance.PropertyLastIndex));
  479. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  480. if (endIndex == previousStringIndex)
  481. {
  482. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  483. continue;
  484. }
  485. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  486. a.SetIndexValue(lengthA, t, updateLength: true);
  487. lengthA++;
  488. if (lengthA == lim)
  489. {
  490. return a;
  491. }
  492. previousStringIndex = endIndex;
  493. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  494. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  495. var i = 1;
  496. while (i <= numberOfCaptures)
  497. {
  498. var nextCapture = z.Get(i);
  499. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  500. i++;
  501. lengthA++;
  502. if (lengthA == lim)
  503. {
  504. return a;
  505. }
  506. }
  507. currentIndex = previousStringIndex;
  508. }
  509. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  510. return a;
  511. }
  512. private JsValue Flags(JsValue thisObj, JsValue[] arguments)
  513. {
  514. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.flags");
  515. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  516. {
  517. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  518. }
  519. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  520. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  521. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  522. result = AddFlagIfPresent(r, "multiline", 'm', result);
  523. result = AddFlagIfPresent(r, "dotAll", 's', result);
  524. result = AddFlagIfPresent(r, "unicode", 'u', result);
  525. result = AddFlagIfPresent(r, "unicodeSets", 'v', result);
  526. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  527. return result;
  528. }
  529. private JsValue ToRegExpString(JsValue thisObj, JsValue[] arguments)
  530. {
  531. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.toString");
  532. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  533. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  534. return "/" + pattern + "/" + flags;
  535. }
  536. private JsValue Test(JsValue thisObj, JsValue[] arguments)
  537. {
  538. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.test");
  539. var s = TypeConverter.ToString(arguments.At(0));
  540. // check couple fast paths
  541. if (r is RegExpInstance R && !R.FullUnicode)
  542. {
  543. if (!R.Sticky && !R.Global)
  544. {
  545. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  546. return R.Value.IsMatch(s);
  547. }
  548. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  549. if (lastIndex >= s.Length && s.Length > 0)
  550. {
  551. return JsBoolean.False;
  552. }
  553. var m = R.Value.Match(s, lastIndex);
  554. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  555. {
  556. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  557. return JsBoolean.False;
  558. }
  559. R.Set(RegExpInstance.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  560. return JsBoolean.True;
  561. }
  562. var match = RegExpExec(r, s);
  563. return !match.IsNull();
  564. }
  565. /// <summary>
  566. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  567. /// </summary>
  568. private JsValue Search(JsValue thisObj, JsValue[] arguments)
  569. {
  570. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.search");
  571. var s = TypeConverter.ToString(arguments.At(0));
  572. var previousLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  573. if (!SameValue(previousLastIndex, 0))
  574. {
  575. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  576. }
  577. var result = RegExpExec(rx, s);
  578. var currentLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  579. if (!SameValue(currentLastIndex, previousLastIndex))
  580. {
  581. rx.Set(RegExpInstance.PropertyLastIndex, previousLastIndex, true);
  582. }
  583. if (result.IsNull())
  584. {
  585. return -1;
  586. }
  587. return result.Get(PropertyIndex);
  588. }
  589. /// <summary>
  590. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  591. /// </summary>
  592. private JsValue Match(JsValue thisObj, JsValue[] arguments)
  593. {
  594. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.match");
  595. var s = TypeConverter.ToString(arguments.At(0));
  596. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  597. var global = flags.IndexOf('g') != -1;
  598. if (!global)
  599. {
  600. return RegExpExec(rx, s);
  601. }
  602. var fullUnicode = flags.IndexOf('u') != -1;
  603. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  604. if (!fullUnicode
  605. && rx is RegExpInstance rei
  606. && rei.TryGetDefaultRegExpExec(out _))
  607. {
  608. // fast path
  609. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  610. if (rei.Sticky)
  611. {
  612. var match = rei.Value.Match(s);
  613. if (!match.Success || match.Index != 0)
  614. {
  615. return Null;
  616. }
  617. a.SetIndexValue(0, match.Value, updateLength: false);
  618. uint li = 0;
  619. while (true)
  620. {
  621. match = match.NextMatch();
  622. if (!match.Success || match.Index != ++li)
  623. break;
  624. a.SetIndexValue(li, match.Value, updateLength: false);
  625. }
  626. a.SetLength(li);
  627. return a;
  628. }
  629. else
  630. {
  631. var matches = rei.Value.Matches(s);
  632. if (matches.Count == 0)
  633. {
  634. return Null;
  635. }
  636. a.EnsureCapacity((uint) matches.Count);
  637. a.SetLength((uint) matches.Count);
  638. for (var i = 0; i < matches.Count; i++)
  639. {
  640. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  641. }
  642. return a;
  643. }
  644. }
  645. return MatchSlow(rx, s, fullUnicode);
  646. }
  647. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  648. {
  649. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  650. uint n = 0;
  651. while (true)
  652. {
  653. var result = RegExpExec(rx, s);
  654. if (result.IsNull())
  655. {
  656. a.SetLength(n);
  657. return n == 0 ? Null : a;
  658. }
  659. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  660. a.SetIndexValue(n, matchStr, updateLength: false);
  661. if (matchStr == "")
  662. {
  663. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  664. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  665. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex, true);
  666. }
  667. n++;
  668. }
  669. }
  670. /// <summary>
  671. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  672. /// </summary>
  673. private JsValue MatchAll(JsValue thisObj, JsValue[] arguments)
  674. {
  675. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.matchAll");
  676. var s = TypeConverter.ToString(arguments.At(0));
  677. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  678. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  679. var matcher = Construct(c, new JsValue[]
  680. {
  681. r,
  682. flags
  683. });
  684. var lastIndex = TypeConverter.ToLength(r.Get(RegExpInstance.PropertyLastIndex));
  685. matcher.Set(RegExpInstance.PropertyLastIndex, lastIndex, true);
  686. var global = flags.IndexOf('g') != -1;
  687. var fullUnicode = flags.IndexOf('u') != -1;
  688. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  689. }
  690. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  691. {
  692. if (!unicode || index + 1 >= (ulong) s.Length)
  693. {
  694. return index + 1;
  695. }
  696. var first = s[(int) index];
  697. if (first < 0xD800 || first > 0xDBFF)
  698. {
  699. return index + 1;
  700. }
  701. var second = s[(int) (index + 1)];
  702. if (second < 0xDC00 || second > 0xDFFF)
  703. {
  704. return index + 1;
  705. }
  706. return index + 2;
  707. }
  708. internal static JsValue RegExpExec(ObjectInstance r, string s)
  709. {
  710. var exec = r.Get(PropertyExec);
  711. if (exec is ICallable callable)
  712. {
  713. var result = callable.Call(r, new JsValue[] { s });
  714. if (!result.IsNull() && !result.IsObject())
  715. {
  716. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  717. }
  718. return result;
  719. }
  720. var ri = r as RegExpInstance;
  721. if (ri is null)
  722. {
  723. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  724. }
  725. return RegExpBuiltinExec(ri, s);
  726. }
  727. internal bool TryGetDefaultExec(ObjectInstance o, [NotNullWhen((true))] out Func<JsValue, JsValue[], JsValue>? exec)
  728. {
  729. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  730. {
  731. exec = _defaultExec;
  732. return true;
  733. }
  734. exec = default;
  735. return false;
  736. }
  737. /// <summary>
  738. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  739. /// </summary>
  740. private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
  741. {
  742. var length = (ulong) s.Length;
  743. var lastIndex = TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  744. var global = R.Global;
  745. var sticky = R.Sticky;
  746. if (!global && !sticky)
  747. {
  748. lastIndex = 0;
  749. }
  750. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters) // Reg Exp is really ""
  751. {
  752. if (lastIndex > (ulong) s.Length)
  753. {
  754. return Null;
  755. }
  756. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  757. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  758. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  759. array.FastSetDataProperty(PropertyInput._value, s);
  760. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  761. return array;
  762. }
  763. var matcher = R.Value;
  764. var fullUnicode = R.FullUnicode;
  765. var hasIndices = R.Indices;
  766. if (!global & !sticky && !fullUnicode && !hasIndices)
  767. {
  768. // we can the non-stateful fast path which is the common case
  769. var m = matcher.Match(s, (int) lastIndex);
  770. if (!m.Success)
  771. {
  772. return Null;
  773. }
  774. return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
  775. }
  776. // the stateful version
  777. Match match;
  778. while (true)
  779. {
  780. if (lastIndex > length)
  781. {
  782. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  783. return Null;
  784. }
  785. match = R.Value.Match(s, (int) lastIndex);
  786. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  787. if (!success)
  788. {
  789. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  790. return Null;
  791. }
  792. break;
  793. }
  794. var e = match.Index + match.Length;
  795. if (fullUnicode)
  796. {
  797. e = GetStringIndex(s, e);
  798. }
  799. if (global || sticky)
  800. {
  801. R.Set(RegExpInstance.PropertyLastIndex, e, true);
  802. }
  803. return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
  804. }
  805. /// <summary>
  806. /// https://tc39.es/ecma262/#sec-getstringindex
  807. /// </summary>
  808. private static int GetStringIndex(string s, int codePointIndex)
  809. {
  810. if (s.Length == 0)
  811. {
  812. return 0;
  813. }
  814. var len = s.Length;
  815. var codeUnitCount = 0;
  816. var codePointCount = 0;
  817. while (codeUnitCount < len)
  818. {
  819. if (codePointCount == codePointIndex)
  820. {
  821. return codeUnitCount;
  822. }
  823. var isSurrogatePair = char.IsSurrogatePair(s, codeUnitCount);
  824. codeUnitCount += isSurrogatePair ? 2 : 1;
  825. codePointCount += 1;
  826. }
  827. return len;
  828. }
  829. private static ArrayInstance CreateReturnValueArray(
  830. Engine engine,
  831. Regex regex,
  832. Match match,
  833. string s,
  834. bool fullUnicode,
  835. bool hasIndices)
  836. {
  837. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
  838. array.CreateDataProperty(PropertyIndex, match.Index);
  839. array.CreateDataProperty(PropertyInput, s);
  840. ObjectInstance? groups = null;
  841. List<string>? groupNames = null;
  842. var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
  843. for (uint i = 0; i < match.Groups.Count; i++)
  844. {
  845. var capture = match.Groups[(int) i];
  846. var capturedValue = Undefined;
  847. if (capture?.Success == true)
  848. {
  849. capturedValue = capture.Value;
  850. }
  851. if (hasIndices)
  852. {
  853. if (capture?.Success == true)
  854. {
  855. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  856. }
  857. else
  858. {
  859. indices!.Add(null);
  860. }
  861. }
  862. var groupName = GetRegexGroupName(regex, (int) i);
  863. if (!string.IsNullOrWhiteSpace(groupName))
  864. {
  865. groups ??= OrdinaryObjectCreate(engine, null);
  866. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  867. groupNames ??= new List<string>();
  868. groupNames.Add(groupName!);
  869. }
  870. array.SetIndexValue(i, capturedValue, updateLength: false);
  871. }
  872. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  873. if (hasIndices)
  874. {
  875. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  876. array.CreateDataPropertyOrThrow("indices", indicesArray);
  877. }
  878. return array;
  879. }
  880. /// <summary>
  881. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  882. /// </summary>
  883. private static ArrayInstance MakeMatchIndicesIndexPairArray(
  884. Engine engine,
  885. string s,
  886. List<JsNumber[]?> indices,
  887. List<string>? groupNames,
  888. bool hasGroups)
  889. {
  890. var n = indices.Count;
  891. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  892. ObjectInstance? groups = null;
  893. if (hasGroups)
  894. {
  895. groups = OrdinaryObjectCreate(engine, null);
  896. }
  897. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  898. for (var i = 0; i < n; ++i)
  899. {
  900. var matchIndices = indices[i];
  901. var matchIndexPair = matchIndices is not null
  902. ? GetMatchIndexPair(engine, s, matchIndices)
  903. : Undefined;
  904. a.Push(matchIndexPair);
  905. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  906. {
  907. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  908. }
  909. }
  910. return a;
  911. }
  912. /// <summary>
  913. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  914. /// </summary>
  915. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  916. {
  917. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  918. }
  919. private static string? GetRegexGroupName(Regex regex, int index)
  920. {
  921. if (index == 0)
  922. {
  923. return null;
  924. }
  925. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  926. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  927. {
  928. // regex defaults to index as group name when it's not a named group
  929. return null;
  930. }
  931. return groupNameFromNumber;
  932. }
  933. private JsValue Exec(JsValue thisObj, JsValue[] arguments)
  934. {
  935. var r = thisObj as RegExpInstance;
  936. if (r is null)
  937. {
  938. ExceptionHelper.ThrowTypeError(_engine.Realm);
  939. }
  940. var s = TypeConverter.ToString(arguments.At(0));
  941. return RegExpBuiltinExec(r, s);
  942. }
  943. }
  944. }