2
0

RegExpPrototype.cs 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098
  1. using System.Diagnostics.CodeAnalysis;
  2. using System.Text.RegularExpressions;
  3. using Jint.Collections;
  4. using Jint.Native.Number;
  5. using Jint.Native.Object;
  6. using Jint.Native.String;
  7. using Jint.Native.Symbol;
  8. using Jint.Pooling;
  9. using Jint.Runtime;
  10. using Jint.Runtime.Descriptors;
  11. using Jint.Runtime.Interop;
  12. namespace Jint.Native.RegExp
  13. {
  14. internal sealed class RegExpPrototype : Prototype
  15. {
  16. private static readonly JsString PropertyExec = new("exec");
  17. private static readonly JsString PropertyIndex = new("index");
  18. private static readonly JsString PropertyInput = new("input");
  19. private static readonly JsString PropertySticky = new("sticky");
  20. private static readonly JsString PropertyGlobal = new("global");
  21. internal static readonly JsString PropertySource = new("source");
  22. private static readonly JsString DefaultSource = new("(?:)");
  23. internal static readonly JsString PropertyFlags = new("flags");
  24. private static readonly JsString PropertyGroups = new("groups");
  25. private readonly RegExpConstructor _constructor;
  26. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  27. internal RegExpPrototype(
  28. Engine engine,
  29. Realm realm,
  30. RegExpConstructor constructor,
  31. ObjectPrototype objectPrototype) : base(engine, realm)
  32. {
  33. _defaultExec = Exec;
  34. _constructor = constructor;
  35. _prototype = objectPrototype;
  36. }
  37. protected override void Initialize()
  38. {
  39. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  40. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpInstance, JsValue> valueExtractor, JsValue? protoValue = null)
  41. {
  42. return new GetSetPropertyDescriptor(
  43. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  44. {
  45. if (ReferenceEquals(thisObj, this))
  46. {
  47. return protoValue ?? Undefined;
  48. }
  49. var r = thisObj as RegExpInstance;
  50. if (r is null)
  51. {
  52. ExceptionHelper.ThrowTypeError(_realm);
  53. }
  54. return valueExtractor(r);
  55. }, 0, lengthFlags),
  56. set: Undefined,
  57. flags: PropertyFlag.Configurable);
  58. }
  59. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  60. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  61. {
  62. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  63. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  64. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  65. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  66. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  67. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  68. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  69. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  70. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  71. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  72. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  73. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  74. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  75. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  76. };
  77. SetProperties(properties);
  78. var symbols = new SymbolDictionary(5)
  79. {
  80. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  81. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  85. };
  86. SetSymbols(symbols);
  87. }
  88. /// <summary>
  89. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  90. /// </summary>
  91. private JsValue Source(JsValue thisObj, JsValue[] arguments)
  92. {
  93. if (ReferenceEquals(thisObj, this))
  94. {
  95. return DefaultSource;
  96. }
  97. var r = thisObj as RegExpInstance;
  98. if (r is null)
  99. {
  100. ExceptionHelper.ThrowTypeError(_realm);
  101. }
  102. if (string.IsNullOrEmpty(r.Source))
  103. {
  104. return RegExpInstance.regExpForMatchingAllCharacters;
  105. }
  106. return r.Source
  107. .Replace("\\/", "/") // ensure forward-slashes
  108. .Replace("/", "\\/") // then escape again
  109. .Replace("\n", "\\n");
  110. }
  111. /// <summary>
  112. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  113. /// </summary>
  114. private JsValue Replace(JsValue thisObj, JsValue[] arguments)
  115. {
  116. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.replace");
  117. var s = TypeConverter.ToString(arguments.At(0));
  118. var lengthS = s.Length;
  119. var replaceValue = arguments.At(1);
  120. var functionalReplace = replaceValue is ICallable;
  121. // we need heavier logic if we have named captures
  122. var mayHaveNamedCaptures = false;
  123. if (!functionalReplace)
  124. {
  125. var value = TypeConverter.ToString(replaceValue);
  126. replaceValue = value;
  127. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  128. }
  129. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  130. var global = flags.IndexOf('g') != -1;
  131. var fullUnicode = false;
  132. if (global)
  133. {
  134. fullUnicode = flags.IndexOf('u') != -1;
  135. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  136. }
  137. // check if we can access fast path
  138. if (!fullUnicode
  139. && !mayHaveNamedCaptures
  140. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  141. && rx is RegExpInstance rei && rei.TryGetDefaultRegExpExec(out _))
  142. {
  143. var count = global ? int.MaxValue : 1;
  144. string result;
  145. if (functionalReplace)
  146. {
  147. string Evaluator(Match match)
  148. {
  149. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  150. replacerArgs.Add(match.Value);
  151. ObjectInstance? groups = null;
  152. for (var i = 1; i < match.Groups.Count; i++)
  153. {
  154. var capture = match.Groups[i];
  155. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  156. var groupName = GetRegexGroupName(rei.Value, i);
  157. if (!string.IsNullOrWhiteSpace(groupName))
  158. {
  159. groups ??= OrdinaryObjectCreate(_engine, null);
  160. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  161. }
  162. }
  163. replacerArgs.Add(match.Index);
  164. replacerArgs.Add(s);
  165. if (groups is not null)
  166. {
  167. replacerArgs.Add(groups);
  168. }
  169. return CallFunctionalReplace(replaceValue, replacerArgs);
  170. }
  171. result = rei.Value.Replace(s, Evaluator, count);
  172. }
  173. else
  174. {
  175. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  176. }
  177. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero);
  178. return result;
  179. }
  180. var results = new List<ObjectInstance>();
  181. while (true)
  182. {
  183. var result = RegExpExec(rx, s);
  184. if (result.IsNull())
  185. {
  186. break;
  187. }
  188. results.Add((ObjectInstance) result);
  189. if (!global)
  190. {
  191. break;
  192. }
  193. var matchStr = TypeConverter.ToString(result.Get(0));
  194. if (matchStr == "")
  195. {
  196. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  197. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  198. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex);
  199. }
  200. }
  201. var accumulatedResult = "";
  202. var nextSourcePosition = 0;
  203. var captures = new List<string>();
  204. for (var i = 0; i < results.Count; i++)
  205. {
  206. var result = results[i];
  207. var nCaptures = (int) result.Length;
  208. nCaptures = System.Math.Max(nCaptures - 1, 0);
  209. var matched = TypeConverter.ToString(result.Get(0));
  210. var matchLength = matched.Length;
  211. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  212. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  213. uint n = 1;
  214. captures.Clear();
  215. while (n <= nCaptures)
  216. {
  217. var capN = result.Get(n);
  218. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  219. captures.Add(value);
  220. n++;
  221. }
  222. var namedCaptures = result.Get(PropertyGroups);
  223. string replacement;
  224. if (functionalReplace)
  225. {
  226. var replacerArgs = new List<JsValue>();
  227. replacerArgs.Add(matched);
  228. foreach (var capture in captures)
  229. {
  230. replacerArgs.Add(capture);
  231. }
  232. replacerArgs.Add(position);
  233. replacerArgs.Add(s);
  234. if (!namedCaptures.IsUndefined())
  235. {
  236. replacerArgs.Add(namedCaptures);
  237. }
  238. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  239. }
  240. else
  241. {
  242. if (!namedCaptures.IsUndefined())
  243. {
  244. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  245. }
  246. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  247. }
  248. if (position >= nextSourcePosition)
  249. {
  250. accumulatedResult = accumulatedResult +
  251. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  252. replacement;
  253. nextSourcePosition = position + matchLength;
  254. }
  255. }
  256. if (nextSourcePosition >= lengthS)
  257. {
  258. return accumulatedResult;
  259. }
  260. return accumulatedResult + s.Substring(nextSourcePosition);
  261. }
  262. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  263. {
  264. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  265. return TypeConverter.ToString(result);
  266. }
  267. /// <summary>
  268. /// https://tc39.es/ecma262/#sec-getsubstitution
  269. /// </summary>
  270. internal static string GetSubstitution(
  271. string matched,
  272. string str,
  273. int position,
  274. string[] captures,
  275. JsValue namedCaptures,
  276. string replacement)
  277. {
  278. // If there is no pattern, replace the pattern as is.
  279. if (replacement.IndexOf('$') < 0)
  280. {
  281. return replacement;
  282. }
  283. // Patterns
  284. // $$ Inserts a "$".
  285. // $& Inserts the matched substring.
  286. // $` Inserts the portion of the string that precedes the matched substring.
  287. // $' Inserts the portion of the string that follows the matched substring.
  288. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  289. using var replacementBuilder = StringBuilderPool.Rent();
  290. var sb = replacementBuilder.Builder;
  291. for (var i = 0; i < replacement.Length; i++)
  292. {
  293. char c = replacement[i];
  294. if (c == '$' && i < replacement.Length - 1)
  295. {
  296. c = replacement[++i];
  297. switch (c)
  298. {
  299. case '$':
  300. sb.Append('$');
  301. break;
  302. case '&':
  303. sb.Append(matched);
  304. break;
  305. case '`':
  306. sb.Append(str.Substring(0, position));
  307. break;
  308. case '\'':
  309. sb.Append(str.Substring(position + matched.Length));
  310. break;
  311. case '<':
  312. var gtPos = replacement.IndexOf('>', i + 1);
  313. if (gtPos == -1 || namedCaptures.IsUndefined())
  314. {
  315. sb.Append('$');
  316. sb.Append(c);
  317. }
  318. else
  319. {
  320. var startIndex = i + 1;
  321. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  322. var capture = namedCaptures.Get(groupName);
  323. if (!capture.IsUndefined())
  324. {
  325. sb.Append(TypeConverter.ToString(capture));
  326. }
  327. i = gtPos;
  328. }
  329. break;
  330. default:
  331. {
  332. if (char.IsDigit(c))
  333. {
  334. int matchNumber1 = c - '0';
  335. // The match number can be one or two digits long.
  336. int matchNumber2 = 0;
  337. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  338. {
  339. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  340. }
  341. // Try the two digit capture first.
  342. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  343. {
  344. // Two digit capture replacement.
  345. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  346. i++;
  347. }
  348. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  349. {
  350. // Single digit capture replacement.
  351. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  352. }
  353. else
  354. {
  355. // Capture does not exist.
  356. sb.Append('$');
  357. i--;
  358. }
  359. }
  360. else
  361. {
  362. // Unknown replacement pattern.
  363. sb.Append('$');
  364. sb.Append(c);
  365. }
  366. break;
  367. }
  368. }
  369. }
  370. else
  371. {
  372. sb.Append(c);
  373. }
  374. }
  375. return replacementBuilder.ToString();
  376. }
  377. /// <summary>
  378. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  379. /// </summary>
  380. private JsValue Split(JsValue thisObj, JsValue[] arguments)
  381. {
  382. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.split");
  383. var s = TypeConverter.ToString(arguments.At(0));
  384. var limit = arguments.At(1);
  385. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  386. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  387. var unicodeMatching = flags.IndexOf('u') > -1;
  388. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  389. var splitter = Construct(c, new JsValue[]
  390. {
  391. rx,
  392. newFlags
  393. });
  394. uint lengthA = 0;
  395. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  396. if (lim == 0)
  397. {
  398. return _realm.Intrinsics.Array.ArrayCreate(0);
  399. }
  400. if (s.Length == 0)
  401. {
  402. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  403. var z = RegExpExec(splitter, s);
  404. if (!z.IsNull())
  405. {
  406. return a;
  407. }
  408. a.SetIndexValue(0, s, updateLength: true);
  409. return a;
  410. }
  411. if (!unicodeMatching && rx is RegExpInstance R && R.TryGetDefaultRegExpExec(out _))
  412. {
  413. // we can take faster path
  414. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters)
  415. {
  416. // if empty string, just a string split
  417. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  418. }
  419. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  420. var match = R.Value.Match(s, 0);
  421. if (!match.Success) // No match at all return the string in an array
  422. {
  423. a.SetIndexValue(0, s, updateLength: true);
  424. return a;
  425. }
  426. int lastIndex = 0;
  427. uint index = 0;
  428. while (match.Success && index < lim)
  429. {
  430. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  431. {
  432. match = match.NextMatch();
  433. continue;
  434. }
  435. // Add the match results to the array.
  436. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  437. if (index >= lim)
  438. {
  439. return a;
  440. }
  441. lastIndex = match.Index + match.Length;
  442. for (int i = 1; i < match.Groups.Count; i++)
  443. {
  444. var group = match.Groups[i];
  445. var item = Undefined;
  446. if (group.Captures.Count > 0)
  447. {
  448. item = match.Groups[i].Value;
  449. }
  450. a.SetIndexValue(index++, item, updateLength: true);
  451. if (index >= lim)
  452. {
  453. return a;
  454. }
  455. }
  456. match = match.NextMatch();
  457. if (!match.Success) // Add the last part of the split
  458. {
  459. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  460. }
  461. }
  462. return a;
  463. }
  464. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  465. }
  466. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  467. {
  468. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  469. ulong previousStringIndex = 0;
  470. ulong currentIndex = 0;
  471. while (currentIndex < (ulong) s.Length)
  472. {
  473. splitter.Set(RegExpInstance.PropertyLastIndex, currentIndex, true);
  474. var z = RegExpExec(splitter, s);
  475. if (z.IsNull())
  476. {
  477. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  478. continue;
  479. }
  480. var endIndex = TypeConverter.ToLength(splitter.Get(RegExpInstance.PropertyLastIndex));
  481. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  482. if (endIndex == previousStringIndex)
  483. {
  484. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  485. continue;
  486. }
  487. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  488. a.SetIndexValue(lengthA, t, updateLength: true);
  489. lengthA++;
  490. if (lengthA == lim)
  491. {
  492. return a;
  493. }
  494. previousStringIndex = endIndex;
  495. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  496. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  497. var i = 1;
  498. while (i <= numberOfCaptures)
  499. {
  500. var nextCapture = z.Get(i);
  501. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  502. i++;
  503. lengthA++;
  504. if (lengthA == lim)
  505. {
  506. return a;
  507. }
  508. }
  509. currentIndex = previousStringIndex;
  510. }
  511. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  512. return a;
  513. }
  514. private JsValue Flags(JsValue thisObj, JsValue[] arguments)
  515. {
  516. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.flags");
  517. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  518. {
  519. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  520. }
  521. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  522. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  523. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  524. result = AddFlagIfPresent(r, "multiline", 'm', result);
  525. result = AddFlagIfPresent(r, "dotAll", 's', result);
  526. result = AddFlagIfPresent(r, "unicode", 'u', result);
  527. result = AddFlagIfPresent(r, "unicodeSets", 'v', result);
  528. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  529. return result;
  530. }
  531. private JsValue ToRegExpString(JsValue thisObj, JsValue[] arguments)
  532. {
  533. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.toString");
  534. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  535. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  536. return "/" + pattern + "/" + flags;
  537. }
  538. private JsValue Test(JsValue thisObj, JsValue[] arguments)
  539. {
  540. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.test");
  541. var s = TypeConverter.ToString(arguments.At(0));
  542. // check couple fast paths
  543. if (r is RegExpInstance R && !R.FullUnicode)
  544. {
  545. if (!R.Sticky && !R.Global)
  546. {
  547. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  548. return R.Value.IsMatch(s);
  549. }
  550. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  551. if (lastIndex >= s.Length && s.Length > 0)
  552. {
  553. return JsBoolean.False;
  554. }
  555. var m = R.Value.Match(s, lastIndex);
  556. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  557. {
  558. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  559. return JsBoolean.False;
  560. }
  561. R.Set(RegExpInstance.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  562. return JsBoolean.True;
  563. }
  564. var match = RegExpExec(r, s);
  565. return !match.IsNull();
  566. }
  567. /// <summary>
  568. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  569. /// </summary>
  570. private JsValue Search(JsValue thisObj, JsValue[] arguments)
  571. {
  572. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.search");
  573. var s = TypeConverter.ToString(arguments.At(0));
  574. var previousLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  575. if (!SameValue(previousLastIndex, 0))
  576. {
  577. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  578. }
  579. var result = RegExpExec(rx, s);
  580. var currentLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  581. if (!SameValue(currentLastIndex, previousLastIndex))
  582. {
  583. rx.Set(RegExpInstance.PropertyLastIndex, previousLastIndex, true);
  584. }
  585. if (result.IsNull())
  586. {
  587. return -1;
  588. }
  589. return result.Get(PropertyIndex);
  590. }
  591. /// <summary>
  592. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  593. /// </summary>
  594. private JsValue Match(JsValue thisObj, JsValue[] arguments)
  595. {
  596. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.match");
  597. var s = TypeConverter.ToString(arguments.At(0));
  598. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  599. var global = flags.IndexOf('g') != -1;
  600. if (!global)
  601. {
  602. return RegExpExec(rx, s);
  603. }
  604. var fullUnicode = flags.IndexOf('u') != -1;
  605. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  606. if (!fullUnicode
  607. && rx is RegExpInstance rei
  608. && rei.TryGetDefaultRegExpExec(out _))
  609. {
  610. // fast path
  611. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  612. if (rei.Sticky)
  613. {
  614. var match = rei.Value.Match(s);
  615. if (!match.Success || match.Index != 0)
  616. {
  617. return Null;
  618. }
  619. a.SetIndexValue(0, match.Value, updateLength: false);
  620. uint li = 0;
  621. while (true)
  622. {
  623. match = match.NextMatch();
  624. if (!match.Success || match.Index != ++li)
  625. break;
  626. a.SetIndexValue(li, match.Value, updateLength: false);
  627. }
  628. a.SetLength(li);
  629. return a;
  630. }
  631. else
  632. {
  633. var matches = rei.Value.Matches(s);
  634. if (matches.Count == 0)
  635. {
  636. return Null;
  637. }
  638. a.EnsureCapacity((uint) matches.Count);
  639. a.SetLength((uint) matches.Count);
  640. for (var i = 0; i < matches.Count; i++)
  641. {
  642. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  643. }
  644. return a;
  645. }
  646. }
  647. return MatchSlow(rx, s, fullUnicode);
  648. }
  649. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  650. {
  651. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  652. uint n = 0;
  653. while (true)
  654. {
  655. var result = RegExpExec(rx, s);
  656. if (result.IsNull())
  657. {
  658. a.SetLength(n);
  659. return n == 0 ? Null : a;
  660. }
  661. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  662. a.SetIndexValue(n, matchStr, updateLength: false);
  663. if (matchStr == "")
  664. {
  665. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  666. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  667. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex, true);
  668. }
  669. n++;
  670. }
  671. }
  672. /// <summary>
  673. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  674. /// </summary>
  675. private JsValue MatchAll(JsValue thisObj, JsValue[] arguments)
  676. {
  677. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.matchAll");
  678. var s = TypeConverter.ToString(arguments.At(0));
  679. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  680. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  681. var matcher = Construct(c, new JsValue[]
  682. {
  683. r,
  684. flags
  685. });
  686. var lastIndex = TypeConverter.ToLength(r.Get(RegExpInstance.PropertyLastIndex));
  687. matcher.Set(RegExpInstance.PropertyLastIndex, lastIndex, true);
  688. var global = flags.IndexOf('g') != -1;
  689. var fullUnicode = flags.IndexOf('u') != -1;
  690. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  691. }
  692. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  693. {
  694. if (!unicode || index + 1 >= (ulong) s.Length)
  695. {
  696. return index + 1;
  697. }
  698. var first = s[(int) index];
  699. if (first < 0xD800 || first > 0xDBFF)
  700. {
  701. return index + 1;
  702. }
  703. var second = s[(int) (index + 1)];
  704. if (second < 0xDC00 || second > 0xDFFF)
  705. {
  706. return index + 1;
  707. }
  708. return index + 2;
  709. }
  710. internal static JsValue RegExpExec(ObjectInstance r, string s)
  711. {
  712. var exec = r.Get(PropertyExec);
  713. if (exec is ICallable callable)
  714. {
  715. var result = callable.Call(r, new JsValue[] { s });
  716. if (!result.IsNull() && !result.IsObject())
  717. {
  718. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  719. }
  720. return result;
  721. }
  722. var ri = r as RegExpInstance;
  723. if (ri is null)
  724. {
  725. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  726. }
  727. return RegExpBuiltinExec(ri, s);
  728. }
  729. internal bool TryGetDefaultExec(ObjectInstance o, [NotNullWhen((true))] out Func<JsValue, JsValue[], JsValue>? exec)
  730. {
  731. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  732. {
  733. exec = _defaultExec;
  734. return true;
  735. }
  736. exec = default;
  737. return false;
  738. }
  739. /// <summary>
  740. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  741. /// </summary>
  742. private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
  743. {
  744. var length = (ulong) s.Length;
  745. var lastIndex = TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  746. var global = R.Global;
  747. var sticky = R.Sticky;
  748. if (!global && !sticky)
  749. {
  750. lastIndex = 0;
  751. }
  752. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters) // Reg Exp is really ""
  753. {
  754. if (lastIndex > (ulong) s.Length)
  755. {
  756. return Null;
  757. }
  758. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  759. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  760. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  761. array.FastSetDataProperty(PropertyInput._value, s);
  762. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  763. return array;
  764. }
  765. var matcher = R.Value;
  766. var fullUnicode = R.FullUnicode;
  767. var hasIndices = R.Indices;
  768. if (!global & !sticky && !fullUnicode && !hasIndices)
  769. {
  770. // we can the non-stateful fast path which is the common case
  771. var m = matcher.Match(s, (int) lastIndex);
  772. if (!m.Success)
  773. {
  774. return Null;
  775. }
  776. return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
  777. }
  778. // the stateful version
  779. Match match;
  780. while (true)
  781. {
  782. if (lastIndex > length)
  783. {
  784. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  785. return Null;
  786. }
  787. match = R.Value.Match(s, (int) lastIndex);
  788. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  789. if (!success)
  790. {
  791. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  792. return Null;
  793. }
  794. break;
  795. }
  796. var e = match.Index + match.Length;
  797. if (fullUnicode)
  798. {
  799. e = GetStringIndex(s, e);
  800. }
  801. if (global || sticky)
  802. {
  803. R.Set(RegExpInstance.PropertyLastIndex, e, true);
  804. }
  805. return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
  806. }
  807. /// <summary>
  808. /// https://tc39.es/ecma262/#sec-getstringindex
  809. /// </summary>
  810. private static int GetStringIndex(string s, int codePointIndex)
  811. {
  812. if (s.Length == 0)
  813. {
  814. return 0;
  815. }
  816. var len = s.Length;
  817. var codeUnitCount = 0;
  818. var codePointCount = 0;
  819. while (codeUnitCount < len)
  820. {
  821. if (codePointCount == codePointIndex)
  822. {
  823. return codeUnitCount;
  824. }
  825. var isSurrogatePair = char.IsSurrogatePair(s, codeUnitCount);
  826. codeUnitCount += isSurrogatePair ? 2 : 1;
  827. codePointCount += 1;
  828. }
  829. return len;
  830. }
  831. private static JsArray CreateReturnValueArray(
  832. Engine engine,
  833. Regex regex,
  834. Match match,
  835. string s,
  836. bool fullUnicode,
  837. bool hasIndices)
  838. {
  839. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
  840. array.CreateDataProperty(PropertyIndex, match.Index);
  841. array.CreateDataProperty(PropertyInput, s);
  842. ObjectInstance? groups = null;
  843. List<string>? groupNames = null;
  844. var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
  845. for (uint i = 0; i < match.Groups.Count; i++)
  846. {
  847. var capture = match.Groups[(int) i];
  848. var capturedValue = Undefined;
  849. if (capture?.Success == true)
  850. {
  851. capturedValue = capture.Value;
  852. }
  853. if (hasIndices)
  854. {
  855. if (capture?.Success == true)
  856. {
  857. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  858. }
  859. else
  860. {
  861. indices!.Add(null);
  862. }
  863. }
  864. var groupName = GetRegexGroupName(regex, (int) i);
  865. if (!string.IsNullOrWhiteSpace(groupName))
  866. {
  867. groups ??= OrdinaryObjectCreate(engine, null);
  868. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  869. groupNames ??= new List<string>();
  870. groupNames.Add(groupName!);
  871. }
  872. array.SetIndexValue(i, capturedValue, updateLength: false);
  873. }
  874. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  875. if (hasIndices)
  876. {
  877. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  878. array.CreateDataPropertyOrThrow("indices", indicesArray);
  879. }
  880. return array;
  881. }
  882. /// <summary>
  883. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  884. /// </summary>
  885. private static JsArray MakeMatchIndicesIndexPairArray(
  886. Engine engine,
  887. string s,
  888. List<JsNumber[]?> indices,
  889. List<string>? groupNames,
  890. bool hasGroups)
  891. {
  892. var n = indices.Count;
  893. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  894. ObjectInstance? groups = null;
  895. if (hasGroups)
  896. {
  897. groups = OrdinaryObjectCreate(engine, null);
  898. }
  899. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  900. for (var i = 0; i < n; ++i)
  901. {
  902. var matchIndices = indices[i];
  903. var matchIndexPair = matchIndices is not null
  904. ? GetMatchIndexPair(engine, s, matchIndices)
  905. : Undefined;
  906. a.Push(matchIndexPair);
  907. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  908. {
  909. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  910. }
  911. }
  912. return a;
  913. }
  914. /// <summary>
  915. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  916. /// </summary>
  917. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  918. {
  919. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  920. }
  921. private static string? GetRegexGroupName(Regex regex, int index)
  922. {
  923. if (index == 0)
  924. {
  925. return null;
  926. }
  927. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  928. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  929. {
  930. // regex defaults to index as group name when it's not a named group
  931. return null;
  932. }
  933. return groupNameFromNumber;
  934. }
  935. private JsValue Exec(JsValue thisObj, JsValue[] arguments)
  936. {
  937. var r = thisObj as RegExpInstance;
  938. if (r is null)
  939. {
  940. ExceptionHelper.ThrowTypeError(_engine.Realm);
  941. }
  942. var s = TypeConverter.ToString(arguments.At(0));
  943. return RegExpBuiltinExec(r, s);
  944. }
  945. }
  946. }