2
0

RegExpPrototype.cs 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. using System.Diagnostics.CodeAnalysis;
  2. using System.Text.RegularExpressions;
  3. using Jint.Collections;
  4. using Jint.Native.Number;
  5. using Jint.Native.Object;
  6. using Jint.Native.String;
  7. using Jint.Native.Symbol;
  8. using Jint.Pooling;
  9. using Jint.Runtime;
  10. using Jint.Runtime.Descriptors;
  11. using Jint.Runtime.Interop;
  12. namespace Jint.Native.RegExp
  13. {
  14. internal sealed class RegExpPrototype : Prototype
  15. {
  16. private static readonly JsString PropertyExec = new("exec");
  17. private static readonly JsString PropertyIndex = new("index");
  18. private static readonly JsString PropertyInput = new("input");
  19. private static readonly JsString PropertySticky = new("sticky");
  20. private static readonly JsString PropertyGlobal = new("global");
  21. internal static readonly JsString PropertySource = new("source");
  22. private static readonly JsString DefaultSource = new("(?:)");
  23. internal static readonly JsString PropertyFlags = new("flags");
  24. private static readonly JsString PropertyGroups = new("groups");
  25. private readonly RegExpConstructor _constructor;
  26. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  27. internal RegExpPrototype(
  28. Engine engine,
  29. Realm realm,
  30. RegExpConstructor constructor,
  31. ObjectPrototype objectPrototype) : base(engine, realm)
  32. {
  33. _defaultExec = Exec;
  34. _constructor = constructor;
  35. _prototype = objectPrototype;
  36. }
  37. protected override void Initialize()
  38. {
  39. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  40. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<JsRegExp, JsValue> valueExtractor, JsValue? protoValue = null)
  41. {
  42. return new GetSetPropertyDescriptor(
  43. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  44. {
  45. if (ReferenceEquals(thisObj, this))
  46. {
  47. return protoValue ?? Undefined;
  48. }
  49. var r = thisObj as JsRegExp;
  50. if (r is null)
  51. {
  52. ExceptionHelper.ThrowTypeError(_realm);
  53. }
  54. return valueExtractor(r);
  55. }, 0, lengthFlags),
  56. set: Undefined,
  57. flags: PropertyFlag.Configurable);
  58. }
  59. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  60. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  61. {
  62. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  63. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  64. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  65. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  66. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  67. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  68. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  69. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  70. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  71. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  72. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  73. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  74. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  75. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  76. };
  77. SetProperties(properties);
  78. var symbols = new SymbolDictionary(5)
  79. {
  80. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  81. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  85. };
  86. SetSymbols(symbols);
  87. }
  88. /// <summary>
  89. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  90. /// </summary>
  91. private JsValue Source(JsValue thisObject, JsValue[] arguments)
  92. {
  93. if (ReferenceEquals(thisObject, this))
  94. {
  95. return DefaultSource;
  96. }
  97. var r = thisObject as JsRegExp;
  98. if (r is null)
  99. {
  100. ExceptionHelper.ThrowTypeError(_realm);
  101. }
  102. if (string.IsNullOrEmpty(r.Source))
  103. {
  104. return JsRegExp.regExpForMatchingAllCharacters;
  105. }
  106. return r.Source
  107. .Replace("\\/", "/") // ensure forward-slashes
  108. .Replace("/", "\\/") // then escape again
  109. .Replace("\n", "\\n");
  110. }
  111. /// <summary>
  112. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  113. /// </summary>
  114. private JsValue Replace(JsValue thisObject, JsValue[] arguments)
  115. {
  116. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.replace");
  117. var s = TypeConverter.ToString(arguments.At(0));
  118. var lengthS = s.Length;
  119. var replaceValue = arguments.At(1);
  120. var functionalReplace = replaceValue is ICallable;
  121. // we need heavier logic if we have named captures
  122. var mayHaveNamedCaptures = false;
  123. if (!functionalReplace)
  124. {
  125. var value = TypeConverter.ToString(replaceValue);
  126. replaceValue = value;
  127. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  128. }
  129. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  130. var global = flags.IndexOf('g') != -1;
  131. var fullUnicode = false;
  132. if (global)
  133. {
  134. fullUnicode = flags.IndexOf('u') != -1;
  135. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  136. }
  137. // check if we can access fast path
  138. if (!fullUnicode
  139. && !mayHaveNamedCaptures
  140. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  141. && rx is JsRegExp rei && rei.TryGetDefaultRegExpExec(out _))
  142. {
  143. var count = global ? int.MaxValue : 1;
  144. string result;
  145. if (functionalReplace)
  146. {
  147. string Evaluator(Match match)
  148. {
  149. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  150. var replacerArgs = new List<JsValue>(actualGroupCount + 2);
  151. replacerArgs.Add(match.Value);
  152. ObjectInstance? groups = null;
  153. for (var i = 1; i < actualGroupCount; i++)
  154. {
  155. var capture = match.Groups[i];
  156. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  157. var groupName = GetRegexGroupName(rei, i);
  158. if (!string.IsNullOrWhiteSpace(groupName))
  159. {
  160. groups ??= OrdinaryObjectCreate(_engine, null);
  161. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  162. }
  163. }
  164. replacerArgs.Add(match.Index);
  165. replacerArgs.Add(s);
  166. if (groups is not null)
  167. {
  168. replacerArgs.Add(groups);
  169. }
  170. return CallFunctionalReplace(replaceValue, replacerArgs);
  171. }
  172. result = rei.Value.Replace(s, Evaluator, count);
  173. }
  174. else
  175. {
  176. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  177. }
  178. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero);
  179. return result;
  180. }
  181. var results = new List<ObjectInstance>();
  182. while (true)
  183. {
  184. var result = RegExpExec(rx, s);
  185. if (result.IsNull())
  186. {
  187. break;
  188. }
  189. results.Add((ObjectInstance) result);
  190. if (!global)
  191. {
  192. break;
  193. }
  194. var matchStr = TypeConverter.ToString(result.Get(0));
  195. if (matchStr == "")
  196. {
  197. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  198. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  199. rx.Set(JsRegExp.PropertyLastIndex, nextIndex);
  200. }
  201. }
  202. var accumulatedResult = "";
  203. var nextSourcePosition = 0;
  204. var captures = new List<string>();
  205. for (var i = 0; i < results.Count; i++)
  206. {
  207. var result = results[i];
  208. var nCaptures = (int) result.Length;
  209. nCaptures = System.Math.Max(nCaptures - 1, 0);
  210. var matched = TypeConverter.ToString(result.Get(0));
  211. var matchLength = matched.Length;
  212. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  213. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  214. uint n = 1;
  215. captures.Clear();
  216. while (n <= nCaptures)
  217. {
  218. var capN = result.Get(n);
  219. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  220. captures.Add(value);
  221. n++;
  222. }
  223. var namedCaptures = result.Get(PropertyGroups);
  224. string replacement;
  225. if (functionalReplace)
  226. {
  227. var replacerArgs = new List<JsValue>();
  228. replacerArgs.Add(matched);
  229. foreach (var capture in captures)
  230. {
  231. replacerArgs.Add(capture);
  232. }
  233. replacerArgs.Add(position);
  234. replacerArgs.Add(s);
  235. if (!namedCaptures.IsUndefined())
  236. {
  237. replacerArgs.Add(namedCaptures);
  238. }
  239. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  240. }
  241. else
  242. {
  243. if (!namedCaptures.IsUndefined())
  244. {
  245. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  246. }
  247. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  248. }
  249. if (position >= nextSourcePosition)
  250. {
  251. accumulatedResult = accumulatedResult +
  252. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  253. replacement;
  254. nextSourcePosition = position + matchLength;
  255. }
  256. }
  257. if (nextSourcePosition >= lengthS)
  258. {
  259. return accumulatedResult;
  260. }
  261. return accumulatedResult + s.Substring(nextSourcePosition);
  262. }
  263. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  264. {
  265. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  266. return TypeConverter.ToString(result);
  267. }
  268. /// <summary>
  269. /// https://tc39.es/ecma262/#sec-getsubstitution
  270. /// </summary>
  271. internal static string GetSubstitution(
  272. string matched,
  273. string str,
  274. int position,
  275. string[] captures,
  276. JsValue namedCaptures,
  277. string replacement)
  278. {
  279. // If there is no pattern, replace the pattern as is.
  280. if (replacement.IndexOf('$') < 0)
  281. {
  282. return replacement;
  283. }
  284. // Patterns
  285. // $$ Inserts a "$".
  286. // $& Inserts the matched substring.
  287. // $` Inserts the portion of the string that precedes the matched substring.
  288. // $' Inserts the portion of the string that follows the matched substring.
  289. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  290. using var replacementBuilder = StringBuilderPool.Rent();
  291. var sb = replacementBuilder.Builder;
  292. for (var i = 0; i < replacement.Length; i++)
  293. {
  294. char c = replacement[i];
  295. if (c == '$' && i < replacement.Length - 1)
  296. {
  297. c = replacement[++i];
  298. switch (c)
  299. {
  300. case '$':
  301. sb.Append('$');
  302. break;
  303. case '&':
  304. sb.Append(matched);
  305. break;
  306. case '`':
  307. sb.Append(str.Substring(0, position));
  308. break;
  309. case '\'':
  310. sb.Append(str.Substring(position + matched.Length));
  311. break;
  312. case '<':
  313. var gtPos = replacement.IndexOf('>', i + 1);
  314. if (gtPos == -1 || namedCaptures.IsUndefined())
  315. {
  316. sb.Append('$');
  317. sb.Append(c);
  318. }
  319. else
  320. {
  321. var startIndex = i + 1;
  322. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  323. var capture = namedCaptures.Get(groupName);
  324. if (!capture.IsUndefined())
  325. {
  326. sb.Append(TypeConverter.ToString(capture));
  327. }
  328. i = gtPos;
  329. }
  330. break;
  331. default:
  332. {
  333. if (char.IsDigit(c))
  334. {
  335. int matchNumber1 = c - '0';
  336. // The match number can be one or two digits long.
  337. int matchNumber2 = 0;
  338. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  339. {
  340. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  341. }
  342. // Try the two digit capture first.
  343. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  344. {
  345. // Two digit capture replacement.
  346. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  347. i++;
  348. }
  349. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  350. {
  351. // Single digit capture replacement.
  352. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  353. }
  354. else
  355. {
  356. // Capture does not exist.
  357. sb.Append('$');
  358. i--;
  359. }
  360. }
  361. else
  362. {
  363. // Unknown replacement pattern.
  364. sb.Append('$');
  365. sb.Append(c);
  366. }
  367. break;
  368. }
  369. }
  370. }
  371. else
  372. {
  373. sb.Append(c);
  374. }
  375. }
  376. return replacementBuilder.ToString();
  377. }
  378. /// <summary>
  379. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  380. /// </summary>
  381. private JsValue Split(JsValue thisObject, JsValue[] arguments)
  382. {
  383. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.split");
  384. var s = TypeConverter.ToString(arguments.At(0));
  385. var limit = arguments.At(1);
  386. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  387. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  388. var unicodeMatching = flags.IndexOf('u') > -1;
  389. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  390. var splitter = Construct(c, new JsValue[]
  391. {
  392. rx,
  393. newFlags
  394. });
  395. uint lengthA = 0;
  396. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  397. if (lim == 0)
  398. {
  399. return _realm.Intrinsics.Array.ArrayCreate(0);
  400. }
  401. if (s.Length == 0)
  402. {
  403. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  404. var z = RegExpExec(splitter, s);
  405. if (!z.IsNull())
  406. {
  407. return a;
  408. }
  409. a.SetIndexValue(0, s, updateLength: true);
  410. return a;
  411. }
  412. if (!unicodeMatching && rx is JsRegExp R && R.TryGetDefaultRegExpExec(out _))
  413. {
  414. // we can take faster path
  415. if (R.Source == JsRegExp.regExpForMatchingAllCharacters)
  416. {
  417. // if empty string, just a string split
  418. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  419. }
  420. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  421. int lastIndex = 0;
  422. uint index = 0;
  423. for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
  424. {
  425. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  426. {
  427. continue;
  428. }
  429. // Add the match results to the array.
  430. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  431. if (index >= lim)
  432. {
  433. return a;
  434. }
  435. lastIndex = match.Index + match.Length;
  436. var actualGroupCount = GetActualRegexGroupCount(R, match);
  437. for (int i = 1; i < actualGroupCount; i++)
  438. {
  439. var group = match.Groups[i];
  440. var item = Undefined;
  441. if (group.Captures.Count > 0)
  442. {
  443. item = match.Groups[i].Value;
  444. }
  445. a.SetIndexValue(index++, item, updateLength: true);
  446. if (index >= lim)
  447. {
  448. return a;
  449. }
  450. }
  451. }
  452. // Add the last part of the split
  453. a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);
  454. return a;
  455. }
  456. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  457. }
  458. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  459. {
  460. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  461. ulong previousStringIndex = 0;
  462. ulong currentIndex = 0;
  463. while (currentIndex < (ulong) s.Length)
  464. {
  465. splitter.Set(JsRegExp.PropertyLastIndex, currentIndex, true);
  466. var z = RegExpExec(splitter, s);
  467. if (z.IsNull())
  468. {
  469. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  470. continue;
  471. }
  472. var endIndex = TypeConverter.ToLength(splitter.Get(JsRegExp.PropertyLastIndex));
  473. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  474. if (endIndex == previousStringIndex)
  475. {
  476. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  477. continue;
  478. }
  479. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  480. a.SetIndexValue(lengthA, t, updateLength: true);
  481. lengthA++;
  482. if (lengthA == lim)
  483. {
  484. return a;
  485. }
  486. previousStringIndex = endIndex;
  487. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  488. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  489. var i = 1;
  490. while (i <= numberOfCaptures)
  491. {
  492. var nextCapture = z.Get(i);
  493. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  494. i++;
  495. lengthA++;
  496. if (lengthA == lim)
  497. {
  498. return a;
  499. }
  500. }
  501. currentIndex = previousStringIndex;
  502. }
  503. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  504. return a;
  505. }
  506. private JsValue Flags(JsValue thisObject, JsValue[] arguments)
  507. {
  508. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.flags");
  509. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  510. {
  511. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  512. }
  513. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  514. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  515. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  516. result = AddFlagIfPresent(r, "multiline", 'm', result);
  517. result = AddFlagIfPresent(r, "dotAll", 's', result);
  518. result = AddFlagIfPresent(r, "unicode", 'u', result);
  519. result = AddFlagIfPresent(r, "unicodeSets", 'v', result);
  520. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  521. return result;
  522. }
  523. private JsValue ToRegExpString(JsValue thisObject, JsValue[] arguments)
  524. {
  525. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.toString");
  526. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  527. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  528. return "/" + pattern + "/" + flags;
  529. }
  530. private JsValue Test(JsValue thisObject, JsValue[] arguments)
  531. {
  532. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.test");
  533. var s = TypeConverter.ToString(arguments.At(0));
  534. // check couple fast paths
  535. if (r is JsRegExp R && !R.FullUnicode)
  536. {
  537. if (!R.Sticky && !R.Global)
  538. {
  539. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  540. return R.Value.IsMatch(s);
  541. }
  542. var lastIndex = (int) TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  543. if (lastIndex >= s.Length && s.Length > 0)
  544. {
  545. return JsBoolean.False;
  546. }
  547. var m = R.Value.Match(s, lastIndex);
  548. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  549. {
  550. R.Set(JsRegExp.PropertyLastIndex, 0, throwOnError: true);
  551. return JsBoolean.False;
  552. }
  553. R.Set(JsRegExp.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  554. return JsBoolean.True;
  555. }
  556. var match = RegExpExec(r, s);
  557. return !match.IsNull();
  558. }
  559. /// <summary>
  560. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  561. /// </summary>
  562. private JsValue Search(JsValue thisObject, JsValue[] arguments)
  563. {
  564. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.search");
  565. var s = TypeConverter.ToString(arguments.At(0));
  566. var previousLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  567. if (!SameValue(previousLastIndex, 0))
  568. {
  569. rx.Set(JsRegExp.PropertyLastIndex, 0, true);
  570. }
  571. var result = RegExpExec(rx, s);
  572. var currentLastIndex = rx.Get(JsRegExp.PropertyLastIndex);
  573. if (!SameValue(currentLastIndex, previousLastIndex))
  574. {
  575. rx.Set(JsRegExp.PropertyLastIndex, previousLastIndex, true);
  576. }
  577. if (result.IsNull())
  578. {
  579. return -1;
  580. }
  581. return result.Get(PropertyIndex);
  582. }
  583. /// <summary>
  584. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  585. /// </summary>
  586. private JsValue Match(JsValue thisObject, JsValue[] arguments)
  587. {
  588. var rx = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.match");
  589. var s = TypeConverter.ToString(arguments.At(0));
  590. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  591. var global = flags.IndexOf('g') != -1;
  592. if (!global)
  593. {
  594. return RegExpExec(rx, s);
  595. }
  596. var fullUnicode = flags.IndexOf('u') != -1;
  597. rx.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  598. if (!fullUnicode
  599. && rx is JsRegExp rei
  600. && rei.TryGetDefaultRegExpExec(out _))
  601. {
  602. // fast path
  603. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  604. if (rei.Sticky)
  605. {
  606. var match = rei.Value.Match(s);
  607. if (!match.Success || match.Index != 0)
  608. {
  609. return Null;
  610. }
  611. a.SetIndexValue(0, match.Value, updateLength: false);
  612. uint li = 0;
  613. while (true)
  614. {
  615. match = match.NextMatch();
  616. if (!match.Success || match.Index != ++li)
  617. break;
  618. a.SetIndexValue(li, match.Value, updateLength: false);
  619. }
  620. a.SetLength(li);
  621. return a;
  622. }
  623. else
  624. {
  625. var matches = rei.Value.Matches(s);
  626. if (matches.Count == 0)
  627. {
  628. return Null;
  629. }
  630. a.EnsureCapacity((uint) matches.Count);
  631. a.SetLength((uint) matches.Count);
  632. for (var i = 0; i < matches.Count; i++)
  633. {
  634. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  635. }
  636. return a;
  637. }
  638. }
  639. return MatchSlow(rx, s, fullUnicode);
  640. }
  641. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  642. {
  643. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  644. uint n = 0;
  645. while (true)
  646. {
  647. var result = RegExpExec(rx, s);
  648. if (result.IsNull())
  649. {
  650. a.SetLength(n);
  651. return n == 0 ? Null : a;
  652. }
  653. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  654. a.SetIndexValue(n, matchStr, updateLength: false);
  655. if (matchStr == "")
  656. {
  657. var thisIndex = TypeConverter.ToLength(rx.Get(JsRegExp.PropertyLastIndex));
  658. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  659. rx.Set(JsRegExp.PropertyLastIndex, nextIndex, true);
  660. }
  661. n++;
  662. }
  663. }
  664. /// <summary>
  665. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  666. /// </summary>
  667. private JsValue MatchAll(JsValue thisObject, JsValue[] arguments)
  668. {
  669. var r = AssertThisIsObjectInstance(thisObject, "RegExp.prototype.matchAll");
  670. var s = TypeConverter.ToString(arguments.At(0));
  671. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  672. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  673. var matcher = Construct(c, new JsValue[]
  674. {
  675. r,
  676. flags
  677. });
  678. var lastIndex = TypeConverter.ToLength(r.Get(JsRegExp.PropertyLastIndex));
  679. matcher.Set(JsRegExp.PropertyLastIndex, lastIndex, true);
  680. var global = flags.IndexOf('g') != -1;
  681. var fullUnicode = flags.IndexOf('u') != -1;
  682. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  683. }
  684. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  685. {
  686. if (!unicode || index + 1 >= (ulong) s.Length)
  687. {
  688. return index + 1;
  689. }
  690. var first = s[(int) index];
  691. if (first < 0xD800 || first > 0xDBFF)
  692. {
  693. return index + 1;
  694. }
  695. var second = s[(int) (index + 1)];
  696. if (second < 0xDC00 || second > 0xDFFF)
  697. {
  698. return index + 1;
  699. }
  700. return index + 2;
  701. }
  702. internal static JsValue RegExpExec(ObjectInstance r, string s)
  703. {
  704. var exec = r.Get(PropertyExec);
  705. if (exec is ICallable callable)
  706. {
  707. var result = callable.Call(r, new JsValue[] { s });
  708. if (!result.IsNull() && !result.IsObject())
  709. {
  710. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  711. }
  712. return result;
  713. }
  714. var ri = r as JsRegExp;
  715. if (ri is null)
  716. {
  717. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  718. }
  719. return RegExpBuiltinExec(ri, s);
  720. }
  721. internal bool TryGetDefaultExec(ObjectInstance o, [NotNullWhen((true))] out Func<JsValue, JsValue[], JsValue>? exec)
  722. {
  723. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  724. {
  725. exec = _defaultExec;
  726. return true;
  727. }
  728. exec = default;
  729. return false;
  730. }
  731. /// <summary>
  732. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  733. /// </summary>
  734. private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
  735. {
  736. var length = (ulong) s.Length;
  737. var lastIndex = TypeConverter.ToLength(R.Get(JsRegExp.PropertyLastIndex));
  738. var global = R.Global;
  739. var sticky = R.Sticky;
  740. if (!global && !sticky)
  741. {
  742. lastIndex = 0;
  743. }
  744. if (R.Source == JsRegExp.regExpForMatchingAllCharacters) // Reg Exp is really ""
  745. {
  746. if (lastIndex > (ulong) s.Length)
  747. {
  748. return Null;
  749. }
  750. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  751. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  752. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  753. array.FastSetDataProperty(PropertyInput._value, s);
  754. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  755. return array;
  756. }
  757. var matcher = R.Value;
  758. var fullUnicode = R.FullUnicode;
  759. var hasIndices = R.Indices;
  760. if (!global & !sticky && !fullUnicode && !hasIndices)
  761. {
  762. // we can the non-stateful fast path which is the common case
  763. var m = matcher.Match(s, (int) lastIndex);
  764. if (!m.Success)
  765. {
  766. return Null;
  767. }
  768. return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
  769. }
  770. // the stateful version
  771. Match match;
  772. if (lastIndex > length)
  773. {
  774. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  775. return Null;
  776. }
  777. var startAt = (int) lastIndex;
  778. while (true)
  779. {
  780. match = R.Value.Match(s, startAt);
  781. // The conversion of Unicode regex patterns to .NET Regex has some flaws:
  782. // when the pattern may match empty strings, the adapted Regex will return empty string matches
  783. // in the middle of surrogate pairs. As a best effort solution, we remove these fake positive matches.
  784. // (See also: https://github.com/sebastienros/esprima-dotnet/pull/364#issuecomment-1606045259)
  785. if (match.Success
  786. && fullUnicode
  787. && match.Length == 0
  788. && 0 < match.Index && match.Index < s.Length
  789. && char.IsHighSurrogate(s[match.Index - 1]) && char.IsLowSurrogate(s[match.Index]))
  790. {
  791. startAt++;
  792. continue;
  793. }
  794. break;
  795. }
  796. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  797. if (!success)
  798. {
  799. R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
  800. return Null;
  801. }
  802. var e = match.Index + match.Length;
  803. // NOTE: Even in Unicode mode, we don't need to translate indices as .NET regexes always return code unit indices.
  804. if (global || sticky)
  805. {
  806. R.Set(JsRegExp.PropertyLastIndex, e, true);
  807. }
  808. return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
  809. }
  810. private static JsArray CreateReturnValueArray(
  811. JsRegExp rei,
  812. Match match,
  813. string s,
  814. bool fullUnicode,
  815. bool hasIndices)
  816. {
  817. var engine = rei.Engine;
  818. var actualGroupCount = GetActualRegexGroupCount(rei, match);
  819. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
  820. array.CreateDataProperty(PropertyIndex, match.Index);
  821. array.CreateDataProperty(PropertyInput, s);
  822. ObjectInstance? groups = null;
  823. List<string>? groupNames = null;
  824. var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
  825. for (uint i = 0; i < actualGroupCount; i++)
  826. {
  827. var capture = match.Groups[(int) i];
  828. var capturedValue = Undefined;
  829. if (capture?.Success == true)
  830. {
  831. capturedValue = capture.Value;
  832. }
  833. if (hasIndices)
  834. {
  835. if (capture?.Success == true)
  836. {
  837. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  838. }
  839. else
  840. {
  841. indices!.Add(null);
  842. }
  843. }
  844. var groupName = GetRegexGroupName(rei, (int) i);
  845. if (!string.IsNullOrWhiteSpace(groupName))
  846. {
  847. groups ??= OrdinaryObjectCreate(engine, null);
  848. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  849. groupNames ??= new List<string>();
  850. groupNames.Add(groupName!);
  851. }
  852. array.SetIndexValue(i, capturedValue, updateLength: false);
  853. }
  854. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  855. if (hasIndices)
  856. {
  857. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  858. array.CreateDataPropertyOrThrow("indices", indicesArray);
  859. }
  860. return array;
  861. }
  862. /// <summary>
  863. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  864. /// </summary>
  865. private static JsArray MakeMatchIndicesIndexPairArray(
  866. Engine engine,
  867. string s,
  868. List<JsNumber[]?> indices,
  869. List<string>? groupNames,
  870. bool hasGroups)
  871. {
  872. var n = indices.Count;
  873. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  874. ObjectInstance? groups = null;
  875. if (hasGroups)
  876. {
  877. groups = OrdinaryObjectCreate(engine, null);
  878. }
  879. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  880. for (var i = 0; i < n; ++i)
  881. {
  882. var matchIndices = indices[i];
  883. var matchIndexPair = matchIndices is not null
  884. ? GetMatchIndexPair(engine, s, matchIndices)
  885. : Undefined;
  886. a.Push(matchIndexPair);
  887. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  888. {
  889. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  890. }
  891. }
  892. return a;
  893. }
  894. /// <summary>
  895. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  896. /// </summary>
  897. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  898. {
  899. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  900. }
  901. private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
  902. {
  903. return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
  904. }
  905. private static string? GetRegexGroupName(JsRegExp rei, int index)
  906. {
  907. if (index == 0)
  908. {
  909. return null;
  910. }
  911. var regex = rei.Value;
  912. if (rei.ParseResult.Success)
  913. {
  914. return rei.ParseResult.GetRegexGroupName(index);
  915. }
  916. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  917. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  918. {
  919. // regex defaults to index as group name when it's not a named group
  920. return null;
  921. }
  922. return groupNameFromNumber;
  923. }
  924. private JsValue Exec(JsValue thisObject, JsValue[] arguments)
  925. {
  926. var r = thisObject as JsRegExp;
  927. if (r is null)
  928. {
  929. ExceptionHelper.ThrowTypeError(_engine.Realm);
  930. }
  931. var s = TypeConverter.ToString(arguments.At(0));
  932. return RegExpBuiltinExec(r, s);
  933. }
  934. }
  935. }