2
0

RegExpPrototype.cs 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094
  1. using System.Diagnostics.CodeAnalysis;
  2. using System.Text.RegularExpressions;
  3. using Jint.Collections;
  4. using Jint.Native.Number;
  5. using Jint.Native.Object;
  6. using Jint.Native.String;
  7. using Jint.Native.Symbol;
  8. using Jint.Pooling;
  9. using Jint.Runtime;
  10. using Jint.Runtime.Descriptors;
  11. using Jint.Runtime.Interop;
  12. namespace Jint.Native.RegExp
  13. {
  14. internal sealed class RegExpPrototype : Prototype
  15. {
  16. private static readonly JsString PropertyExec = new("exec");
  17. private static readonly JsString PropertyIndex = new("index");
  18. private static readonly JsString PropertyInput = new("input");
  19. private static readonly JsString PropertySticky = new("sticky");
  20. private static readonly JsString PropertyGlobal = new("global");
  21. internal static readonly JsString PropertySource = new("source");
  22. private static readonly JsString DefaultSource = new("(?:)");
  23. internal static readonly JsString PropertyFlags = new("flags");
  24. private static readonly JsString PropertyGroups = new("groups");
  25. private readonly RegExpConstructor _constructor;
  26. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  27. internal RegExpPrototype(
  28. Engine engine,
  29. Realm realm,
  30. RegExpConstructor constructor,
  31. ObjectPrototype objectPrototype) : base(engine, realm)
  32. {
  33. _defaultExec = Exec;
  34. _constructor = constructor;
  35. _prototype = objectPrototype;
  36. }
  37. protected override void Initialize()
  38. {
  39. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  40. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpInstance, JsValue> valueExtractor, JsValue? protoValue = null)
  41. {
  42. return new GetSetPropertyDescriptor(
  43. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  44. {
  45. if (ReferenceEquals(thisObj, this))
  46. {
  47. return protoValue ?? Undefined;
  48. }
  49. var r = thisObj as RegExpInstance;
  50. if (r is null)
  51. {
  52. ExceptionHelper.ThrowTypeError(_realm);
  53. }
  54. return valueExtractor(r);
  55. }, 0, lengthFlags),
  56. set: Undefined,
  57. flags: PropertyFlag.Configurable);
  58. }
  59. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  60. var properties = new PropertyDictionary(14, checkExistingKeys: false)
  61. {
  62. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  63. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  64. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  65. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  66. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
  67. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  68. ["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
  69. ["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
  70. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
  71. ["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
  72. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  73. ["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
  74. ["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
  75. ["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
  76. };
  77. SetProperties(properties);
  78. var symbols = new SymbolDictionary(5)
  79. {
  80. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  81. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  85. };
  86. SetSymbols(symbols);
  87. }
  88. /// <summary>
  89. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  90. /// </summary>
  91. private JsValue Source(JsValue thisObj, JsValue[] arguments)
  92. {
  93. if (ReferenceEquals(thisObj, this))
  94. {
  95. return DefaultSource;
  96. }
  97. var r = thisObj as RegExpInstance;
  98. if (r is null)
  99. {
  100. ExceptionHelper.ThrowTypeError(_realm);
  101. }
  102. if (r.Source is null)
  103. {
  104. return JsString.Empty;
  105. }
  106. return r.Source.Replace("/", "\\/").Replace("\n", "\\n");
  107. }
  108. /// <summary>
  109. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  110. /// </summary>
  111. private JsValue Replace(JsValue thisObj, JsValue[] arguments)
  112. {
  113. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.replace");
  114. var s = TypeConverter.ToString(arguments.At(0));
  115. var lengthS = s.Length;
  116. var replaceValue = arguments.At(1);
  117. var functionalReplace = replaceValue is ICallable;
  118. // we need heavier logic if we have named captures
  119. var mayHaveNamedCaptures = false;
  120. if (!functionalReplace)
  121. {
  122. var value = TypeConverter.ToString(replaceValue);
  123. replaceValue = value;
  124. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  125. }
  126. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  127. var global = flags.IndexOf('g') != -1;
  128. var fullUnicode = false;
  129. if (global)
  130. {
  131. fullUnicode = flags.IndexOf('u') != -1;
  132. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  133. }
  134. // check if we can access fast path
  135. if (!fullUnicode
  136. && !mayHaveNamedCaptures
  137. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  138. && rx is RegExpInstance rei && rei.TryGetDefaultRegExpExec(out _))
  139. {
  140. var count = global ? int.MaxValue : 1;
  141. string result;
  142. if (functionalReplace)
  143. {
  144. string Evaluator(Match match)
  145. {
  146. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  147. replacerArgs.Add(match.Value);
  148. ObjectInstance? groups = null;
  149. for (var i = 1; i < match.Groups.Count; i++)
  150. {
  151. var capture = match.Groups[i];
  152. replacerArgs.Add(capture.Success ? capture.Value : Undefined);
  153. var groupName = GetRegexGroupName(rei.Value, i);
  154. if (!string.IsNullOrWhiteSpace(groupName))
  155. {
  156. groups ??= OrdinaryObjectCreate(_engine, null);
  157. groups.CreateDataPropertyOrThrow(groupName, capture.Success ? capture.Value : Undefined);
  158. }
  159. }
  160. replacerArgs.Add(match.Index);
  161. replacerArgs.Add(s);
  162. if (groups is not null)
  163. {
  164. replacerArgs.Add(groups);
  165. }
  166. return CallFunctionalReplace(replaceValue, replacerArgs);
  167. }
  168. result = rei.Value.Replace(s, Evaluator, count);
  169. }
  170. else
  171. {
  172. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  173. }
  174. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero);
  175. return result;
  176. }
  177. var results = new List<ObjectInstance>();
  178. while (true)
  179. {
  180. var result = RegExpExec(rx, s);
  181. if (result.IsNull())
  182. {
  183. break;
  184. }
  185. results.Add((ObjectInstance) result);
  186. if (!global)
  187. {
  188. break;
  189. }
  190. var matchStr = TypeConverter.ToString(result.Get(0));
  191. if (matchStr == "")
  192. {
  193. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  194. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  195. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex);
  196. }
  197. }
  198. var accumulatedResult = "";
  199. var nextSourcePosition = 0;
  200. var captures = new List<string>();
  201. for (var i = 0; i < results.Count; i++)
  202. {
  203. var result = results[i];
  204. var nCaptures = (int) result.Length;
  205. nCaptures = System.Math.Max(nCaptures - 1, 0);
  206. var matched = TypeConverter.ToString(result.Get(0));
  207. var matchLength = matched.Length;
  208. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  209. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  210. uint n = 1;
  211. captures.Clear();
  212. while (n <= nCaptures)
  213. {
  214. var capN = result.Get(n);
  215. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  216. captures.Add(value);
  217. n++;
  218. }
  219. var namedCaptures = result.Get(PropertyGroups);
  220. string replacement;
  221. if (functionalReplace)
  222. {
  223. var replacerArgs = new List<JsValue>();
  224. replacerArgs.Add(matched);
  225. foreach (var capture in captures)
  226. {
  227. replacerArgs.Add(capture);
  228. }
  229. replacerArgs.Add(position);
  230. replacerArgs.Add(s);
  231. if (!namedCaptures.IsUndefined())
  232. {
  233. replacerArgs.Add(namedCaptures);
  234. }
  235. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  236. }
  237. else
  238. {
  239. if (!namedCaptures.IsUndefined())
  240. {
  241. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  242. }
  243. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  244. }
  245. if (position >= nextSourcePosition)
  246. {
  247. accumulatedResult = accumulatedResult +
  248. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  249. replacement;
  250. nextSourcePosition = position + matchLength;
  251. }
  252. }
  253. if (nextSourcePosition >= lengthS)
  254. {
  255. return accumulatedResult;
  256. }
  257. return accumulatedResult + s.Substring(nextSourcePosition);
  258. }
  259. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  260. {
  261. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  262. return TypeConverter.ToString(result);
  263. }
  264. /// <summary>
  265. /// https://tc39.es/ecma262/#sec-getsubstitution
  266. /// </summary>
  267. internal static string GetSubstitution(
  268. string matched,
  269. string str,
  270. int position,
  271. string[] captures,
  272. JsValue namedCaptures,
  273. string replacement)
  274. {
  275. // If there is no pattern, replace the pattern as is.
  276. if (replacement.IndexOf('$') < 0)
  277. {
  278. return replacement;
  279. }
  280. // Patterns
  281. // $$ Inserts a "$".
  282. // $& Inserts the matched substring.
  283. // $` Inserts the portion of the string that precedes the matched substring.
  284. // $' Inserts the portion of the string that follows the matched substring.
  285. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  286. using var replacementBuilder = StringBuilderPool.Rent();
  287. var sb = replacementBuilder.Builder;
  288. for (var i = 0; i < replacement.Length; i++)
  289. {
  290. char c = replacement[i];
  291. if (c == '$' && i < replacement.Length - 1)
  292. {
  293. c = replacement[++i];
  294. switch (c)
  295. {
  296. case '$':
  297. sb.Append('$');
  298. break;
  299. case '&':
  300. sb.Append(matched);
  301. break;
  302. case '`':
  303. sb.Append(str.Substring(0, position));
  304. break;
  305. case '\'':
  306. sb.Append(str.Substring(position + matched.Length));
  307. break;
  308. case '<':
  309. var gtPos = replacement.IndexOf('>', i + 1);
  310. if (gtPos == -1 || namedCaptures.IsUndefined())
  311. {
  312. sb.Append('$');
  313. sb.Append(c);
  314. }
  315. else
  316. {
  317. var startIndex = i + 1;
  318. var groupName = replacement.Substring(startIndex, gtPos - startIndex);
  319. var capture = namedCaptures.Get(groupName);
  320. if (!capture.IsUndefined())
  321. {
  322. sb.Append(TypeConverter.ToString(capture));
  323. }
  324. i = gtPos;
  325. }
  326. break;
  327. default:
  328. {
  329. if (char.IsDigit(c))
  330. {
  331. int matchNumber1 = c - '0';
  332. // The match number can be one or two digits long.
  333. int matchNumber2 = 0;
  334. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  335. {
  336. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  337. }
  338. // Try the two digit capture first.
  339. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  340. {
  341. // Two digit capture replacement.
  342. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  343. i++;
  344. }
  345. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  346. {
  347. // Single digit capture replacement.
  348. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  349. }
  350. else
  351. {
  352. // Capture does not exist.
  353. sb.Append('$');
  354. i--;
  355. }
  356. }
  357. else
  358. {
  359. // Unknown replacement pattern.
  360. sb.Append('$');
  361. sb.Append(c);
  362. }
  363. break;
  364. }
  365. }
  366. }
  367. else
  368. {
  369. sb.Append(c);
  370. }
  371. }
  372. return replacementBuilder.ToString();
  373. }
  374. /// <summary>
  375. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  376. /// </summary>
  377. private JsValue Split(JsValue thisObj, JsValue[] arguments)
  378. {
  379. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.split");
  380. var s = TypeConverter.ToString(arguments.At(0));
  381. var limit = arguments.At(1);
  382. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  383. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  384. var unicodeMatching = flags.IndexOf('u') > -1;
  385. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  386. var splitter = Construct(c, new JsValue[]
  387. {
  388. rx,
  389. newFlags
  390. });
  391. uint lengthA = 0;
  392. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  393. if (lim == 0)
  394. {
  395. return _realm.Intrinsics.Array.ArrayCreate(0);
  396. }
  397. if (s.Length == 0)
  398. {
  399. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  400. var z = RegExpExec(splitter, s);
  401. if (!z.IsNull())
  402. {
  403. return a;
  404. }
  405. a.SetIndexValue(0, s, updateLength: true);
  406. return a;
  407. }
  408. if (!unicodeMatching && rx is RegExpInstance R && R.TryGetDefaultRegExpExec(out _))
  409. {
  410. // we can take faster path
  411. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters)
  412. {
  413. // if empty string, just a string split
  414. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  415. }
  416. var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
  417. var match = R.Value.Match(s, 0);
  418. if (!match.Success) // No match at all return the string in an array
  419. {
  420. a.SetIndexValue(0, s, updateLength: true);
  421. return a;
  422. }
  423. int lastIndex = 0;
  424. uint index = 0;
  425. while (match.Success && index < lim)
  426. {
  427. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  428. {
  429. match = match.NextMatch();
  430. continue;
  431. }
  432. // Add the match results to the array.
  433. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  434. if (index >= lim)
  435. {
  436. return a;
  437. }
  438. lastIndex = match.Index + match.Length;
  439. for (int i = 1; i < match.Groups.Count; i++)
  440. {
  441. var group = match.Groups[i];
  442. var item = Undefined;
  443. if (group.Captures.Count > 0)
  444. {
  445. item = match.Groups[i].Value;
  446. }
  447. a.SetIndexValue(index++, item, updateLength: true);
  448. if (index >= lim)
  449. {
  450. return a;
  451. }
  452. }
  453. match = match.NextMatch();
  454. if (!match.Success) // Add the last part of the split
  455. {
  456. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  457. }
  458. }
  459. return a;
  460. }
  461. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  462. }
  463. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  464. {
  465. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  466. ulong previousStringIndex = 0;
  467. ulong currentIndex = 0;
  468. while (currentIndex < (ulong) s.Length)
  469. {
  470. splitter.Set(RegExpInstance.PropertyLastIndex, currentIndex, true);
  471. var z = RegExpExec(splitter, s);
  472. if (z.IsNull())
  473. {
  474. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  475. continue;
  476. }
  477. var endIndex = TypeConverter.ToLength(splitter.Get(RegExpInstance.PropertyLastIndex));
  478. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  479. if (endIndex == previousStringIndex)
  480. {
  481. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  482. continue;
  483. }
  484. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  485. a.SetIndexValue(lengthA, t, updateLength: true);
  486. lengthA++;
  487. if (lengthA == lim)
  488. {
  489. return a;
  490. }
  491. previousStringIndex = endIndex;
  492. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  493. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  494. var i = 1;
  495. while (i <= numberOfCaptures)
  496. {
  497. var nextCapture = z.Get(i);
  498. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  499. i++;
  500. lengthA++;
  501. if (lengthA == lim)
  502. {
  503. return a;
  504. }
  505. }
  506. currentIndex = previousStringIndex;
  507. }
  508. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  509. return a;
  510. }
  511. private JsValue Flags(JsValue thisObj, JsValue[] arguments)
  512. {
  513. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.flags");
  514. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  515. {
  516. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  517. }
  518. var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
  519. result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
  520. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  521. result = AddFlagIfPresent(r, "multiline", 'm', result);
  522. result = AddFlagIfPresent(r, "dotAll", 's', result);
  523. result = AddFlagIfPresent(r, "unicode", 'u', result);
  524. result = AddFlagIfPresent(r, "unicodeSets", 'v', result);
  525. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  526. return result;
  527. }
  528. private JsValue ToRegExpString(JsValue thisObj, JsValue[] arguments)
  529. {
  530. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.toString");
  531. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  532. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  533. return "/" + pattern + "/" + flags;
  534. }
  535. private JsValue Test(JsValue thisObj, JsValue[] arguments)
  536. {
  537. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.test");
  538. var s = TypeConverter.ToString(arguments.At(0));
  539. // check couple fast paths
  540. if (r is RegExpInstance R && !R.FullUnicode)
  541. {
  542. if (!R.Sticky && !R.Global)
  543. {
  544. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  545. return R.Value.IsMatch(s);
  546. }
  547. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  548. if (lastIndex >= s.Length && s.Length > 0)
  549. {
  550. return JsBoolean.False;
  551. }
  552. var m = R.Value.Match(s, lastIndex);
  553. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  554. {
  555. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  556. return JsBoolean.False;
  557. }
  558. R.Set(RegExpInstance.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  559. return JsBoolean.True;
  560. }
  561. var match = RegExpExec(r, s);
  562. return !match.IsNull();
  563. }
  564. /// <summary>
  565. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  566. /// </summary>
  567. private JsValue Search(JsValue thisObj, JsValue[] arguments)
  568. {
  569. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.search");
  570. var s = TypeConverter.ToString(arguments.At(0));
  571. var previousLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  572. if (!SameValue(previousLastIndex, 0))
  573. {
  574. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  575. }
  576. var result = RegExpExec(rx, s);
  577. var currentLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  578. if (!SameValue(currentLastIndex, previousLastIndex))
  579. {
  580. rx.Set(RegExpInstance.PropertyLastIndex, previousLastIndex, true);
  581. }
  582. if (result.IsNull())
  583. {
  584. return -1;
  585. }
  586. return result.Get(PropertyIndex);
  587. }
  588. /// <summary>
  589. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  590. /// </summary>
  591. private JsValue Match(JsValue thisObj, JsValue[] arguments)
  592. {
  593. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.match");
  594. var s = TypeConverter.ToString(arguments.At(0));
  595. var flags = TypeConverter.ToString(rx.Get(PropertyFlags));
  596. var global = flags.IndexOf('g') != -1;
  597. if (!global)
  598. {
  599. return RegExpExec(rx, s);
  600. }
  601. var fullUnicode = flags.IndexOf('u') != -1;
  602. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  603. if (!fullUnicode
  604. && rx is RegExpInstance rei
  605. && rei.TryGetDefaultRegExpExec(out _))
  606. {
  607. // fast path
  608. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  609. if (rei.Sticky)
  610. {
  611. var match = rei.Value.Match(s);
  612. if (!match.Success || match.Index != 0)
  613. {
  614. return Null;
  615. }
  616. a.SetIndexValue(0, match.Value, updateLength: false);
  617. uint li = 0;
  618. while (true)
  619. {
  620. match = match.NextMatch();
  621. if (!match.Success || match.Index != ++li)
  622. break;
  623. a.SetIndexValue(li, match.Value, updateLength: false);
  624. }
  625. a.SetLength(li);
  626. return a;
  627. }
  628. else
  629. {
  630. var matches = rei.Value.Matches(s);
  631. if (matches.Count == 0)
  632. {
  633. return Null;
  634. }
  635. a.EnsureCapacity((uint) matches.Count);
  636. a.SetLength((uint) matches.Count);
  637. for (var i = 0; i < matches.Count; i++)
  638. {
  639. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  640. }
  641. return a;
  642. }
  643. }
  644. return MatchSlow(rx, s, fullUnicode);
  645. }
  646. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  647. {
  648. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  649. uint n = 0;
  650. while (true)
  651. {
  652. var result = RegExpExec(rx, s);
  653. if (result.IsNull())
  654. {
  655. a.SetLength(n);
  656. return n == 0 ? Null : a;
  657. }
  658. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  659. a.SetIndexValue(n, matchStr, updateLength: false);
  660. if (matchStr == "")
  661. {
  662. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  663. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  664. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex, true);
  665. }
  666. n++;
  667. }
  668. }
  669. /// <summary>
  670. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  671. /// </summary>
  672. private JsValue MatchAll(JsValue thisObj, JsValue[] arguments)
  673. {
  674. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.matchAll");
  675. var s = TypeConverter.ToString(arguments.At(0));
  676. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  677. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  678. var matcher = Construct(c, new JsValue[]
  679. {
  680. r,
  681. flags
  682. });
  683. var lastIndex = TypeConverter.ToLength(r.Get(RegExpInstance.PropertyLastIndex));
  684. matcher.Set(RegExpInstance.PropertyLastIndex, lastIndex, true);
  685. var global = flags.IndexOf('g') != -1;
  686. var fullUnicode = flags.IndexOf('u') != -1;
  687. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  688. }
  689. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  690. {
  691. if (!unicode || index + 1 >= (ulong) s.Length)
  692. {
  693. return index + 1;
  694. }
  695. var first = s[(int) index];
  696. if (first < 0xD800 || first > 0xDBFF)
  697. {
  698. return index + 1;
  699. }
  700. var second = s[(int) (index + 1)];
  701. if (second < 0xDC00 || second > 0xDFFF)
  702. {
  703. return index + 1;
  704. }
  705. return index + 2;
  706. }
  707. internal static JsValue RegExpExec(ObjectInstance r, string s)
  708. {
  709. var exec = r.Get(PropertyExec);
  710. if (exec is ICallable callable)
  711. {
  712. var result = callable.Call(r, new JsValue[] { s });
  713. if (!result.IsNull() && !result.IsObject())
  714. {
  715. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  716. }
  717. return result;
  718. }
  719. var ri = r as RegExpInstance;
  720. if (ri is null)
  721. {
  722. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  723. }
  724. return RegExpBuiltinExec(ri, s);
  725. }
  726. internal bool TryGetDefaultExec(ObjectInstance o, [NotNullWhen((true))] out Func<JsValue, JsValue[], JsValue>? exec)
  727. {
  728. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  729. {
  730. exec = _defaultExec;
  731. return true;
  732. }
  733. exec = default;
  734. return false;
  735. }
  736. /// <summary>
  737. /// https://tc39.es/ecma262/#sec-regexpbuiltinexec
  738. /// </summary>
  739. private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
  740. {
  741. var length = (ulong) s.Length;
  742. var lastIndex = TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  743. var global = R.Global;
  744. var sticky = R.Sticky;
  745. if (!global && !sticky)
  746. {
  747. lastIndex = 0;
  748. }
  749. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters) // Reg Exp is really ""
  750. {
  751. if (lastIndex > (ulong) s.Length)
  752. {
  753. return Null;
  754. }
  755. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  756. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  757. array.FastSetDataProperty(PropertyIndex._value, lastIndex);
  758. array.FastSetDataProperty(PropertyInput._value, s);
  759. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  760. return array;
  761. }
  762. var matcher = R.Value;
  763. var fullUnicode = R.FullUnicode;
  764. var hasIndices = R.Indices;
  765. if (!global & !sticky && !fullUnicode && !hasIndices)
  766. {
  767. // we can the non-stateful fast path which is the common case
  768. var m = matcher.Match(s, (int) lastIndex);
  769. if (!m.Success)
  770. {
  771. return Null;
  772. }
  773. return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
  774. }
  775. // the stateful version
  776. Match match;
  777. while (true)
  778. {
  779. if (lastIndex > length)
  780. {
  781. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  782. return Null;
  783. }
  784. match = R.Value.Match(s, (int) lastIndex);
  785. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  786. if (!success)
  787. {
  788. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  789. return Null;
  790. }
  791. break;
  792. }
  793. var e = match.Index + match.Length;
  794. if (fullUnicode)
  795. {
  796. e = GetStringIndex(s, e);
  797. }
  798. if (global || sticky)
  799. {
  800. R.Set(RegExpInstance.PropertyLastIndex, e, true);
  801. }
  802. return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
  803. }
  804. /// <summary>
  805. /// https://tc39.es/ecma262/#sec-getstringindex
  806. /// </summary>
  807. private static int GetStringIndex(string s, int codePointIndex)
  808. {
  809. if (s.Length == 0)
  810. {
  811. return 0;
  812. }
  813. var len = s.Length;
  814. var codeUnitCount = 0;
  815. var codePointCount = 0;
  816. while (codeUnitCount < len)
  817. {
  818. if (codePointCount == codePointIndex)
  819. {
  820. return codeUnitCount;
  821. }
  822. var isSurrogatePair = char.IsSurrogatePair(s, codeUnitCount);
  823. codeUnitCount += isSurrogatePair ? 2 : 1;
  824. codePointCount += 1;
  825. }
  826. return len;
  827. }
  828. private static JsArray CreateReturnValueArray(
  829. Engine engine,
  830. Regex regex,
  831. Match match,
  832. string s,
  833. bool fullUnicode,
  834. bool hasIndices)
  835. {
  836. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
  837. array.CreateDataProperty(PropertyIndex, match.Index);
  838. array.CreateDataProperty(PropertyInput, s);
  839. ObjectInstance? groups = null;
  840. List<string>? groupNames = null;
  841. var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
  842. for (uint i = 0; i < match.Groups.Count; i++)
  843. {
  844. var capture = match.Groups[(int) i];
  845. var capturedValue = Undefined;
  846. if (capture?.Success == true)
  847. {
  848. capturedValue = capture.Value;
  849. }
  850. if (hasIndices)
  851. {
  852. if (capture?.Success == true)
  853. {
  854. indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
  855. }
  856. else
  857. {
  858. indices!.Add(null);
  859. }
  860. }
  861. var groupName = GetRegexGroupName(regex, (int) i);
  862. if (!string.IsNullOrWhiteSpace(groupName))
  863. {
  864. groups ??= OrdinaryObjectCreate(engine, null);
  865. groups.CreateDataPropertyOrThrow(groupName, capturedValue);
  866. groupNames ??= new List<string>();
  867. groupNames.Add(groupName!);
  868. }
  869. array.SetIndexValue(i, capturedValue, updateLength: false);
  870. }
  871. array.CreateDataProperty(PropertyGroups, groups ?? Undefined);
  872. if (hasIndices)
  873. {
  874. var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
  875. array.CreateDataPropertyOrThrow("indices", indicesArray);
  876. }
  877. return array;
  878. }
  879. /// <summary>
  880. /// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  881. /// </summary>
  882. private static JsArray MakeMatchIndicesIndexPairArray(
  883. Engine engine,
  884. string s,
  885. List<JsNumber[]?> indices,
  886. List<string>? groupNames,
  887. bool hasGroups)
  888. {
  889. var n = indices.Count;
  890. var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
  891. ObjectInstance? groups = null;
  892. if (hasGroups)
  893. {
  894. groups = OrdinaryObjectCreate(engine, null);
  895. }
  896. a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
  897. for (var i = 0; i < n; ++i)
  898. {
  899. var matchIndices = indices[i];
  900. var matchIndexPair = matchIndices is not null
  901. ? GetMatchIndexPair(engine, s, matchIndices)
  902. : Undefined;
  903. a.Push(matchIndexPair);
  904. if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
  905. {
  906. groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
  907. }
  908. }
  909. return a;
  910. }
  911. /// <summary>
  912. /// https://tc39.es/ecma262/#sec-getmatchindexpair
  913. /// </summary>
  914. private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
  915. {
  916. return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
  917. }
  918. private static string? GetRegexGroupName(Regex regex, int index)
  919. {
  920. if (index == 0)
  921. {
  922. return null;
  923. }
  924. var groupNameFromNumber = regex.GroupNameFromNumber(index);
  925. if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
  926. {
  927. // regex defaults to index as group name when it's not a named group
  928. return null;
  929. }
  930. return groupNameFromNumber;
  931. }
  932. private JsValue Exec(JsValue thisObj, JsValue[] arguments)
  933. {
  934. var r = thisObj as RegExpInstance;
  935. if (r is null)
  936. {
  937. ExceptionHelper.ThrowTypeError(_engine.Realm);
  938. }
  939. var s = TypeConverter.ToString(arguments.At(0));
  940. return RegExpBuiltinExec(r, s);
  941. }
  942. }
  943. }