2
0

RegExpPrototype.cs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Text.RegularExpressions;
  5. using Jint.Collections;
  6. using Jint.Native.Array;
  7. using Jint.Native.Number;
  8. using Jint.Native.Object;
  9. using Jint.Native.String;
  10. using Jint.Native.Symbol;
  11. using Jint.Pooling;
  12. using Jint.Runtime;
  13. using Jint.Runtime.Descriptors;
  14. using Jint.Runtime.Interop;
  15. namespace Jint.Native.RegExp
  16. {
  17. public sealed class RegExpPrototype : Prototype
  18. {
  19. private static readonly JsString PropertyExec = new JsString("exec");
  20. private static readonly JsString PropertyIndex = new JsString("index");
  21. private static readonly JsString PropertyInput = new JsString("input");
  22. private static readonly JsString PropertySticky = new JsString("sticky");
  23. private static readonly JsString PropertyGlobal = new JsString("global");
  24. internal static readonly JsString PropertySource = new JsString("source");
  25. private static readonly JsValue DefaultSource = new JsString("(?:)");
  26. internal static readonly JsString PropertyFlags = new JsString("flags");
  27. private readonly RegExpConstructor _constructor;
  28. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  29. internal RegExpPrototype(
  30. Engine engine,
  31. Realm realm,
  32. RegExpConstructor constructor,
  33. ObjectPrototype objectPrototype) : base(engine, realm)
  34. {
  35. _defaultExec = Exec;
  36. _constructor = constructor;
  37. _prototype = objectPrototype;
  38. }
  39. protected override void Initialize()
  40. {
  41. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  42. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpInstance, JsValue> valueExtractor, JsValue protoValue = null)
  43. {
  44. return new GetSetPropertyDescriptor(
  45. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  46. {
  47. if (ReferenceEquals(thisObj, this))
  48. {
  49. return protoValue ?? Undefined;
  50. }
  51. var r = thisObj as RegExpInstance;
  52. if (r is null)
  53. {
  54. ExceptionHelper.ThrowTypeError(_realm);
  55. }
  56. return valueExtractor(r);
  57. }, 0, lengthFlags),
  58. set: Undefined,
  59. flags: PropertyFlag.Configurable);
  60. }
  61. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  62. var properties = new PropertyDictionary(12, checkExistingKeys: false)
  63. {
  64. ["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
  65. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  66. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  67. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  68. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", r => r.DotAll),
  69. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  70. ["global"] = CreateGetAccessorDescriptor("get global", r => r.Global),
  71. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", r => r.IgnoreCase),
  72. ["multiline"] = CreateGetAccessorDescriptor("get multiline", r => r.Multiline),
  73. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  74. ["sticky"] = CreateGetAccessorDescriptor("get sticky", r => r.Sticky),
  75. ["unicode"] = CreateGetAccessorDescriptor("get unicode", r => r.FullUnicode)
  76. };
  77. SetProperties(properties);
  78. var symbols = new SymbolDictionary(5)
  79. {
  80. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  81. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  85. };
  86. SetSymbols(symbols);
  87. }
  88. /// <summary>
  89. /// https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  90. /// </summary>
  91. private JsValue Source(JsValue thisObj, JsValue[] arguments)
  92. {
  93. if (ReferenceEquals(thisObj, this))
  94. {
  95. return DefaultSource;
  96. }
  97. var r = thisObj as RegExpInstance;
  98. if (r is null)
  99. {
  100. ExceptionHelper.ThrowTypeError(_realm);
  101. }
  102. if (r.Source is null)
  103. {
  104. return JsString.Empty;
  105. }
  106. return r.Source.Replace("/", "\\/");
  107. }
  108. /// <summary>
  109. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  110. /// </summary>
  111. private JsValue Replace(JsValue thisObj, JsValue[] arguments)
  112. {
  113. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.replace");
  114. var s = TypeConverter.ToString(arguments.At(0));
  115. var lengthS = s.Length;
  116. var replaceValue = arguments.At(1);
  117. var functionalReplace = replaceValue is ICallable;
  118. // we need heavier logic if we have named captures
  119. bool mayHaveNamedCaptures = false;
  120. if (!functionalReplace)
  121. {
  122. var value = TypeConverter.ToString(replaceValue);
  123. replaceValue = value;
  124. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  125. }
  126. var fullUnicode = false;
  127. var global = TypeConverter.ToBoolean(rx.Get(PropertyGlobal));
  128. if (global)
  129. {
  130. fullUnicode = TypeConverter.ToBoolean(rx.Get("unicode"));
  131. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  132. }
  133. // check if we can access fast path
  134. if (!fullUnicode
  135. && !mayHaveNamedCaptures
  136. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  137. && rx is RegExpInstance rei && rei.TryGetDefaultRegExpExec(out _))
  138. {
  139. var count = global ? int.MaxValue : 1;
  140. string result;
  141. if (functionalReplace)
  142. {
  143. string Evaluator(Match match)
  144. {
  145. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  146. replacerArgs.Add(match.Value);
  147. for (var i = 1; i < match.Groups.Count; i++)
  148. {
  149. var capture = match.Groups[i];
  150. replacerArgs.Add(capture.Value);
  151. }
  152. replacerArgs.Add(match.Index);
  153. replacerArgs.Add(s);
  154. // no named captures
  155. return CallFunctionalReplace(replaceValue, replacerArgs);
  156. }
  157. result = rei.Value.Replace(s, Evaluator, count);
  158. }
  159. else
  160. {
  161. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  162. }
  163. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero);
  164. return result;
  165. }
  166. var results = new List<ObjectInstance>();
  167. while (true)
  168. {
  169. var result = RegExpExec(rx, s);
  170. if (result.IsNull())
  171. {
  172. break;
  173. }
  174. results.Add((ObjectInstance) result);
  175. if (!global)
  176. {
  177. break;
  178. }
  179. var matchStr = TypeConverter.ToString(result.Get(0));
  180. if (matchStr == "")
  181. {
  182. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  183. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  184. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex);
  185. }
  186. }
  187. var accumulatedResult = "";
  188. var nextSourcePosition = 0;
  189. var captures = new List<string>();
  190. foreach (var result in results)
  191. {
  192. var nCaptures = (int) result.Length;
  193. nCaptures = System.Math.Max(nCaptures - 1, 0);
  194. var matched = TypeConverter.ToString(result.Get(0));
  195. var matchLength = matched.Length;
  196. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  197. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  198. uint n = 1;
  199. captures.Clear();
  200. while (n <= nCaptures)
  201. {
  202. var capN = result.Get(n);
  203. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  204. captures.Add(value);
  205. n++;
  206. }
  207. var namedCaptures = result.Get("groups");
  208. string replacement;
  209. if (functionalReplace)
  210. {
  211. var replacerArgs = new List<JsValue>();
  212. replacerArgs.Add(matched);
  213. foreach (var capture in captures)
  214. {
  215. replacerArgs.Add(capture);
  216. }
  217. replacerArgs.Add(position);
  218. replacerArgs.Add(s);
  219. if (!namedCaptures.IsUndefined())
  220. {
  221. replacerArgs.Add(namedCaptures);
  222. }
  223. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  224. }
  225. else
  226. {
  227. if (!namedCaptures.IsUndefined())
  228. {
  229. namedCaptures = TypeConverter.ToObject(_realm, namedCaptures);
  230. }
  231. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  232. }
  233. if (position >= nextSourcePosition)
  234. {
  235. accumulatedResult = accumulatedResult +
  236. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  237. replacement;
  238. nextSourcePosition = position + matchLength;
  239. }
  240. }
  241. if (nextSourcePosition >= lengthS)
  242. {
  243. return accumulatedResult;
  244. }
  245. return accumulatedResult + s.Substring(nextSourcePosition);
  246. }
  247. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  248. {
  249. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  250. return TypeConverter.ToString(result);
  251. }
  252. internal static string GetSubstitution(
  253. string matched,
  254. string str,
  255. int position,
  256. string[] captures,
  257. JsValue namedCaptures,
  258. string replacement)
  259. {
  260. // If there is no pattern, replace the pattern as is.
  261. if (replacement.IndexOf('$') < 0)
  262. {
  263. return replacement;
  264. }
  265. // Patterns
  266. // $$ Inserts a "$".
  267. // $& Inserts the matched substring.
  268. // $` Inserts the portion of the string that precedes the matched substring.
  269. // $' Inserts the portion of the string that follows the matched substring.
  270. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  271. using var replacementBuilder = StringBuilderPool.Rent();
  272. var sb = replacementBuilder.Builder;
  273. for (var i = 0; i < replacement.Length; i++)
  274. {
  275. char c = replacement[i];
  276. if (c == '$' && i < replacement.Length - 1)
  277. {
  278. c = replacement[++i];
  279. switch (c)
  280. {
  281. case '$':
  282. sb.Append('$');
  283. break;
  284. case '&':
  285. sb.Append(matched);
  286. break;
  287. case '`':
  288. sb.Append(str.Substring(0, position));
  289. break;
  290. case '\'':
  291. sb.Append(str.Substring(position + matched.Length));
  292. break;
  293. default:
  294. {
  295. if (char.IsDigit(c))
  296. {
  297. int matchNumber1 = c - '0';
  298. // The match number can be one or two digits long.
  299. int matchNumber2 = 0;
  300. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  301. {
  302. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  303. }
  304. // Try the two digit capture first.
  305. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  306. {
  307. // Two digit capture replacement.
  308. sb.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  309. i++;
  310. }
  311. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  312. {
  313. // Single digit capture replacement.
  314. sb.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  315. }
  316. else
  317. {
  318. // Capture does not exist.
  319. sb.Append('$');
  320. i--;
  321. }
  322. }
  323. else
  324. {
  325. // Unknown replacement pattern.
  326. sb.Append('$');
  327. sb.Append(c);
  328. }
  329. break;
  330. }
  331. }
  332. }
  333. else
  334. {
  335. sb.Append(c);
  336. }
  337. }
  338. return replacementBuilder.ToString();
  339. }
  340. /// <summary>
  341. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  342. /// </summary>
  343. private JsValue Split(JsValue thisObj, JsValue[] arguments)
  344. {
  345. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.split");
  346. var s = TypeConverter.ToString(arguments.At(0));
  347. var limit = arguments.At(1);
  348. var c = SpeciesConstructor(rx, _realm.Intrinsics.RegExp);
  349. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  350. var unicodeMatching = flags.IndexOf('u') > -1;
  351. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  352. var splitter = Construct(c, new JsValue[]
  353. {
  354. rx,
  355. newFlags
  356. });
  357. uint lengthA = 0;
  358. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  359. if (lim == 0)
  360. {
  361. return _realm.Intrinsics.Array.ArrayCreate(0);
  362. }
  363. if (s.Length == 0)
  364. {
  365. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  366. var z = RegExpExec(splitter, s);
  367. if (!z.IsNull())
  368. {
  369. return a;
  370. }
  371. a.SetIndexValue(0, s, updateLength: true);
  372. return a;
  373. }
  374. if (!unicodeMatching && rx is RegExpInstance R && R.TryGetDefaultRegExpExec(out _))
  375. {
  376. // we can take faster path
  377. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters)
  378. {
  379. // if empty string, just a string split
  380. return StringPrototype.SplitWithStringSeparator(_realm, "", s, (uint) s.Length);
  381. }
  382. var a = (ArrayInstance) _realm.Intrinsics.Array.Construct(Arguments.Empty);
  383. var match = R.Value.Match(s, 0);
  384. if (!match.Success) // No match at all return the string in an array
  385. {
  386. a.SetIndexValue(0, s, updateLength: true);
  387. return a;
  388. }
  389. int lastIndex = 0;
  390. uint index = 0;
  391. while (match.Success && index < lim)
  392. {
  393. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  394. {
  395. match = match.NextMatch();
  396. continue;
  397. }
  398. // Add the match results to the array.
  399. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  400. if (index >= lim)
  401. {
  402. return a;
  403. }
  404. lastIndex = match.Index + match.Length;
  405. for (int i = 1; i < match.Groups.Count; i++)
  406. {
  407. var group = match.Groups[i];
  408. var item = Undefined;
  409. if (group.Captures.Count > 0)
  410. {
  411. item = match.Groups[i].Value;
  412. }
  413. a.SetIndexValue(index++, item, updateLength: true);
  414. if (index >= lim)
  415. {
  416. return a;
  417. }
  418. }
  419. match = match.NextMatch();
  420. if (!match.Success) // Add the last part of the split
  421. {
  422. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  423. }
  424. }
  425. return a;
  426. }
  427. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  428. }
  429. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  430. {
  431. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  432. ulong previousStringIndex = 0;
  433. ulong currentIndex = 0;
  434. while (currentIndex < (ulong) s.Length)
  435. {
  436. splitter.Set(RegExpInstance.PropertyLastIndex, currentIndex, true);
  437. var z = RegExpExec(splitter, s);
  438. if (z.IsNull())
  439. {
  440. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  441. continue;
  442. }
  443. var endIndex = TypeConverter.ToLength(splitter.Get(RegExpInstance.PropertyLastIndex));
  444. endIndex = System.Math.Min(endIndex, (ulong) s.Length);
  445. if (endIndex == previousStringIndex)
  446. {
  447. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  448. continue;
  449. }
  450. var t = s.Substring((int) previousStringIndex, (int) (currentIndex - previousStringIndex));
  451. a.SetIndexValue(lengthA, t, updateLength: true);
  452. lengthA++;
  453. if (lengthA == lim)
  454. {
  455. return a;
  456. }
  457. previousStringIndex = endIndex;
  458. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  459. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  460. var i = 1;
  461. while (i <= numberOfCaptures)
  462. {
  463. var nextCapture = z.Get(i);
  464. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  465. i++;
  466. lengthA++;
  467. if (lengthA == lim)
  468. {
  469. return a;
  470. }
  471. }
  472. currentIndex = previousStringIndex;
  473. }
  474. a.SetIndexValue(lengthA, s.Substring((int) previousStringIndex, s.Length - (int) previousStringIndex), updateLength: true);
  475. return a;
  476. }
  477. private JsValue Flags(JsValue thisObj, JsValue[] arguments)
  478. {
  479. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.flags");
  480. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  481. {
  482. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  483. }
  484. var result = AddFlagIfPresent(r, PropertyGlobal, 'g', "");
  485. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  486. result = AddFlagIfPresent(r, "multiline", 'm', result);
  487. result = AddFlagIfPresent(r, "dotAll", 's', result);
  488. result = AddFlagIfPresent(r, "unicode", 'u', result);
  489. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  490. return result;
  491. }
  492. private JsValue ToRegExpString(JsValue thisObj, JsValue[] arguments)
  493. {
  494. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.toString");
  495. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  496. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  497. return "/" + pattern + "/" + flags;
  498. }
  499. private JsValue Test(JsValue thisObj, JsValue[] arguments)
  500. {
  501. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.test");
  502. var s = TypeConverter.ToString(arguments.At(0));
  503. // check couple fast paths
  504. if (r is RegExpInstance R && !R.FullUnicode)
  505. {
  506. if (!R.Sticky && !R.Global)
  507. {
  508. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  509. return R.Value.IsMatch(s);
  510. }
  511. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  512. if (lastIndex >= s.Length && s.Length > 0)
  513. {
  514. return JsBoolean.False;
  515. }
  516. var m = R.Value.Match(s, lastIndex);
  517. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  518. {
  519. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  520. return JsBoolean.False;
  521. }
  522. R.Set(RegExpInstance.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  523. return JsBoolean.True;
  524. }
  525. var match = RegExpExec(r, s);
  526. return !match.IsNull();
  527. }
  528. private JsValue Search(JsValue thisObj, JsValue[] arguments)
  529. {
  530. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.search");
  531. var s = TypeConverter.ToString(arguments.At(0));
  532. var previousLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  533. if (!SameValue(previousLastIndex, 0))
  534. {
  535. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  536. }
  537. var result = RegExpExec(rx, s);
  538. var currentLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  539. if (!SameValue(currentLastIndex, previousLastIndex))
  540. {
  541. rx.Set(RegExpInstance.PropertyLastIndex, previousLastIndex, true);
  542. }
  543. if (result.IsNull())
  544. {
  545. return -1;
  546. }
  547. return result.Get(PropertyIndex);
  548. }
  549. private JsValue Match(JsValue thisObj, JsValue[] arguments)
  550. {
  551. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.match");
  552. var s = TypeConverter.ToString(arguments.At(0));
  553. var global = TypeConverter.ToBoolean(rx.Get(PropertyGlobal));
  554. if (!global)
  555. {
  556. return RegExpExec(rx, s);
  557. }
  558. var fullUnicode = TypeConverter.ToBoolean(rx.Get("unicode"));
  559. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  560. if (!fullUnicode
  561. && rx is RegExpInstance rei
  562. && rei.TryGetDefaultRegExpExec(out _))
  563. {
  564. // fast path
  565. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  566. if (rei.Sticky)
  567. {
  568. var match = rei.Value.Match(s);
  569. if (!match.Success || match.Index != 0)
  570. {
  571. return Null;
  572. }
  573. a.SetIndexValue(0, match.Value, updateLength: false);
  574. uint li = 0;
  575. while (true)
  576. {
  577. match = match.NextMatch();
  578. if (!match.Success || match.Index != ++li)
  579. break;
  580. a.SetIndexValue(li, match.Value, updateLength: false);
  581. }
  582. a.SetLength(li);
  583. return a;
  584. }
  585. else
  586. {
  587. var matches = rei.Value.Matches(s);
  588. if (matches.Count == 0)
  589. {
  590. return Null;
  591. }
  592. a.EnsureCapacity((uint) matches.Count);
  593. a.SetLength((uint) matches.Count);
  594. for (var i = 0; i < matches.Count; i++)
  595. {
  596. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  597. }
  598. return a;
  599. }
  600. }
  601. return MatchSlow(rx, s, fullUnicode);
  602. }
  603. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  604. {
  605. var a = _realm.Intrinsics.Array.ArrayCreate(0);
  606. uint n = 0;
  607. while (true)
  608. {
  609. var result = RegExpExec(rx, s);
  610. if (result.IsNull())
  611. {
  612. a.SetLength(n);
  613. return n == 0 ? Null : a;
  614. }
  615. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  616. a.SetIndexValue(n, matchStr, updateLength: false);
  617. if (matchStr == "")
  618. {
  619. var thisIndex = TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  620. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  621. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex, true);
  622. }
  623. n++;
  624. }
  625. }
  626. /// <summary>
  627. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  628. /// </summary>
  629. private JsValue MatchAll(JsValue thisObj, JsValue[] arguments)
  630. {
  631. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.matchAll");
  632. var s = TypeConverter.ToString(arguments.At(0));
  633. var c = SpeciesConstructor(r, _realm.Intrinsics.RegExp);
  634. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  635. var matcher = Construct(c, new JsValue[]
  636. {
  637. r,
  638. flags
  639. });
  640. var lastIndex = TypeConverter.ToLength(r.Get(RegExpInstance.PropertyLastIndex));
  641. matcher.Set(RegExpInstance.PropertyLastIndex, lastIndex, true);
  642. var global = flags.IndexOf('g') != -1;
  643. var fullUnicode = flags.IndexOf('u') != -1;
  644. return _realm.Intrinsics.RegExpStringIteratorPrototype.Construct(matcher, s, global, fullUnicode);
  645. }
  646. private static ulong AdvanceStringIndex(string s, ulong index, bool unicode)
  647. {
  648. if (!unicode || index + 1 >= (ulong) s.Length)
  649. {
  650. return index + 1;
  651. }
  652. var first = s[(int) index];
  653. if (first < 0xD800 || first > 0xDBFF)
  654. {
  655. return index + 1;
  656. }
  657. var second = s[(int) (index + 1)];
  658. if (second < 0xDC00 || second > 0xDFFF)
  659. {
  660. return index + 1;
  661. }
  662. return index + 2;
  663. }
  664. internal static JsValue RegExpExec(ObjectInstance r, string s)
  665. {
  666. var exec = r.Get(PropertyExec);
  667. if (exec is ICallable callable)
  668. {
  669. var result = callable.Call(r, new JsValue[] { s });
  670. if (!result.IsNull() && !result.IsObject())
  671. {
  672. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  673. }
  674. return result;
  675. }
  676. var ri = r as RegExpInstance;
  677. if (ri is null)
  678. {
  679. ExceptionHelper.ThrowTypeError(r.Engine.Realm);
  680. }
  681. return RegExpBuiltinExec(ri, s);
  682. }
  683. internal bool TryGetDefaultExec(ObjectInstance o, out Func<JsValue, JsValue[], JsValue> exec)
  684. {
  685. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  686. {
  687. exec = _defaultExec;
  688. return true;
  689. }
  690. exec = default;
  691. return false;
  692. }
  693. private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
  694. {
  695. var length = (ulong) s.Length;
  696. var lastIndex = TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  697. var global = R.Global;
  698. var sticky = R.Sticky;
  699. if (!global && !sticky)
  700. {
  701. lastIndex = 0;
  702. }
  703. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters) // Reg Exp is really ""
  704. {
  705. if (lastIndex > (ulong) s.Length)
  706. {
  707. return Null;
  708. }
  709. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  710. var array = R.Engine.Realm.Intrinsics.Array.ArrayCreate(1);
  711. array.FastAddProperty(PropertyIndex, lastIndex, true, true, true);
  712. array.FastAddProperty(PropertyInput, s, true, true, true);
  713. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  714. return array;
  715. }
  716. var matcher = R.Value;
  717. var fullUnicode = R.FullUnicode;
  718. if (!global & !sticky && !fullUnicode)
  719. {
  720. // we can the non-stateful fast path which is the common case
  721. var m = matcher.Match(s, (int) lastIndex);
  722. if (!m.Success)
  723. {
  724. return Null;
  725. }
  726. return CreateReturnValueArray(R.Engine, m, s, fullUnicode: false);
  727. }
  728. // the stateful version
  729. Match match;
  730. while (true)
  731. {
  732. if (lastIndex > length)
  733. {
  734. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  735. return Null;
  736. }
  737. match = R.Value.Match(s, (int) lastIndex);
  738. var success = match.Success && (!sticky || match.Index == (int) lastIndex);
  739. if (!success)
  740. {
  741. if (sticky)
  742. {
  743. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  744. return Null;
  745. }
  746. lastIndex = AdvanceStringIndex(s, lastIndex, fullUnicode);
  747. }
  748. else
  749. {
  750. break;
  751. }
  752. }
  753. var e = match.Index + match.Length;
  754. if (fullUnicode)
  755. {
  756. // e is an index into the Input character list, derived from S, matched by matcher.
  757. // Let eUTF be the smallest index into S that corresponds to the character at element e of Input.
  758. // If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S.
  759. // Set e to eUTF.
  760. var indexes = StringInfo.ParseCombiningCharacters(s);
  761. if (match.Index < indexes.Length)
  762. {
  763. var sub = StringInfo.GetNextTextElement(s, match.Index);
  764. e += sub.Length - 1;
  765. }
  766. }
  767. R.Set(RegExpInstance.PropertyLastIndex, e, true);
  768. return CreateReturnValueArray(R.Engine, match, s, fullUnicode);
  769. }
  770. private static ArrayInstance CreateReturnValueArray(Engine engine, Match match, string inputValue, bool fullUnicode)
  771. {
  772. var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
  773. array.CreateDataProperty(PropertyIndex, match.Index);
  774. array.CreateDataProperty(PropertyInput, inputValue);
  775. ObjectInstance groups = null;
  776. for (uint i = 0; i < match.Groups.Count; i++)
  777. {
  778. var capture = i < match.Groups.Count ? match.Groups[(int) i] : null;
  779. var capturedValue = Undefined;
  780. if (capture?.Success == true)
  781. {
  782. capturedValue = fullUnicode
  783. ? StringInfo.GetNextTextElement(inputValue, capture.Index)
  784. : capture.Value;
  785. // todo detect captured name
  786. }
  787. array.SetIndexValue(i, capturedValue, updateLength: false);
  788. }
  789. array.CreateDataProperty("groups", groups ?? Undefined);
  790. return array;
  791. }
  792. private JsValue Exec(JsValue thisObj, JsValue[] arguments)
  793. {
  794. var r = thisObj as RegExpInstance;
  795. if (r is null)
  796. {
  797. ExceptionHelper.ThrowTypeError(_engine.Realm);
  798. }
  799. var s = TypeConverter.ToString(arguments.At(0));
  800. return RegExpBuiltinExec(r, s);
  801. }
  802. }
  803. }