RegExpPrototype.cs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Text.RegularExpressions;
  5. using Jint.Collections;
  6. using Jint.Native.Array;
  7. using Jint.Native.Number;
  8. using Jint.Native.Object;
  9. using Jint.Native.String;
  10. using Jint.Native.Symbol;
  11. using Jint.Pooling;
  12. using Jint.Runtime;
  13. using Jint.Runtime.Descriptors;
  14. using Jint.Runtime.Descriptors.Specialized;
  15. using Jint.Runtime.Interop;
  16. namespace Jint.Native.RegExp
  17. {
  18. public sealed class RegExpPrototype : ObjectInstance
  19. {
  20. private static readonly JsString PropertyExec = new JsString("exec");
  21. private static readonly JsString PropertyIndex = new JsString("index");
  22. private static readonly JsString PropertyInput = new JsString("input");
  23. private static readonly JsString PropertySticky = new JsString("sticky");
  24. private static readonly JsString PropertyGlobal = new JsString("global");
  25. internal static readonly JsString PropertySource = new JsString("source");
  26. private static readonly JsValue DefaultSource = new JsString("(?:)");
  27. internal static readonly JsString PropertyFlags = new JsString("flags");
  28. private RegExpConstructor _regExpConstructor;
  29. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  30. private RegExpPrototype(Engine engine) : base(engine)
  31. {
  32. _defaultExec = Exec;
  33. }
  34. public static RegExpPrototype CreatePrototypeObject(Engine engine, RegExpConstructor regExpConstructor)
  35. {
  36. var obj = new RegExpPrototype(engine)
  37. {
  38. _prototype = engine.Object.PrototypeObject, _regExpConstructor = regExpConstructor
  39. };
  40. return obj;
  41. }
  42. protected override void Initialize()
  43. {
  44. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  45. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpInstance, JsValue> valueExtractor, JsValue protoValue = null)
  46. {
  47. return new GetSetPropertyDescriptor(
  48. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  49. {
  50. if (ReferenceEquals(thisObj, this))
  51. {
  52. return protoValue ?? Undefined;
  53. }
  54. if (!(thisObj is RegExpInstance r))
  55. {
  56. return ExceptionHelper.ThrowTypeError<JsValue>(_engine);
  57. }
  58. return valueExtractor(r);
  59. }, 0, lengthFlags),
  60. set: Undefined,
  61. flags: PropertyFlag.Configurable);
  62. }
  63. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  64. var properties = new PropertyDictionary(12, checkExistingKeys: false)
  65. {
  66. ["constructor"] = new PropertyDescriptor(_regExpConstructor, propertyFlags),
  67. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  68. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  69. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  70. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", r => r.DotAll),
  71. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  72. ["global"] = CreateGetAccessorDescriptor("get global", r => r.Global),
  73. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", r => r.IgnoreCase),
  74. ["multiline"] = CreateGetAccessorDescriptor("get multiline", r => r.Multiline),
  75. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  76. ["sticky"] = CreateGetAccessorDescriptor("get sticky", r => r.Sticky),
  77. ["unicode"] = CreateGetAccessorDescriptor("get unicode", r => r.FullUnicode)
  78. };
  79. SetProperties(properties);
  80. var symbols = new SymbolDictionary(5)
  81. {
  82. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  85. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  86. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  87. };
  88. SetSymbols(symbols);
  89. }
  90. private JsValue Source(JsValue thisObj, JsValue[] arguments)
  91. {
  92. if (ReferenceEquals(thisObj, this))
  93. {
  94. return DefaultSource;
  95. }
  96. if (!(thisObj is RegExpInstance r))
  97. {
  98. return ExceptionHelper.ThrowTypeError<JsValue>(_engine);
  99. }
  100. return r.Source.Replace("/", "\\/");
  101. }
  102. /// <summary>
  103. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  104. /// </summary>
  105. private JsValue Replace(JsValue thisObj, JsValue[] arguments)
  106. {
  107. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.replace");
  108. var s = TypeConverter.ToString(arguments.At(0));
  109. var lengthS = s.Length;
  110. var replaceValue = arguments.At(1);
  111. var functionalReplace = replaceValue is ICallable;
  112. // we need heavier logic if we have named captures
  113. bool mayHaveNamedCaptures = false;
  114. if (!functionalReplace)
  115. {
  116. var value = TypeConverter.ToString(replaceValue);
  117. replaceValue = value;
  118. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  119. }
  120. var fullUnicode = false;
  121. var global = TypeConverter.ToBoolean(rx.Get(PropertyGlobal));
  122. if (global)
  123. {
  124. fullUnicode = TypeConverter.ToBoolean(rx.Get("unicode"));
  125. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  126. }
  127. // check if we can access fast path
  128. if (!fullUnicode
  129. && !mayHaveNamedCaptures
  130. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  131. && rx is RegExpInstance rei && rei.TryGetDefaultRegExpExec(out _))
  132. {
  133. var count = global ? int.MaxValue : 1;
  134. string result;
  135. if (functionalReplace)
  136. {
  137. string Evaluator(Match match)
  138. {
  139. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  140. replacerArgs.Add(match.Value);
  141. for (var i = 1; i < match.Groups.Count; i++)
  142. {
  143. var capture = match.Groups[i];
  144. replacerArgs.Add(capture.Value);
  145. }
  146. replacerArgs.Add(match.Index);
  147. replacerArgs.Add(s);
  148. // no named captures
  149. return CallFunctionalReplace(replaceValue, replacerArgs);
  150. }
  151. result = rei.Value.Replace(s, Evaluator, count);
  152. }
  153. else
  154. {
  155. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  156. }
  157. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero);
  158. return result;
  159. }
  160. var results = new List<ObjectInstance>();
  161. while (true)
  162. {
  163. var result = RegExpExec(rx, s);
  164. if (result.IsNull())
  165. {
  166. break;
  167. }
  168. results.Add((ObjectInstance) result);
  169. if (!global)
  170. {
  171. break;
  172. }
  173. var matchStr = TypeConverter.ToString(result.Get(0));
  174. if (matchStr == "")
  175. {
  176. var thisIndex = (int) TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  177. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  178. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex);
  179. }
  180. }
  181. var accumulatedResult = "";
  182. var nextSourcePosition = 0;
  183. var captures = new List<string>();
  184. foreach (var result in results)
  185. {
  186. var nCaptures = (int) result.Length;
  187. nCaptures = System.Math.Max(nCaptures - 1, 0);
  188. var matched = TypeConverter.ToString(result.Get(0));
  189. var matchLength = matched.Length;
  190. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  191. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  192. uint n = 1;
  193. captures.Clear();
  194. while (n <= nCaptures)
  195. {
  196. var capN = result.Get(n);
  197. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  198. captures.Add(value);
  199. n++;
  200. }
  201. var namedCaptures = result.Get("groups");
  202. string replacement;
  203. if (functionalReplace)
  204. {
  205. var replacerArgs = new List<JsValue>();
  206. replacerArgs.Add(matched);
  207. foreach (var capture in captures)
  208. {
  209. replacerArgs.Add(capture);
  210. }
  211. replacerArgs.Add(position);
  212. replacerArgs.Add(s);
  213. if (!namedCaptures.IsUndefined())
  214. {
  215. replacerArgs.Add(namedCaptures);
  216. }
  217. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  218. }
  219. else
  220. {
  221. if (!namedCaptures.IsUndefined())
  222. {
  223. namedCaptures = TypeConverter.ToObject(_engine, namedCaptures);
  224. }
  225. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  226. }
  227. if (position >= nextSourcePosition)
  228. {
  229. accumulatedResult = accumulatedResult +
  230. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  231. replacement;
  232. nextSourcePosition = position + matchLength;
  233. }
  234. }
  235. if (nextSourcePosition >= lengthS)
  236. {
  237. return accumulatedResult;
  238. }
  239. return accumulatedResult + s.Substring(nextSourcePosition);
  240. }
  241. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  242. {
  243. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  244. return TypeConverter.ToString(result);
  245. }
  246. internal static string GetSubstitution(
  247. string matched,
  248. string str,
  249. int position,
  250. string[] captures,
  251. JsValue namedCaptures,
  252. string replacement)
  253. {
  254. // If there is no pattern, replace the pattern as is.
  255. if (replacement.IndexOf('$') < 0)
  256. {
  257. return replacement;
  258. }
  259. // Patterns
  260. // $$ Inserts a "$".
  261. // $& Inserts the matched substring.
  262. // $` Inserts the portion of the string that precedes the matched substring.
  263. // $' Inserts the portion of the string that follows the matched substring.
  264. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  265. using (var replacementBuilder = StringBuilderPool.Rent())
  266. {
  267. for (int i = 0; i < replacement.Length; i++)
  268. {
  269. char c = replacement[i];
  270. if (c == '$' && i < replacement.Length - 1)
  271. {
  272. c = replacement[++i];
  273. switch (c)
  274. {
  275. case '$':
  276. replacementBuilder.Builder.Append('$');
  277. break;
  278. case '&':
  279. replacementBuilder.Builder.Append(matched);
  280. break;
  281. case '`':
  282. replacementBuilder.Builder.Append(str.Substring(0, position));
  283. break;
  284. case '\'':
  285. replacementBuilder.Builder.Append(str.Substring(position + matched.Length));
  286. break;
  287. default:
  288. {
  289. if (char.IsDigit(c))
  290. {
  291. int matchNumber1 = c - '0';
  292. // The match number can be one or two digits long.
  293. int matchNumber2 = 0;
  294. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  295. {
  296. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  297. }
  298. // Try the two digit capture first.
  299. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  300. {
  301. // Two digit capture replacement.
  302. replacementBuilder.Builder.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  303. i++;
  304. }
  305. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  306. {
  307. // Single digit capture replacement.
  308. replacementBuilder.Builder.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  309. }
  310. else
  311. {
  312. // Capture does not exist.
  313. replacementBuilder.Builder.Append('$');
  314. i--;
  315. }
  316. }
  317. else
  318. {
  319. // Unknown replacement pattern.
  320. replacementBuilder.Builder.Append('$');
  321. replacementBuilder.Builder.Append(c);
  322. }
  323. break;
  324. }
  325. }
  326. }
  327. else
  328. {
  329. replacementBuilder.Builder.Append(c);
  330. }
  331. }
  332. return replacementBuilder.ToString();
  333. }
  334. }
  335. /// <summary>
  336. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  337. /// </summary>
  338. private JsValue Split(JsValue thisObj, JsValue[] arguments)
  339. {
  340. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.split");
  341. var s = TypeConverter.ToString(arguments.At(0));
  342. var limit = arguments.At(1);
  343. var c = SpeciesConstructor(rx, _engine.RegExp);
  344. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  345. var unicodeMatching = flags.IndexOf('u') > -1;
  346. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  347. var splitter = Construct(c, new JsValue[]
  348. {
  349. rx,
  350. newFlags
  351. });
  352. uint lengthA = 0;
  353. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  354. if (lim == 0)
  355. {
  356. return _engine.Array.ConstructFast(0);
  357. }
  358. if (s.Length == 0)
  359. {
  360. var a = _engine.Array.ConstructFast(0);
  361. var z = RegExpExec(splitter, s);
  362. if (!z.IsNull())
  363. {
  364. return a;
  365. }
  366. a.SetIndexValue(0, s, updateLength: true);
  367. return a;
  368. }
  369. if (!unicodeMatching && rx is RegExpInstance R && R.TryGetDefaultRegExpExec(out _))
  370. {
  371. // we can take faster path
  372. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters)
  373. {
  374. // if empty string, just a string split
  375. return StringPrototype.SplitWithStringSeparator(_engine, "", s, (uint) s.Length);
  376. }
  377. var a = (ArrayInstance) Engine.Array.Construct(Arguments.Empty);
  378. var match = R.Value.Match(s, 0);
  379. if (!match.Success) // No match at all return the string in an array
  380. {
  381. a.SetIndexValue(0, s, updateLength: true);
  382. return a;
  383. }
  384. int lastIndex = 0;
  385. uint index = 0;
  386. while (match.Success && index < lim)
  387. {
  388. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  389. {
  390. match = match.NextMatch();
  391. continue;
  392. }
  393. // Add the match results to the array.
  394. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  395. if (index >= lim)
  396. {
  397. return a;
  398. }
  399. lastIndex = match.Index + match.Length;
  400. for (int i = 1; i < match.Groups.Count; i++)
  401. {
  402. var group = match.Groups[i];
  403. var item = Undefined;
  404. if (group.Captures.Count > 0)
  405. {
  406. item = match.Groups[i].Value;
  407. }
  408. a.SetIndexValue(index++, item, updateLength: true);
  409. if (index >= lim)
  410. {
  411. return a;
  412. }
  413. }
  414. match = match.NextMatch();
  415. if (!match.Success) // Add the last part of the split
  416. {
  417. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  418. }
  419. }
  420. return a;
  421. }
  422. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  423. }
  424. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  425. {
  426. var a = _engine.Array.ConstructFast(0);
  427. var previousStringIndex = 0;
  428. var currentIndex = 0;
  429. while (currentIndex < s.Length)
  430. {
  431. splitter.Set(RegExpInstance.PropertyLastIndex, currentIndex, true);
  432. var z = RegExpExec(splitter, s);
  433. if (z.IsNull())
  434. {
  435. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  436. continue;
  437. }
  438. var endIndex = (int) TypeConverter.ToLength(splitter.Get(RegExpInstance.PropertyLastIndex));
  439. endIndex = System.Math.Min(endIndex, s.Length);
  440. if (endIndex == previousStringIndex)
  441. {
  442. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  443. continue;
  444. }
  445. var t = s.Substring(previousStringIndex, currentIndex - previousStringIndex);
  446. a.SetIndexValue(lengthA, t, updateLength: true);
  447. lengthA++;
  448. if (lengthA == lim)
  449. {
  450. return a;
  451. }
  452. previousStringIndex = endIndex;
  453. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  454. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  455. var i = 1;
  456. while (i <= numberOfCaptures)
  457. {
  458. var nextCapture = z.Get(i);
  459. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  460. i++;
  461. lengthA++;
  462. if (lengthA == lim)
  463. {
  464. return a;
  465. }
  466. }
  467. currentIndex = previousStringIndex;
  468. }
  469. a.SetIndexValue(lengthA, s.Substring(previousStringIndex, s.Length - previousStringIndex), updateLength: true);
  470. return a;
  471. }
  472. private JsValue Flags(JsValue thisObj, JsValue[] arguments)
  473. {
  474. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.flags");
  475. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  476. {
  477. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  478. }
  479. var result = AddFlagIfPresent(r, PropertyGlobal, 'g', "");
  480. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  481. result = AddFlagIfPresent(r, "multiline", 'm', result);
  482. result = AddFlagIfPresent(r, "dotAll", 's', result);
  483. result = AddFlagIfPresent(r, "unicode", 'u', result);
  484. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  485. return result;
  486. }
  487. private JsValue ToRegExpString(JsValue thisObj, JsValue[] arguments)
  488. {
  489. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.toString");
  490. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  491. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  492. return "/" + pattern + "/" + flags;
  493. }
  494. private JsValue Test(JsValue thisObj, JsValue[] arguments)
  495. {
  496. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.test");
  497. var s = TypeConverter.ToString(arguments.At(0));
  498. // check couple fast paths
  499. if (r is RegExpInstance R && !R.FullUnicode)
  500. {
  501. if (!R.Sticky && !R.Global)
  502. {
  503. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  504. return R.Value.IsMatch(s);
  505. }
  506. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  507. if (lastIndex >= s.Length && s.Length > 0)
  508. {
  509. return JsBoolean.False;
  510. }
  511. var m = R.Value.Match(s, lastIndex);
  512. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  513. {
  514. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  515. return JsBoolean.False;
  516. }
  517. R.Set(RegExpInstance.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  518. return JsBoolean.True;
  519. }
  520. var match = RegExpExec(r, s);
  521. return !match.IsNull();
  522. }
  523. private JsValue Search(JsValue thisObj, JsValue[] arguments)
  524. {
  525. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.search");
  526. var s = TypeConverter.ToString(arguments.At(0));
  527. var previousLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  528. if (!SameValue(previousLastIndex, 0))
  529. {
  530. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  531. }
  532. var result = RegExpExec(rx, s);
  533. var currentLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  534. if (!SameValue(currentLastIndex, previousLastIndex))
  535. {
  536. rx.Set(RegExpInstance.PropertyLastIndex, previousLastIndex, true);
  537. }
  538. if (result.IsNull())
  539. {
  540. return -1;
  541. }
  542. return result.Get(PropertyIndex);
  543. }
  544. private JsValue Match(JsValue thisObj, JsValue[] arguments)
  545. {
  546. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.match");
  547. var s = TypeConverter.ToString(arguments.At(0));
  548. var global = TypeConverter.ToBoolean(rx.Get(PropertyGlobal));
  549. if (!global)
  550. {
  551. return RegExpExec(rx, s);
  552. }
  553. var fullUnicode = TypeConverter.ToBoolean(rx.Get("unicode"));
  554. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  555. if (!fullUnicode
  556. && rx is RegExpInstance rei
  557. && rei.TryGetDefaultRegExpExec(out _))
  558. {
  559. // fast path
  560. var a = Engine.Array.ConstructFast(0);
  561. if (rei.Sticky)
  562. {
  563. var match = rei.Value.Match(s);
  564. if (!match.Success || match.Index != 0)
  565. {
  566. return Null;
  567. }
  568. a.SetIndexValue(0, match.Value, updateLength: false);
  569. uint li = 0;
  570. while (true)
  571. {
  572. match = match.NextMatch();
  573. if (!match.Success || match.Index != ++li)
  574. break;
  575. a.SetIndexValue(li, match.Value, updateLength: false);
  576. }
  577. a.SetLength(li);
  578. return a;
  579. }
  580. else
  581. {
  582. var matches = rei.Value.Matches(s);
  583. if (matches.Count == 0)
  584. {
  585. return Null;
  586. }
  587. a.EnsureCapacity((uint) matches.Count);
  588. a.SetLength((uint) matches.Count);
  589. for (var i = 0; i < matches.Count; i++)
  590. {
  591. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  592. }
  593. return a;
  594. }
  595. }
  596. return MatchSlow(rx, s, fullUnicode);
  597. }
  598. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  599. {
  600. var a = Engine.Array.ConstructFast(0);
  601. uint n = 0;
  602. while (true)
  603. {
  604. var result = RegExpExec(rx, s);
  605. if (result.IsNull())
  606. {
  607. a.SetLength(n);
  608. return n == 0 ? Null : a;
  609. }
  610. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  611. a.SetIndexValue(n, matchStr, updateLength: false);
  612. if (matchStr == "")
  613. {
  614. var thisIndex = (int) TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  615. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  616. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex, true);
  617. }
  618. n++;
  619. }
  620. }
  621. /// <summary>
  622. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  623. /// </summary>
  624. private JsValue MatchAll(JsValue thisObj, JsValue[] arguments)
  625. {
  626. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.matchAll");
  627. var s = TypeConverter.ToString(arguments.At(0));
  628. var c = SpeciesConstructor(r, _engine.RegExp);
  629. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  630. var matcher = Construct(c, new JsValue[]
  631. {
  632. r,
  633. flags
  634. });
  635. var lastIndex = TypeConverter.ToLength(r.Get(RegExpInstance.PropertyLastIndex));
  636. matcher.Set(RegExpInstance.PropertyLastIndex, lastIndex, true);
  637. var global = flags.IndexOf('g') != -1;
  638. var fullUnicode = flags.IndexOf('u') != -1;
  639. return _engine.Iterator.CreateRegExpStringIterator(matcher, s, global, fullUnicode);
  640. }
  641. private static int AdvanceStringIndex(string s, int index, bool unicode)
  642. {
  643. if (!unicode || index + 1 >= s.Length)
  644. {
  645. return index + 1;
  646. }
  647. var first = s[index];
  648. if (first < 0xD800 || first > 0xDBFF)
  649. {
  650. return index + 1;
  651. }
  652. var second = s[index + 1];
  653. if (second < 0xDC00 || second > 0xDFFF)
  654. {
  655. return index + 1;
  656. }
  657. return index + 2;
  658. }
  659. internal static JsValue RegExpExec(ObjectInstance r, string s)
  660. {
  661. var exec = r.Get(PropertyExec);
  662. if (exec is ICallable callable)
  663. {
  664. var result = callable.Call(r, new JsValue[] { s });
  665. if (!result.IsNull() && !result.IsObject())
  666. {
  667. return ExceptionHelper.ThrowTypeError<ObjectInstance>(r.Engine);
  668. }
  669. return result;
  670. }
  671. if (!(r is RegExpInstance ri))
  672. {
  673. return ExceptionHelper.ThrowTypeError<ObjectInstance>(r.Engine);
  674. }
  675. return RegExpBuiltinExec(ri, s);
  676. }
  677. internal bool TryGetDefaultExec(ObjectInstance o, out Func<JsValue, JsValue[], JsValue> exec)
  678. {
  679. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  680. {
  681. exec = _defaultExec;
  682. return true;
  683. }
  684. exec = default;
  685. return false;
  686. }
  687. private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
  688. {
  689. var length = s.Length;
  690. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  691. var global = R.Global;
  692. var sticky = R.Sticky;
  693. if (!global && !sticky)
  694. {
  695. lastIndex = 0;
  696. }
  697. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters) // Reg Exp is really ""
  698. {
  699. if (lastIndex > s.Length)
  700. {
  701. return Null;
  702. }
  703. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  704. var array = R.Engine.Array.ConstructFast(1);
  705. array.FastAddProperty(PropertyIndex, lastIndex, true, true, true);
  706. array.FastAddProperty(PropertyInput, s, true, true, true);
  707. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  708. return array;
  709. }
  710. var matcher = R.Value;
  711. var fullUnicode = R.FullUnicode;
  712. if (!global & !sticky && !fullUnicode)
  713. {
  714. // we can the non-stateful fast path which is the common case
  715. var m = matcher.Match(s, lastIndex);
  716. if (!m.Success)
  717. {
  718. return Null;
  719. }
  720. return CreateReturnValueArray(R.Engine, m, s, fullUnicode: false);
  721. }
  722. // the stateful version
  723. Match match;
  724. while (true)
  725. {
  726. if (lastIndex > length)
  727. {
  728. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  729. return Null;
  730. }
  731. match = R.Value.Match(s, lastIndex);
  732. var success = match.Success && (!sticky || match.Index == lastIndex);
  733. if (!success)
  734. {
  735. if (sticky)
  736. {
  737. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  738. return Null;
  739. }
  740. lastIndex = AdvanceStringIndex(s, lastIndex, fullUnicode);
  741. }
  742. else
  743. {
  744. break;
  745. }
  746. }
  747. var e = match.Index + match.Length;
  748. if (fullUnicode)
  749. {
  750. // e is an index into the Input character list, derived from S, matched by matcher.
  751. // Let eUTF be the smallest index into S that corresponds to the character at element e of Input.
  752. // If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S.
  753. // Set e to eUTF.
  754. var indexes = StringInfo.ParseCombiningCharacters(s);
  755. if (match.Index < indexes.Length)
  756. {
  757. var sub = StringInfo.GetNextTextElement(s, match.Index);
  758. e += sub.Length - 1;
  759. }
  760. }
  761. R.Set(RegExpInstance.PropertyLastIndex, e, true);
  762. return CreateReturnValueArray(R.Engine, match, s, fullUnicode);
  763. }
  764. private static ArrayInstance CreateReturnValueArray(Engine engine, Match match, string inputValue, bool fullUnicode)
  765. {
  766. var array = engine.Array.ConstructFast((ulong) match.Groups.Count);
  767. array.CreateDataProperty(PropertyIndex, match.Index);
  768. array.CreateDataProperty(PropertyInput, inputValue);
  769. ObjectInstance groups = null;
  770. for (uint i = 0; i < match.Groups.Count; i++)
  771. {
  772. var capture = i < match.Groups.Count ? match.Groups[(int) i] : null;
  773. var capturedValue = Undefined;
  774. if (capture?.Success == true)
  775. {
  776. capturedValue = fullUnicode
  777. ? StringInfo.GetNextTextElement(inputValue, capture.Index)
  778. : capture.Value;
  779. // todo detect captured name
  780. }
  781. array.SetIndexValue(i, capturedValue, updateLength: false);
  782. }
  783. array.CreateDataProperty("groups", groups ?? Undefined);
  784. return array;
  785. }
  786. private JsValue Exec(JsValue thisObj, JsValue[] arguments)
  787. {
  788. if (!(thisObj is RegExpInstance r))
  789. {
  790. return ExceptionHelper.ThrowTypeError<JsValue>(_engine);
  791. }
  792. var s = TypeConverter.ToString(arguments.At(0));
  793. return RegExpBuiltinExec(r, s);
  794. }
  795. }
  796. }