RegExpPrototype.cs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Text.RegularExpressions;
  5. using Jint.Collections;
  6. using Jint.Native.Array;
  7. using Jint.Native.Number;
  8. using Jint.Native.Object;
  9. using Jint.Native.String;
  10. using Jint.Native.Symbol;
  11. using Jint.Pooling;
  12. using Jint.Runtime;
  13. using Jint.Runtime.Descriptors;
  14. using Jint.Runtime.Interop;
  15. namespace Jint.Native.RegExp
  16. {
  17. public sealed class RegExpPrototype : ObjectInstance
  18. {
  19. private static readonly JsString PropertyExec = new JsString("exec");
  20. private static readonly JsString PropertyIndex = new JsString("index");
  21. private static readonly JsString PropertyInput = new JsString("input");
  22. private static readonly JsString PropertySticky = new JsString("sticky");
  23. private static readonly JsString PropertyGlobal = new JsString("global");
  24. internal static readonly JsString PropertySource = new JsString("source");
  25. private static readonly JsValue DefaultSource = new JsString("(?:)");
  26. internal static readonly JsString PropertyFlags = new JsString("flags");
  27. private RegExpConstructor _regExpConstructor;
  28. private readonly Func<JsValue, JsValue[], JsValue> _defaultExec;
  29. private RegExpPrototype(Engine engine) : base(engine)
  30. {
  31. _defaultExec = Exec;
  32. }
  33. public static RegExpPrototype CreatePrototypeObject(Engine engine, RegExpConstructor regExpConstructor)
  34. {
  35. var obj = new RegExpPrototype(engine)
  36. {
  37. _prototype = engine.Object.PrototypeObject, _regExpConstructor = regExpConstructor
  38. };
  39. return obj;
  40. }
  41. protected override void Initialize()
  42. {
  43. const PropertyFlag lengthFlags = PropertyFlag.Configurable;
  44. GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpInstance, JsValue> valueExtractor, JsValue protoValue = null)
  45. {
  46. return new GetSetPropertyDescriptor(
  47. get: new ClrFunctionInstance(Engine, name, (thisObj, arguments) =>
  48. {
  49. if (ReferenceEquals(thisObj, this))
  50. {
  51. return protoValue ?? Undefined;
  52. }
  53. if (!(thisObj is RegExpInstance r))
  54. {
  55. return ExceptionHelper.ThrowTypeError<JsValue>(_engine);
  56. }
  57. return valueExtractor(r);
  58. }, 0, lengthFlags),
  59. set: Undefined,
  60. flags: PropertyFlag.Configurable);
  61. }
  62. const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
  63. var properties = new PropertyDictionary(12, checkExistingKeys: false)
  64. {
  65. ["constructor"] = new PropertyDescriptor(_regExpConstructor, propertyFlags),
  66. ["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
  67. ["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
  68. ["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
  69. ["dotAll"] = CreateGetAccessorDescriptor("get dotAll", r => r.DotAll),
  70. ["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  71. ["global"] = CreateGetAccessorDescriptor("get global", r => r.Global),
  72. ["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", r => r.IgnoreCase),
  73. ["multiline"] = CreateGetAccessorDescriptor("get multiline", r => r.Multiline),
  74. ["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
  75. ["sticky"] = CreateGetAccessorDescriptor("get sticky", r => r.Sticky),
  76. ["unicode"] = CreateGetAccessorDescriptor("get unicode", r => r.FullUnicode)
  77. };
  78. SetProperties(properties);
  79. var symbols = new SymbolDictionary(5)
  80. {
  81. [GlobalSymbolRegistry.Match] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.match]", Match, 1, lengthFlags), propertyFlags),
  82. [GlobalSymbolRegistry.MatchAll] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.matchAll]", MatchAll, 1, lengthFlags), propertyFlags),
  83. [GlobalSymbolRegistry.Replace] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.replace]", Replace, 2, lengthFlags), propertyFlags),
  84. [GlobalSymbolRegistry.Search] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.search]", Search, 1, lengthFlags), propertyFlags),
  85. [GlobalSymbolRegistry.Split] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "[Symbol.split]", Split, 2, lengthFlags), propertyFlags)
  86. };
  87. SetSymbols(symbols);
  88. }
  89. private JsValue Source(JsValue thisObj, JsValue[] arguments)
  90. {
  91. if (ReferenceEquals(thisObj, this))
  92. {
  93. return DefaultSource;
  94. }
  95. if (!(thisObj is RegExpInstance r))
  96. {
  97. return ExceptionHelper.ThrowTypeError<JsValue>(_engine);
  98. }
  99. return r.Source.Replace("/", "\\/");
  100. }
  101. /// <summary>
  102. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  103. /// </summary>
  104. private JsValue Replace(JsValue thisObj, JsValue[] arguments)
  105. {
  106. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.replace");
  107. var s = TypeConverter.ToString(arguments.At(0));
  108. var lengthS = s.Length;
  109. var replaceValue = arguments.At(1);
  110. var functionalReplace = replaceValue is ICallable;
  111. // we need heavier logic if we have named captures
  112. bool mayHaveNamedCaptures = false;
  113. if (!functionalReplace)
  114. {
  115. var value = TypeConverter.ToString(replaceValue);
  116. replaceValue = value;
  117. mayHaveNamedCaptures = value.IndexOf('$') != -1;
  118. }
  119. var fullUnicode = false;
  120. var global = TypeConverter.ToBoolean(rx.Get(PropertyGlobal));
  121. if (global)
  122. {
  123. fullUnicode = TypeConverter.ToBoolean(rx.Get("unicode"));
  124. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  125. }
  126. // check if we can access fast path
  127. if (!fullUnicode
  128. && !mayHaveNamedCaptures
  129. && !TypeConverter.ToBoolean(rx.Get(PropertySticky))
  130. && rx is RegExpInstance rei && rei.TryGetDefaultRegExpExec(out _))
  131. {
  132. var count = global ? int.MaxValue : 1;
  133. string result;
  134. if (functionalReplace)
  135. {
  136. string Evaluator(Match match)
  137. {
  138. var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
  139. replacerArgs.Add(match.Value);
  140. for (var i = 1; i < match.Groups.Count; i++)
  141. {
  142. var capture = match.Groups[i];
  143. replacerArgs.Add(capture.Value);
  144. }
  145. replacerArgs.Add(match.Index);
  146. replacerArgs.Add(s);
  147. // no named captures
  148. return CallFunctionalReplace(replaceValue, replacerArgs);
  149. }
  150. result = rei.Value.Replace(s, Evaluator, count);
  151. }
  152. else
  153. {
  154. result = rei.Value.Replace(s, TypeConverter.ToString(replaceValue), count);
  155. }
  156. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero);
  157. return result;
  158. }
  159. var results = new List<ObjectInstance>();
  160. while (true)
  161. {
  162. var result = RegExpExec(rx, s);
  163. if (result.IsNull())
  164. {
  165. break;
  166. }
  167. results.Add((ObjectInstance) result);
  168. if (!global)
  169. {
  170. break;
  171. }
  172. var matchStr = TypeConverter.ToString(result.Get(0));
  173. if (matchStr == "")
  174. {
  175. var thisIndex = (int) TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  176. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  177. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex);
  178. }
  179. }
  180. var accumulatedResult = "";
  181. var nextSourcePosition = 0;
  182. var captures = new List<string>();
  183. foreach (var result in results)
  184. {
  185. var nCaptures = (int) result.Length;
  186. nCaptures = System.Math.Max(nCaptures - 1, 0);
  187. var matched = TypeConverter.ToString(result.Get(0));
  188. var matchLength = matched.Length;
  189. var position = (int) TypeConverter.ToInteger(result.Get(PropertyIndex));
  190. position = System.Math.Max(System.Math.Min(position, lengthS), 0);
  191. uint n = 1;
  192. captures.Clear();
  193. while (n <= nCaptures)
  194. {
  195. var capN = result.Get(n);
  196. var value = !capN.IsUndefined() ? TypeConverter.ToString(capN) : "";
  197. captures.Add(value);
  198. n++;
  199. }
  200. var namedCaptures = result.Get("groups");
  201. string replacement;
  202. if (functionalReplace)
  203. {
  204. var replacerArgs = new List<JsValue>();
  205. replacerArgs.Add(matched);
  206. foreach (var capture in captures)
  207. {
  208. replacerArgs.Add(capture);
  209. }
  210. replacerArgs.Add(position);
  211. replacerArgs.Add(s);
  212. if (!namedCaptures.IsUndefined())
  213. {
  214. replacerArgs.Add(namedCaptures);
  215. }
  216. replacement = CallFunctionalReplace(replaceValue, replacerArgs);
  217. }
  218. else
  219. {
  220. if (!namedCaptures.IsUndefined())
  221. {
  222. namedCaptures = TypeConverter.ToObject(_engine, namedCaptures);
  223. }
  224. replacement = GetSubstitution(matched, s, position, captures.ToArray(), namedCaptures, TypeConverter.ToString(replaceValue));
  225. }
  226. if (position >= nextSourcePosition)
  227. {
  228. accumulatedResult = accumulatedResult +
  229. s.Substring(nextSourcePosition, position - nextSourcePosition) +
  230. replacement;
  231. nextSourcePosition = position + matchLength;
  232. }
  233. }
  234. if (nextSourcePosition >= lengthS)
  235. {
  236. return accumulatedResult;
  237. }
  238. return accumulatedResult + s.Substring(nextSourcePosition);
  239. }
  240. private static string CallFunctionalReplace(JsValue replacer, List<JsValue> replacerArgs)
  241. {
  242. var result = ((ICallable) replacer).Call(Undefined, replacerArgs.ToArray());
  243. return TypeConverter.ToString(result);
  244. }
  245. internal static string GetSubstitution(
  246. string matched,
  247. string str,
  248. int position,
  249. string[] captures,
  250. JsValue namedCaptures,
  251. string replacement)
  252. {
  253. // If there is no pattern, replace the pattern as is.
  254. if (replacement.IndexOf('$') < 0)
  255. {
  256. return replacement;
  257. }
  258. // Patterns
  259. // $$ Inserts a "$".
  260. // $& Inserts the matched substring.
  261. // $` Inserts the portion of the string that precedes the matched substring.
  262. // $' Inserts the portion of the string that follows the matched substring.
  263. // $n or $nn Where n or nn are decimal digits, inserts the nth parenthesized submatch string, provided the first argument was a RegExp object.
  264. using (var replacementBuilder = StringBuilderPool.Rent())
  265. {
  266. for (int i = 0; i < replacement.Length; i++)
  267. {
  268. char c = replacement[i];
  269. if (c == '$' && i < replacement.Length - 1)
  270. {
  271. c = replacement[++i];
  272. switch (c)
  273. {
  274. case '$':
  275. replacementBuilder.Builder.Append('$');
  276. break;
  277. case '&':
  278. replacementBuilder.Builder.Append(matched);
  279. break;
  280. case '`':
  281. replacementBuilder.Builder.Append(str.Substring(0, position));
  282. break;
  283. case '\'':
  284. replacementBuilder.Builder.Append(str.Substring(position + matched.Length));
  285. break;
  286. default:
  287. {
  288. if (char.IsDigit(c))
  289. {
  290. int matchNumber1 = c - '0';
  291. // The match number can be one or two digits long.
  292. int matchNumber2 = 0;
  293. if (i < replacement.Length - 1 && char.IsDigit(replacement[i + 1]))
  294. {
  295. matchNumber2 = matchNumber1 * 10 + (replacement[i + 1] - '0');
  296. }
  297. // Try the two digit capture first.
  298. if (matchNumber2 > 0 && matchNumber2 <= captures.Length)
  299. {
  300. // Two digit capture replacement.
  301. replacementBuilder.Builder.Append(TypeConverter.ToString(captures[matchNumber2 - 1]));
  302. i++;
  303. }
  304. else if (matchNumber1 > 0 && matchNumber1 <= captures.Length)
  305. {
  306. // Single digit capture replacement.
  307. replacementBuilder.Builder.Append(TypeConverter.ToString(captures[matchNumber1 - 1]));
  308. }
  309. else
  310. {
  311. // Capture does not exist.
  312. replacementBuilder.Builder.Append('$');
  313. i--;
  314. }
  315. }
  316. else
  317. {
  318. // Unknown replacement pattern.
  319. replacementBuilder.Builder.Append('$');
  320. replacementBuilder.Builder.Append(c);
  321. }
  322. break;
  323. }
  324. }
  325. }
  326. else
  327. {
  328. replacementBuilder.Builder.Append(c);
  329. }
  330. }
  331. return replacementBuilder.ToString();
  332. }
  333. }
  334. /// <summary>
  335. /// https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  336. /// </summary>
  337. private JsValue Split(JsValue thisObj, JsValue[] arguments)
  338. {
  339. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.split");
  340. var s = TypeConverter.ToString(arguments.At(0));
  341. var limit = arguments.At(1);
  342. var c = SpeciesConstructor(rx, _engine.RegExp);
  343. var flags = TypeConverter.ToJsString(rx.Get(PropertyFlags));
  344. var unicodeMatching = flags.IndexOf('u') > -1;
  345. var newFlags = flags.IndexOf('y') > -1 ? flags : new JsString(flags.ToString() + 'y');
  346. var splitter = Construct(c, new JsValue[]
  347. {
  348. rx,
  349. newFlags
  350. });
  351. uint lengthA = 0;
  352. var lim = limit.IsUndefined() ? NumberConstructor.MaxSafeInteger : TypeConverter.ToUint32(limit);
  353. if (lim == 0)
  354. {
  355. return _engine.Array.ConstructFast(0);
  356. }
  357. if (s.Length == 0)
  358. {
  359. var a = _engine.Array.ConstructFast(0);
  360. var z = RegExpExec(splitter, s);
  361. if (!z.IsNull())
  362. {
  363. return a;
  364. }
  365. a.SetIndexValue(0, s, updateLength: true);
  366. return a;
  367. }
  368. if (!unicodeMatching && rx is RegExpInstance R && R.TryGetDefaultRegExpExec(out _))
  369. {
  370. // we can take faster path
  371. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters)
  372. {
  373. // if empty string, just a string split
  374. return StringPrototype.SplitWithStringSeparator(_engine, "", s, (uint) s.Length);
  375. }
  376. var a = (ArrayInstance) Engine.Array.Construct(Arguments.Empty);
  377. var match = R.Value.Match(s, 0);
  378. if (!match.Success) // No match at all return the string in an array
  379. {
  380. a.SetIndexValue(0, s, updateLength: true);
  381. return a;
  382. }
  383. int lastIndex = 0;
  384. uint index = 0;
  385. while (match.Success && index < lim)
  386. {
  387. if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
  388. {
  389. match = match.NextMatch();
  390. continue;
  391. }
  392. // Add the match results to the array.
  393. a.SetIndexValue(index++, s.Substring(lastIndex, match.Index - lastIndex), updateLength: true);
  394. if (index >= lim)
  395. {
  396. return a;
  397. }
  398. lastIndex = match.Index + match.Length;
  399. for (int i = 1; i < match.Groups.Count; i++)
  400. {
  401. var group = match.Groups[i];
  402. var item = Undefined;
  403. if (group.Captures.Count > 0)
  404. {
  405. item = match.Groups[i].Value;
  406. }
  407. a.SetIndexValue(index++, item, updateLength: true);
  408. if (index >= lim)
  409. {
  410. return a;
  411. }
  412. }
  413. match = match.NextMatch();
  414. if (!match.Success) // Add the last part of the split
  415. {
  416. a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
  417. }
  418. }
  419. return a;
  420. }
  421. return SplitSlow(s, splitter, unicodeMatching, lengthA, lim);
  422. }
  423. private JsValue SplitSlow(string s, ObjectInstance splitter, bool unicodeMatching, uint lengthA, long lim)
  424. {
  425. var a = _engine.Array.ConstructFast(0);
  426. var previousStringIndex = 0;
  427. var currentIndex = 0;
  428. while (currentIndex < s.Length)
  429. {
  430. splitter.Set(RegExpInstance.PropertyLastIndex, currentIndex, true);
  431. var z = RegExpExec(splitter, s);
  432. if (z.IsNull())
  433. {
  434. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  435. continue;
  436. }
  437. var endIndex = (int) TypeConverter.ToLength(splitter.Get(RegExpInstance.PropertyLastIndex));
  438. endIndex = System.Math.Min(endIndex, s.Length);
  439. if (endIndex == previousStringIndex)
  440. {
  441. currentIndex = AdvanceStringIndex(s, currentIndex, unicodeMatching);
  442. continue;
  443. }
  444. var t = s.Substring(previousStringIndex, currentIndex - previousStringIndex);
  445. a.SetIndexValue(lengthA, t, updateLength: true);
  446. lengthA++;
  447. if (lengthA == lim)
  448. {
  449. return a;
  450. }
  451. previousStringIndex = endIndex;
  452. var numberOfCaptures = (int) TypeConverter.ToLength(z.Get(CommonProperties.Length));
  453. numberOfCaptures = System.Math.Max(numberOfCaptures - 1, 0);
  454. var i = 1;
  455. while (i <= numberOfCaptures)
  456. {
  457. var nextCapture = z.Get(i);
  458. a.SetIndexValue(lengthA, nextCapture, updateLength: true);
  459. i++;
  460. lengthA++;
  461. if (lengthA == lim)
  462. {
  463. return a;
  464. }
  465. }
  466. currentIndex = previousStringIndex;
  467. }
  468. a.SetIndexValue(lengthA, s.Substring(previousStringIndex, s.Length - previousStringIndex), updateLength: true);
  469. return a;
  470. }
  471. private JsValue Flags(JsValue thisObj, JsValue[] arguments)
  472. {
  473. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.flags");
  474. static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
  475. {
  476. return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
  477. }
  478. var result = AddFlagIfPresent(r, PropertyGlobal, 'g', "");
  479. result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
  480. result = AddFlagIfPresent(r, "multiline", 'm', result);
  481. result = AddFlagIfPresent(r, "dotAll", 's', result);
  482. result = AddFlagIfPresent(r, "unicode", 'u', result);
  483. result = AddFlagIfPresent(r, PropertySticky, 'y', result);
  484. return result;
  485. }
  486. private JsValue ToRegExpString(JsValue thisObj, JsValue[] arguments)
  487. {
  488. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.toString");
  489. var pattern = TypeConverter.ToString(r.Get(PropertySource));
  490. var flags = TypeConverter.ToString(r.Get(PropertyFlags));
  491. return "/" + pattern + "/" + flags;
  492. }
  493. private JsValue Test(JsValue thisObj, JsValue[] arguments)
  494. {
  495. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.test");
  496. var s = TypeConverter.ToString(arguments.At(0));
  497. // check couple fast paths
  498. if (r is RegExpInstance R && !R.FullUnicode)
  499. {
  500. if (!R.Sticky && !R.Global)
  501. {
  502. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  503. return R.Value.IsMatch(s);
  504. }
  505. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  506. if (lastIndex >= s.Length && s.Length > 0)
  507. {
  508. return JsBoolean.False;
  509. }
  510. var m = R.Value.Match(s, lastIndex);
  511. if (!m.Success || (R.Sticky && m.Index != lastIndex))
  512. {
  513. R.Set(RegExpInstance.PropertyLastIndex, 0, throwOnError: true);
  514. return JsBoolean.False;
  515. }
  516. R.Set(RegExpInstance.PropertyLastIndex, m.Index + m.Length, throwOnError: true);
  517. return JsBoolean.True;
  518. }
  519. var match = RegExpExec(r, s);
  520. return !match.IsNull();
  521. }
  522. private JsValue Search(JsValue thisObj, JsValue[] arguments)
  523. {
  524. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.search");
  525. var s = TypeConverter.ToString(arguments.At(0));
  526. var previousLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  527. if (!SameValue(previousLastIndex, 0))
  528. {
  529. rx.Set(RegExpInstance.PropertyLastIndex, 0, true);
  530. }
  531. var result = RegExpExec(rx, s);
  532. var currentLastIndex = rx.Get(RegExpInstance.PropertyLastIndex);
  533. if (!SameValue(currentLastIndex, previousLastIndex))
  534. {
  535. rx.Set(RegExpInstance.PropertyLastIndex, previousLastIndex, true);
  536. }
  537. if (result.IsNull())
  538. {
  539. return -1;
  540. }
  541. return result.Get(PropertyIndex);
  542. }
  543. private JsValue Match(JsValue thisObj, JsValue[] arguments)
  544. {
  545. var rx = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.match");
  546. var s = TypeConverter.ToString(arguments.At(0));
  547. var global = TypeConverter.ToBoolean(rx.Get(PropertyGlobal));
  548. if (!global)
  549. {
  550. return RegExpExec(rx, s);
  551. }
  552. var fullUnicode = TypeConverter.ToBoolean(rx.Get("unicode"));
  553. rx.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  554. if (!fullUnicode
  555. && rx is RegExpInstance rei
  556. && rei.TryGetDefaultRegExpExec(out _))
  557. {
  558. // fast path
  559. var a = Engine.Array.ConstructFast(0);
  560. if (rei.Sticky)
  561. {
  562. var match = rei.Value.Match(s);
  563. if (!match.Success || match.Index != 0)
  564. {
  565. return Null;
  566. }
  567. a.SetIndexValue(0, match.Value, updateLength: false);
  568. uint li = 0;
  569. while (true)
  570. {
  571. match = match.NextMatch();
  572. if (!match.Success || match.Index != ++li)
  573. break;
  574. a.SetIndexValue(li, match.Value, updateLength: false);
  575. }
  576. a.SetLength(li);
  577. return a;
  578. }
  579. else
  580. {
  581. var matches = rei.Value.Matches(s);
  582. if (matches.Count == 0)
  583. {
  584. return Null;
  585. }
  586. a.EnsureCapacity((uint) matches.Count);
  587. a.SetLength((uint) matches.Count);
  588. for (var i = 0; i < matches.Count; i++)
  589. {
  590. a.SetIndexValue((uint) i, matches[i].Value, updateLength: false);
  591. }
  592. return a;
  593. }
  594. }
  595. return MatchSlow(rx, s, fullUnicode);
  596. }
  597. private JsValue MatchSlow(ObjectInstance rx, string s, bool fullUnicode)
  598. {
  599. var a = Engine.Array.ConstructFast(0);
  600. uint n = 0;
  601. while (true)
  602. {
  603. var result = RegExpExec(rx, s);
  604. if (result.IsNull())
  605. {
  606. a.SetLength(n);
  607. return n == 0 ? Null : a;
  608. }
  609. var matchStr = TypeConverter.ToString(result.Get(JsString.NumberZeroString));
  610. a.SetIndexValue(n, matchStr, updateLength: false);
  611. if (matchStr == "")
  612. {
  613. var thisIndex = (int) TypeConverter.ToLength(rx.Get(RegExpInstance.PropertyLastIndex));
  614. var nextIndex = AdvanceStringIndex(s, thisIndex, fullUnicode);
  615. rx.Set(RegExpInstance.PropertyLastIndex, nextIndex, true);
  616. }
  617. n++;
  618. }
  619. }
  620. /// <summary>
  621. /// https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  622. /// </summary>
  623. private JsValue MatchAll(JsValue thisObj, JsValue[] arguments)
  624. {
  625. var r = AssertThisIsObjectInstance(thisObj, "RegExp.prototype.matchAll");
  626. var s = TypeConverter.ToString(arguments.At(0));
  627. var c = SpeciesConstructor(r, _engine.RegExp);
  628. var flags = TypeConverter.ToJsString(r.Get(PropertyFlags));
  629. var matcher = Construct(c, new JsValue[]
  630. {
  631. r,
  632. flags
  633. });
  634. var lastIndex = TypeConverter.ToLength(r.Get(RegExpInstance.PropertyLastIndex));
  635. matcher.Set(RegExpInstance.PropertyLastIndex, lastIndex, true);
  636. var global = flags.IndexOf('g') != -1;
  637. var fullUnicode = flags.IndexOf('u') != -1;
  638. return _engine.Iterator.CreateRegExpStringIterator(matcher, s, global, fullUnicode);
  639. }
  640. private static int AdvanceStringIndex(string s, int index, bool unicode)
  641. {
  642. if (!unicode || index + 1 >= s.Length)
  643. {
  644. return index + 1;
  645. }
  646. var first = s[index];
  647. if (first < 0xD800 || first > 0xDBFF)
  648. {
  649. return index + 1;
  650. }
  651. var second = s[index + 1];
  652. if (second < 0xDC00 || second > 0xDFFF)
  653. {
  654. return index + 1;
  655. }
  656. return index + 2;
  657. }
  658. internal static JsValue RegExpExec(ObjectInstance r, string s)
  659. {
  660. var exec = r.Get(PropertyExec);
  661. if (exec is ICallable callable)
  662. {
  663. var result = callable.Call(r, new JsValue[] { s });
  664. if (!result.IsNull() && !result.IsObject())
  665. {
  666. return ExceptionHelper.ThrowTypeError<ObjectInstance>(r.Engine);
  667. }
  668. return result;
  669. }
  670. if (!(r is RegExpInstance ri))
  671. {
  672. return ExceptionHelper.ThrowTypeError<ObjectInstance>(r.Engine);
  673. }
  674. return RegExpBuiltinExec(ri, s);
  675. }
  676. internal bool TryGetDefaultExec(ObjectInstance o, out Func<JsValue, JsValue[], JsValue> exec)
  677. {
  678. if (o.Get(PropertyExec) is ClrFunctionInstance functionInstance && functionInstance._func == _defaultExec)
  679. {
  680. exec = _defaultExec;
  681. return true;
  682. }
  683. exec = default;
  684. return false;
  685. }
  686. private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
  687. {
  688. var length = s.Length;
  689. var lastIndex = (int) TypeConverter.ToLength(R.Get(RegExpInstance.PropertyLastIndex));
  690. var global = R.Global;
  691. var sticky = R.Sticky;
  692. if (!global && !sticky)
  693. {
  694. lastIndex = 0;
  695. }
  696. if (R.Source == RegExpInstance.regExpForMatchingAllCharacters) // Reg Exp is really ""
  697. {
  698. if (lastIndex > s.Length)
  699. {
  700. return Null;
  701. }
  702. // "aaa".match() => [ '', index: 0, input: 'aaa' ]
  703. var array = R.Engine.Array.ConstructFast(1);
  704. array.FastAddProperty(PropertyIndex, lastIndex, true, true, true);
  705. array.FastAddProperty(PropertyInput, s, true, true, true);
  706. array.SetIndexValue(0, JsString.Empty, updateLength: false);
  707. return array;
  708. }
  709. var matcher = R.Value;
  710. var fullUnicode = R.FullUnicode;
  711. if (!global & !sticky && !fullUnicode)
  712. {
  713. // we can the non-stateful fast path which is the common case
  714. var m = matcher.Match(s, lastIndex);
  715. if (!m.Success)
  716. {
  717. return Null;
  718. }
  719. return CreateReturnValueArray(R.Engine, m, s, fullUnicode: false);
  720. }
  721. // the stateful version
  722. Match match;
  723. while (true)
  724. {
  725. if (lastIndex > length)
  726. {
  727. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  728. return Null;
  729. }
  730. match = R.Value.Match(s, lastIndex);
  731. var success = match.Success && (!sticky || match.Index == lastIndex);
  732. if (!success)
  733. {
  734. if (sticky)
  735. {
  736. R.Set(RegExpInstance.PropertyLastIndex, JsNumber.PositiveZero, true);
  737. return Null;
  738. }
  739. lastIndex = AdvanceStringIndex(s, lastIndex, fullUnicode);
  740. }
  741. else
  742. {
  743. break;
  744. }
  745. }
  746. var e = match.Index + match.Length;
  747. if (fullUnicode)
  748. {
  749. // e is an index into the Input character list, derived from S, matched by matcher.
  750. // Let eUTF be the smallest index into S that corresponds to the character at element e of Input.
  751. // If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S.
  752. // Set e to eUTF.
  753. var indexes = StringInfo.ParseCombiningCharacters(s);
  754. if (match.Index < indexes.Length)
  755. {
  756. var sub = StringInfo.GetNextTextElement(s, match.Index);
  757. e += sub.Length - 1;
  758. }
  759. }
  760. R.Set(RegExpInstance.PropertyLastIndex, e, true);
  761. return CreateReturnValueArray(R.Engine, match, s, fullUnicode);
  762. }
  763. private static ArrayInstance CreateReturnValueArray(Engine engine, Match match, string inputValue, bool fullUnicode)
  764. {
  765. var array = engine.Array.ConstructFast((ulong) match.Groups.Count);
  766. array.CreateDataProperty(PropertyIndex, match.Index);
  767. array.CreateDataProperty(PropertyInput, inputValue);
  768. ObjectInstance groups = null;
  769. for (uint i = 0; i < match.Groups.Count; i++)
  770. {
  771. var capture = i < match.Groups.Count ? match.Groups[(int) i] : null;
  772. var capturedValue = Undefined;
  773. if (capture?.Success == true)
  774. {
  775. capturedValue = fullUnicode
  776. ? StringInfo.GetNextTextElement(inputValue, capture.Index)
  777. : capture.Value;
  778. // todo detect captured name
  779. }
  780. array.SetIndexValue(i, capturedValue, updateLength: false);
  781. }
  782. array.CreateDataProperty("groups", groups ?? Undefined);
  783. return array;
  784. }
  785. private JsValue Exec(JsValue thisObj, JsValue[] arguments)
  786. {
  787. if (!(thisObj is RegExpInstance r))
  788. {
  789. return ExceptionHelper.ThrowTypeError<JsValue>(_engine);
  790. }
  791. var s = TypeConverter.ToString(arguments.At(0));
  792. return RegExpBuiltinExec(r, s);
  793. }
  794. }
  795. }