2 ani în urmă · 6eddc61db4
--- a/Jint.Tests.Test262/Test262Harness.settings.json
+++ b/Jint.Tests.Test262/Test262Harness.settings.json
@@ -44,14 +44,10 @@
 
															     "language/literals/regexp/named-groups/forward-reference.js",
														
 
															     // RegExp handling problems
														
 
															-    "built-ins/RegExp/match-indices/indices-array-unicode-property-names.js",
														
 
															     "built-ins/RegExp/named-groups/non-unicode-match.js",
														
 
															     "built-ins/RegExp/named-groups/non-unicode-property-names-valid.js",
														
 
															-    "built-ins/RegExp/named-groups/non-unicode-property-names.js",
														
 
															     "built-ins/RegExp/named-groups/unicode-match.js",
														
 
															     "built-ins/RegExp/named-groups/unicode-property-names-valid.js",
														
 
															-    "built-ins/RegExp/named-groups/unicode-property-names.js",
														
 
															-    "built-ins/RegExp/prototype/Symbol.replace/named-groups.js",
														
 
															     "built-ins/RegExp/prototype/exec/S15.10.6.2_A1_T6.js",
														
 
															     "built-ins/String/prototype/split/separator-regexp.js",
														
 
															     "language/literals/regexp/u-case-mapping.js",
														
--- a/Jint.Tests/Runtime/RegExpTests.cs
+++ b/Jint.Tests/Runtime/RegExpTests.cs
@@ -35,7 +35,7 @@ public class RegExpTests
 
															     public void PreventsInfiniteLoop()
														
 
															     {
														
 
															         var engine = new Engine();
														
 
															-        var result = (ArrayInstance)engine.Evaluate("'x'.match(/|/g);");
														
 
															+        var result = (ArrayInstance) engine.Evaluate("'x'.match(/|/g);");
														
 
															         Assert.Equal((uint) 2, result.Length);
														
 
															         Assert.Equal("", result[0]);
														
 
															         Assert.Equal("", result[1]);
														
@@ -103,4 +103,41 @@ public class RegExpTests
 
															         var source = engine.Evaluate(@"/\/\//.source");
														
 
															         Assert.Equal("\\/\\/", source);
														
 
															     }
														
 
															+
														
 
															+    [Theory]
														
 
															+    [InlineData("", "/()/ug", new[] { "" }, new[] { 0 })]
														
 
															+    [InlineData("💩", "/()/ug", new[] { "", "" }, new[] { 0, 2 })]
														
 
															+    [InlineData("ᴜⁿᵢ𝒸ₒᵈₑ is a 💩", "/i?/ug",
														
 
															+        new[] { "", "", "", "", "", "", "", "", "i", "", "", "", "", "", "" },
														
 
															+        new[] { 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 })]
														
 
															+    public void ShouldNotMatchEmptyStringsWithinSurrogatePairsInUnicodeMode(string input, string pattern, string[] expectedCaptures, int[] expectedIndices)
														
 
															+    {
														
 
															+        var engine = new Engine();
														
 
															+        var matches = engine.Evaluate($"[...'{input}'.matchAll({pattern})]").AsArray();
														
 
															+        Assert.Equal((ulong) expectedCaptures.Length, matches.Length);
														
 
															+        Assert.Equal(expectedCaptures, matches.Select((m, i) => m.Get(0).AsString()));
														
 
															+        Assert.Equal(expectedIndices, matches.Select(m => m.Get("index").AsInteger()));
														
 
															+    }
														
 
															+
														
 
															+    [Fact]
														
 
															+    public void ShouldAllowProblematicGroupNames()
														
 
															+    {
														
 
															+        var engine = new Engine();
														
 
															+
														
 
															+        var match = engine.Evaluate("'abc'.match(/(?<$group>b)/)").AsArray();
														
 
															+        var groups = match.Get("groups").AsObject();
														
 
															+        Assert.Equal(new[] { "$group" }, groups.GetOwnPropertyKeys().Select(k => k.AsString()));
														
 
															+        Assert.Equal("b", groups["$group"]);
														
 
															+
														
 
															+        var result = engine.Evaluate("'abc'.replace(/(?<$group>b)/g, '-$<$group>-')").AsString();
														
 
															+        Assert.Equal("a-b-c", result);
														
 
															+    }
														
 
															+
														
 
															+    [Fact]
														
 
															+    public void Issue506()
														
 
															+    {
														
 
															+        var engine = new Engine();
														
 
															+        var result = engine.Evaluate("/[^]?(:[rp][el]a[\\w-]+)[^]/.test(':reagent-')").AsBoolean();
														
 
															+        Assert.True(result);
														
 
															+    }
														
 
															 }
														
--- a/Jint/Native/RegExp/RegExpPrototype.cs
+++ b/Jint/Native/RegExp/RegExpPrototype.cs
@@ -1,4 +1,5 @@
 
															 using System.Diagnostics.CodeAnalysis;
														
 
															+using System.Text;
														
 
															 using System.Text.RegularExpressions;
														
 
															 using Jint.Collections;
														
 
															 using Jint.Native.Number;
														
@@ -824,7 +825,7 @@ namespace Jint.Native.RegExp
 
															             var exec = r.Get(PropertyExec);
														
 
															             if (exec is ICallable callable)
														
 
															             {
														
 
															-                var result = callable.Call(r, new JsValue[]  { s });
														
 
															+                var result = callable.Call(r, new JsValue[] { s });
														
 
															                 if (!result.IsNull() && !result.IsObject())
														
 
															                 {
														
 
															                     ExceptionHelper.ThrowTypeError(r.Engine.Realm);
														
@@ -902,31 +903,47 @@ namespace Jint.Native.RegExp
 
															             // the stateful version
														
 
															             Match match;
														
 
															-            while (true)
														
 
															+
														
 
															+            if (lastIndex > length)
														
 
															             {
														
 
															-                if (lastIndex > length)
														
 
															-                {
														
 
															-                    R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
														
 
															-                    return Null;
														
 
															-                }
														
 
															+                R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
														
 
															+                return Null;
														
 
															+            }
														
 
															-                match = R.Value.Match(s, (int) lastIndex);
														
 
															-                var success = match.Success && (!sticky || match.Index == (int) lastIndex);
														
 
															-                if (!success)
														
 
															+            var startAt = (int) lastIndex;
														
 
															+            while (true)
														
 
															+            {
														
 
															+                match = R.Value.Match(s, startAt);
														
 
															+
														
 
															+                // The conversion of Unicode regex patterns to .NET Regex has some flaws:
														
 
															+                // when the pattern may match empty strings, the adapted Regex will return empty string matches
														
 
															+                // in the middle of surrogate pairs. As a best effort solution, we remove these fake positive matches.
														
 
															+                // (See also: https://github.com/sebastienros/esprima-dotnet/pull/364#issuecomment-1606045259)
														
 
															+
														
 
															+                if (match.Success
														
 
															+                    && fullUnicode
														
 
															+                    && match.Length == 0
														
 
															+                    && 0 < match.Index && match.Index < s.Length
														
 
															+                    && char.IsHighSurrogate(s[match.Index - 1]) && char.IsLowSurrogate(s[match.Index]))
														
 
															                 {
														
 
															-                    R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
														
 
															-                    return Null;
														
 
															+                    startAt++;
														
 
															+                    continue;
														
 
															                 }
														
 
															                 break;
														
 
															             }
														
 
															-            var e = match.Index + match.Length;
														
 
															-            if (fullUnicode)
														
 
															+            var success = match.Success && (!sticky || match.Index == (int) lastIndex);
														
 
															+            if (!success)
														
 
															             {
														
 
															-                e = GetStringIndex(s, e);
														
 
															+                R.Set(JsRegExp.PropertyLastIndex, JsNumber.PositiveZero, true);
														
 
															+                return Null;
														
 
															             }
														
 
															+            var e = match.Index + match.Length;
														
 
															+
														
 
															+            // NOTE: Even in Unicode mode, we don't need to translate indices as .NET regexes always return code unit indices.
														
 
															+
														
 
															             if (global || sticky)
														
 
															             {
														
 
															                 R.Set(JsRegExp.PropertyLastIndex, e, true);
														
@@ -935,35 +952,6 @@ namespace Jint.Native.RegExp
 
															             return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
														
 
															         }
														
 
															-        /// <summary>
														
 
															-        /// https://tc39.es/ecma262/#sec-getstringindex
														
 
															-        /// </summary>
														
 
															-        private static int GetStringIndex(string s, int codePointIndex)
														
 
															-        {
														
 
															-            if (s.Length == 0)
														
 
															-            {
														
 
															-                return 0;
														
 
															-            }
														
 
															-
														
 
															-            var len = s.Length;
														
 
															-            var codeUnitCount = 0;
														
 
															-            var codePointCount = 0;
														
 
															-
														
 
															-            while (codeUnitCount < len)
														
 
															-            {
														
 
															-                if (codePointCount == codePointIndex)
														
 
															-                {
														
 
															-                    return codeUnitCount;
														
 
															-                }
														
 
															-
														
 
															-                var isSurrogatePair = char.IsSurrogatePair(s, codeUnitCount);
														
 
															-                codeUnitCount += isSurrogatePair ? 2 : 1;
														
 
															-                codePointCount += 1;
														
 
															-            }
														
 
															-
														
 
															-            return len;
														
 
															-        }
														
 
															-
														
 
															         private static JsArray CreateReturnValueArray(
														
 
															             Engine engine,
														
 
															             Regex regex,
														
@@ -1080,6 +1068,24 @@ namespace Jint.Native.RegExp
 
															                 return null;
														
 
															             }
														
 
															+
														
 
															+            // The characters allowed in group names differs between the JS and .NET regex engines.
														
 
															+            // For example the group name "$group" is valid in JS but invalid in .NET.
														
 
															+            // As a workaround for this issue, the parser make an attempt to encode the problematic group names to
														
 
															+            // names which are valid in .NET and probably won't collide with other group names present in the pattern
														
 
															+            // (https://github.com/sebastienros/esprima-dotnet/blob/v3.0.0-rc-03/src/Esprima/Scanner.RegExpParser.cs#L942).
														
 
															+            // We need to decode such group names.
														
 
															+            const string encodedGroupNamePrefix = "__utf8_";
														
 
															+            if (groupNameFromNumber.StartsWith(encodedGroupNamePrefix, StringComparison.Ordinal))
														
 
															+            {
														
 
															+                try
														
 
															+                {
														
 
															+                    var bytes = groupNameFromNumber.AsSpan(encodedGroupNamePrefix.Length).BytesFromHexString();
														
 
															+                    groupNameFromNumber = Encoding.UTF8.GetString(bytes);
														
 
															+                }
														
 
															+                catch { /* intentional no-op */ }
														
 
															+            }
														
 
															+
														
 
															             return groupNameFromNumber;
														
 
															         }
														
--- a/Jint/Shims.cs
+++ b/Jint/Shims.cs
@@ -0,0 +1,41 @@
 
															+namespace Jint;
														
 
															+
														
 
															+internal static class Shims
														
 
															+{
														
 
															+    public static byte[] BytesFromHexString(this ReadOnlySpan<char> value)
														
 
															+    {
														
 
															+#if NET6_0_OR_GREATER
														
 
															+        return Convert.FromHexString(value);
														
 
															+#else
														
 
															+        if ((value.Length & 1) != 0)
														
 
															+        {
														
 
															+            throw new FormatException();
														
 
															+        }
														
 
															+
														
 
															+        var byteCount = value.Length >> 1;
														
 
															+        var result = new byte[byteCount];
														
 
															+        var index = 0;
														
 
															+        for (var i = 0; i < byteCount; i++)
														
 
															+        {
														
 
															+            int hi, lo;
														
 
															+            if ((hi = GetDigitValue(value[index++])) < 0
														
 
															+                || (lo = GetDigitValue(value[index++])) < 0)
														
 
															+            {
														
 
															+                throw new FormatException();
														
 
															+            }
														
 
															+
														
 
															+            result[i] = (byte) (hi << 4 | lo);
														
 
															+        }
														
 
															+
														
 
															+        return result;
														
 
															+
														
 
															+        static int GetDigitValue(char ch) => ch switch
														
 
															+        {
														
 
															+            >= '0' and <= '9' => ch - 0x30,
														
 
															+            >= 'a' and <= 'f' => ch - 0x57,
														
 
															+            >= 'A' and <= 'F' => ch - 0x37,
														
 
															+            _ => -1
														
 
															+        };
														
 
															+#endif
														
 
															+    }
														
 
															+}