Ver código fonte

Fix capturing group numbering bug and a few more regex-related issues (#1614)

adams85 2 anos atrás
pai
commit
e9ace1a5a1

+ 0 - 6
Jint.Tests.Test262/Test262Harness.settings.json

@@ -44,14 +44,8 @@
     "language/literals/regexp/named-groups/forward-reference.js",
     "language/literals/regexp/named-groups/forward-reference.js",
 
 
     // RegExp handling problems
     // RegExp handling problems
-    "built-ins/RegExp/named-groups/non-unicode-match.js",
-    "built-ins/RegExp/named-groups/non-unicode-property-names-valid.js",
-    "built-ins/RegExp/named-groups/unicode-match.js",
-    "built-ins/RegExp/named-groups/unicode-property-names-valid.js",
     "built-ins/RegExp/prototype/exec/S15.10.6.2_A1_T6.js",
     "built-ins/RegExp/prototype/exec/S15.10.6.2_A1_T6.js",
-    "built-ins/String/prototype/split/separator-regexp.js",
     "language/literals/regexp/u-case-mapping.js",
     "language/literals/regexp/u-case-mapping.js",
-    "language/literals/regexp/u-surrogate-pairs-atom-escape-decimal.js",
 
 
     // requires investigation how to process complex function name evaluation for property
     // requires investigation how to process complex function name evaluation for property
     "built-ins/Function/prototype/toString/method-computed-property-name.js",
     "built-ins/Function/prototype/toString/method-computed-property-name.js",

+ 1 - 1
Jint/Jint.csproj

@@ -16,7 +16,7 @@
   </PropertyGroup>
   </PropertyGroup>
 
 
   <ItemGroup>
   <ItemGroup>
-    <PackageReference Include="Esprima" Version="3.0.0-rc-03" />
+    <PackageReference Include="Esprima" Version="3.0.0-rc-04" />
     <PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="all" />
     <PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="all" />
     <PackageReference Include="Nullable" Version="1.3.1" PrivateAssets="all" />
     <PackageReference Include="Nullable" Version="1.3.1" PrivateAssets="all" />
     <PackageReference Include="PolySharp" Version="1.13.1">
     <PackageReference Include="PolySharp" Version="1.13.1">

+ 3 - 0
Jint/Native/RegExp/JsRegExp.cs

@@ -1,4 +1,5 @@
 using System.Text.RegularExpressions;
 using System.Text.RegularExpressions;
+using Esprima;
 using Jint.Native.Object;
 using Jint.Native.Object;
 using Jint.Runtime;
 using Jint.Runtime;
 using Jint.Runtime.Descriptors;
 using Jint.Runtime.Descriptors;
@@ -62,6 +63,8 @@ namespace Jint.Native.RegExp
             }
             }
         }
         }
 
 
+        public RegExpParseResult ParseResult { get; set; }
+
         public bool DotAll { get; private set; }
         public bool DotAll { get; private set; }
         public bool Global { get; private set; }
         public bool Global { get; private set; }
         public bool Indices { get; private set; }
         public bool Indices { get; private set; }

+ 7 - 5
Jint/Native/RegExp/RegExpConstructor.cs

@@ -104,14 +104,15 @@ namespace Jint.Native.RegExp
 
 
             try
             try
             {
             {
-                var regExp = Scanner.AdaptRegExp(p, f, compiled: false, _engine.Options.Constraints.RegexTimeout);
+                var regExpParseResult = Scanner.AdaptRegExp(p, f, compiled: false, _engine.Options.Constraints.RegexTimeout);
 
 
-                if (regExp is null)
+                if (!regExpParseResult.Success)
                 {
                 {
-                    ExceptionHelper.ThrowSyntaxError(_realm, $"Unsupported regular expression: '/{p}/{flags}'");
+                    ExceptionHelper.ThrowSyntaxError(_realm, $"Unsupported regular expression. {regExpParseResult.ConversionError!.Description}");
                 }
                 }
 
 
-                r.Value = regExp;
+                r.Value = regExpParseResult.Regex!;
+                r.ParseResult = regExpParseResult;
             }
             }
             catch (Exception ex)
             catch (Exception ex)
             {
             {
@@ -135,12 +136,13 @@ namespace Jint.Native.RegExp
             return r;
             return r;
         }
         }
 
 
-        public JsRegExp Construct(Regex regExp, string source, string flags)
+        public JsRegExp Construct(Regex regExp, string source, string flags, RegExpParseResult regExpParseResult = default)
         {
         {
             var r = RegExpAlloc(this);
             var r = RegExpAlloc(this);
             r.Value = regExp;
             r.Value = regExp;
             r.Source = source;
             r.Source = source;
             r.Flags = flags;
             r.Flags = flags;
+            r.ParseResult = regExpParseResult;
 
 
             RegExpInitialize(r);
             RegExpInitialize(r);
 
 

+ 32 - 48
Jint/Native/RegExp/RegExpPrototype.cs

@@ -1,5 +1,4 @@
 using System.Diagnostics.CodeAnalysis;
 using System.Diagnostics.CodeAnalysis;
-using System.Text;
 using System.Text.RegularExpressions;
 using System.Text.RegularExpressions;
 using Jint.Collections;
 using Jint.Collections;
 using Jint.Native.Number;
 using Jint.Native.Number;
@@ -168,16 +167,17 @@ namespace Jint.Native.RegExp
                 {
                 {
                     string Evaluator(Match match)
                     string Evaluator(Match match)
                     {
                     {
-                        var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
+                        var actualGroupCount = GetActualRegexGroupCount(rei, match);
+                        var replacerArgs = new List<JsValue>(actualGroupCount + 2);
                         replacerArgs.Add(match.Value);
                         replacerArgs.Add(match.Value);
 
 
                         ObjectInstance? groups = null;
                         ObjectInstance? groups = null;
-                        for (var i = 1; i < match.Groups.Count; i++)
+                        for (var i = 1; i < actualGroupCount; i++)
                         {
                         {
                             var capture = match.Groups[i];
                             var capture = match.Groups[i];
                             replacerArgs.Add(capture.Success ? capture.Value : Undefined);
                             replacerArgs.Add(capture.Success ? capture.Value : Undefined);
 
 
-                            var groupName = GetRegexGroupName(rei.Value, i);
+                            var groupName = GetRegexGroupName(rei, i);
                             if (!string.IsNullOrWhiteSpace(groupName))
                             if (!string.IsNullOrWhiteSpace(groupName))
                             {
                             {
                                 groups ??= OrdinaryObjectCreate(_engine, null);
                                 groups ??= OrdinaryObjectCreate(_engine, null);
@@ -475,21 +475,13 @@ namespace Jint.Native.RegExp
                 }
                 }
 
 
                 var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
                 var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
-                var match = R.Value.Match(s, 0);
-
-                if (!match.Success) // No match at all return the string in an array
-                {
-                    a.SetIndexValue(0, s, updateLength: true);
-                    return a;
-                }
 
 
                 int lastIndex = 0;
                 int lastIndex = 0;
                 uint index = 0;
                 uint index = 0;
-                while (match.Success && index < lim)
+                for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
                 {
                 {
                     if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
                     if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
                     {
                     {
-                        match = match.NextMatch();
                         continue;
                         continue;
                     }
                     }
 
 
@@ -502,7 +494,8 @@ namespace Jint.Native.RegExp
                     }
                     }
 
 
                     lastIndex = match.Index + match.Length;
                     lastIndex = match.Index + match.Length;
-                    for (int i = 1; i < match.Groups.Count; i++)
+                    var actualGroupCount = GetActualRegexGroupCount(R, match);
+                    for (int i = 1; i < actualGroupCount; i++)
                     {
                     {
                         var group = match.Groups[i];
                         var group = match.Groups[i];
                         var item = Undefined;
                         var item = Undefined;
@@ -518,14 +511,11 @@ namespace Jint.Native.RegExp
                             return a;
                             return a;
                         }
                         }
                     }
                     }
-
-                    match = match.NextMatch();
-                    if (!match.Success) // Add the last part of the split
-                    {
-                        a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
-                    }
                 }
                 }
 
 
+                // Add the last part of the split
+                a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);
+
                 return a;
                 return a;
             }
             }
 
 
@@ -720,7 +710,7 @@ namespace Jint.Native.RegExp
                         match = match.NextMatch();
                         match = match.NextMatch();
                         if (!match.Success || match.Index != ++li)
                         if (!match.Success || match.Index != ++li)
                             break;
                             break;
-                        a.SetIndexValue(li,  match.Value, updateLength: false);
+                        a.SetIndexValue(li, match.Value, updateLength: false);
                     }
                     }
                     a.SetLength(li);
                     a.SetLength(li);
                     return a;
                     return a;
@@ -898,7 +888,7 @@ namespace Jint.Native.RegExp
                     return Null;
                     return Null;
                 }
                 }
 
 
-                return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
+                return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
             }
             }
 
 
             // the stateful version
             // the stateful version
@@ -949,25 +939,26 @@ namespace Jint.Native.RegExp
                 R.Set(JsRegExp.PropertyLastIndex, e, true);
                 R.Set(JsRegExp.PropertyLastIndex, e, true);
             }
             }
 
 
-            return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
+            return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
         }
         }
 
 
         private static JsArray CreateReturnValueArray(
         private static JsArray CreateReturnValueArray(
-            Engine engine,
-            Regex regex,
+            JsRegExp rei,
             Match match,
             Match match,
             string s,
             string s,
             bool fullUnicode,
             bool fullUnicode,
             bool hasIndices)
             bool hasIndices)
         {
         {
-            var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
+            var engine = rei.Engine;
+            var actualGroupCount = GetActualRegexGroupCount(rei, match);
+            var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
             array.CreateDataProperty(PropertyIndex, match.Index);
             array.CreateDataProperty(PropertyIndex, match.Index);
             array.CreateDataProperty(PropertyInput, s);
             array.CreateDataProperty(PropertyInput, s);
 
 
             ObjectInstance? groups = null;
             ObjectInstance? groups = null;
             List<string>? groupNames = null;
             List<string>? groupNames = null;
-            var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
-            for (uint i = 0; i < match.Groups.Count; i++)
+            var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
+            for (uint i = 0; i < actualGroupCount; i++)
             {
             {
                 var capture = match.Groups[(int) i];
                 var capture = match.Groups[(int) i];
                 var capturedValue = Undefined;
                 var capturedValue = Undefined;
@@ -988,7 +979,7 @@ namespace Jint.Native.RegExp
                     }
                     }
                 }
                 }
 
 
-                var groupName = GetRegexGroupName(regex, (int) i);
+                var groupName = GetRegexGroupName(rei, (int) i);
                 if (!string.IsNullOrWhiteSpace(groupName))
                 if (!string.IsNullOrWhiteSpace(groupName))
                 {
                 {
                     groups ??= OrdinaryObjectCreate(engine, null);
                     groups ??= OrdinaryObjectCreate(engine, null);
@@ -1055,35 +1046,28 @@ namespace Jint.Native.RegExp
             return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
             return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
         }
         }
 
 
-        private static string? GetRegexGroupName(Regex regex, int index)
+        private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
+        {
+            return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
+        }
+
+        private static string? GetRegexGroupName(JsRegExp rei, int index)
         {
         {
             if (index == 0)
             if (index == 0)
             {
             {
                 return null;
                 return null;
             }
             }
+            var regex = rei.Value;
+            if (rei.ParseResult.Success)
+            {
+                return rei.ParseResult.GetRegexGroupName(index);
+            }
+
             var groupNameFromNumber = regex.GroupNameFromNumber(index);
             var groupNameFromNumber = regex.GroupNameFromNumber(index);
             if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
             if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
             {
             {
                 // regex defaults to index as group name when it's not a named group
                 // regex defaults to index as group name when it's not a named group
                 return null;
                 return null;
-
-            }
-
-            // The characters allowed in group names differs between the JS and .NET regex engines.
-            // For example the group name "$group" is valid in JS but invalid in .NET.
-            // As a workaround for this issue, the parser make an attempt to encode the problematic group names to
-            // names which are valid in .NET and probably won't collide with other group names present in the pattern
-            // (https://github.com/sebastienros/esprima-dotnet/blob/v3.0.0-rc-03/src/Esprima/Scanner.RegExpParser.cs#L942).
-            // We need to decode such group names.
-            const string encodedGroupNamePrefix = "__utf8_";
-            if (groupNameFromNumber.StartsWith(encodedGroupNamePrefix, StringComparison.Ordinal))
-            {
-                try
-                {
-                    var bytes = groupNameFromNumber.AsSpan(encodedGroupNamePrefix.Length).BytesFromHexString();
-                    groupNameFromNumber = Encoding.UTF8.GetString(bytes);
-                }
-                catch { /* intentional no-op */ }
             }
             }
 
 
             return groupNameFromNumber;
             return groupNameFromNumber;

+ 4 - 3
Jint/Runtime/Interpreter/Expressions/JintLiteralExpression.cs

@@ -76,12 +76,13 @@ namespace Jint.Runtime.Interpreter.Expressions
             if (expression.TokenType == TokenType.RegularExpression)
             if (expression.TokenType == TokenType.RegularExpression)
             {
             {
                 var regExpLiteral = (RegExpLiteral) _expression;
                 var regExpLiteral = (RegExpLiteral) _expression;
-                if (regExpLiteral.Value is System.Text.RegularExpressions.Regex regex)
+                var regExpParseResult = regExpLiteral.ParseResult;
+                if (regExpParseResult.Success)
                 {
                 {
-                    return context.Engine.Realm.Intrinsics.RegExp.Construct(regex, regExpLiteral.Regex.Pattern, regExpLiteral.Regex.Flags);
+                    return context.Engine.Realm.Intrinsics.RegExp.Construct(regExpParseResult.Regex!, regExpLiteral.Regex.Pattern, regExpLiteral.Regex.Flags, regExpParseResult);
                 }
                 }
 
 
-                ExceptionHelper.ThrowSyntaxError(context.Engine.Realm, $"Unsupported regular expression: '{regExpLiteral.Regex.Pattern}/{regExpLiteral.Regex.Flags}'");
+                ExceptionHelper.ThrowSyntaxError(context.Engine.Realm, $"Unsupported regular expression. {regExpParseResult.ConversionError!.Description}");
             }
             }
 
 
             return JsValue.FromObject(context.Engine, expression.Value);
             return JsValue.FromObject(context.Engine, expression.Value);