Browse Source

fix: enhance string pattern matching Empty pattern case

Akeit0 6 months ago
parent
commit
9e5b0063e4
2 changed files with 138 additions and 11 deletions
  1. 9 11
      src/Lua/Standard/StringLibrary.cs
  2. 129 0
      tests/Lua.Tests/PatternMatchingTests.cs

+ 9 - 11
src/Lua/Standard/StringLibrary.cs

@@ -693,7 +693,15 @@ public sealed class StringLibrary
             init = s.Length + init + 1;
         }
 
-        init = Math.Max(0, Math.Min(init - 1, s.Length)); // Convert from 1-based to 0-based and clamp
+        init--; // Convert from 1-based to 0-based
+        
+        // Check if init is beyond string bounds
+        if (init > s.Length)
+        {
+            return new(context.Return(LuaValue.Nil));
+        }
+        
+        init = Math.Max(0, init); // Clamp to 0 if negative
 
         // Check for plain search mode (4th parameter = true)
         if (find && context.GetArgumentOrDefault(3).ToBoolean())
@@ -712,11 +720,6 @@ public sealed class StringLibrary
 
     private static ValueTask<int> PlainSearch(LuaFunctionExecutionContext context, string s, string pattern, int init)
     {
-        if (init > s.Length)
-        {
-            return new(context.Return(LuaValue.Nil));
-        }
-
         var index = s.AsSpan(init).IndexOf(pattern);
         if (index == -1)
         {
@@ -729,11 +732,6 @@ public sealed class StringLibrary
 
     private static ValueTask<int> SimplePatternSearch(LuaFunctionExecutionContext context, string s, string pattern, int init)
     {
-        if (init > s.Length)
-        {
-            return new(context.Return(LuaValue.Nil));
-        }
-
         var index = s.AsSpan(init).IndexOf(pattern);
         if (index == -1)
         {

+ 129 - 0
tests/Lua.Tests/PatternMatchingTests.cs

@@ -252,6 +252,12 @@ public class PatternMatchingTests
         Assert.That(result[0].Read<double>(), Is.EqualTo(1));
         Assert.That(result[1].Read<double>(), Is.EqualTo(0));
         
+        // Empty pattern with empty string (should match at position 1)
+        result = await state.DoStringAsync("return string.find('', '')");
+        Assert.That(result.Length, Is.EqualTo(2));
+        Assert.That(result[0].Read<double>(), Is.EqualTo(1));
+        Assert.That(result[1].Read<double>(), Is.EqualTo(0));
+        
         // Negative start position
         result = await state.DoStringAsync("return string.find('hello', 'l', -2)");
         Assert.That(result.Length, Is.EqualTo(2));
@@ -263,6 +269,11 @@ public class PatternMatchingTests
         Assert.That(result.Length, Is.EqualTo(1));
         Assert.That(result[0].Type, Is.EqualTo(LuaValueType.Nil));
         
+        // Empty string with init beyond length
+        result = await state.DoStringAsync("return string.find('', '', 2)");
+        Assert.That(result.Length, Is.EqualTo(1));
+        Assert.That(result[0].Type, Is.EqualTo(LuaValueType.Nil));
+        
         // Position captures
         result = await state.DoStringAsync("return string.find('hello', '()l()l()')");
         Assert.That(result.Length, Is.EqualTo(5)); // start, end, pos1, pos2, pos3
@@ -798,4 +809,122 @@ public class PatternMatchingTests
             await state.DoStringAsync("return string.match(string.rep('a', 1000), string.rep('a?', 1000) .. string.rep('a', 1000))"));
         Assert.That(exception.Message, Does.Contain("pattern too complex"));
     }
+
+    [Test]
+    public async Task Test_DollarSignPattern_EscapingIssue()
+    {
+        var state = LuaState.Create();
+        state.OpenStringLibrary();
+        state.OpenTableLibrary();
+        
+        // Test the problematic pattern from the user's code
+        // The pattern "$([^$]+)" won't work because $ needs to be escaped as %$
+        var result = await state.DoStringAsync(@"
+            local prog = 'Hello $world$ and $123$ test'
+            local matches = {}
+            
+            -- Wrong pattern (will not match correctly)
+            for s in string.gmatch(prog, '$([^$]+)') do
+                table.insert(matches, s)
+            end
+            
+            return #matches
+        ");
+        Assert.That(result[0].Read<double>(), Is.EqualTo(0)); // No matches because pattern is wrong
+        
+        // Test the correct pattern with escaped dollar signs
+        result = await state.DoStringAsync(@"
+            local prog = 'Hello $world$ and $123$ test'
+            local matches = {}
+            
+            -- Correct pattern (with escaped dollar signs)
+            for s in string.gmatch(prog, '%$([^%$]+)') do
+                table.insert(matches, s)
+            end
+            
+            return table.unpack(matches)
+        ");
+        Assert.That(result.Length, Is.EqualTo(2));
+        Assert.That(result[0].Read<string>(), Is.EqualTo("world"));
+        Assert.That(result[1].Read<string>(), Is.EqualTo("123"));
+    }
+
+    [Test]
+    public async Task Test_DollarSignPattern_CompleteExample()
+    {
+        var state = LuaState.Create();
+        state.OpenStringLibrary();
+        state.OpenTableLibrary();
+        state.OpenBasicLibrary();
+        
+        // Simulate the user's use case with corrected pattern
+        var result = await state.DoStringAsync(@"
+            local prog = 'Start $1$ middle $hello$ end $2$'
+            local F = {
+                [1] = function() return 'FIRST' end,
+                [2] = function() return 'SECOND' end
+            }
+            local output = {}
+            
+            -- Process the string with correct pattern
+            local lastPos = 1
+            for match, content in string.gmatch(prog, '()%$([^%$]+)%$()') do
+                -- Add text before the match
+                if match > lastPos then
+                    table.insert(output, prog:sub(lastPos, match - 1))
+                end
+                
+                -- Process the content
+                local n = tonumber(content)
+                if n and F[n] then
+                    table.insert(output, F[n]())
+                else
+                    table.insert(output, content)
+                end
+                
+                lastPos = match + #content + 2 -- +2 for the two $ signs
+            end
+            
+            -- Add remaining text
+            if lastPos <= #prog then
+                table.insert(output, prog:sub(lastPos))
+            end
+            
+            return table.concat(output)
+        ");
+        
+        Assert.That(result[0].Read<string>(), Is.EqualTo("Start FIRST middle hello end SECOND"));
+    }
+
+    [Test]
+    public async Task Test_DollarSignPattern_EdgeCases()
+    {
+        var state = LuaState.Create();
+        state.OpenStringLibrary();
+        state.OpenTableLibrary();
+        
+        // Test empty content between dollar signs
+        var result = await state.DoStringAsync(@"
+            local matches = {}
+            for s in string.gmatch('$$ and $empty$', '%$([^%$]*)') do
+                table.insert(matches, s)
+            end
+            return table.unpack(matches)
+        ");
+        Assert.That(result.Length, Is.EqualTo(2));
+        Assert.That(result[0].Read<string>(), Is.EqualTo("")); // Empty match
+        Assert.That(result[1].Read<string>(), Is.EqualTo("empty"));
+        
+        // Test nested or adjacent dollar signs
+        result = await state.DoStringAsync(@"
+            local matches = {}
+            for s in string.gmatch('$a$$b$', '%$([^%$]+)') do
+                table.insert(matches, s)
+            end
+            return table.unpack(matches)
+        ");
+        Assert.That(result.Length, Is.EqualTo(2));
+        Assert.That(result[0].Read<string>(), Is.EqualTo("a"));
+        Assert.That(result[1].Read<string>(), Is.EqualTo("b"));
+    }
 }