Browse Source

Update: support escape sequences

AnnulusGames 1 year ago
parent
commit
c6e4fb43fe

+ 5 - 1
src/Lua/CodeAnalysis/Compilation/LuaCompiler.cs

@@ -69,7 +69,11 @@ public sealed class LuaCompiler : ISyntaxNodeVisitor<ScopeCompilationContext, bo
 
     public bool VisitStringLiteralNode(StringLiteralNode node, ScopeCompilationContext context)
     {
-        var index = context.Function.GetConstantIndex(node.Text);
+        var str = node.IsShortLiteral
+            ? StringHelper.FromStringLiteral(node.Text.Span)
+            : node.Text.ToString();
+
+        var index = context.Function.GetConstantIndex(str);
         context.PushInstruction(Instruction.LoadK(context.StackPosition, index), node.Position, true);
         return true;
     }

+ 13 - 4
src/Lua/CodeAnalysis/Syntax/DisplayStringSyntaxVisitor.cs

@@ -193,7 +193,7 @@ public sealed class DisplayStringSyntaxVisitor : ISyntaxNodeVisitor<DisplayStrin
     public bool VisitTableMethodDeclarationStatementNode(TableMethodDeclarationStatementNode node, Context context)
     {
         context.Append("function ");
-        
+
         for (int i = 0; i < node.MemberPath.Length; i++)
         {
             context.Append(node.MemberPath[i].Name.ToString());
@@ -411,9 +411,18 @@ public sealed class DisplayStringSyntaxVisitor : ISyntaxNodeVisitor<DisplayStrin
 
     public bool VisitStringLiteralNode(StringLiteralNode node, Context context)
     {
-        context.Append("\"");
-        context.Append(node.Text);
-        context.Append("\"");
+        if (node.IsShortLiteral)
+        {
+            context.Append("\"");
+            context.Append(node.Text.ToString());
+            context.Append("\"");
+        }
+        else
+        {
+            context.Append("[[");
+            context.Append(node.Text.ToString());
+            context.Append("]]");
+        }
         return true;
     }
 

+ 18 - 9
src/Lua/CodeAnalysis/Syntax/Lexer.cs

@@ -1,4 +1,5 @@
 using System.Runtime.CompilerServices;
+using Lua.Internal;
 
 namespace Lua.CodeAnalysis.Syntax;
 
@@ -311,20 +312,26 @@ public ref struct Lexer
             var stringStartOffset = offset;
 
             var isTerminated = false;
+            var prevC = char.MinValue;
             while (span.Length > offset)
             {
                 var c = span[offset];
-                if (c == quote)
-                {
-                    isTerminated = true;
-                    break;
-                }
 
-                if (c is '\n' or '\r')
+                if (prevC is not '\\')
                 {
-                    break;
+                    if (c == quote)
+                    {
+                        isTerminated = true;
+                        break;
+                    }
+
+                    if (c is '\n' or '\r')
+                    {
+                        break;
+                    }
                 }
 
+                prevC = c;
                 Advance(1);
             }
 
@@ -350,7 +357,7 @@ public ref struct Lexer
                     throw new LuaParseException(ChunkName, this.position, "error: Unterminated string");
                 }
 
-                current = SyntaxToken.String(Source[start..end], position);
+                current = SyntaxToken.RawString(Source[start..end], position);
                 return true;
             }
             else
@@ -450,6 +457,7 @@ public ref struct Lexer
         var startOffset = offset;
         var endOffset = 0;
         var isTerminated = false;
+        var prevC = char.MinValue;
 
         while (span.Length > offset + level + 1)
         {
@@ -472,7 +480,7 @@ public ref struct Lexer
                 }
             }
 
-            if (current is ']')
+            if (current is ']' && prevC is not '\\')
             {
                 endOffset = offset;
 
@@ -489,6 +497,7 @@ public ref struct Lexer
             }
 
         CONTINUE:
+            prevC = current;
             Advance(1);
         }
 

+ 1 - 1
src/Lua/CodeAnalysis/Syntax/Nodes/StringLiteralNode.cs

@@ -1,6 +1,6 @@
 namespace Lua.CodeAnalysis.Syntax.Nodes;
 
-public record StringLiteralNode(string Text, SourcePosition Position) : ExpressionNode(Position)
+public record StringLiteralNode(ReadOnlyMemory<char> Text, bool IsShortLiteral, SourcePosition Position) : ExpressionNode(Position)
 {
     public override TResult Accept<TContext, TResult>(ISyntaxNodeVisitor<TContext, TResult> visitor, TContext context)
     {

+ 9 - 4
src/Lua/CodeAnalysis/Syntax/Parser.cs

@@ -556,12 +556,13 @@ public ref struct Parser
         {
             SyntaxTokenType.Identifier => enumerator.GetNext(true).Type switch
             {
-                SyntaxTokenType.LParen or SyntaxTokenType.String => ParseCallFunctionExpression(ref enumerator, null),
+                SyntaxTokenType.LParen or SyntaxTokenType.String or SyntaxTokenType.RawString => ParseCallFunctionExpression(ref enumerator, null),
                 SyntaxTokenType.LSquare or SyntaxTokenType.Dot or SyntaxTokenType.Colon => ParseTableAccessExpression(ref enumerator, null),
                 _ => new IdentifierNode(enumerator.Current.Text, enumerator.Current.Position),
             },
             SyntaxTokenType.Number => new NumericLiteralNode(ConvertTextToNumber(enumerator.Current.Text.Span), enumerator.Current.Position),
-            SyntaxTokenType.String => new StringLiteralNode(enumerator.Current.Text.ToString(), enumerator.Current.Position),
+            SyntaxTokenType.String => new StringLiteralNode(enumerator.Current.Text, true, enumerator.Current.Position),
+            SyntaxTokenType.RawString => new StringLiteralNode(enumerator.Current.Text, false, enumerator.Current.Position),
             SyntaxTokenType.True => new BooleanLiteralNode(true, enumerator.Current.Position),
             SyntaxTokenType.False => new BooleanLiteralNode(false, enumerator.Current.Position),
             SyntaxTokenType.Nil => new NilLiteralNode(enumerator.Current.Position),
@@ -587,7 +588,7 @@ public ref struct Parser
             result = ParseTableAccessExpression(ref enumerator, result);
             goto RECURSIVE;
         }
-        else if (nextType is SyntaxTokenType.LParen or SyntaxTokenType.String or SyntaxTokenType.LCurly)
+        else if (nextType is SyntaxTokenType.LParen or SyntaxTokenType.String or SyntaxTokenType.RawString or SyntaxTokenType.LCurly)
         {
             MoveNextWithValidation(ref enumerator);
             result = ParseCallFunctionExpression(ref enumerator, result);
@@ -859,7 +860,11 @@ public ref struct Parser
     {
         if (enumerator.Current.Type is SyntaxTokenType.String)
         {
-            return [new StringLiteralNode(enumerator.Current.Text.ToString(), enumerator.Current.Position)];
+            return [new StringLiteralNode(enumerator.Current.Text, true, enumerator.Current.Position)];
+        }
+        else if (enumerator.Current.Type is SyntaxTokenType.RawString)
+        {
+            return [new StringLiteralNode(enumerator.Current.Text, false, enumerator.Current.Position)];
         }
         else if (enumerator.Current.Type is SyntaxTokenType.LCurly)
         {

+ 10 - 4
src/Lua/CodeAnalysis/Syntax/SyntaxToken.cs

@@ -91,14 +91,14 @@ public readonly struct SyntaxToken(SyntaxTokenType type, ReadOnlyMemory<char> te
         return new(SyntaxTokenType.Identifier, text, position);
     }
 
-    public static SyntaxToken String(string text, SourcePosition position)
+    public static SyntaxToken String(ReadOnlyMemory<char> text, SourcePosition position)
     {
-        return new(SyntaxTokenType.String, text.AsMemory(), position);
+        return new(SyntaxTokenType.String, text, position);
     }
 
-    public static SyntaxToken String(ReadOnlyMemory<char> text, SourcePosition position)
+    public static SyntaxToken RawString(ReadOnlyMemory<char> text, SourcePosition position)
     {
-        return new(SyntaxTokenType.String, text, position);
+        return new(SyntaxTokenType.RawString, text, position);
     }
 
     public static SyntaxToken Label(ReadOnlyMemory<char> text, SourcePosition position)
@@ -126,6 +126,7 @@ public readonly struct SyntaxToken(SyntaxTokenType type, ReadOnlyMemory<char> te
             SyntaxTokenType.Comma => ",",
             SyntaxTokenType.Number => Text.ToString(),
             SyntaxTokenType.String => $"\"{Text}\"",
+            SyntaxTokenType.RawString => $"[[{Text}]]",
             SyntaxTokenType.Nil => Keywords.Nil,
             SyntaxTokenType.True => Keywords.True,
             SyntaxTokenType.False => Keywords.False,
@@ -266,6 +267,11 @@ public enum SyntaxTokenType
     /// </summary>
     String,
 
+    /// <summary>
+    /// Raw string literal (e.g. [[Hello, World!]])
+    /// </summary>
+    RawString,
+
     /// <summary>
     /// Nil literal (nil)
     /// </summary>

+ 79 - 0
src/Lua/Internal/StringHelper.cs

@@ -0,0 +1,79 @@
+using System.Runtime.CompilerServices;
+using System.Text;
+
+namespace Lua.Internal;
+
+internal static class StringHelper
+{
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public static ReadOnlySpan<char> Slice(string s, int i, int j)
+    {
+        if (i < 0) i = s.Length + i + 1;
+        if (j < 0) j = s.Length + j + 1;
+
+        if (i < 1) i = 1;
+        if (j > s.Length) j = s.Length;
+
+        return i > j ? "" : s.AsSpan()[(i - 1)..j];
+    }
+
+    public static string FromStringLiteral(ReadOnlySpan<char> literal)
+    {
+        var builder = new ValueStringBuilder(literal.Length);
+        for (int i = 0; i < literal.Length; i++)
+        {
+            var c = literal[i];
+            if (c is '\\' && i < literal.Length - 1)
+            {
+                i++;
+                c = literal[i];
+
+                switch (c)
+                {
+                    case 'a':
+                        builder.Append('\a');
+                        break;
+                    case 'b':
+                        builder.Append('\b');
+                        break;
+                    case 'f':
+                        builder.Append('\f');
+                        break;
+                    case 'n':
+                        builder.Append('\n');
+                        break;
+                    case 'r':
+                        builder.Append('\r');
+                        break;
+                    case 't':
+                        builder.Append('\t');
+                        break;
+                    case 'v':
+                        builder.Append('\v');
+                        break;
+                    case '\\':
+                        builder.Append('\\');
+                        break;
+                    case '\"':
+                        builder.Append('\"');
+                        break;
+                    case '\'':
+                        builder.Append('\'');
+                        break;
+                    case '[':
+                        builder.Append('[');
+                        break;
+                    case ']':
+                        builder.Append(']');
+                        break;
+                }
+            }
+            else
+            {
+                builder.Append(c);
+            }
+        }
+
+        return builder.ToString();
+    }
+}

+ 2 - 0
src/Lua/Standard/Text/ByteFunction.cs

@@ -1,4 +1,6 @@
 
+using Lua.Internal;
+
 namespace Lua.Standard.Text;
 
 public sealed class ByteFunction : LuaFunction

+ 0 - 24
src/Lua/Standard/Text/StringHelper.cs

@@ -1,24 +0,0 @@
-using System.Runtime.CompilerServices;
-
-namespace Lua.Standard.Text;
-
-internal static class StringHelper
-{
-    public static int UnicodeToAscii(int i)
-    {
-        if (i >= 0 && i <= 255) return i;
-        throw new ArgumentOutOfRangeException(nameof(i));
-    }
-
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
-    public static ReadOnlySpan<char> Slice(string s, int i, int j)
-    {
-        if (i < 0) i = s.Length + i + 1;
-        if (j < 0) j = s.Length + j + 1;
-
-        if (i < 1) i = 1;
-        if (j > s.Length) j = s.Length;
-
-        return i > j ? "" : s.AsSpan()[(i - 1)..j];
-    }
-}

+ 1 - 1
src/Lua/Standard/Text/SubFunction.cs

@@ -1,5 +1,5 @@
 
-using System.Text;
+using Lua.Internal;
 
 namespace Lua.Standard.Text;