Browse Source

Add: implement BOM utility for encoding detection in Lua chunk loading

Akeit0 7 months ago
parent
commit
022610245a
2 changed files with 52 additions and 6 deletions
  1. 48 0
      src/Lua/Internal/BomUtility.cs
  2. 4 6
      src/Lua/LuaState.cs

+ 48 - 0
src/Lua/Internal/BomUtility.cs

@@ -0,0 +1,48 @@
+using System.Text;
+
+namespace Lua.Internal;
+
+internal static class BomUtility
+{
+    static ReadOnlySpan<byte> BomUtf8 => [0xEF, 0xBB, 0xBF];
+    static ReadOnlySpan<byte> BomUtf16Little => [0xFF, 0xFE];
+    static ReadOnlySpan<byte> BomUtf16Big => [0xFE, 0xFF];
+    static ReadOnlySpan<byte> BomUtf32Little => [0xFF, 0xFE, 0x00, 0x00];
+    static ReadOnlySpan<byte> BomUtf32Big => [0x00, 0x00, 0xFE, 0xFF];
+
+    public static ReadOnlySpan<byte> GetEncodingFromBytes(ReadOnlySpan<byte> text, out Encoding encoding)
+    {
+        if (text.StartsWith(BomUtf8))
+        {
+            encoding = Encoding.UTF8;
+            return text.Slice(BomUtf8.Length);
+        }
+
+        if (text.StartsWith(BomUtf16Little))
+        {
+            encoding = Encoding.Unicode;
+            return text.Slice(BomUtf16Little.Length);
+        }
+
+        if (text.StartsWith(BomUtf16Big))
+        {
+            encoding = Encoding.BigEndianUnicode;
+            return text.Slice(BomUtf16Big.Length);
+        }
+
+        if (text.StartsWith(BomUtf32Little))
+        {
+            encoding = Encoding.UTF32;
+            return text.Slice(BomUtf32Little.Length);
+        }
+
+        if (text.StartsWith(BomUtf32Big))
+        {
+            encoding = Encoding.UTF32;
+            return text.Slice(BomUtf32Big.Length);
+        }
+
+        encoding = Encoding.UTF8;
+        return text;
+    }
+}

+ 4 - 6
src/Lua/LuaState.cs

@@ -159,18 +159,16 @@ public sealed class LuaState
             {
             {
                 return new LuaClosure(MainThread, Parser.UnDump(chunk, chunkName), environment);
                 return new LuaClosure(MainThread, Parser.UnDump(chunk, chunkName), environment);
             }
             }
-            if(chunk[0] == 0xef && chunk[1] == 0xbb && chunk[2] == 0xbf)
-            {
-                chunk= chunk[3..];
-            }
         }
         }
 
 
-        var charCount = Encoding.UTF8.GetCharCount(chunk);
+        chunk = BomUtility.GetEncodingFromBytes(chunk, out var encoding);
+
+        var charCount = encoding.GetCharCount(chunk);
         var pooled = ArrayPool<char>.Shared.Rent(charCount);
         var pooled = ArrayPool<char>.Shared.Rent(charCount);
         try
         try
         {
         {
             var chars = pooled.AsSpan(0, charCount);
             var chars = pooled.AsSpan(0, charCount);
-            Encoding.UTF8.GetChars(chunk, chars);
+            encoding.GetChars(chunk, chars);
             return Load(chars, chunkName, environment);
             return Load(chars, chunkName, environment);
         }
         }
         finally
         finally