BomUtility.cs 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. using System.Text;
  2. namespace Lua.CodeAnalysis.Compilation;
  3. static class BomUtility
  4. {
  5. public static ReadOnlySpan<byte> BomUtf8 => [0xEF, 0xBB, 0xBF];
  6. static ReadOnlySpan<byte> BomUtf16Little => [0xFF, 0xFE];
  7. static ReadOnlySpan<byte> BomUtf16Big => [0xFE, 0xFF];
  8. static ReadOnlySpan<byte> BomUtf32Little => [0xFF, 0xFE, 0x00, 0x00];
  9. /// <summary>
  10. /// Removes the BOM from the beginning of the text and returns the encoding.
  11. /// Supported encodings are UTF-8, UTF-16 (little and big endian), and UTF-32 (little endian).
  12. /// Unknown BOMs are ignored, and the encoding is set to UTF-8 by default.
  13. /// </summary>
  14. /// <param name="text">The text to check for BOM.</param>
  15. /// <param name="encoding">The encoding of the text.</param>
  16. /// <returns>The text without the BOM.</returns>
  17. public static ReadOnlySpan<byte> GetEncodingFromBytes(ReadOnlySpan<byte> text, out Encoding encoding)
  18. {
  19. if (text.StartsWith(BomUtf8))
  20. {
  21. encoding = Encoding.UTF8;
  22. return text.Slice(BomUtf8.Length);
  23. }
  24. if (text.StartsWith(BomUtf16Little))
  25. {
  26. encoding = Encoding.Unicode;
  27. return text.Slice(BomUtf16Little.Length);
  28. }
  29. if (text.StartsWith(BomUtf16Big))
  30. {
  31. encoding = Encoding.BigEndianUnicode;
  32. return text.Slice(BomUtf16Big.Length);
  33. }
  34. if (text.StartsWith(BomUtf32Little))
  35. {
  36. encoding = Encoding.UTF32;
  37. return text.Slice(BomUtf32Little.Length);
  38. }
  39. encoding = Encoding.UTF8;
  40. return text;
  41. }
  42. }