RuneExtensions.cs 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #nullable enable
  2. using System.Globalization;
  3. using Wcwidth;
  4. namespace Terminal.Gui;
  5. /// <summary>Extends <see cref="System.Text.Rune"/> to support TUI text manipulation.</summary>
  6. public static class RuneExtensions
  7. {
  8. /// <summary>Maximum Unicode code point.</summary>
  9. public static readonly int MaxUnicodeCodePoint = 0x10FFFF;
  10. /// <summary>Reports if the provided array of bytes can be encoded as UTF-8.</summary>
  11. /// <param name="buffer">The byte array to probe.</param>
  12. /// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
  13. public static bool CanBeEncodedAsRune (byte [] buffer)
  14. {
  15. string str = Encoding.Unicode.GetString (buffer);
  16. foreach (Rune rune in str.EnumerateRunes ())
  17. {
  18. if (rune == Rune.ReplacementChar)
  19. {
  20. return false;
  21. }
  22. }
  23. return true;
  24. }
  25. /// <summary>Attempts to decode the rune as a surrogate pair to UTF-16.</summary>
  26. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  27. /// <param name="rune">The rune to decode.</param>
  28. /// <param name="chars">The chars if the rune is a surrogate pair. Null otherwise.</param>
  29. /// <returns><see langword="true"/> if the rune is a valid surrogate pair; <see langword="false"/> otherwise.</returns>
  30. public static bool DecodeSurrogatePair (this Rune rune, out char []? chars)
  31. {
  32. bool isSingleUtf16CodeUnit = rune.IsBmp;
  33. if (isSingleUtf16CodeUnit)
  34. {
  35. chars = null;
  36. return false;
  37. }
  38. const int maxCharsPerRune = 2;
  39. Span<char> charBuffer = stackalloc char[maxCharsPerRune];
  40. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  41. if (charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]))
  42. {
  43. chars = charBuffer [..charsWritten].ToArray ();
  44. return true;
  45. }
  46. chars = null;
  47. return false;
  48. }
  49. /// <summary>Writes into the destination buffer starting at offset the UTF8 encoded version of the rune.</summary>
  50. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  51. /// <param name="rune">The rune to encode.</param>
  52. /// <param name="dest">The destination buffer.</param>
  53. /// <param name="start">Starting offset to look into.</param>
  54. /// <param name="count">Number of bytes valid in the buffer, or -1 to make it the length of the buffer.</param>
  55. /// <returns>he number of bytes written into the destination buffer.</returns>
  56. public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1)
  57. {
  58. const int maxUtf8BytesPerRune = 4;
  59. Span<byte> bytes = stackalloc byte[maxUtf8BytesPerRune];
  60. int writtenBytes = rune.EncodeToUtf8 (bytes);
  61. int bytesToCopy = count == -1
  62. ? writtenBytes
  63. : Math.Min (count, writtenBytes);
  64. int bytesWritten = 0;
  65. for (int i = 0; i < bytesToCopy; i++)
  66. {
  67. if (bytes [i] == '\0')
  68. {
  69. break;
  70. }
  71. dest [start + i] = bytes [i];
  72. bytesWritten++;
  73. }
  74. return bytesWritten;
  75. }
  76. /// <summary>Attempts to encode (as UTF-16) a surrogate pair.</summary>
  77. /// <param name="highSurrogate">The high surrogate code point.</param>
  78. /// <param name="lowSurrogate">The low surrogate code point.</param>
  79. /// <param name="result">The encoded rune.</param>
  80. /// <returns><see langword="true"/> if the encoding succeeded; <see langword="false"/> otherwise.</returns>
  81. public static bool EncodeSurrogatePair (char highSurrogate, char lowSurrogate, out Rune result)
  82. {
  83. result = default (Rune);
  84. if (char.IsSurrogatePair (highSurrogate, lowSurrogate))
  85. {
  86. result = (Rune)char.ConvertToUtf32 (highSurrogate, lowSurrogate);
  87. return true;
  88. }
  89. return false;
  90. }
  91. /// <summary>Gets the number of columns the rune occupies in the terminal.</summary>
  92. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  93. /// <param name="rune">The rune to measure.</param>
  94. /// <returns>
  95. /// The number of columns required to fit the rune, 0 if the argument is the null character, or -1 if the value is
  96. /// not printable, otherwise the number of columns that the rune occupies.
  97. /// </returns>
  98. public static int GetColumns (this Rune rune) { return UnicodeCalculator.GetWidth (rune); }
  99. /// <summary>Get number of bytes required to encode the rune, based on the provided encoding.</summary>
  100. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  101. /// <param name="rune">The rune to probe.</param>
  102. /// <param name="encoding">The encoding used; the default is UTF8.</param>
  103. /// <returns>The number of bytes required.</returns>
  104. public static int GetEncodingLength (this Rune rune, Encoding? encoding = null)
  105. {
  106. encoding ??= Encoding.UTF8;
  107. const int maxCharsPerRune = 2;
  108. // Get characters with UTF16 to keep that part independent of selected encoding.
  109. Span<char> charBuffer = stackalloc char[maxCharsPerRune];
  110. int charsWritten = rune.EncodeToUtf16(charBuffer);
  111. Span<char> chars = charBuffer[..charsWritten];
  112. int maxEncodedLength = encoding.GetMaxByteCount (charsWritten);
  113. Span<byte> byteBuffer = stackalloc byte[maxEncodedLength];
  114. int bytesEncoded = encoding.GetBytes (chars, byteBuffer);
  115. ReadOnlySpan<byte> encodedBytes = byteBuffer[..bytesEncoded];
  116. if (encodedBytes [^1] == '\0')
  117. {
  118. return encodedBytes.Length - 1;
  119. }
  120. return encodedBytes.Length;
  121. }
  122. /// <summary>Returns <see langword="true"/> if the rune is a combining character.</summary>
  123. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  124. /// <param name="rune"></param>
  125. /// <returns></returns>
  126. public static bool IsCombiningMark (this Rune rune)
  127. {
  128. UnicodeCategory category = Rune.GetUnicodeCategory (rune);
  129. return category == UnicodeCategory.NonSpacingMark
  130. || category == UnicodeCategory.SpacingCombiningMark
  131. || category == UnicodeCategory.EnclosingMark;
  132. }
  133. /// <summary>Reports whether a rune is a surrogate code point.</summary>
  134. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  135. /// <param name="rune">The rune to probe.</param>
  136. /// <returns><see langword="true"/> if the rune is a surrogate code point; <see langword="false"/> otherwise.</returns>
  137. public static bool IsSurrogatePair (this Rune rune)
  138. {
  139. bool isSingleUtf16CodeUnit = rune.IsBmp;
  140. if (isSingleUtf16CodeUnit)
  141. {
  142. return false;
  143. }
  144. const int maxCharsPerRune = 2;
  145. Span<char> charBuffer = stackalloc char[maxCharsPerRune];
  146. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  147. return charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]);
  148. }
  149. /// <summary>
  150. /// Ensures the rune is not a control character and can be displayed by translating characters below 0x20 to
  151. /// equivalent, printable, Unicode chars.
  152. /// </summary>
  153. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  154. /// <param name="rune"></param>
  155. /// <returns></returns>
  156. public static Rune MakePrintable (this Rune rune) { return Rune.IsControl (rune) ? new Rune (rune.Value + 0x2400) : rune; }
  157. }