RuneExtensions.cs 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. #nullable enable
  2. using System.Globalization;
  3. using Wcwidth;
  4. namespace Terminal.Gui.Text;
  5. /// <summary>Extends <see cref="System.Text.Rune"/> to support TUI text manipulation.</summary>
  6. public static class RuneExtensions
  7. {
  8. /// <summary>Maximum Unicode code point.</summary>
  9. public static readonly int MaxUnicodeCodePoint = 0x10FFFF;
  10. /// <summary>Reports if the provided array of bytes can be encoded as UTF-8.</summary>
  11. /// <param name="buffer">The byte array to probe.</param>
  12. /// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
  13. public static bool CanBeEncodedAsRune (byte [] buffer)
  14. {
  15. string str = Encoding.Unicode.GetString (buffer);
  16. foreach (Rune rune in str.EnumerateRunes ())
  17. {
  18. if (rune == Rune.ReplacementChar)
  19. {
  20. return false;
  21. }
  22. }
  23. return true;
  24. }
  25. /// <summary>Attempts to decode the rune as a surrogate pair to UTF-16.</summary>
  26. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  27. /// <param name="rune">The rune to decode.</param>
  28. /// <param name="chars">The chars if the rune is a surrogate pair. Null otherwise.</param>
  29. /// <returns><see langword="true"/> if the rune is a valid surrogate pair; <see langword="false"/> otherwise.</returns>
  30. public static bool DecodeSurrogatePair (this Rune rune, out char []? chars)
  31. {
  32. bool isSingleUtf16CodeUnit = rune.IsBmp;
  33. if (isSingleUtf16CodeUnit)
  34. {
  35. chars = null;
  36. return false;
  37. }
  38. const int maxCharsPerRune = 2;
  39. Span<char> charBuffer = stackalloc char[maxCharsPerRune];
  40. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  41. if (charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]))
  42. {
  43. chars = charBuffer [..charsWritten].ToArray ();
  44. return true;
  45. }
  46. chars = null;
  47. return false;
  48. }
  49. /// <summary>Writes into the destination buffer starting at offset the UTF8 encoded version of the rune.</summary>
  50. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  51. /// <param name="rune">The rune to encode.</param>
  52. /// <param name="dest">The destination buffer.</param>
  53. /// <param name="start">Starting offset to look into.</param>
  54. /// <param name="count">Number of bytes valid in the buffer, or -1 to make it the length of the buffer.</param>
  55. /// <returns>he number of bytes written into the destination buffer.</returns>
  56. public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1)
  57. {
  58. const int maxUtf8BytesPerRune = 4;
  59. Span<byte> bytes = stackalloc byte[maxUtf8BytesPerRune];
  60. int writtenBytes = rune.EncodeToUtf8 (bytes);
  61. int bytesToCopy = count == -1
  62. ? writtenBytes
  63. : Math.Min (count, writtenBytes);
  64. int bytesWritten = 0;
  65. for (int i = 0; i < bytesToCopy; i++)
  66. {
  67. if (bytes [i] == '\0')
  68. {
  69. break;
  70. }
  71. dest [start + i] = bytes [i];
  72. bytesWritten++;
  73. }
  74. return bytesWritten;
  75. }
  76. /// <summary>Attempts to encode (as UTF-16) a surrogate pair.</summary>
  77. /// <param name="highSurrogate">The high surrogate code point.</param>
  78. /// <param name="lowSurrogate">The low surrogate code point.</param>
  79. /// <param name="result">The encoded rune.</param>
  80. /// <returns><see langword="true"/> if the encoding succeeded; <see langword="false"/> otherwise.</returns>
  81. public static bool EncodeSurrogatePair (char highSurrogate, char lowSurrogate, out Rune result)
  82. {
  83. result = default (Rune);
  84. if (char.IsSurrogatePair (highSurrogate, lowSurrogate))
  85. {
  86. result = (Rune)char.ConvertToUtf32 (highSurrogate, lowSurrogate);
  87. return true;
  88. }
  89. return false;
  90. }
  91. /// <summary>Gets the number of columns the rune occupies in the terminal.</summary>
  92. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  93. /// <param name="rune">The rune to measure.</param>
  94. /// <returns>
  95. /// The number of columns required to fit the rune, 0 if the argument is the null character, or -1 if the value is
  96. /// not printable, otherwise the number of columns that the rune occupies.
  97. /// </returns>
  98. public static int GetColumns (this Rune rune)
  99. {
  100. int value = rune.Value;
  101. // TODO: Remove this code when #4259 is fixed
  102. // TODO: See https://github.com/gui-cs/Terminal.Gui/issues/4259
  103. if (value is >= 0x2630 and <= 0x2637 || // Trigrams
  104. value is >= 0x268A and <= 0x268F || // Monograms/Digrams
  105. value is >= 0x4DC0 and <= 0x4DFF) // Hexagrams
  106. {
  107. return 2; // Assume double-width due to Windows Terminal font rendering
  108. }
  109. // Fallback to original GetWidth for other code points
  110. return UnicodeCalculator.GetWidth (rune);
  111. }
  112. /// <summary>Get number of bytes required to encode the rune, based on the provided encoding.</summary>
  113. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  114. /// <param name="rune">The rune to probe.</param>
  115. /// <param name="encoding">The encoding used; the default is UTF8.</param>
  116. /// <returns>The number of bytes required.</returns>
  117. public static int GetEncodingLength (this Rune rune, Encoding? encoding = null)
  118. {
  119. encoding ??= Encoding.UTF8;
  120. const int maxCharsPerRune = 2;
  121. // Get characters with UTF16 to keep that part independent of selected encoding.
  122. Span<char> charBuffer = stackalloc char[maxCharsPerRune];
  123. int charsWritten = rune.EncodeToUtf16(charBuffer);
  124. Span<char> chars = charBuffer[..charsWritten];
  125. int maxEncodedLength = encoding.GetMaxByteCount (charsWritten);
  126. Span<byte> byteBuffer = stackalloc byte[maxEncodedLength];
  127. int bytesEncoded = encoding.GetBytes (chars, byteBuffer);
  128. ReadOnlySpan<byte> encodedBytes = byteBuffer[..bytesEncoded];
  129. if (encodedBytes [^1] == '\0')
  130. {
  131. return encodedBytes.Length - 1;
  132. }
  133. return encodedBytes.Length;
  134. }
  135. /// <summary>Returns <see langword="true"/> if the rune is a combining character.</summary>
  136. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  137. /// <param name="rune"></param>
  138. /// <returns></returns>
  139. public static bool IsCombiningMark (this Rune rune)
  140. {
  141. UnicodeCategory category = Rune.GetUnicodeCategory (rune);
  142. return category == UnicodeCategory.NonSpacingMark
  143. || category == UnicodeCategory.SpacingCombiningMark
  144. || category == UnicodeCategory.EnclosingMark;
  145. }
  146. /// <summary>Reports whether a rune is a surrogate code point.</summary>
  147. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  148. /// <param name="rune">The rune to probe.</param>
  149. /// <returns><see langword="true"/> if the rune is a surrogate code point; <see langword="false"/> otherwise.</returns>
  150. public static bool IsSurrogatePair (this Rune rune)
  151. {
  152. bool isSingleUtf16CodeUnit = rune.IsBmp;
  153. if (isSingleUtf16CodeUnit)
  154. {
  155. return false;
  156. }
  157. const int maxCharsPerRune = 2;
  158. Span<char> charBuffer = stackalloc char[maxCharsPerRune];
  159. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  160. return charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]);
  161. }
  162. /// <summary>
  163. /// Ensures the rune is not a control character and can be displayed by translating characters below 0x20 to
  164. /// equivalent, printable, Unicode chars.
  165. /// </summary>
  166. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  167. /// <param name="rune"></param>
  168. /// <returns></returns>
  169. public static Rune MakePrintable (this Rune rune) { return Rune.IsControl (rune) ? new Rune (rune.Value + 0x2400) : rune; }
  170. }