RuneExtensions.cs 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. 
  2. using System.Globalization;
  3. using Wcwidth;
  4. namespace Terminal.Gui.Text;
  5. /// <summary>Extends <see cref="System.Text.Rune"/> to support TUI text manipulation.</summary>
  6. public static class RuneExtensions
  7. {
  8. /// <summary>Maximum Unicode code point.</summary>
  9. public static readonly int MaxUnicodeCodePoint = 0x10FFFF;
  10. /// <summary>Reports if the provided array of bytes can be encoded as UTF-8.</summary>
  11. /// <param name="buffer">The byte array to probe.</param>
  12. /// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
  13. public static bool CanBeEncodedAsRune (byte [] buffer)
  14. {
  15. string str = Encoding.Unicode.GetString (buffer);
  16. foreach (Rune rune in str.EnumerateRunes ())
  17. {
  18. if (rune == Rune.ReplacementChar)
  19. {
  20. return false;
  21. }
  22. }
  23. return true;
  24. }
  25. /// <summary>Attempts to decode the rune as a surrogate pair to UTF-16.</summary>
  26. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  27. /// <param name="rune">The rune to decode.</param>
  28. /// <param name="chars">The chars if the rune is a surrogate pair. Null otherwise.</param>
  29. /// <returns><see langword="true"/> if the rune is a valid surrogate pair; <see langword="false"/> otherwise.</returns>
  30. public static bool DecodeSurrogatePair (this Rune rune, out char []? chars)
  31. {
  32. bool isSingleUtf16CodeUnit = rune.IsBmp;
  33. if (isSingleUtf16CodeUnit)
  34. {
  35. chars = null;
  36. return false;
  37. }
  38. const int MAX_CHARS_PER_RUNE = 2;
  39. Span<char> charBuffer = stackalloc char [MAX_CHARS_PER_RUNE];
  40. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  41. if (charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]))
  42. {
  43. chars = charBuffer [..charsWritten].ToArray ();
  44. return true;
  45. }
  46. chars = null;
  47. return false;
  48. }
  49. /// <summary>Writes into the destination buffer starting at offset the UTF8 encoded version of the rune.</summary>
  50. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  51. /// <param name="rune">The rune to encode.</param>
  52. /// <param name="dest">The destination buffer.</param>
  53. /// <param name="start">Starting offset to look into.</param>
  54. /// <param name="count">Number of bytes valid in the buffer, or -1 to make it the length of the buffer.</param>
  55. /// <returns>he number of bytes written into the destination buffer.</returns>
  56. public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1)
  57. {
  58. const int MAX_UTF8_BYTES_PER_RUNE = 4;
  59. Span<byte> bytes = stackalloc byte [MAX_UTF8_BYTES_PER_RUNE];
  60. int writtenBytes = rune.EncodeToUtf8 (bytes);
  61. int bytesToCopy = count == -1
  62. ? writtenBytes
  63. : Math.Min (count, writtenBytes);
  64. var bytesWritten = 0;
  65. for (var i = 0; i < bytesToCopy; i++)
  66. {
  67. if (bytes [i] == '\0')
  68. {
  69. break;
  70. }
  71. dest [start + i] = bytes [i];
  72. bytesWritten++;
  73. }
  74. return bytesWritten;
  75. }
  76. /// <summary>Attempts to encode (as UTF-16) a surrogate pair.</summary>
  77. /// <param name="highSurrogate">The high surrogate code point.</param>
  78. /// <param name="lowSurrogate">The low surrogate code point.</param>
  79. /// <param name="result">The encoded rune.</param>
  80. /// <returns><see langword="true"/> if the encoding succeeded; <see langword="false"/> otherwise.</returns>
  81. public static bool EncodeSurrogatePair (char highSurrogate, char lowSurrogate, out Rune result)
  82. {
  83. result = default (Rune);
  84. if (char.IsSurrogatePair (highSurrogate, lowSurrogate))
  85. {
  86. result = (Rune)char.ConvertToUtf32 (highSurrogate, lowSurrogate);
  87. return true;
  88. }
  89. return false;
  90. }
  91. /// <summary>Gets the number of columns the rune occupies in the terminal.</summary>
  92. /// <remarks>
  93. /// <para>
  94. /// Implemented via a port of <a href="https://man7.org/linux/man-pages/man3/wcwidth.3.html">wcwidth</a>.
  95. /// </para>
  96. /// <para>
  97. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  98. /// </para>
  99. /// </remarks>
  100. /// <param name="rune">The rune to measure.</param>
  101. /// <returns>
  102. /// The number of columns required to fit the rune, 0 if the argument is the null character, or -1 if the value is
  103. /// not printable, otherwise the number of columns that the rune occupies.
  104. /// </returns>
  105. public static int GetColumns (this Rune rune) { return UnicodeCalculator.GetWidth (rune); }
  106. /// <summary>Get number of bytes required to encode the rune, based on the provided encoding.</summary>
  107. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  108. /// <param name="rune">The rune to probe.</param>
  109. /// <param name="encoding">The encoding used; the default is UTF8.</param>
  110. /// <returns>The number of bytes required.</returns>
  111. public static int GetEncodingLength (this Rune rune, Encoding? encoding = null)
  112. {
  113. encoding ??= Encoding.UTF8;
  114. const int MAX_CHARS_PER_RUNE = 2;
  115. // Get characters with UTF16 to keep that part independent of selected encoding.
  116. Span<char> charBuffer = stackalloc char [MAX_CHARS_PER_RUNE];
  117. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  118. Span<char> chars = charBuffer [..charsWritten];
  119. int maxEncodedLength = encoding.GetMaxByteCount (charsWritten);
  120. Span<byte> byteBuffer = stackalloc byte [maxEncodedLength];
  121. int bytesEncoded = encoding.GetBytes (chars, byteBuffer);
  122. ReadOnlySpan<byte> encodedBytes = byteBuffer [..bytesEncoded];
  123. if (encodedBytes [^1] == '\0')
  124. {
  125. return encodedBytes.Length - 1;
  126. }
  127. return encodedBytes.Length;
  128. }
  129. /// <summary>Returns <see langword="true"/> if the rune is a combining character.</summary>
  130. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  131. /// <param name="rune"></param>
  132. /// <returns></returns>
  133. public static bool IsCombiningMark (this Rune rune)
  134. {
  135. UnicodeCategory category = Rune.GetUnicodeCategory (rune);
  136. return category == UnicodeCategory.NonSpacingMark
  137. || category == UnicodeCategory.SpacingCombiningMark
  138. || category == UnicodeCategory.EnclosingMark;
  139. }
  140. /// <summary>Reports whether a rune is a surrogate code point.</summary>
  141. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  142. /// <param name="rune">The rune to probe.</param>
  143. /// <returns><see langword="true"/> if the rune is a surrogate code point; <see langword="false"/> otherwise.</returns>
  144. public static bool IsSurrogatePair (this Rune rune)
  145. {
  146. bool isSingleUtf16CodeUnit = rune.IsBmp;
  147. if (isSingleUtf16CodeUnit)
  148. {
  149. return false;
  150. }
  151. const int MAX_CHARS_PER_RUNE = 2;
  152. Span<char> charBuffer = stackalloc char [MAX_CHARS_PER_RUNE];
  153. int charsWritten = rune.EncodeToUtf16 (charBuffer);
  154. return charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]);
  155. }
  156. /// <summary>
  157. /// Ensures the rune is not a control character and can be displayed by translating characters below 0x20 to
  158. /// equivalent, printable, Unicode chars.
  159. /// </summary>
  160. /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
  161. /// <param name="rune"></param>
  162. /// <returns></returns>
  163. public static Rune MakePrintable (this Rune rune) { return Rune.IsControl (rune) ? new (rune.Value + 0x2400) : rune; }
  164. }