RuneExtensions.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. using System.Globalization;
  2. using System.Text;
  3. namespace Terminal.Gui;
  4. /// <summary>
  5. /// Extends <see cref="System.Text.Rune"/> to support TUI text manipulation.
  6. /// </summary>
  7. public static class RuneExtensions {
  8. /// <summary>
  9. /// Maximum Unicode code point.
  10. /// </summary>
  11. public static int MaxUnicodeCodePoint = 0x10FFFF;
  12. /// <summary>
  13. /// Gets the number of columns the rune occupies in the terminal.
  14. /// </summary>
  15. /// <remarks>
  16. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  17. /// </remarks>
  18. /// <param name="rune">The rune to measure.</param>
  19. /// <returns>
  20. /// The number of columns required to fit the rune, 0 if the argument is the null character, or
  21. /// -1 if the value is not printable,
  22. /// otherwise the number of columns that the rune occupies.
  23. /// </returns>
  24. public static int GetColumns (this Rune rune)
  25. {
  26. // TODO: I believe there is a way to do this without using our own tables, using Rune.
  27. var codePoint = rune.Value;
  28. switch (codePoint) {
  29. case < 0x20:
  30. case >= 0x7f and < 0xa0:
  31. return -1;
  32. case < 0x7f:
  33. return 1;
  34. }
  35. /* binary search in table of non-spacing characters */
  36. if (BiSearch (codePoint, _combining, _combining.GetLength (0) - 1) != 0) {
  37. return 0;
  38. }
  39. /* if we arrive here, ucs is not a combining or C0/C1 control character */
  40. return 1 + (BiSearch (codePoint, _combiningWideChars, _combiningWideChars.GetLength (0) - 1) != 0 ? 1 : 0);
  41. }
  42. /// <summary>
  43. /// Returns <see langword="true"/> if the rune is a combining character.
  44. /// </summary>
  45. /// <remarks>
  46. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  47. /// </remarks>
  48. /// <param name="rune"></param>
  49. /// <returns></returns>
  50. public static bool IsCombiningMark (this System.Text.Rune rune)
  51. {
  52. UnicodeCategory category = Rune.GetUnicodeCategory (rune);
  53. return Rune.GetUnicodeCategory (rune) == UnicodeCategory.NonSpacingMark
  54. || category == UnicodeCategory.SpacingCombiningMark
  55. || category == UnicodeCategory.EnclosingMark;
  56. }
  57. /// <summary>
  58. /// Ensures the rune is not a control character and can be displayed by translating characters below 0x20
  59. /// to equivalent, printable, Unicode chars.
  60. /// </summary>
  61. /// <remarks>
  62. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  63. /// </remarks>
  64. /// <param name="rune"></param>
  65. /// <returns></returns>
  66. public static Rune MakePrintable (this System.Text.Rune rune) => Rune.IsControl (rune) ? new Rune (rune.Value + 0x2400) : rune;
  67. /// <summary>
  68. /// Get number of bytes required to encode the rune, based on the provided encoding.
  69. /// </summary>
  70. /// <remarks>
  71. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  72. /// </remarks>
  73. /// <param name="rune">The rune to probe.</param>
  74. /// <param name="encoding">The encoding used; the default is UTF8.</param>
  75. /// <returns>The number of bytes required.</returns>
  76. public static int GetEncodingLength (this Rune rune, Encoding encoding = null)
  77. {
  78. encoding ??= Encoding.UTF8;
  79. var bytes = encoding.GetBytes (rune.ToString ().ToCharArray ());
  80. var offset = 0;
  81. if (bytes [^1] == 0) {
  82. offset++;
  83. }
  84. return bytes.Length - offset;
  85. }
  86. /// <summary>
  87. /// Writes into the destination buffer starting at offset the UTF8 encoded version of the rune.
  88. /// </summary>
  89. /// <remarks>
  90. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  91. /// </remarks>
  92. /// <param name="rune">The rune to encode.</param>
  93. /// <param name="dest">The destination buffer.</param>
  94. /// <param name="start">Starting offset to look into.</param>
  95. /// <param name="count">Number of bytes valid in the buffer, or -1 to make it the length of the buffer.</param>
  96. /// <returns>he number of bytes written into the destination buffer.</returns>
  97. public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1)
  98. {
  99. var bytes = Encoding.UTF8.GetBytes (rune.ToString ());
  100. var length = 0;
  101. for (var i = 0; i < (count == -1 ? bytes.Length : count); i++) {
  102. if (bytes [i] == 0) {
  103. break;
  104. }
  105. dest [start + i] = bytes [i];
  106. length++;
  107. }
  108. return length;
  109. }
  110. /// <summary>
  111. /// Attempts to decode the rune as a surrogate pair to UTF-16.
  112. /// </summary>
  113. /// <remarks>
  114. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  115. /// </remarks>
  116. /// <param name="rune">The rune to decode.</param>
  117. /// <param name="chars">The chars if the rune is a surrogate pair. Null otherwise.</param>
  118. /// <returns><see langword="true"/> if the rune is a valid surrogate pair; <see langword="false"/> otherwise.</returns>
  119. public static bool DecodeSurrogatePair (this Rune rune, out char [] chars)
  120. {
  121. if (rune.IsSurrogatePair ()) {
  122. chars = rune.ToString ().ToCharArray ();
  123. return true;
  124. }
  125. chars = null;
  126. return false;
  127. }
  128. /// <summary>
  129. /// Attempts to encode (as UTF-16) a surrogate pair.
  130. /// </summary>
  131. /// <param name="highSurrogate">The high surrogate code point.</param>
  132. /// <param name="lowSurrogate">The low surrogate code point.</param>
  133. /// <param name="result">The encoded rune.</param>
  134. /// <returns><see langword="true"/> if the encoding succeeded; <see langword="false"/> otherwise.</returns>
  135. public static bool EncodeSurrogatePair (char highSurrogate, char lowSurrogate, out Rune result)
  136. {
  137. result = default;
  138. if (char.IsSurrogatePair (highSurrogate, lowSurrogate)) {
  139. result = (Rune)char.ConvertToUtf32 (highSurrogate, lowSurrogate);
  140. return true;
  141. }
  142. return false;
  143. }
  144. /// <summary>
  145. /// Reports whether a rune is a surrogate code point.
  146. /// </summary>
  147. /// <remarks>
  148. /// This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.
  149. /// </remarks>
  150. /// <param name="rune">The rune to probe.</param>
  151. /// <returns><see langword="true"/> if the rune is a surrogate code point; <see langword="false"/> otherwise.</returns>
  152. public static bool IsSurrogatePair (this Rune rune)
  153. {
  154. return char.IsSurrogatePair (rune.ToString (), 0);
  155. }
  156. /// <summary>
  157. /// Reports if the provided array of bytes can be encoded as UTF-8.
  158. /// </summary>
  159. /// <param name="buffer">The byte array to probe.</param>
  160. /// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
  161. public static bool CanBeEncodedAsRune (byte [] buffer)
  162. {
  163. var str = Encoding.Unicode.GetString (buffer);
  164. foreach (var rune in str.EnumerateRunes ()) {
  165. if (rune == Rune.ReplacementChar) {
  166. return false;
  167. }
  168. }
  169. return true;
  170. }
  171. // ---------------- implementation details ------------------
  172. // TODO: Can this be handled by the new .NET 8 Rune type?
  173. static readonly int [,] _combining = new int [,] {
  174. { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
  175. { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
  176. { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
  177. { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
  178. { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
  179. { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
  180. { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
  181. { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
  182. { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
  183. { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
  184. { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
  185. { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
  186. { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
  187. { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
  188. { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
  189. { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
  190. { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
  191. { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
  192. { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
  193. { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
  194. { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
  195. { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
  196. { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
  197. { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
  198. { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
  199. { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
  200. { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
  201. { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
  202. { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
  203. { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
  204. { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
  205. { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
  206. { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
  207. { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
  208. { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
  209. { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
  210. { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
  211. { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
  212. { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
  213. { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2E9A, 0x2E9A },
  214. { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
  215. { 0x31E4, 0x31EF }, { 0x321F, 0x321F }, { 0xA48D, 0xA48F },
  216. { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
  217. { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE1A, 0xFE1F },
  218. { 0xFE20, 0xFE23 }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 },
  219. { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
  220. { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
  221. { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
  222. { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
  223. { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
  224. { 0xE0100, 0xE01EF }
  225. };
  226. static readonly int [,] _combiningWideChars = new int [,] {
  227. /* Hangul Jamo init. consonants - 0x1100, 0x11ff */
  228. /* Miscellaneous Technical - 0x2300, 0x23ff */
  229. /* Hangul Syllables - 0x11a8, 0x11c2 */
  230. /* CJK Compatibility Ideographs - f900, fad9 */
  231. /* Vertical forms - fe10, fe19 */
  232. /* CJK Compatibility Forms - fe30, fe4f */
  233. /* Fullwidth Forms - ff01, ffee */
  234. /* Alphabetic Presentation Forms - 0xFB00, 0xFb4f */
  235. /* Chess Symbols - 0x1FA00, 0x1FA0f */
  236. { 0x1100, 0x115f }, { 0x231a, 0x231b }, { 0x2329, 0x232a },
  237. { 0x23e9, 0x23ec }, { 0x23f0, 0x23f0 }, { 0x23f3, 0x23f3 },
  238. { 0x25fd, 0x25fe }, { 0x2614, 0x2615 }, { 0x2648, 0x2653 },
  239. { 0x267f, 0x267f }, { 0x2693, 0x2693 }, { 0x26a1, 0x26a1 },
  240. { 0x26aa, 0x26ab }, { 0x26bd, 0x26be }, { 0x26c4, 0x26c5 },
  241. { 0x26ce, 0x26ce }, { 0x26d4, 0x26d4 }, { 0x26ea, 0x26ea },
  242. { 0x26f2, 0x26f3 }, { 0x26f5, 0x26f5 }, { 0x26fa, 0x26fa },
  243. { 0x26fd, 0x26fd }, { 0x2705, 0x2705 }, { 0x270a, 0x270b },
  244. { 0x2728, 0x2728 }, { 0x274c, 0x274c }, { 0x274e, 0x274e },
  245. { 0x2753, 0x2755 }, { 0x2757, 0x2757 }, { 0x2795, 0x2797 },
  246. { 0x27b0, 0x27b0 }, { 0x27bf, 0x27bf }, { 0x2b1b, 0x2b1c },
  247. { 0x2b50, 0x2b50 }, { 0x2b55, 0x2b55 }, { 0x2e80, 0x303e },
  248. { 0x3041, 0x3096 }, { 0x3099, 0x30ff }, { 0x3105, 0x312f },
  249. { 0x3131, 0x318e }, { 0x3190, 0x3247 }, { 0x3250, 0x4dbf },
  250. { 0x4e00, 0xa4c6 }, { 0xa960, 0xa97c }, { 0xac00, 0xd7a3 },
  251. { 0xf900, 0xfaff }, { 0xfe10, 0xfe1f }, { 0xfe30, 0xfe6b },
  252. { 0xff01, 0xff60 }, { 0xffe0, 0xffe6 },
  253. { 0x16fe0, 0x16fe4 }, { 0x16ff0, 0x16ff1 }, { 0x17000, 0x187f7 },
  254. { 0x18800, 0x18cd5 }, { 0x18d00, 0x18d08 }, { 0x1aff0, 0x1affc },
  255. { 0x1b000, 0x1b122 }, { 0x1b150, 0x1b152 }, { 0x1b164, 0x1b167 }, { 0x1b170, 0x1b2fb }, { 0x1d538, 0x1d550 },
  256. { 0x1f004, 0x1f004 }, { 0x1f0cf, 0x1f0cf }, /*{ 0x1f100, 0x1f10a },*/
  257. //{ 0x1f110, 0x1f12d }, { 0x1f130, 0x1f169 }, { 0x1f170, 0x1f1ac },
  258. { 0x1f18f, 0x1f199 },
  259. { 0x1f1e6, 0x1f1ff }, { 0x1f200, 0x1f202 }, { 0x1f210, 0x1f23b },
  260. { 0x1f240, 0x1f248 }, { 0x1f250, 0x1f251 }, { 0x1f260, 0x1f265 },
  261. { 0x1f300, 0x1f320 }, { 0x1f32d, 0x1f33e }, { 0x1f340, 0x1f37e },
  262. { 0x1f380, 0x1f393 }, { 0x1f3a0, 0x1f3ca }, { 0x1f3cf, 0x1f3d3 },
  263. { 0x1f3e0, 0x1f3f0 }, { 0x1f3f4, 0x1f3f4 }, { 0x1f3f8, 0x1f43e },
  264. { 0x1f440, 0x1f44e }, { 0x1f450, 0x1f4fc }, { 0x1f4ff, 0x1f53d },
  265. { 0x1f54b, 0x1f54e }, { 0x1f550, 0x1f567 }, { 0x1f57a, 0x1f57a },
  266. { 0x1f595, 0x1f596 }, { 0x1f5a4, 0x1f5a4 }, { 0x1f5fb, 0x1f606 },
  267. { 0x1f607, 0x1f64f }, { 0x1f680, 0x1f6c5 }, { 0x1f6cc, 0x1f6cc },
  268. { 0x1f6d0, 0x1f6d2 }, { 0x1f6d5, 0x1f6d7 }, { 0x1f6dd, 0x1f6df }, { 0x1f6eb, 0x1f6ec },
  269. { 0x1f6f4, 0x1f6fc }, { 0x1f7e0, 0x1f7eb }, { 0x1f7f0, 0x1f7f0 }, { 0x1f90c, 0x1f93a },
  270. { 0x1f93c, 0x1f945 }, { 0x1f947, 0x1f97f }, { 0x1f980, 0x1f9cc },
  271. { 0x1f9cd, 0x1f9ff }, { 0x1fa70, 0x1fa74 }, { 0x1fa78, 0x1fa7c }, { 0x1fa80, 0x1fa86 },
  272. { 0x1fa90, 0x1faac }, { 0x1fab0, 0x1faba }, { 0x1fac0, 0x1fac5 },
  273. { 0x1fad0, 0x1fad9 }, { 0x1fae0, 0x1fae7 }, { 0x1faf0, 0x1faf6 }, { 0x20000, 0x2fffd }, { 0x30000, 0x3fffd },
  274. //{ 0xe0100, 0xe01ef }, { 0xf0000, 0xffffd }, { 0x100000, 0x10fffd }
  275. };
  276. static int BiSearch (int rune, int [,] table, int max)
  277. {
  278. var min = 0;
  279. if (rune < table [0, 0] || rune > table [max, 1]) {
  280. return 0;
  281. }
  282. while (max >= min) {
  283. var mid = (min + max) / 2;
  284. if (rune > table [mid, 1]) {
  285. min = mid + 1;
  286. } else if (rune < table [mid, 0]) {
  287. max = mid - 1;
  288. } else {
  289. return 1;
  290. }
  291. }
  292. return 0;
  293. }
  294. }