using System.Globalization;
using System.Text;
namespace Terminal.Gui;
///
/// Extends to support TUI text manipulation.
///
public static class RuneExtensions {
///
/// Maximum Unicode code point.
///
public static int MaxUnicodeCodePoint = 0x10FFFF;
///
/// Gets the number of columns the rune occupies in the terminal.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
/// The rune to measure.
///
/// The number of columns required to fit the rune, 0 if the argument is the null character, or
/// -1 if the value is not printable,
/// otherwise the number of columns that the rune occupies.
///
public static int GetColumns (this Rune rune)
{
// TODO: I believe there is a way to do this without using our own tables, using Rune.
var codePoint = rune.Value;
switch (codePoint) {
case < 0x20:
case >= 0x7f and < 0xa0:
return -1;
case < 0x7f:
return 1;
}
/* binary search in table of non-spacing characters */
if (BiSearch (codePoint, _combining, _combining.GetLength (0) - 1) != 0) {
return 0;
}
/* if we arrive here, ucs is not a combining or C0/C1 control character */
return 1 + (BiSearch (codePoint, _combiningWideChars, _combiningWideChars.GetLength (0) - 1) != 0 ? 1 : 0);
}
///
/// Returns if the rune is a combining character.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
///
///
public static bool IsCombiningMark (this System.Text.Rune rune)
{
UnicodeCategory category = Rune.GetUnicodeCategory (rune);
return Rune.GetUnicodeCategory (rune) == UnicodeCategory.NonSpacingMark
|| category == UnicodeCategory.SpacingCombiningMark
|| category == UnicodeCategory.EnclosingMark;
}
///
/// Ensures the rune is not a control character and can be displayed by translating characters below 0x20
/// to equivalent, printable, Unicode chars.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
///
///
public static Rune MakePrintable (this System.Text.Rune rune) => Rune.IsControl (rune) ? new Rune (rune.Value + 0x2400) : rune;
///
/// Get number of bytes required to encode the rune, based on the provided encoding.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
/// The rune to probe.
/// The encoding used; the default is UTF8.
/// The number of bytes required.
public static int GetEncodingLength (this Rune rune, Encoding encoding = null)
{
encoding ??= Encoding.UTF8;
var bytes = encoding.GetBytes (rune.ToString ().ToCharArray ());
var offset = 0;
if (bytes [^1] == 0) {
offset++;
}
return bytes.Length - offset;
}
///
/// Writes into the destination buffer starting at offset the UTF8 encoded version of the rune.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
/// The rune to encode.
/// The destination buffer.
/// Starting offset to look into.
/// Number of bytes valid in the buffer, or -1 to make it the length of the buffer.
/// he number of bytes written into the destination buffer.
public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1)
{
var bytes = Encoding.UTF8.GetBytes (rune.ToString ());
var length = 0;
for (var i = 0; i < (count == -1 ? bytes.Length : count); i++) {
if (bytes [i] == 0) {
break;
}
dest [start + i] = bytes [i];
length++;
}
return length;
}
///
/// Attempts to decode the rune as a surrogate pair to UTF-16.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
/// The rune to decode.
/// The chars if the rune is a surrogate pair. Null otherwise.
/// if the rune is a valid surrogate pair; otherwise.
public static bool DecodeSurrogatePair (this Rune rune, out char [] chars)
{
if (rune.IsSurrogatePair ()) {
chars = rune.ToString ().ToCharArray ();
return true;
}
chars = null;
return false;
}
///
/// Attempts to encode (as UTF-16) a surrogate pair.
///
/// The high surrogate code point.
/// The low surrogate code point.
/// The encoded rune.
/// if the encoding succeeded; otherwise.
public static bool EncodeSurrogatePair (char highSurrogate, char lowSurrogate, out Rune result)
{
result = default;
if (char.IsSurrogatePair (highSurrogate, lowSurrogate)) {
result = (Rune)char.ConvertToUtf32 (highSurrogate, lowSurrogate);
return true;
}
return false;
}
///
/// Reports whether a rune is a surrogate code point.
///
///
/// This is a Terminal.Gui extension method to to support TUI text manipulation.
///
/// The rune to probe.
/// if the rune is a surrogate code point; otherwise.
public static bool IsSurrogatePair (this Rune rune)
{
return char.IsSurrogatePair (rune.ToString (), 0);
}
///
/// Reports if the provided array of bytes can be encoded as UTF-8.
///
/// The byte array to probe.
/// true if is valid; otherwise, false.
public static bool CanBeEncodedAsRune (byte [] buffer)
{
var str = Encoding.Unicode.GetString (buffer);
foreach (var rune in str.EnumerateRunes ()) {
if (rune == Rune.ReplacementChar) {
return false;
}
}
return true;
}
// ---------------- implementation details ------------------
// TODO: Can this be handled by the new .NET 8 Rune type?
static readonly int [,] _combining = new int [,] {
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2E9A, 0x2E9A },
{ 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
{ 0x31E4, 0x31EF }, { 0x321F, 0x321F }, { 0xA48D, 0xA48F },
{ 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
{ 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE1A, 0xFE1F },
{ 0xFE20, 0xFE23 }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 },
{ 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
{ 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
{ 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
{ 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
{ 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
{ 0xE0100, 0xE01EF }
};
static readonly int [,] _combiningWideChars = new int [,] {
/* Hangul Jamo init. consonants - 0x1100, 0x11ff */
/* Miscellaneous Technical - 0x2300, 0x23ff */
/* Hangul Syllables - 0x11a8, 0x11c2 */
/* CJK Compatibility Ideographs - f900, fad9 */
/* Vertical forms - fe10, fe19 */
/* CJK Compatibility Forms - fe30, fe4f */
/* Fullwidth Forms - ff01, ffee */
/* Alphabetic Presentation Forms - 0xFB00, 0xFb4f */
/* Chess Symbols - 0x1FA00, 0x1FA0f */
{ 0x1100, 0x115f }, { 0x231a, 0x231b }, { 0x2329, 0x232a },
{ 0x23e9, 0x23ec }, { 0x23f0, 0x23f0 }, { 0x23f3, 0x23f3 },
{ 0x25fd, 0x25fe }, { 0x2614, 0x2615 }, { 0x2648, 0x2653 },
{ 0x267f, 0x267f }, { 0x2693, 0x2693 }, { 0x26a1, 0x26a1 },
{ 0x26aa, 0x26ab }, { 0x26bd, 0x26be }, { 0x26c4, 0x26c5 },
{ 0x26ce, 0x26ce }, { 0x26d4, 0x26d4 }, { 0x26ea, 0x26ea },
{ 0x26f2, 0x26f3 }, { 0x26f5, 0x26f5 }, { 0x26fa, 0x26fa },
{ 0x26fd, 0x26fd }, { 0x2705, 0x2705 }, { 0x270a, 0x270b },
{ 0x2728, 0x2728 }, { 0x274c, 0x274c }, { 0x274e, 0x274e },
{ 0x2753, 0x2755 }, { 0x2757, 0x2757 }, { 0x2795, 0x2797 },
{ 0x27b0, 0x27b0 }, { 0x27bf, 0x27bf }, { 0x2b1b, 0x2b1c },
{ 0x2b50, 0x2b50 }, { 0x2b55, 0x2b55 }, { 0x2e80, 0x303e },
{ 0x3041, 0x3096 }, { 0x3099, 0x30ff }, { 0x3105, 0x312f },
{ 0x3131, 0x318e }, { 0x3190, 0x3247 }, { 0x3250, 0x4dbf },
{ 0x4e00, 0xa4c6 }, { 0xa960, 0xa97c }, { 0xac00, 0xd7a3 },
{ 0xf900, 0xfaff }, { 0xfe10, 0xfe1f }, { 0xfe30, 0xfe6b },
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 },
{ 0x16fe0, 0x16fe4 }, { 0x16ff0, 0x16ff1 }, { 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 }, { 0x18d00, 0x18d08 }, { 0x1aff0, 0x1affc },
{ 0x1b000, 0x1b122 }, { 0x1b150, 0x1b152 }, { 0x1b164, 0x1b167 }, { 0x1b170, 0x1b2fb }, { 0x1d538, 0x1d550 },
{ 0x1f004, 0x1f004 }, { 0x1f0cf, 0x1f0cf }, /*{ 0x1f100, 0x1f10a },*/
//{ 0x1f110, 0x1f12d }, { 0x1f130, 0x1f169 }, { 0x1f170, 0x1f1ac },
{ 0x1f18f, 0x1f199 },
{ 0x1f1e6, 0x1f1ff }, { 0x1f200, 0x1f202 }, { 0x1f210, 0x1f23b },
{ 0x1f240, 0x1f248 }, { 0x1f250, 0x1f251 }, { 0x1f260, 0x1f265 },
{ 0x1f300, 0x1f320 }, { 0x1f32d, 0x1f33e }, { 0x1f340, 0x1f37e },
{ 0x1f380, 0x1f393 }, { 0x1f3a0, 0x1f3ca }, { 0x1f3cf, 0x1f3d3 },
{ 0x1f3e0, 0x1f3f0 }, { 0x1f3f4, 0x1f3f4 }, { 0x1f3f8, 0x1f43e },
{ 0x1f440, 0x1f44e }, { 0x1f450, 0x1f4fc }, { 0x1f4ff, 0x1f53d },
{ 0x1f54b, 0x1f54e }, { 0x1f550, 0x1f567 }, { 0x1f57a, 0x1f57a },
{ 0x1f595, 0x1f596 }, { 0x1f5a4, 0x1f5a4 }, { 0x1f5fb, 0x1f606 },
{ 0x1f607, 0x1f64f }, { 0x1f680, 0x1f6c5 }, { 0x1f6cc, 0x1f6cc },
{ 0x1f6d0, 0x1f6d2 }, { 0x1f6d5, 0x1f6d7 }, { 0x1f6dd, 0x1f6df }, { 0x1f6eb, 0x1f6ec },
{ 0x1f6f4, 0x1f6fc }, { 0x1f7e0, 0x1f7eb }, { 0x1f7f0, 0x1f7f0 }, { 0x1f90c, 0x1f93a },
{ 0x1f93c, 0x1f945 }, { 0x1f947, 0x1f97f }, { 0x1f980, 0x1f9cc },
{ 0x1f9cd, 0x1f9ff }, { 0x1fa70, 0x1fa74 }, { 0x1fa78, 0x1fa7c }, { 0x1fa80, 0x1fa86 },
{ 0x1fa90, 0x1faac }, { 0x1fab0, 0x1faba }, { 0x1fac0, 0x1fac5 },
{ 0x1fad0, 0x1fad9 }, { 0x1fae0, 0x1fae7 }, { 0x1faf0, 0x1faf6 }, { 0x20000, 0x2fffd }, { 0x30000, 0x3fffd },
//{ 0xe0100, 0xe01ef }, { 0xf0000, 0xffffd }, { 0x100000, 0x10fffd }
};
static int BiSearch (int rune, int [,] table, int max)
{
var min = 0;
if (rune < table [0, 0] || rune > table [max, 1]) {
return 0;
}
while (max >= min) {
var mid = (min + max) / 2;
if (rune > table [mid, 1]) {
min = mid + 1;
} else if (rune < table [mid, 0]) {
max = mid - 1;
} else {
return 1;
}
}
return 0;
}
}