123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- #nullable enable
- using System.Globalization;
- using Wcwidth;
- namespace Terminal.Gui;
- /// <summary>Extends <see cref="System.Text.Rune"/> to support TUI text manipulation.</summary>
- public static class RuneExtensions
- {
- /// <summary>Maximum Unicode code point.</summary>
- public static readonly int MaxUnicodeCodePoint = 0x10FFFF;
- /// <summary>Reports if the provided array of bytes can be encoded as UTF-8.</summary>
- /// <param name="buffer">The byte array to probe.</param>
- /// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
- public static bool CanBeEncodedAsRune (byte [] buffer)
- {
- string str = Encoding.Unicode.GetString (buffer);
- foreach (Rune rune in str.EnumerateRunes ())
- {
- if (rune == Rune.ReplacementChar)
- {
- return false;
- }
- }
- return true;
- }
- /// <summary>Attempts to decode the rune as a surrogate pair to UTF-16.</summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune">The rune to decode.</param>
- /// <param name="chars">The chars if the rune is a surrogate pair. Null otherwise.</param>
- /// <returns><see langword="true"/> if the rune is a valid surrogate pair; <see langword="false"/> otherwise.</returns>
- public static bool DecodeSurrogatePair (this Rune rune, out char []? chars)
- {
- bool isSingleUtf16CodeUnit = rune.IsBmp;
- if (isSingleUtf16CodeUnit)
- {
- chars = null;
- return false;
- }
- const int maxCharsPerRune = 2;
- Span<char> charBuffer = stackalloc char[maxCharsPerRune];
- int charsWritten = rune.EncodeToUtf16 (charBuffer);
- if (charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]))
- {
- chars = charBuffer [..charsWritten].ToArray ();
- return true;
- }
- chars = null;
- return false;
- }
- /// <summary>Writes into the destination buffer starting at offset the UTF8 encoded version of the rune.</summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune">The rune to encode.</param>
- /// <param name="dest">The destination buffer.</param>
- /// <param name="start">Starting offset to look into.</param>
- /// <param name="count">Number of bytes valid in the buffer, or -1 to make it the length of the buffer.</param>
- /// <returns>he number of bytes written into the destination buffer.</returns>
- public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1)
- {
- const int maxUtf8BytesPerRune = 4;
- Span<byte> bytes = stackalloc byte[maxUtf8BytesPerRune];
- int writtenBytes = rune.EncodeToUtf8 (bytes);
- int bytesToCopy = count == -1
- ? writtenBytes
- : Math.Min (count, writtenBytes);
- int bytesWritten = 0;
- for (int i = 0; i < bytesToCopy; i++)
- {
- if (bytes [i] == '\0')
- {
- break;
- }
- dest [start + i] = bytes [i];
- bytesWritten++;
- }
- return bytesWritten;
- }
- /// <summary>Attempts to encode (as UTF-16) a surrogate pair.</summary>
- /// <param name="highSurrogate">The high surrogate code point.</param>
- /// <param name="lowSurrogate">The low surrogate code point.</param>
- /// <param name="result">The encoded rune.</param>
- /// <returns><see langword="true"/> if the encoding succeeded; <see langword="false"/> otherwise.</returns>
- public static bool EncodeSurrogatePair (char highSurrogate, char lowSurrogate, out Rune result)
- {
- result = default (Rune);
- if (char.IsSurrogatePair (highSurrogate, lowSurrogate))
- {
- result = (Rune)char.ConvertToUtf32 (highSurrogate, lowSurrogate);
- return true;
- }
- return false;
- }
- /// <summary>Gets the number of columns the rune occupies in the terminal.</summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune">The rune to measure.</param>
- /// <returns>
- /// The number of columns required to fit the rune, 0 if the argument is the null character, or -1 if the value is
- /// not printable, otherwise the number of columns that the rune occupies.
- /// </returns>
- public static int GetColumns (this Rune rune) { return UnicodeCalculator.GetWidth (rune); }
- /// <summary>Get number of bytes required to encode the rune, based on the provided encoding.</summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune">The rune to probe.</param>
- /// <param name="encoding">The encoding used; the default is UTF8.</param>
- /// <returns>The number of bytes required.</returns>
- public static int GetEncodingLength (this Rune rune, Encoding? encoding = null)
- {
- encoding ??= Encoding.UTF8;
- const int maxCharsPerRune = 2;
- // Get characters with UTF16 to keep that part independent of selected encoding.
- Span<char> charBuffer = stackalloc char[maxCharsPerRune];
- int charsWritten = rune.EncodeToUtf16(charBuffer);
- Span<char> chars = charBuffer[..charsWritten];
- int maxEncodedLength = encoding.GetMaxByteCount (charsWritten);
- Span<byte> byteBuffer = stackalloc byte[maxEncodedLength];
- int bytesEncoded = encoding.GetBytes (chars, byteBuffer);
- ReadOnlySpan<byte> encodedBytes = byteBuffer[..bytesEncoded];
- if (encodedBytes [^1] == '\0')
- {
- return encodedBytes.Length - 1;
- }
- return encodedBytes.Length;
- }
- /// <summary>Returns <see langword="true"/> if the rune is a combining character.</summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune"></param>
- /// <returns></returns>
- public static bool IsCombiningMark (this Rune rune)
- {
- UnicodeCategory category = Rune.GetUnicodeCategory (rune);
- return category == UnicodeCategory.NonSpacingMark
- || category == UnicodeCategory.SpacingCombiningMark
- || category == UnicodeCategory.EnclosingMark;
- }
- /// <summary>Reports whether a rune is a surrogate code point.</summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune">The rune to probe.</param>
- /// <returns><see langword="true"/> if the rune is a surrogate code point; <see langword="false"/> otherwise.</returns>
- public static bool IsSurrogatePair (this Rune rune)
- {
- bool isSingleUtf16CodeUnit = rune.IsBmp;
- if (isSingleUtf16CodeUnit)
- {
- return false;
- }
- const int maxCharsPerRune = 2;
- Span<char> charBuffer = stackalloc char[maxCharsPerRune];
- int charsWritten = rune.EncodeToUtf16 (charBuffer);
- return charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]);
- }
- /// <summary>
- /// Ensures the rune is not a control character and can be displayed by translating characters below 0x20 to
- /// equivalent, printable, Unicode chars.
- /// </summary>
- /// <remarks>This is a Terminal.Gui extension method to <see cref="System.Text.Rune"/> to support TUI text manipulation.</remarks>
- /// <param name="rune"></param>
- /// <returns></returns>
- public static Rune MakePrintable (this Rune rune) { return Rune.IsControl (rune) ? new Rune (rune.Value + 0x2400) : rune; }
- }
|