c
/
mono
огледало од https://github.com/mono/mono.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338
							// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Buffers;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Text.Unicode;

namespace System.Text
{
    /// <summary>
    /// Represents a Unicode scalar value ([ U+0000..U+D7FF ], inclusive; or [ U+E000..U+10FFFF ], inclusive).
    /// </summary>
    /// <remarks>
    /// This type's constructors and conversion operators validate the input, so consumers can call the APIs
    /// assuming that the underlying <see cref="Rune"/> instance is well-formed.
    /// </remarks>
    [DebuggerDisplay("{DebuggerDisplay,nq}")]
    public readonly struct Rune : IComparable<Rune>, IEquatable<Rune>
    {
        private const byte IsWhiteSpaceFlag = 0x80;
        private const byte IsLetterOrDigitFlag = 0x40;
        private const byte UnicodeCategoryMask = 0x1F;

        // Contains information about the ASCII character range [ U+0000..U+007F ], with:
        // - 0x80 bit if set means 'is whitespace'
        // - 0x40 bit if set means 'is letter or digit'
        // - 0x20 bit is reserved for future use
        // - bottom 5 bits are the UnicodeCategory of the character
        private static ReadOnlySpan<byte> AsciiCharInfo => new byte[]
        {
            0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x8E, 0x8E, 0x8E, 0x8E, 0x8E, 0x0E, 0x0E,
            0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E,
            0x8B, 0x18, 0x18, 0x18, 0x1A, 0x18, 0x18, 0x18, 0x14, 0x15, 0x18, 0x19, 0x18, 0x13, 0x18, 0x18,
            0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x18, 0x18, 0x19, 0x19, 0x19, 0x18,
            0x18, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
            0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x14, 0x18, 0x15, 0x1B, 0x12,
            0x1B, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
            0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x14, 0x19, 0x15, 0x19, 0x0E
        };

        private readonly uint _value;

        /// <summary>
        /// Creates a <see cref="Rune"/> from the provided UTF-16 code unit.
        /// </summary>
        /// <exception cref="ArgumentOutOfRangeException">
        /// If <paramref name="ch"/> represents a UTF-16 surrogate code point
        /// U+D800..U+DFFF, inclusive.
        /// </exception>
        public Rune(char ch)
        {
            uint expanded = ch;
            if (UnicodeUtility.IsSurrogateCodePoint(expanded))
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.ch);
            }
            _value = expanded;
        }

        /// <summary>
        /// Creates a <see cref="Rune"/> from the provided UTF-16 surrogate pair.
        /// </summary>
        /// <exception cref="ArgumentOutOfRangeException">
        /// If <paramref name="highSurrogate"/> does not represent a UTF-16 high surrogate code point
        /// or <paramref name="lowSurrogate"/> does not represent a UTF-16 low surrogate code point.
        /// </exception>
        public Rune(char highSurrogate, char lowSurrogate)
            : this((uint)char.ConvertToUtf32(highSurrogate, lowSurrogate), false)
        {
        }

        /// <summary>
        /// Creates a <see cref="Rune"/> from the provided Unicode scalar value.
        /// </summary>
        /// <exception cref="ArgumentOutOfRangeException">
        /// If <paramref name="value"/> does not represent a value Unicode scalar value.
        /// </exception>
        public Rune(int value)
            : this((uint)value)
        {
        }

        /// <summary>
        /// Creates a <see cref="Rune"/> from the provided Unicode scalar value.
        /// </summary>
        /// <exception cref="ArgumentOutOfRangeException">
        /// If <paramref name="value"/> does not represent a value Unicode scalar value.
        /// </exception>
        [CLSCompliant(false)]
        public Rune(uint value)
        {
            if (!UnicodeUtility.IsValidUnicodeScalar(value))
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.value);
            }
            _value = value;
        }

        // non-validating ctor
        private Rune(uint scalarValue, bool unused)
        {
            UnicodeDebug.AssertIsValidScalar(scalarValue);
            _value = scalarValue;
        }

        public static bool operator ==(Rune left, Rune right) => (left._value == right._value);

        public static bool operator !=(Rune left, Rune right) => (left._value != right._value);

        public static bool operator <(Rune left, Rune right) => (left._value < right._value);

        public static bool operator <=(Rune left, Rune right) => (left._value <= right._value);

        public static bool operator >(Rune left, Rune right) => (left._value > right._value);

        public static bool operator >=(Rune left, Rune right) => (left._value >= right._value);

        // Operators below are explicit because they may throw.

        public static explicit operator Rune(char ch) => new Rune(ch);

        [CLSCompliant(false)]
        public static explicit operator Rune(uint value) => new Rune(value);

        public static explicit operator Rune(int value) => new Rune(value);

        // Displayed as "'<char>' (U+XXXX)"; e.g., "'e' (U+0065)"
        private string DebuggerDisplay => FormattableString.Invariant($"U+{_value:X4} '{(IsValid(_value) ? ToString() : "\uFFFD")}'");

        /// <summary>
        /// Returns true if and only if this scalar value is ASCII ([ U+0000..U+007F ])
        /// and therefore representable by a single UTF-8 code unit.
        /// </summary>
        public bool IsAscii => UnicodeUtility.IsAsciiCodePoint(_value);

        /// <summary>
        /// Returns true if and only if this scalar value is within the BMP ([ U+0000..U+FFFF ])
        /// and therefore representable by a single UTF-16 code unit.
        /// </summary>
        public bool IsBmp => UnicodeUtility.IsBmpCodePoint(_value);

        /// <summary>
        /// Returns the Unicode plane (0 to 16, inclusive) which contains this scalar.
        /// </summary>
        public int Plane => UnicodeUtility.GetPlane(_value);

        /// <summary>
        /// A <see cref="Rune"/> instance that represents the Unicode replacement character U+FFFD.
        /// </summary>
        public static Rune ReplacementChar => UnsafeCreate(UnicodeUtility.ReplacementChar);

        /// <summary>
        /// Returns the length in code units (<see cref="Char"/>) of the
        /// UTF-16 sequence required to represent this scalar value.
        /// </summary>
        /// <remarks>
        /// The return value will be 1 or 2.
        /// </remarks>
        public int Utf16SequenceLength => UnicodeUtility.GetUtf16SequenceLength(_value);

        /// <summary>
        /// Returns the length in code units of the
        /// UTF-8 sequence required to represent this scalar value.
        /// </summary>
        /// <remarks>
        /// The return value will be 1 through 4, inclusive.
        /// </remarks>
        public int Utf8SequenceLength => UnicodeUtility.GetUtf8SequenceLength(_value);

        /// <summary>
        /// Returns the Unicode scalar value as an integer.
        /// </summary>
        public int Value => (int)_value;

        private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool toUpper)
        {
            Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller.");
            Debug.Assert(textInfo != null, "This should've been checked by the caller.");

            Span<char> original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair)
            Span<char> modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count

            int charCount = rune.EncodeToUtf16(original);
            original = original.Slice(0, charCount);
            modified = modified.Slice(0, charCount);

            if (toUpper)
            {
                textInfo.ChangeCaseToUpper(original, modified);
            }
            else
            {
                textInfo.ChangeCaseToLower(original, modified);
            }

            // We use simple case folding rules, which disallows moving between the BMP and supplementary
            // planes when performing a case conversion. The helper methods which reconstruct a Rune
            // contain debug asserts for this condition.

            if (rune.IsBmp)
            {
                return UnsafeCreate(modified[0]);
            }
            else
            {
                return UnsafeCreate(UnicodeUtility.GetScalarFromUtf16SurrogatePair(modified[0], modified[1]));
            }
        }

        public int CompareTo(Rune other) => this._value.CompareTo(other._value);

        /// <summary>
        /// Decodes the <see cref="Rune"/> at the beginning of the provided UTF-16 source buffer.
        /// </summary>
        /// <returns>
        /// <para>
        /// If the source buffer begins with a valid UTF-16 encoded scalar value, returns <see cref="OperationStatus.Done"/>,
        /// and outs via <paramref name="result"/> the decoded <see cref="Rune"/> and via <paramref name="charsConsumed"/> the
        /// number of <see langword="char"/>s used in the input buffer to encode the <see cref="Rune"/>.
        /// </para>
        /// <para>
        /// If the source buffer is empty or contains only a standalone UTF-16 high surrogate character, returns <see cref="OperationStatus.NeedMoreData"/>,
        /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="charsConsumed"/> the length of the input buffer.
        /// </para>
        /// <para>
        /// If the source buffer begins with an ill-formed UTF-16 encoded scalar value, returns <see cref="OperationStatus.InvalidData"/>,
        /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="charsConsumed"/> the number of
        /// <see langword="char"/>s used in the input buffer to encode the ill-formed sequence.
        /// </para>
        /// </returns>
        /// <remarks>
        /// The general calling convention is to call this method in a loop, slicing the <paramref name="source"/> buffer by
        /// <paramref name="charsConsumed"/> elements on each iteration of the loop. On each iteration of the loop <paramref name="result"/>
        /// will contain the real scalar value if successfully decoded, or it will contain <see cref="ReplacementChar"/> if
        /// the data could not be successfully decoded. This pattern provides convenient automatic U+FFFD substitution of
        /// invalid sequences while iterating through the loop.
        /// </remarks>
        public static OperationStatus DecodeFromUtf16(ReadOnlySpan<char> source, out Rune result, out int charsConsumed)
        {
            if (!source.IsEmpty)
            {
                // First, check for the common case of a BMP scalar value.
                // If this is correct, return immediately.

                char firstChar = source[0];
                if (TryCreate(firstChar, out result))
                {
                    charsConsumed = 1;
                    return OperationStatus.Done;
                }

                // First thing we saw was a UTF-16 surrogate code point.
                // Let's optimistically assume for now it's a high surrogate and hope
                // that combining it with the next char yields useful results.

                if (1 < (uint)source.Length)
                {
                    char secondChar = source[1];
                    if (TryCreate(firstChar, secondChar, out result))
                    {
                        // Success! Formed a supplementary scalar value.
                        charsConsumed = 2;
                        return OperationStatus.Done;
                    }
                    else
                    {
                        // Either the first character was a low surrogate, or the second
                        // character was not a low surrogate. This is an error.
                        goto InvalidData;
                    }
                }
                else if (!char.IsHighSurrogate(firstChar))
                {
                    // Quick check to make sure we're not going to report NeedMoreData for
                    // a single-element buffer where the data is a standalone low surrogate
                    // character. Since no additional data will ever make this valid, we'll
                    // report an error immediately.
                    goto InvalidData;
                }
            }

            // If we got to this point, the input buffer was empty, or the buffer
            // was a single element in length and that element was a high surrogate char.

            charsConsumed = source.Length;
            result = ReplacementChar;
            return OperationStatus.NeedMoreData;

        InvalidData:

            charsConsumed = 1; // maximal invalid subsequence for UTF-16 is always a single code unit in length
            result = ReplacementChar;
            return OperationStatus.InvalidData;
        }

        /// <summary>
        /// Decodes the <see cref="Rune"/> at the beginning of the provided UTF-8 source buffer.
        /// </summary>
        /// <returns>
        /// <para>
        /// If the source buffer begins with a valid UTF-8 encoded scalar value, returns <see cref="OperationStatus.Done"/>,
        /// and outs via <paramref name="result"/> the decoded <see cref="Rune"/> and via <paramref name="bytesConsumed"/> the
        /// number of <see langword="byte"/>s used in the input buffer to encode the <see cref="Rune"/>.
        /// </para>
        /// <para>
        /// If the source buffer is empty or contains only a standalone UTF-8 high surrogate character, returns <see cref="OperationStatus.NeedMoreData"/>,
        /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="bytesConsumed"/> the length of the input buffer.
        /// </para>
        /// <para>
        /// If the source buffer begins with an ill-formed UTF-8 encoded scalar value, returns <see cref="OperationStatus.InvalidData"/>,
        /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="bytesConsumed"/> the number of
        /// <see langword="char"/>s used in the input buffer to encode the ill-formed sequence.
        /// </para>
        /// </returns>
        /// <remarks>
        /// The general calling convention is to call this method in a loop, slicing the <paramref name="source"/> buffer by
        /// <paramref name="bytesConsumed"/> elements on each iteration of the loop. On each iteration of the loop <paramref name="result"/>
        /// will contain the real scalar value if successfully decoded, or it will contain <see cref="ReplacementChar"/> if
        /// the data could not be successfully decoded. This pattern provides convenient automatic U+FFFD substitution of
        /// invalid sequences while iterating through the loop.
        /// </remarks>
        public static OperationStatus DecodeFromUtf8(ReadOnlySpan<byte> source, out Rune result, out int bytesConsumed)
        {
            // This method follows the Unicode Standard's recommendation for detecting
            // the maximal subpart of an ill-formed subsequence. See The Unicode Standard,
            // Ch. 3.9 for more details. In summary, when reporting an invalid subsequence,
            // it tries to consume as many code units as possible as long as those code
            // units constitute the beginning of a longer well-formed subsequence per Table 3-7.

            int index = 0;

            // Try reading input[0].

            if ((uint)index >= (uint)source.Length)
            {
                goto NeedsMoreData;
            }

            uint tempValue = source[index];
            if (!UnicodeUtility.IsAsciiCodePoint(tempValue))
            {
                goto NotAscii;
            }

        Finish:

            bytesConsumed = index + 1;
            Debug.Assert(1 <= bytesConsumed && bytesConsumed <= 4); // Valid subsequences are always length [1..4]
            result = UnsafeCreate(tempValue);
            return OperationStatus.Done;

        NotAscii:

            // Per Table 3-7, the beginning of a multibyte sequence must be a code unit in
            // the range [C2..F4]. If it's outside of that range, it's either a standalone
            // continuation byte, or it's an overlong two-byte sequence, or it's an out-of-range
            // four-byte sequence.

            if (!UnicodeUtility.IsInRangeInclusive(tempValue, 0xC2, 0xF4))
            {
                goto FirstByteInvalid;
            }

            tempValue = (tempValue - 0xC2) << 6;

            // Try reading input[1].

            index++;
            if ((uint)index >= (uint)source.Length)
            {
                goto NeedsMoreData;
            }

            // Continuation bytes are of the form [10xxxxxx], which means that their two's
            // complement representation is in the range [-65..-128]. This allows us to
            // perform a single comparison to see if a byte is a continuation byte.

            int thisByteSignExtended = (sbyte)source[index];
            if (thisByteSignExtended >= -64)
            {
                goto Invalid;
            }

            tempValue += (uint)thisByteSignExtended;
            tempValue += 0x80; // remove the continuation byte marker
            tempValue += (0xC2 - 0xC0) << 6; // remove the leading byte marker

            if (tempValue < 0x0800)
            {
                Debug.Assert(UnicodeUtility.IsInRangeInclusive(tempValue, 0x0080, 0x07FF));
                goto Finish; // this is a valid 2-byte sequence
            }

            // This appears to be a 3- or 4-byte sequence. Since per Table 3-7 we now have
            // enough information (from just two code units) to detect overlong or surrogate
            // sequences, we need to perform these checks now.

            if (!UnicodeUtility.IsInRangeInclusive(tempValue, ((0xE0 - 0xC0) << 6) + (0xA0 - 0x80), ((0xF4 - 0xC0) << 6) + (0x8F - 0x80)))
            {
                // The first two bytes were not in the range [[E0 A0]..[F4 8F]].
                // This is an overlong 3-byte sequence or an out-of-range 4-byte sequence.
                goto Invalid;
            }

            if (UnicodeUtility.IsInRangeInclusive(tempValue, ((0xED - 0xC0) << 6) + (0xA0 - 0x80), ((0xED - 0xC0) << 6) + (0xBF - 0x80)))
            {
                // This is a UTF-16 surrogate code point, which is invalid in UTF-8.
                goto Invalid;
            }

            if (UnicodeUtility.IsInRangeInclusive(tempValue, ((0xF0 - 0xC0) << 6) + (0x80 - 0x80), ((0xF0 - 0xC0) << 6) + (0x8F - 0x80)))
            {
                // This is an overlong 4-byte sequence.
                goto Invalid;
            }

            // The first two bytes were just fine. We don't need to perform any other checks
            // on the remaining bytes other than to see that they're valid continuation bytes.

            // Try reading input[2].

            index++;
            if ((uint)index >= (uint)source.Length)
            {
                goto NeedsMoreData;
            }

            thisByteSignExtended = (sbyte)source[index];
            if (thisByteSignExtended >= -64)
            {
                goto Invalid; // this byte is not a UTF-8 continuation byte
            }

            tempValue <<= 6;
            tempValue += (uint)thisByteSignExtended;
            tempValue += 0x80; // remove the continuation byte marker
            tempValue -= (0xE0 - 0xC0) << 12; // remove the leading byte marker

            if (tempValue <= 0xFFFF)
            {
                Debug.Assert(UnicodeUtility.IsInRangeInclusive(tempValue, 0x0800, 0xFFFF));
                goto Finish; // this is a valid 3-byte sequence
            }

            // Try reading input[3].

            index++;
            if ((uint)index >= (uint)source.Length)
            {
                goto NeedsMoreData;
            }

            thisByteSignExtended = (sbyte)source[index];
            if (thisByteSignExtended >= -64)
            {
                goto Invalid; // this byte is not a UTF-8 continuation byte
            }

            tempValue <<= 6;
            tempValue += (uint)thisByteSignExtended;
            tempValue += 0x80; // remove the continuation byte marker
            tempValue -= (0xF0 - 0xE0) << 18; // remove the leading byte marker

            UnicodeDebug.AssertIsValidSupplementaryPlaneScalar(tempValue);
            goto Finish; // this is a valid 4-byte sequence

        FirstByteInvalid:

            index = 1; // Invalid subsequences are always at least length 1.

        Invalid:

            Debug.Assert(1 <= index && index <= 3); // Invalid subsequences are always length 1..3
            bytesConsumed = index;
            result = ReplacementChar;
            return OperationStatus.InvalidData;

        NeedsMoreData:

            Debug.Assert(0 <= index && index <= 3); // Incomplete subsequences are always length 0..3
            bytesConsumed = index;
            result = ReplacementChar;
            return OperationStatus.NeedMoreData;
        }

        /// <summary>
        /// Decodes the <see cref="Rune"/> at the end of the provided UTF-16 source buffer.
        /// </summary>
        /// <remarks>
        /// This method is very similar to <see cref="DecodeFromUtf16(ReadOnlySpan{char}, out Rune, out int)"/>, but it allows
        /// the caller to loop backward instead of forward. The typical calling convention is that on each iteration
        /// of the loop, the caller should slice off the final <paramref name="charsConsumed"/> elements of
        /// the <paramref name="source"/> buffer.
        /// </remarks>
        public static OperationStatus DecodeLastFromUtf16(ReadOnlySpan<char> source, out Rune result, out int charsConsumed)
        {
            int index = source.Length - 1;
            if ((uint)index < (uint)source.Length)
            {
                // First, check for the common case of a BMP scalar value.
                // If this is correct, return immediately.

                char finalChar = source[index];
                if (TryCreate(finalChar, out result))
                {
                    charsConsumed = 1;
                    return OperationStatus.Done;
                }

                if (char.IsLowSurrogate(finalChar))
                {
                    // The final character was a UTF-16 low surrogate code point.
                    // This must be preceded by a UTF-16 high surrogate code point, otherwise
                    // we have a standalone low surrogate, which is always invalid.

                    index--;
                    if ((uint)index < (uint)source.Length)
                    {
                        char penultimateChar = source[index];
                        if (TryCreate(penultimateChar, finalChar, out result))
                        {
                            // Success! Formed a supplementary scalar value.
                            charsConsumed = 2;
                            return OperationStatus.Done;
                        }
                    }

                    // If we got to this point, we saw a standalone low surrogate
                    // and must report an error.

                    charsConsumed = 1; // standalone surrogate
                    result = ReplacementChar;
                    return OperationStatus.InvalidData;
                }
            }

            // If we got this far, the source buffer was empty, or the source buffer ended
            // with a UTF-16 high surrogate code point. These aren't errors since they could
            // be valid given more input data.

            charsConsumed = (int)((uint)(-source.Length) >> 31); // 0 -> 0, all other lengths -> 1
            result = ReplacementChar;
            return OperationStatus.NeedMoreData;
        }

        /// <summary>
        /// Decodes the <see cref="Rune"/> at the end of the provided UTF-8 source buffer.
        /// </summary>
        /// <remarks>
        /// This method is very similar to <see cref="DecodeFromUtf8(ReadOnlySpan{byte}, out Rune, out int)"/>, but it allows
        /// the caller to loop backward instead of forward. The typical calling convention is that on each iteration
        /// of the loop, the caller should slice off the final <paramref name="bytesConsumed"/> elements of
        /// the <paramref name="source"/> buffer.
        /// </remarks>
        public static OperationStatus DecodeLastFromUtf8(ReadOnlySpan<byte> source, out Rune value, out int bytesConsumed)
        {
            int index = source.Length - 1;
            if ((uint)index < (uint)source.Length)
            {
                // The buffer contains at least one byte. Let's check the fast case where the
                // buffer ends with an ASCII byte.

                uint tempValue = source[index];
                if (UnicodeUtility.IsAsciiCodePoint(tempValue))
                {
                    bytesConsumed = 1;
                    value = UnsafeCreate(tempValue);
                    return OperationStatus.Done;
                }

                // If the final byte is not an ASCII byte, we may be beginning or in the middle of
                // a UTF-8 multi-code unit sequence. We need to back up until we see the start of
                // the multi-code unit sequence; we can detect the leading byte because all multi-byte
                // sequences begin with a byte whose 0x40 bit is set. Since all multi-byte sequences
                // are no greater than 4 code units in length, we only need to search back a maximum
                // of four bytes.

                if (((byte)tempValue & 0x40) != 0)
                {
                    // This is a UTF-8 leading byte. We'll do a forward read from here.
                    // It'll return invalid (if given C0, F5, etc.) or incomplete. Both are fine.

                    return DecodeFromUtf8(source.Slice(index), out value, out bytesConsumed);
                }

                // If we got to this point, the final byte was a UTF-8 continuation byte.
                // Let's check the three bytes immediately preceding this, looking for the starting byte.

                for (int i = 3; i > 0; i--)
                {
                    index--;
                    if ((uint)index >= (uint)source.Length)
                    {
                        goto Invalid; // out of data
                    }

                    // The check below will get hit for ASCII (values 00..7F) and for UTF-8 starting bytes
                    // (bits 0xC0 set, values C0..FF). In two's complement this is the range [-64..127].
                    // It's just a fast way for us to terminate the search.

                    if ((sbyte)source[index] >= -64)
                    {
                        goto ForwardDecode;
                    }
                }

            Invalid:

                // If we got to this point, either:
                // - the last 4 bytes of the input buffer are continuation bytes;
                // - the entire input buffer (if fewer than 4 bytes) consists only of continuation bytes; or
                // - there's no UTF-8 leading byte between the final continuation byte of the buffer and
                //   the previous well-formed subsequence or maximal invalid subsequence.
                //
                // In all of these cases, the final byte must be a maximal invalid subsequence of length 1.
                // See comment near the end of this method for more information.

                value = ReplacementChar;
                bytesConsumed = 1;
                return OperationStatus.InvalidData;

            ForwardDecode:

                // If we got to this point, we found an ASCII byte or a UTF-8 starting byte at position source[index].
                // Technically this could also mean we found an invalid byte like C0 or F5 at this position, but that's
                // fine since it'll be handled by the forward read. From this position, we'll perform a forward read
                // and see if we consumed the entirety of the buffer.

                source = source.Slice(index);
                Debug.Assert(!source.IsEmpty, "Shouldn't reach this for empty inputs.");

                OperationStatus operationStatus = DecodeFromUtf8(source, out Rune tempRune, out int tempBytesConsumed);
                if (tempBytesConsumed == source.Length)
                {
                    // If this forward read consumed the entirety of the end of the input buffer, we can return it
                    // as the result of this function. It could be well-formed, incomplete, or invalid. If it's
                    // invalid and we consumed the remainder of the buffer, we know we've found the maximal invalid
                    // subsequence, which is what we wanted anyway.

                    bytesConsumed = tempBytesConsumed;
                    value = tempRune;
                    return operationStatus;
                }

                // If we got to this point, we know that the final continuation byte wasn't consumed by the forward
                // read that we just performed above. This means that the continuation byte has to be part of an
                // invalid subsequence since there's no UTF-8 leading byte between what we just consumed and the
                // continuation byte at the end of the input. Furthermore, since any maximal invalid subsequence
                // of length > 1 must have a UTF-8 leading byte as its first code unit, this implies that the
                // continuation byte at the end of the buffer is itself a maximal invalid subsequence of length 1.

                goto Invalid;
            }
            else
            {
                // Source buffer was empty.
                value = ReplacementChar;
                bytesConsumed = 0;
                return OperationStatus.NeedMoreData;
            }
        }

        /// <summary>
        /// Encodes this <see cref="Rune"/> to a UTF-16 destination buffer.
        /// </summary>
        /// <param name="destination">The buffer to which to write this value as UTF-16.</param>
        /// <returns>The number of <see cref="char"/>s written to <paramref name="destination"/>.</returns>
        /// <exception cref="ArgumentException">
        /// If <paramref name="destination"/> is not large enough to hold the output.
        /// </exception>
        public int EncodeToUtf16(Span<char> destination)
        {
            if (!TryEncodeToUtf16(destination, out int charsWritten))
            {
                ThrowHelper.ThrowArgumentException_DestinationTooShort();
            }

            return charsWritten;
        }

        /// <summary>
        /// Encodes this <see cref="Rune"/> to a UTF-8 destination buffer.
        /// </summary>
        /// <param name="destination">The buffer to which to write this value as UTF-8.</param>
        /// <returns>The number of <see cref="byte"/>s written to <paramref name="destination"/>.</returns>
        /// <exception cref="ArgumentException">
        /// If <paramref name="destination"/> is not large enough to hold the output.
        /// </exception>
        public int EncodeToUtf8(Span<byte> destination)
        {
            if (!TryEncodeToUtf8(destination, out int bytesWritten))
            {
                ThrowHelper.ThrowArgumentException_DestinationTooShort();
            }

            return bytesWritten;
        }

        public override bool Equals(object? obj) => (obj is Rune other) && this.Equals(other);

        public bool Equals(Rune other) => (this == other);

        public override int GetHashCode() => Value;

        /// <summary>
        /// Gets the <see cref="Rune"/> which begins at index <paramref name="index"/> in
        /// string <paramref name="input"/>.
        /// </summary>
        /// <remarks>
        /// Throws if <paramref name="input"/> is null, if <paramref name="index"/> is out of range, or
        /// if <paramref name="index"/> does not reference the start of a valid scalar value within <paramref name="input"/>.
        /// </remarks>
        public static Rune GetRuneAt(string input, int index)
        {
            int runeValue = ReadRuneFromString(input, index);
            if (runeValue < 0)
            {
                ThrowHelper.ThrowArgumentException_CannotExtractScalar(ExceptionArgument.index);
            }

            return UnsafeCreate((uint)runeValue);
        }

        /// <summary>
        /// Returns <see langword="true"/> iff <paramref name="value"/> is a valid Unicode scalar
        /// value, i.e., is in [ U+0000..U+D7FF ], inclusive; or [ U+E000..U+10FFFF ], inclusive.
        /// </summary>
        public static bool IsValid(int value) => IsValid((uint)value);

        /// <summary>
        /// Returns <see langword="true"/> iff <paramref name="value"/> is a valid Unicode scalar
        /// value, i.e., is in [ U+0000..U+D7FF ], inclusive; or [ U+E000..U+10FFFF ], inclusive.
        /// </summary>
        [CLSCompliant(false)]
        public static bool IsValid(uint value) => UnicodeUtility.IsValidUnicodeScalar(value);

        // returns a negative number on failure
        internal static int ReadFirstRuneFromUtf16Buffer(ReadOnlySpan<char> input)
        {
            if (input.IsEmpty)
            {
                return -1;
            }

            // Optimistically assume input is within BMP.

            uint returnValue = input[0];
            if (UnicodeUtility.IsSurrogateCodePoint(returnValue))
            {
                if (!UnicodeUtility.IsHighSurrogateCodePoint(returnValue))
                {
                    return -1;
                }

                // Treat 'returnValue' as the high surrogate.

                if (1 >= (uint)input.Length)
                {
                    return -1; // not an argument exception - just a "bad data" failure
                }

                uint potentialLowSurrogate = input[1];
                if (!UnicodeUtility.IsLowSurrogateCodePoint(potentialLowSurrogate))
                {
                    return -1;
                }

                returnValue = UnicodeUtility.GetScalarFromUtf16SurrogatePair(returnValue, potentialLowSurrogate);
            }

            return (int)returnValue;
        }

        // returns a negative number on failure
        private static int ReadRuneFromString(string input, int index)
        {
            if (input is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
            }

            if ((uint)index >= (uint)input!.Length)
            {
                ThrowHelper.ThrowArgumentOutOfRange_IndexException();
            }

            // Optimistically assume input is within BMP.

            uint returnValue = input[index];
            if (UnicodeUtility.IsSurrogateCodePoint(returnValue))
            {
                if (!UnicodeUtility.IsHighSurrogateCodePoint(returnValue))
                {
                    return -1;
                }

                // Treat 'returnValue' as the high surrogate.
                //
                // If this becomes a hot code path, we can skip the below bounds check by reading
                // off the end of the string using unsafe code. Since strings are null-terminated,
                // we're guaranteed not to read a valid low surrogate, so we'll fail correctly if
                // the string terminates unexpectedly.

                index++;
                if ((uint)index >= (uint)input.Length)
                {
                    return -1; // not an argument exception - just a "bad data" failure
                }

                uint potentialLowSurrogate = input[index];
                if (!UnicodeUtility.IsLowSurrogateCodePoint(potentialLowSurrogate))
                {
                    return -1;
                }

                returnValue = UnicodeUtility.GetScalarFromUtf16SurrogatePair(returnValue, potentialLowSurrogate);
            }

            return (int)returnValue;
        }

        /// <summary>
        /// Returns a <see cref="string"/> representation of this <see cref="Rune"/> instance.
        /// </summary>
        public override string ToString()
        {
            if (IsBmp)
            {
                return string.CreateFromChar((char)_value);
            }
            else
            {
                UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out char high, out char low);
                return string.CreateFromChar(high, low);
            }
        }

        /// <summary>
        /// Attempts to create a <see cref="Rune"/> from the provided input value.
        /// </summary>
        public static bool TryCreate(char ch, out Rune result)
        {
            uint extendedValue = ch;
            if (!UnicodeUtility.IsSurrogateCodePoint(extendedValue))
            {
                result = UnsafeCreate(extendedValue);
                return true;
            }
            else
            {
                result = default;
                return false;
            }
        }

        /// <summary>
        /// Attempts to create a <see cref="Rune"/> from the provided UTF-16 surrogate pair.
        /// Returns <see langword="false"/> if the input values don't represent a well-formed UTF-16surrogate pair.
        /// </summary>
        public static bool TryCreate(char highSurrogate, char lowSurrogate, out Rune result)
        {
            // First, extend both to 32 bits, then calculate the offset of
            // each candidate surrogate char from the start of its range.

            uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
            uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;

            // This is a single comparison which allows us to check both for validity at once since
            // both the high surrogate range and the low surrogate range are the same length.
            // If the comparison fails, we call to a helper method to throw the correct exception message.

            if ((highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE)
            {
                // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
                result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40u << 10));
                return true;
            }
            else
            {
                // Didn't have a high surrogate followed by a low surrogate.
                result = default;
                return false;
            }
        }

        /// <summary>
        /// Attempts to create a <see cref="Rune"/> from the provided input value.
        /// </summary>
        public static bool TryCreate(int value, out Rune result) => TryCreate((uint)value, out result);

        /// <summary>
        /// Attempts to create a <see cref="Rune"/> from the provided input value.
        /// </summary>
        [CLSCompliant(false)]
        public static bool TryCreate(uint value, out Rune result)
        {
            if (UnicodeUtility.IsValidUnicodeScalar(value))
            {
                result = UnsafeCreate(value);
                return true;
            }
            else
            {
                result = default;
                return false;
            }
        }

        /// <summary>
        /// Encodes this <see cref="Rune"/> to a UTF-16 destination buffer.
        /// </summary>
        /// <param name="destination">The buffer to which to write this value as UTF-16.</param>
        /// <param name="charsWritten">
        /// The number of <see cref="char"/>s written to <paramref name="destination"/>,
        /// or 0 if the destination buffer is not large enough to contain the output.</param>
        /// <returns>True if the value was written to the buffer; otherwise, false.</returns>
        /// <remarks>
        /// The <see cref="Utf16SequenceLength"/> property can be queried ahead of time to determine
        /// the required size of the <paramref name="destination"/> buffer.
        /// </remarks>
        public bool TryEncodeToUtf16(Span<char> destination, out int charsWritten)
        {
            if (destination.Length >= 1)
            {
                if (IsBmp)
                {
                    destination[0] = (char)_value;
                    charsWritten = 1;
                    return true;
                }
                else if (destination.Length >= 2)
                {
                    UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out destination[0], out destination[1]);
                    charsWritten = 2;
                    return true;
                }
            }

            // Destination buffer not large enough

            charsWritten = default;
            return false;
        }

        /// <summary>
        /// Encodes this <see cref="Rune"/> to a destination buffer as UTF-8 bytes.
        /// </summary>
        /// <param name="destination">The buffer to which to write this value as UTF-8.</param>
        /// <param name="bytesWritten">
        /// The number of <see cref="byte"/>s written to <paramref name="destination"/>,
        /// or 0 if the destination buffer is not large enough to contain the output.</param>
        /// <returns>True if the value was written to the buffer; otherwise, false.</returns>
        /// <remarks>
        /// The <see cref="Utf8SequenceLength"/> property can be queried ahead of time to determine
        /// the required size of the <paramref name="destination"/> buffer.
        /// </remarks>
        public bool TryEncodeToUtf8(Span<byte> destination, out int bytesWritten)
        {
            // The bit patterns below come from the Unicode Standard, Table 3-6.

            if (destination.Length >= 1)
            {
                if (IsAscii)
                {
                    destination[0] = (byte)_value;
                    bytesWritten = 1;
                    return true;
                }

                if (destination.Length >= 2)
                {
                    if (_value <= 0x7FFu)
                    {
                        // Scalar 00000yyy yyxxxxxx -> bytes [ 110yyyyy 10xxxxxx ]
                        destination[0] = (byte)((_value + (0b110u << 11)) >> 6);
                        destination[1] = (byte)((_value & 0x3Fu) + 0x80u);
                        bytesWritten = 2;
                        return true;
                    }

                    if (destination.Length >= 3)
                    {
                        if (_value <= 0xFFFFu)
                        {
                            // Scalar zzzzyyyy yyxxxxxx -> bytes [ 1110zzzz 10yyyyyy 10xxxxxx ]
                            destination[0] = (byte)((_value + (0b1110 << 16)) >> 12);
                            destination[1] = (byte)(((_value & (0x3Fu << 6)) >> 6) + 0x80u);
                            destination[2] = (byte)((_value & 0x3Fu) + 0x80u);
                            bytesWritten = 3;
                            return true;
                        }

                        if (destination.Length >= 4)
                        {
                            // Scalar 000uuuuu zzzzyyyy yyxxxxxx -> bytes [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
                            destination[0] = (byte)((_value + (0b11110 << 21)) >> 18);
                            destination[1] = (byte)(((_value & (0x3Fu << 12)) >> 12) + 0x80u);
                            destination[2] = (byte)(((_value & (0x3Fu << 6)) >> 6) + 0x80u);
                            destination[3] = (byte)((_value & 0x3Fu) + 0x80u);
                            bytesWritten = 4;
                            return true;
                        }
                    }
                }
            }

            // Destination buffer not large enough

            bytesWritten = default;
            return false;
        }

        /// <summary>
        /// Attempts to get the <see cref="Rune"/> which begins at index <paramref name="index"/> in
        /// string <paramref name="input"/>.
        /// </summary>
        /// <returns><see langword="true"/> if a scalar value was successfully extracted from the specified index,
        /// <see langword="false"/> if a value could not be extracted due to invalid data.</returns>
        /// <remarks>
        /// Throws only if <paramref name="input"/> is null or <paramref name="index"/> is out of range.
        /// </remarks>
        public static bool TryGetRuneAt(string input, int index, out Rune value)
        {
            int runeValue = ReadRuneFromString(input, index);
            if (runeValue >= 0)
            {
                value = UnsafeCreate((uint)runeValue);
                return true;
            }
            else
            {
                value = default;
                return false;
            }
        }

        // Allows constructing a Unicode scalar value from an arbitrary 32-bit integer without
        // validation. It is the caller's responsibility to have performed manual validation
        // before calling this method. If a Rune instance is forcibly constructed
        // from invalid input, the APIs on this type have undefined behavior, potentially including
        // introducing a security hole in the consuming application.
        //
        // An example of a security hole resulting from an invalid Rune value, which could result
        // in a stack overflow.
        //
        // public int GetMarvin32HashCode(Rune r) {
        //   Span<char> buffer = stackalloc char[r.Utf16SequenceLength];
        //   r.TryEncode(buffer, ...);
        //   return Marvin32.ComputeHash(buffer.AsBytes());
        // }

        /// <summary>
        /// Creates a <see cref="Rune"/> without performing validation on the input.
        /// </summary>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        internal static Rune UnsafeCreate(uint scalarValue) => new Rune(scalarValue, false);

        // These are analogs of APIs on System.Char

        public static double GetNumericValue(Rune value)
        {
            if (value.IsAscii)
            {
                uint baseNum = value._value - '0';
                return (baseNum <= 9) ? (double)baseNum : -1;
            }
            else
            {
                // not an ASCII char; fall back to globalization table
                return CharUnicodeInfo.InternalGetNumericValue(value.Value);
            }
        }

        public static UnicodeCategory GetUnicodeCategory(Rune value)
        {
            if (value.IsAscii)
            {
                return (UnicodeCategory)(AsciiCharInfo[value.Value] & UnicodeCategoryMask);
            }
            else
            {
                return GetUnicodeCategoryNonAscii(value);
            }
        }

        private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value)
        {
            Debug.Assert(!value.IsAscii, "Shouldn't use this non-optimized code path for ASCII characters.");
            return CharUnicodeInfo.GetUnicodeCategory(value.Value);
        }

        // Returns true iff this Unicode category represents a letter
        private static bool IsCategoryLetter(UnicodeCategory category)
        {
            return UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.UppercaseLetter, (uint)UnicodeCategory.OtherLetter);
        }

        // Returns true iff this Unicode category represents a letter or a decimal digit
        private static bool IsCategoryLetterOrDecimalDigit(UnicodeCategory category)
        {
            return UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.UppercaseLetter, (uint)UnicodeCategory.OtherLetter)
                || (category == UnicodeCategory.DecimalDigitNumber);
        }

        // Returns true iff this Unicode category represents a number
        private static bool IsCategoryNumber(UnicodeCategory category)
        {
            return UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.DecimalDigitNumber, (uint)UnicodeCategory.OtherNumber);
        }

        // Returns true iff this Unicode category represents a punctuation mark
        private static bool IsCategoryPunctuation(UnicodeCategory category)
        {
            return UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.ConnectorPunctuation, (uint)UnicodeCategory.OtherPunctuation);
        }

        // Returns true iff this Unicode category represents a separator
        private static bool IsCategorySeparator(UnicodeCategory category)
        {
            return UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.SpaceSeparator, (uint)UnicodeCategory.ParagraphSeparator);
        }

        // Returns true iff this Unicode category represents a symbol
        private static bool IsCategorySymbol(UnicodeCategory category)
        {
            return UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.MathSymbol, (uint)UnicodeCategory.OtherSymbol);
        }

        public static bool IsControl(Rune value)
        {
            // Per the Unicode stability policy, the set of control characters
            // is forever fixed at [ U+0000..U+001F ], [ U+007F..U+009F ]. No
            // characters will ever be added to the "control characters" group.
            // See http://www.unicode.org/policies/stability_policy.html.

            // Logic below depends on Rune.Value never being -1 (since Rune is a validating type)
            // 00..1F (+1) => 01..20 (&~80) => 01..20
            // 7F..9F (+1) => 80..A0 (&~80) => 00..20

            return (((value._value + 1) & ~0x80u) <= 0x20u);
        }

        public static bool IsDigit(Rune value)
        {
            if (value.IsAscii)
            {
                return UnicodeUtility.IsInRangeInclusive(value._value, '0', '9');
            }
            else
            {
                return GetUnicodeCategoryNonAscii(value) == UnicodeCategory.DecimalDigitNumber;
            }
        }

        public static bool IsLetter(Rune value)
        {
            if (value.IsAscii)
            {
                return (((value._value - 'A') & ~0x20u) <= (uint)('Z' - 'A')); // [A-Za-z]
            }
            else
            {
                return IsCategoryLetter(GetUnicodeCategoryNonAscii(value));
            }
        }

        public static bool IsLetterOrDigit(Rune value)
        {
            if (value.IsAscii)
            {
                return ((AsciiCharInfo[value.Value] & IsLetterOrDigitFlag) != 0);
            }
            else
            {
                return IsCategoryLetterOrDecimalDigit(GetUnicodeCategoryNonAscii(value));
            }
        }

        public static bool IsLower(Rune value)
        {
            if (value.IsAscii)
            {
                return UnicodeUtility.IsInRangeInclusive(value._value, 'a', 'z');
            }
            else
            {
                return GetUnicodeCategoryNonAscii(value) == UnicodeCategory.LowercaseLetter;
            }
        }

        public static bool IsNumber(Rune value)
        {
            if (value.IsAscii)
            {
                return UnicodeUtility.IsInRangeInclusive(value._value, '0', '9');
            }
            else
            {
                return IsCategoryNumber(GetUnicodeCategoryNonAscii(value));
            }
        }

        public static bool IsPunctuation(Rune value)
        {
            return IsCategoryPunctuation(GetUnicodeCategory(value));
        }

        public static bool IsSeparator(Rune value)
        {
            return IsCategorySeparator(GetUnicodeCategory(value));
        }

        public static bool IsSymbol(Rune value)
        {
            return IsCategorySymbol(GetUnicodeCategory(value));
        }

        public static bool IsUpper(Rune value)
        {
            if (value.IsAscii)
            {
                return UnicodeUtility.IsInRangeInclusive(value._value, 'A', 'Z');
            }
            else
            {
                return GetUnicodeCategoryNonAscii(value) == UnicodeCategory.UppercaseLetter;
            }
        }

        public static bool IsWhiteSpace(Rune value)
        {
            if (value.IsAscii)
            {
                return (AsciiCharInfo[value.Value] & IsWhiteSpaceFlag) != 0;
            }

            // U+0085 is special since it's a whitespace character but is in the Control category
            // instead of a normal separator category. No other code point outside the ASCII range
            // has this mismatch.

            if (value._value == 0x0085u)
            {
                return true;
            }

            return IsCategorySeparator(GetUnicodeCategoryNonAscii(value));
        }

        public static Rune ToLower(Rune value, CultureInfo culture)
        {
            if (culture is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture);
            }

            // We don't want to special-case ASCII here since the specified culture might handle
            // ASCII characters differently than the invariant culture (e.g., Turkish I). Instead
            // we'll just jump straight to the globalization tables if they're available.

            if (GlobalizationMode.Invariant)
            {
                return ToLowerInvariant(value);
            }

            return ChangeCaseCultureAware(value, culture!.TextInfo, toUpper: false);
        }

        public static Rune ToLowerInvariant(Rune value)
        {
            // Handle the most common case (ASCII data) first. Within the common case, we expect
            // that there'll be a mix of lowercase & uppercase chars, so make the conversion branchless.

            if (value.IsAscii)
            {
                // It's ok for us to use the UTF-16 conversion utility for this since the high
                // 16 bits of the value will never be set so will be left unchanged.
                return UnsafeCreate(Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(value._value));
            }

            if (GlobalizationMode.Invariant)
            {
                // If the value isn't ASCII and if the globalization tables aren't available,
                // case changing has no effect.
                return value;
            }

            // Non-ASCII data requires going through the case folding tables.

            return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: false);
        }

        public static Rune ToUpper(Rune value, CultureInfo culture)
        {
            if (culture is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture);
            }

            // We don't want to special-case ASCII here since the specified culture might handle
            // ASCII characters differently than the invariant culture (e.g., Turkish I). Instead
            // we'll just jump straight to the globalization tables if they're available.

            if (GlobalizationMode.Invariant)
            {
                return ToUpperInvariant(value);
            }

            return ChangeCaseCultureAware(value, culture!.TextInfo, toUpper: true);
        }

        public static Rune ToUpperInvariant(Rune value)
        {
            // Handle the most common case (ASCII data) first. Within the common case, we expect
            // that there'll be a mix of lowercase & uppercase chars, so make the conversion branchless.

            if (value.IsAscii)
            {
                // It's ok for us to use the UTF-16 conversion utility for this since the high
                // 16 bits of the value will never be set so will be left unchanged.
                return UnsafeCreate(Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(value._value));
            }

            if (GlobalizationMode.Invariant)
            {
                // If the value isn't ASCII and if the globalization tables aren't available,
                // case changing has no effect.
                return value;
            }

            // Non-ASCII data requires going through the case folding tables.

            return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: true);
        }
    }
}