c
/
mono
peilaus alkaen https://github.com/mono/mono.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
							// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Serialization;
using System.Text;
using System.Text.Unicode;
using Internal.Runtime.CompilerServices;

#pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
#if BIT64
using nuint = System.UInt64;
#else // BIT64
using nuint = System.UInt32;
#endif // BIT64

namespace System.Globalization
{
    /// <summary>
    /// This Class defines behaviors specific to a writing system.
    /// A writing system is the collection of scripts and orthographic rules
    /// required to represent a language as text.
    /// </summary>
    public partial class TextInfo : ICloneable, IDeserializationCallback
    {
        private enum Tristate : byte
        {
            NotInitialized = 0,
            False = 1,
            True = 2
        }

        private string? _listSeparator;
        private bool _isReadOnly = false;

        private readonly string _cultureName;
        private readonly CultureData _cultureData;

        // // Name of the text info we're using (ie: _cultureData.TextInfoName)
        private readonly string _textInfoName;

        private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;

        // Invariant text info
        internal static TextInfo Invariant => s_invariant ??= new TextInfo(CultureData.Invariant);

        private static volatile TextInfo? s_invariant;

        internal TextInfo(CultureData cultureData)
        {
            // This is our primary data source, we don't need most of the rest of this
            _cultureData = cultureData;
            _cultureName = _cultureData.CultureName;
            _textInfoName = _cultureData.TextInfoName;

            FinishInitialization();
        }

        void IDeserializationCallback.OnDeserialization(object? sender)
        {
            throw new PlatformNotSupportedException();
        }

        public virtual int ANSICodePage => _cultureData.ANSICodePage;

        public virtual int OEMCodePage => _cultureData.OEMCodePage;

        public virtual int MacCodePage => _cultureData.MacCodePage;

        public virtual int EBCDICCodePage => _cultureData.EBCDICCodePage;

        // Just use the LCID from our text info name
        public int LCID => CultureInfo.GetCultureInfo(_textInfoName).LCID;

        public string CultureName => _textInfoName;

        public bool IsReadOnly => _isReadOnly;

        public virtual object Clone()
        {
            object o = MemberwiseClone();
            ((TextInfo)o).SetReadOnlyState(false);
            return o;
        }

        /// <summary>
        /// Create a cloned readonly instance or return the input one if it is
        /// readonly.
        /// </summary>
        public static TextInfo ReadOnly(TextInfo textInfo)
        {
            if (textInfo == null)
            {
                throw new ArgumentNullException(nameof(textInfo));
            }

            if (textInfo.IsReadOnly)
            {
                return textInfo;
            }

            TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
            clonedTextInfo.SetReadOnlyState(true);
            return clonedTextInfo;
        }

        private void VerifyWritable()
        {
            if (_isReadOnly)
            {
                throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
            }
        }

        internal void SetReadOnlyState(bool readOnly)
        {
            _isReadOnly = readOnly;
        }

        /// <summary>
        /// Returns the string used to separate items in a list.
        /// </summary>
        public virtual string ListSeparator
        {
            get => _listSeparator ??= _cultureData.ListSeparator;
            set
            {
                if (value == null)
                {
                    throw new ArgumentNullException(nameof(value));
                }

                VerifyWritable();
                _listSeparator = value;
            }
        }

        /// <summary>
        /// Converts the character or string to lower case.  Certain locales
        /// have different casing semantics from the file systems in Win32.
        /// </summary>
        public virtual char ToLower(char c)
        {
            if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
            {
                return ToLowerAsciiInvariant(c);
            }

            return ChangeCase(c, toUpper: false);
        }

        public virtual string ToLower(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException(nameof(str));
            }

            if (GlobalizationMode.Invariant)
            {
                return ToLowerAsciiInvariant(str);
            }

            return ChangeCaseCommon<ToLowerConversion>(str);
        }

        private unsafe char ChangeCase(char c, bool toUpper)
        {
            Debug.Assert(!GlobalizationMode.Invariant);

            char dst = default;
            ChangeCase(&c, 1, &dst, 1, toUpper);
            return dst;
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        internal void ChangeCaseToLower(ReadOnlySpan<char> source, Span<char> destination)
        {
            Debug.Assert(destination.Length >= source.Length);
            ChangeCaseCommon<ToLowerConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        internal void ChangeCaseToUpper(ReadOnlySpan<char> source, Span<char> destination)
        {
            Debug.Assert(destination.Length >= source.Length);
            ChangeCaseCommon<ToUpperConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private void ChangeCaseCommon<TConversion>(ReadOnlySpan<char> source, Span<char> destination) where TConversion : struct
        {
            Debug.Assert(destination.Length >= source.Length);
            ChangeCaseCommon<TConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
        }

        private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
        {
            Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
            bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds

            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(charCount >= 0);

            if (charCount == 0)
            {
                goto Return;
            }

            fixed (char* pSource = &source)
            fixed (char* pDestination = &destination)
            {
                nuint currIdx = 0; // in chars

                if (IsAsciiCasingSameAsInvariant)
                {
                    // Read 4 chars (two 32-bit integers) at a time

                    if (charCount >= 4)
                    {
                        nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4;
                        do
                        {
                            // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
                            // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
                            // the ASCII data, we expect that characters of either case might be about equally distributed, so
                            // we want the case change operation itself to be branchless. This gives optimal performance in the
                            // common case. We also expect that developers aren't passing very long (16+ character) strings into
                            // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.

                            uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
                            if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                            {
                                goto NonAscii;
                            }
                            tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                            Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);

                            tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx + 2);
                            if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                            {
                                goto NonAsciiSkipTwoChars;
                            }
                            tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                            Unsafe.WriteUnaligned<uint>(pDestination + currIdx + 2, tempValue);
                            currIdx += 4;
                        } while (currIdx <= lastIndexWhereCanReadFourChars);

                        // At this point, there are fewer than 4 characters remaining to convert.
                        Debug.Assert((uint)charCount - currIdx < 4);
                    }

                    // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
                    if ((charCount & 2) != 0)
                    {
                        uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
                        if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                        {
                            goto NonAscii;
                        }
                        tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                        Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
                        currIdx += 2;
                    }

                    // If there's a single character left to convert, do it now.
                    if ((charCount & 1) != 0)
                    {
                        uint tempValue = pSource[currIdx];
                        if (tempValue > 0x7Fu)
                        {
                            goto NonAscii;
                        }
                        tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                        pDestination[currIdx] = (char)tempValue;
                    }

                    // And we're finished!

                    goto Return;

                // If we reached this point, we found non-ASCII data.
                // Fall back down the p/invoke code path.

                NonAsciiSkipTwoChars:
                    currIdx += 2;

                NonAscii:
                    Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer.");
                    charCount -= (int)currIdx;
                }

                // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
                // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
                // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).

                ChangeCase(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper);
            }

        Return:
            return;
        }

        private unsafe string ChangeCaseCommon<TConversion>(string source) where TConversion : struct
        {
            Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
            bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds

            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(source != null);

            // If the string is empty, we're done.
            if (source.Length == 0)
            {
                return string.Empty;
            }

            fixed (char* pSource = source)
            {
                nuint currIdx = 0; // in chars

                // If this culture's casing for ASCII is the same as invariant, try to take
                // a fast path that'll work in managed code and ASCII rather than calling out
                // to the OS for culture-aware casing.
                if (IsAsciiCasingSameAsInvariant)
                {
                    // Read 2 chars (one 32-bit integer) at a time

                    if (source.Length >= 2)
                    {
                        nuint lastIndexWhereCanReadTwoChars = (uint)source.Length - 2;
                        do
                        {
                            // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.

                            uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
                            if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                            {
                                goto NotAscii;
                            }
                            if ((toUpper) ? Utf16Utility.UInt32ContainsAnyLowercaseAsciiChar(tempValue) : Utf16Utility.UInt32ContainsAnyUppercaseAsciiChar(tempValue))
                            {
                                goto AsciiMustChangeCase;
                            }

                            currIdx += 2;
                        } while (currIdx <= lastIndexWhereCanReadTwoChars);
                    }

                    // If there's a single character left to convert, do it now.
                    if ((source.Length & 1) != 0)
                    {
                        uint tempValue = pSource[currIdx];
                        if (tempValue > 0x7Fu)
                        {
                            goto NotAscii;
                        }
                        if ((toUpper) ? ((tempValue - 'a') <= (uint)('z' - 'a')) : ((tempValue - 'A') <= (uint)('Z' - 'A')))
                        {
                            goto AsciiMustChangeCase;
                        }
                    }

                    // We got through all characters without finding anything that needed to change - done!
                    return source;

                AsciiMustChangeCase:
                    {
                        // We reached ASCII data that requires a case change.
                        // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
                        // conversion code path if we can.

                        string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count

                        // copy existing known-good data into the result
                        Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
                        source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);

                        // and re-run the fast span-based logic over the remainder of the data
                        ChangeCaseCommon<TConversion>(source.AsSpan((int)currIdx), resultSpan.Slice((int)currIdx));
                        return result;
                    }
                }

            NotAscii:
                {
                    // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
                    // In either case we need to fall back to the localization tables.

                    string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count

                    if (currIdx > 0)
                    {
                        // copy existing known-good data into the result
                        Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
                        source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
                    }

                    // and run the culture-aware logic over the remainder of the data
                    fixed (char* pResult = result)
                    {
                        ChangeCase(pSource + currIdx, source.Length - (int)currIdx, pResult + currIdx, result.Length - (int)currIdx, toUpper);
                    }
                    return result;
                }
            }
        }

        internal static unsafe string ToLowerAsciiInvariant(string s)
        {
            if (s.Length == 0)
            {
                return string.Empty;
            }

            fixed (char* pSource = s)
            {
                int i = 0;
                while (i < s.Length)
                {
                    if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A'))
                    {
                        break;
                    }
                    i++;
                }

                if (i >= s.Length)
                {
                    return s;
                }

                string result = string.FastAllocateString(s.Length);
                fixed (char* pResult = result)
                {
                    for (int j = 0; j < i; j++)
                    {
                        pResult[j] = pSource[j];
                    }

                    pResult[i] = (char)(pSource[i] | 0x20);
                    i++;

                    while (i < s.Length)
                    {
                        pResult[i] = ToLowerAsciiInvariant(pSource[i]);
                        i++;
                    }
                }

                return result;
            }
        }

        internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
        {
            Debug.Assert(destination.Length >= source.Length);

            for (int i = 0; i < source.Length; i++)
            {
                destination[i] = ToLowerAsciiInvariant(source[i]);
            }
        }

        private static unsafe string ToUpperAsciiInvariant(string s)
        {
            if (s.Length == 0)
            {
                return string.Empty;
            }

            fixed (char* pSource = s)
            {
                int i = 0;
                while (i < s.Length)
                {
                    if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a'))
                    {
                        break;
                    }
                    i++;
                }

                if (i >= s.Length)
                {
                    return s;
                }

                string result = string.FastAllocateString(s.Length);
                fixed (char* pResult = result)
                {
                    for (int j = 0; j < i; j++)
                    {
                        pResult[j] = pSource[j];
                    }

                    pResult[i] = (char)(pSource[i] & ~0x20);
                    i++;

                    while (i < s.Length)
                    {
                        pResult[i] = ToUpperAsciiInvariant(pSource[i]);
                        i++;
                    }
                }

                return result;
            }
        }

        internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
        {
            Debug.Assert(destination.Length >= source.Length);

            for (int i = 0; i < source.Length; i++)
            {
                destination[i] = ToUpperAsciiInvariant(source[i]);
            }
        }

        private static char ToLowerAsciiInvariant(char c)
        {
            if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
            {
                c = (char)(c | 0x20);
            }
            return c;
        }

        /// <summary>
        /// Converts the character or string to upper case.  Certain locales
        /// have different casing semantics from the file systems in Win32.
        /// </summary>
        public virtual char ToUpper(char c)
        {
            if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
            {
                return ToUpperAsciiInvariant(c);
            }

            return ChangeCase(c, toUpper: true);
        }

        public virtual string ToUpper(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException(nameof(str));
            }

            if (GlobalizationMode.Invariant)
            {
                return ToUpperAsciiInvariant(str);
            }

            return ChangeCaseCommon<ToUpperConversion>(str);
        }

        internal static char ToUpperAsciiInvariant(char c)
        {
            if ((uint)(c - 'a') <= (uint)('z' - 'a'))
            {
                c = (char)(c & ~0x20);
            }
            return c;
        }

        private static bool IsAscii(char c) => c < 0x80;

        private bool IsAsciiCasingSameAsInvariant
        {
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            get
            {
                if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
                {
                    PopulateIsAsciiCasingSameAsInvariant();
                }

                Debug.Assert(_isAsciiCasingSameAsInvariant == Tristate.True || _isAsciiCasingSameAsInvariant == Tristate.False);
                return _isAsciiCasingSameAsInvariant == Tristate.True;
            }
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        private void PopulateIsAsciiCasingSameAsInvariant()
        {
            bool compareResult = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0;
            _isAsciiCasingSameAsInvariant = (compareResult) ? Tristate.True : Tristate.False;
        }

        /// <summary>
        /// Returns true if the dominant direction of text and UI such as the
        /// relative position of buttons and scroll bars
        /// </summary>
        public bool IsRightToLeft => _cultureData.IsRightToLeft;

        public override bool Equals(object? obj)
        {
            return obj is TextInfo otherTextInfo
                && CultureName.Equals(otherTextInfo.CultureName);
        }

        public override int GetHashCode() => CultureName.GetHashCode();

        public override string ToString()
        {
            return "TextInfo - " + _cultureData.CultureName;
        }

        /// <summary>
        /// Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
        /// and the rest of the letters are lowercase.  The choice of which words to titlecase in headings
        /// and titles is dependent on language and local conventions.  For example, "The Merry Wives of Windor"
        /// is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
        /// In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
        /// are not titlecased.  In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
        ///
        /// Moreover, the determination of what actually constitutes a word is language dependent, and this can
        /// influence which letter or letters of a "word" are uppercased when titlecasing strings.  For example
        /// "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
        /// </summary>
        public unsafe string ToTitleCase(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException(nameof(str));
            }

            if (str.Length == 0)
            {
                return str;
            }

            StringBuilder result = new StringBuilder();
            string? lowercaseData = null;
            // Store if the current culture is Dutch (special case)
            bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);

            for (int i = 0; i < str.Length; i++)
            {
                int charLen;
                UnicodeCategory charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                if (char.CheckLetter(charType))
                {
                    // Special case to check for Dutch specific titlecasing with "IJ" characters
                    // at the beginning of a word
                    if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i + 1] == 'j' || str[i + 1] == 'J'))
                    {
                        result.Append("IJ");
                        i += 2;
                    }
                    else
                    {
                        // Do the titlecasing for the first character of the word.
                        i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
                    }

                    // Convert the characters until the end of the this word
                    // to lowercase.
                    int lowercaseStart = i;

                    // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
                    // This is in line with Word 2000 behavior of titlecasing.
                    bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);

                    // Use a loop to find all of the other letters following this letter.
                    while (i < str.Length)
                    {
                        charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                        if (IsLetterCategory(charType))
                        {
                            if (charType == UnicodeCategory.LowercaseLetter)
                            {
                                hasLowerCase = true;
                            }
                            i += charLen;
                        }
                        else if (str[i] == '\'')
                        {
                            i++;
                            if (hasLowerCase)
                            {
                                if (lowercaseData == null)
                                {
                                    lowercaseData = ToLower(str);
                                }
                                result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
                            }
                            else
                            {
                                result.Append(str, lowercaseStart, i - lowercaseStart);
                            }
                            lowercaseStart = i;
                            hasLowerCase = true;
                        }
                        else if (!IsWordSeparator(charType))
                        {
                            // This category is considered to be part of the word.
                            // This is any category that is marked as false in wordSeprator array.
                            i += charLen;
                        }
                        else
                        {
                            // A word separator. Break out of the loop.
                            break;
                        }
                    }

                    int count = i - lowercaseStart;

                    if (count > 0)
                    {
                        if (hasLowerCase)
                        {
                            if (lowercaseData == null)
                            {
                                lowercaseData = ToLower(str);
                            }
                            result.Append(lowercaseData, lowercaseStart, count);
                        }
                        else
                        {
                            result.Append(str, lowercaseStart, count);
                        }
                    }

                    if (i < str.Length)
                    {
                        // not a letter, just append it
                        i = AddNonLetter(ref result, ref str, i, charLen);
                    }
                }
                else
                {
                    // not a letter, just append it
                    i = AddNonLetter(ref result, ref str, i, charLen);
                }
            }
            return result.ToString();
        }

        private static int AddNonLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
        {
            Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
            if (charLen == 2)
            {
                // Surrogate pair
                result.Append(input[inputIndex++]);
                result.Append(input[inputIndex]);
            }
            else
            {
                result.Append(input[inputIndex]);
            }
            return inputIndex;
        }

        private int AddTitlecaseLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
        {
            Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");

            if (charLen == 2)
            {
                // for surrogate pairs do a ToUpper operation on the substring
                ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2);
                if (GlobalizationMode.Invariant)
                {
                    result.Append(src); // surrogate pair in invariant mode, so changing case is a nop
                }
                else
                {
                    Span<char> dst = stackalloc char[2];
                    ChangeCaseToUpper(src, dst);
                    result.Append(dst);
                }
                inputIndex++;
            }
            else
            {
                switch (input[inputIndex])
                {
                    // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
                    case (char)0x01C4:  // DZ with Caron -> Dz with Caron
                    case (char)0x01C5:  // Dz with Caron -> Dz with Caron
                    case (char)0x01C6:  // dz with Caron -> Dz with Caron
                        result.Append((char)0x01C5);
                        break;
                    case (char)0x01C7:  // LJ -> Lj
                    case (char)0x01C8:  // Lj -> Lj
                    case (char)0x01C9:  // lj -> Lj
                        result.Append((char)0x01C8);
                        break;
                    case (char)0x01CA:  // NJ -> Nj
                    case (char)0x01CB:  // Nj -> Nj
                    case (char)0x01CC:  // nj -> Nj
                        result.Append((char)0x01CB);
                        break;
                    case (char)0x01F1:  // DZ -> Dz
                    case (char)0x01F2:  // Dz -> Dz
                    case (char)0x01F3:  // dz -> Dz
                        result.Append((char)0x01F2);
                        break;
                    default:
                        result.Append(ToUpper(input[inputIndex]));
                        break;
                }
            }
            return inputIndex;
        }

        // Used in ToTitleCase():
        // When we find a starting letter, the following array decides if a category should be
        // considered as word seprator or not.
        private const int c_wordSeparatorMask =
            /* false */ (0 <<  0) | // UppercaseLetter = 0,
            /* false */ (0 <<  1) | // LowercaseLetter = 1,
            /* false */ (0 <<  2) | // TitlecaseLetter = 2,
            /* false */ (0 <<  3) | // ModifierLetter = 3,
            /* false */ (0 <<  4) | // OtherLetter = 4,
            /* false */ (0 <<  5) | // NonSpacingMark = 5,
            /* false */ (0 <<  6) | // SpacingCombiningMark = 6,
            /* false */ (0 <<  7) | // EnclosingMark = 7,
            /* false */ (0 <<  8) | // DecimalDigitNumber = 8,
            /* false */ (0 <<  9) | // LetterNumber = 9,
            /* false */ (0 << 10) | // OtherNumber = 10,
            /* true  */ (1 << 11) | // SpaceSeparator = 11,
            /* true  */ (1 << 12) | // LineSeparator = 12,
            /* true  */ (1 << 13) | // ParagraphSeparator = 13,
            /* true  */ (1 << 14) | // Control = 14,
            /* true  */ (1 << 15) | // Format = 15,
            /* false */ (0 << 16) | // Surrogate = 16,
            /* false */ (0 << 17) | // PrivateUse = 17,
            /* true  */ (1 << 18) | // ConnectorPunctuation = 18,
            /* true  */ (1 << 19) | // DashPunctuation = 19,
            /* true  */ (1 << 20) | // OpenPunctuation = 20,
            /* true  */ (1 << 21) | // ClosePunctuation = 21,
            /* true  */ (1 << 22) | // InitialQuotePunctuation = 22,
            /* true  */ (1 << 23) | // FinalQuotePunctuation = 23,
            /* true  */ (1 << 24) | // OtherPunctuation = 24,
            /* true  */ (1 << 25) | // MathSymbol = 25,
            /* true  */ (1 << 26) | // CurrencySymbol = 26,
            /* true  */ (1 << 27) | // ModifierSymbol = 27,
            /* true  */ (1 << 28) | // OtherSymbol = 28,
            /* false */ (0 << 29);  // OtherNotAssigned = 29;

        private static bool IsWordSeparator(UnicodeCategory category)
        {
            return (c_wordSeparatorMask & (1 << (int)category)) != 0;
        }

        private static bool IsLetterCategory(UnicodeCategory uc)
        {
            return uc == UnicodeCategory.UppercaseLetter
                 || uc == UnicodeCategory.LowercaseLetter
                 || uc == UnicodeCategory.TitlecaseLetter
                 || uc == UnicodeCategory.ModifierLetter
                 || uc == UnicodeCategory.OtherLetter;
        }

        // A dummy struct that is used for 'ToUpper' in generic parameters
        private readonly struct ToUpperConversion { }

        // A dummy struct that is used for 'ToLower' in generic parameters
        private readonly struct ToLowerConversion { }
    }
}