| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957 |
- // Licensed to the .NET Foundation under one or more agreements.
- // The .NET Foundation licenses this file to you under the MIT license.
- // See the LICENSE file in the project root for more information.
- ////////////////////////////////////////////////////////////////////////////
- //
- //
- // Purpose: This Class defines behaviors specific to a writing system.
- // A writing system is the collection of scripts and
- // orthographic rules required to represent a language as text.
- //
- //
- ////////////////////////////////////////////////////////////////////////////
- using System.Diagnostics;
- using System.Runtime.CompilerServices;
- using System.Runtime.InteropServices;
- using System.Runtime.Serialization;
- using System.Text;
- using Internal.Runtime.CompilerServices;
- #if BIT64
- using nuint = System.UInt64;
- using nint = System.Int64;
- #else // BIT64
- using nuint = System.UInt32;
- using nint = System.Int32;
- #endif // BIT64
- namespace System.Globalization
- {
- public partial class TextInfo : ICloneable, IDeserializationCallback
- {
- private enum Tristate : byte
- {
- NotInitialized = 0,
- False = 1,
- True = 2
- }
- private string _listSeparator;
- private bool _isReadOnly = false;
- /* _cultureName is the name of the creating culture.
- _cultureData is the data that backs this class.
- _textInfoName is the actual name of the textInfo (from cultureData.STEXTINFO)
- In the desktop, when we call the sorting dll, it doesn't
- know how to resolve custom locle names to sort ids so we have to have already resolved this.
- */
- private readonly string _cultureName; // Name of the culture that created this text info
- private readonly CultureData _cultureData; // Data record for the culture that made us, not for this textinfo
- private readonly string _textInfoName; // Name of the text info we're using (ie: _cultureData.STEXTINFO)
- private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;
- // Invariant text info
- internal static TextInfo Invariant
- {
- get
- {
- if (s_Invariant == null)
- s_Invariant = new TextInfo(CultureData.Invariant);
- return s_Invariant;
- }
- }
- internal volatile static TextInfo s_Invariant;
- //////////////////////////////////////////////////////////////////////////
- ////
- //// TextInfo Constructors
- ////
- //// Implements CultureInfo.TextInfo.
- ////
- //////////////////////////////////////////////////////////////////////////
- internal TextInfo(CultureData cultureData)
- {
- // This is our primary data source, we don't need most of the rest of this
- _cultureData = cultureData;
- _cultureName = _cultureData.CultureName;
- _textInfoName = _cultureData.STEXTINFO;
- FinishInitialization();
- }
- void IDeserializationCallback.OnDeserialization(object sender)
- {
- throw new PlatformNotSupportedException();
- }
- public virtual int ANSICodePage => _cultureData.IDEFAULTANSICODEPAGE;
- public virtual int OEMCodePage => _cultureData.IDEFAULTOEMCODEPAGE;
- public virtual int MacCodePage => _cultureData.IDEFAULTMACCODEPAGE;
- public virtual int EBCDICCodePage => _cultureData.IDEFAULTEBCDICCODEPAGE;
- // Just use the LCID from our text info name
- public int LCID => CultureInfo.GetCultureInfo(_textInfoName).LCID;
- public string CultureName => _textInfoName;
- public bool IsReadOnly => _isReadOnly;
- //////////////////////////////////////////////////////////////////////////
- ////
- //// Clone
- ////
- //// Is the implementation of ICloneable.
- ////
- //////////////////////////////////////////////////////////////////////////
- public virtual object Clone()
- {
- object o = MemberwiseClone();
- ((TextInfo)o).SetReadOnlyState(false);
- return o;
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // ReadOnly
- //
- // Create a cloned readonly instance or return the input one if it is
- // readonly.
- //
- ////////////////////////////////////////////////////////////////////////
- public static TextInfo ReadOnly(TextInfo textInfo)
- {
- if (textInfo == null) { throw new ArgumentNullException(nameof(textInfo)); }
- if (textInfo.IsReadOnly) { return textInfo; }
- TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
- clonedTextInfo.SetReadOnlyState(true);
- return clonedTextInfo;
- }
- private void VerifyWritable()
- {
- if (_isReadOnly)
- {
- throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
- }
- }
- internal void SetReadOnlyState(bool readOnly)
- {
- _isReadOnly = readOnly;
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // ListSeparator
- //
- // Returns the string used to separate items in a list.
- //
- ////////////////////////////////////////////////////////////////////////
- public virtual string ListSeparator
- {
- get
- {
- if (_listSeparator == null)
- {
- _listSeparator = _cultureData.SLIST;
- }
- return _listSeparator;
- }
- set
- {
- if (value == null)
- {
- throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String);
- }
- VerifyWritable();
- _listSeparator = value;
- }
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // ToLower
- //
- // Converts the character or string to lower case. Certain locales
- // have different casing semantics from the file systems in Win32.
- //
- ////////////////////////////////////////////////////////////////////////
- public virtual char ToLower(char c)
- {
- if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
- {
- return ToLowerAsciiInvariant(c);
- }
- return ChangeCase(c, toUpper: false);
- }
- public virtual string ToLower(string str)
- {
- if (str == null) { throw new ArgumentNullException(nameof(str)); }
- if (GlobalizationMode.Invariant)
- {
- return ToLowerAsciiInvariant(str);
- }
- return ChangeCaseCommon<ToLowerConversion>(str);
- }
- private unsafe char ChangeCase(char c, bool toUpper)
- {
- Debug.Assert(!GlobalizationMode.Invariant);
-
- char dst = default;
- ChangeCase(&c, 1, &dst, 1, toUpper);
- return dst;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal void ChangeCaseToLower(ReadOnlySpan<char> source, Span<char> destination)
- {
- Debug.Assert(destination.Length >= source.Length);
- ChangeCaseCommon<ToLowerConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal void ChangeCaseToUpper(ReadOnlySpan<char> source, Span<char> destination)
- {
- Debug.Assert(destination.Length >= source.Length);
- ChangeCaseCommon<ToUpperConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private void ChangeCaseCommon<TConversion>(ReadOnlySpan<char> source, Span<char> destination) where TConversion : struct
- {
- Debug.Assert(destination.Length >= source.Length);
- ChangeCaseCommon<TConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
- }
- private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
- {
- Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
- bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
- Debug.Assert(!GlobalizationMode.Invariant);
- Debug.Assert(charCount >= 0);
- if (charCount == 0)
- {
- goto Return;
- }
- fixed (char* pSource = &source)
- fixed (char* pDestination = &destination)
- {
- nuint currIdx = 0; // in chars
- if (IsAsciiCasingSameAsInvariant)
- {
- // Read 4 chars (two 32-bit integers) at a time
- if (charCount >= 4)
- {
- nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4;
- do
- {
- // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
- // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
- // the ASCII data, we expect that characters of either case might be about equally distributed, so
- // we want the case change operation itself to be branchless. This gives optimal performance in the
- // common case. We also expect that developers aren't passing very long (16+ character) strings into
- // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.
- uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
- if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
- {
- goto NonAscii;
- }
- tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
- Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
- tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx + 2);
- if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
- {
- goto NonAsciiSkipTwoChars;
- }
- tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
- Unsafe.WriteUnaligned<uint>(pDestination + currIdx + 2, tempValue);
- currIdx += 4;
- } while (currIdx <= lastIndexWhereCanReadFourChars);
- // At this point, there are fewer than 4 characters remaining to convert.
- Debug.Assert((uint)charCount - currIdx < 4);
- }
- // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
- if ((charCount & 2) != 0)
- {
- uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
- if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
- {
- goto NonAscii;
- }
- tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
- Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
- currIdx += 2;
- }
- // If there's a single character left to convert, do it now.
- if ((charCount & 1) != 0)
- {
- uint tempValue = pSource[currIdx];
- if (tempValue > 0x7Fu)
- {
- goto NonAscii;
- }
- tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
- pDestination[currIdx] = (char)tempValue;
- }
- // And we're finished!
- goto Return;
- // If we reached this point, we found non-ASCII data.
- // Fall back down the p/invoke code path.
- NonAsciiSkipTwoChars:
- currIdx += 2;
- NonAscii:
- Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer.");
- charCount -= (int)currIdx;
- }
- // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
- // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
- // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).
- ChangeCase(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper);
- }
- Return:
- return;
- }
- private unsafe string ChangeCaseCommon<TConversion>(string source) where TConversion : struct
- {
- Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
- bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
- Debug.Assert(!GlobalizationMode.Invariant);
- Debug.Assert(source != null);
- // If the string is empty, we're done.
- if (source.Length == 0)
- {
- return string.Empty;
- }
- fixed (char* pSource = source)
- {
- nuint currIdx = 0; // in chars
- // If this culture's casing for ASCII is the same as invariant, try to take
- // a fast path that'll work in managed code and ASCII rather than calling out
- // to the OS for culture-aware casing.
- if (IsAsciiCasingSameAsInvariant)
- {
- // Read 2 chars (one 32-bit integer) at a time
- if (source.Length >= 2)
- {
- nuint lastIndexWhereCanReadTwoChars = (uint)source.Length - 2;
- do
- {
- // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.
- uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
- if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
- {
- goto NotAscii;
- }
- if ((toUpper) ? Utf16Utility.UInt32ContainsAnyLowercaseAsciiChar(tempValue) : Utf16Utility.UInt32ContainsAnyUppercaseAsciiChar(tempValue))
- {
- goto AsciiMustChangeCase;
- }
- currIdx += 2;
- } while (currIdx <= lastIndexWhereCanReadTwoChars);
- }
- // If there's a single character left to convert, do it now.
- if ((source.Length & 1) != 0)
- {
- uint tempValue = pSource[currIdx];
- if (tempValue > 0x7Fu)
- {
- goto NotAscii;
- }
- if ((toUpper) ? ((tempValue - 'a') <= (uint)('z' - 'a')) : ((tempValue - 'A') <= (uint)('Z' - 'A')))
- {
- goto AsciiMustChangeCase;
- }
- }
- // We got through all characters without finding anything that needed to change - done!
- return source;
- AsciiMustChangeCase:
- {
- // We reached ASCII data that requires a case change.
- // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
- // conversion code path if we can.
- string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
- // copy existing known-good data into the result
- Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
- source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
- // and re-run the fast span-based logic over the remainder of the data
- ChangeCaseCommon<TConversion>(source.AsSpan((int)currIdx), resultSpan.Slice((int)currIdx));
- return result;
- }
- }
- NotAscii:
- {
- // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
- // In either case we need to fall back to the localization tables.
- string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
- if (currIdx > 0)
- {
- // copy existing known-good data into the result
- Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
- source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
- }
- // and run the culture-aware logic over the remainder of the data
- fixed (char* pResult = result)
- {
- ChangeCase(pSource + currIdx, source.Length - (int)currIdx, pResult + currIdx, result.Length - (int)currIdx, toUpper);
- }
- return result;
- }
- }
- }
- internal static unsafe string ToLowerAsciiInvariant(string s)
- {
- if (s.Length == 0)
- {
- return string.Empty;
- }
-
- fixed (char* pSource = s)
- {
- int i = 0;
- while (i < s.Length)
- {
- if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A'))
- {
- break;
- }
- i++;
- }
-
- if (i >= s.Length)
- {
- return s;
- }
- string result = string.FastAllocateString(s.Length);
- fixed (char* pResult = result)
- {
- for (int j = 0; j < i; j++)
- {
- pResult[j] = pSource[j];
- }
-
- pResult[i] = (char)(pSource[i] | 0x20);
- i++;
- while (i < s.Length)
- {
- pResult[i] = ToLowerAsciiInvariant(pSource[i]);
- i++;
- }
- }
- return result;
- }
- }
- internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
- {
- Debug.Assert(destination.Length >= source.Length);
- for (int i = 0; i < source.Length; i++)
- {
- destination[i] = ToLowerAsciiInvariant(source[i]);
- }
- }
- private static unsafe string ToUpperAsciiInvariant(string s)
- {
- if (s.Length == 0)
- {
- return string.Empty;
- }
-
- fixed (char* pSource = s)
- {
- int i = 0;
- while (i < s.Length)
- {
- if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a'))
- {
- break;
- }
- i++;
- }
-
- if (i >= s.Length)
- {
- return s;
- }
- string result = string.FastAllocateString(s.Length);
- fixed (char* pResult = result)
- {
- for (int j = 0; j < i; j++)
- {
- pResult[j] = pSource[j];
- }
-
- pResult[i] = (char)(pSource[i] & ~0x20);
- i++;
- while (i < s.Length)
- {
- pResult[i] = ToUpperAsciiInvariant(pSource[i]);
- i++;
- }
- }
- return result;
- }
- }
- internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
- {
- Debug.Assert(destination.Length >= source.Length);
- for (int i = 0; i < source.Length; i++)
- {
- destination[i] = ToUpperAsciiInvariant(source[i]);
- }
- }
- private static char ToLowerAsciiInvariant(char c)
- {
- if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
- {
- c = (char)(c | 0x20);
- }
- return c;
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // ToUpper
- //
- // Converts the character or string to upper case. Certain locales
- // have different casing semantics from the file systems in Win32.
- //
- ////////////////////////////////////////////////////////////////////////
- public virtual char ToUpper(char c)
- {
- if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
- {
- return ToUpperAsciiInvariant(c);
- }
-
- return ChangeCase(c, toUpper: true);
- }
- public virtual string ToUpper(string str)
- {
- if (str == null) { throw new ArgumentNullException(nameof(str)); }
- if (GlobalizationMode.Invariant)
- {
- return ToUpperAsciiInvariant(str);
- }
- return ChangeCaseCommon<ToUpperConversion>(str);
- }
- internal static char ToUpperAsciiInvariant(char c)
- {
- if ((uint)(c - 'a') <= (uint)('z' - 'a'))
- {
- c = (char)(c & ~0x20);
- }
- return c;
- }
- private static bool IsAscii(char c)
- {
- return c < 0x80;
- }
- private bool IsAsciiCasingSameAsInvariant
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get
- {
- if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
- {
- PopulateIsAsciiCasingSameAsInvariant();
- }
- Debug.Assert(_isAsciiCasingSameAsInvariant == Tristate.True || _isAsciiCasingSameAsInvariant == Tristate.False);
- return (_isAsciiCasingSameAsInvariant == Tristate.True);
- }
- }
- [MethodImpl(MethodImplOptions.NoInlining)]
- private void PopulateIsAsciiCasingSameAsInvariant()
- {
- bool compareResult = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0;
- _isAsciiCasingSameAsInvariant = (compareResult) ? Tristate.True : Tristate.False;
- }
- // IsRightToLeft
- //
- // Returns true if the dominant direction of text and UI such as the relative position of buttons and scroll bars
- //
- public bool IsRightToLeft => _cultureData.IsRightToLeft;
- ////////////////////////////////////////////////////////////////////////
- //
- // Equals
- //
- // Implements Object.Equals(). Returns a boolean indicating whether
- // or not object refers to the same CultureInfo as the current instance.
- //
- ////////////////////////////////////////////////////////////////////////
- public override bool Equals(object obj)
- {
- if (obj is TextInfo that)
- {
- return CultureName.Equals(that.CultureName);
- }
- return false;
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // GetHashCode
- //
- // Implements Object.GetHashCode(). Returns the hash code for the
- // CultureInfo. The hash code is guaranteed to be the same for CultureInfo A
- // and B where A.Equals(B) is true.
- //
- ////////////////////////////////////////////////////////////////////////
- public override int GetHashCode()
- {
- return CultureName.GetHashCode();
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // ToString
- //
- // Implements Object.ToString(). Returns a string describing the
- // TextInfo.
- //
- ////////////////////////////////////////////////////////////////////////
- public override string ToString()
- {
- return "TextInfo - " + _cultureData.CultureName;
- }
- //
- // Titlecasing:
- // -----------
- // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
- // and the rest of the letters are lowercase. The choice of which words to titlecase in headings
- // and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor"
- // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
- // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
- // are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
- //
- // Moreover, the determination of what actually constitutes a word is language dependent, and this can
- // influence which letter or letters of a "word" are uppercased when titlecasing strings. For example
- // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
- //
- public unsafe string ToTitleCase(string str)
- {
- if (str == null)
- {
- throw new ArgumentNullException(nameof(str));
- }
- if (str.Length == 0)
- {
- return str;
- }
- StringBuilder result = new StringBuilder();
- string lowercaseData = null;
- // Store if the current culture is Dutch (special case)
- bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);
- for (int i = 0; i < str.Length; i++)
- {
- UnicodeCategory charType;
- int charLen;
- charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
- if (char.CheckLetter(charType))
- {
- // Special case to check for Dutch specific titlecasing with "IJ" characters
- // at the beginning of a word
- if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i+1] == 'j' || str[i+1] == 'J'))
- {
- result.Append("IJ");
- i += 2;
- }
- else
- {
- // Do the titlecasing for the first character of the word.
- i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
- }
- //
- // Convert the characters until the end of the this word
- // to lowercase.
- //
- int lowercaseStart = i;
- //
- // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
- // This is in line with Word 2000 behavior of titlecasing.
- //
- bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
- // Use a loop to find all of the other letters following this letter.
- while (i < str.Length)
- {
- charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
- if (IsLetterCategory(charType))
- {
- if (charType == UnicodeCategory.LowercaseLetter)
- {
- hasLowerCase = true;
- }
- i += charLen;
- }
- else if (str[i] == '\'')
- {
- i++;
- if (hasLowerCase)
- {
- if (lowercaseData == null)
- {
- lowercaseData = ToLower(str);
- }
- result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
- }
- else
- {
- result.Append(str, lowercaseStart, i - lowercaseStart);
- }
- lowercaseStart = i;
- hasLowerCase = true;
- }
- else if (!IsWordSeparator(charType))
- {
- // This category is considered to be part of the word.
- // This is any category that is marked as false in wordSeprator array.
- i+= charLen;
- }
- else
- {
- // A word separator. Break out of the loop.
- break;
- }
- }
- int count = i - lowercaseStart;
- if (count > 0)
- {
- if (hasLowerCase)
- {
- if (lowercaseData == null)
- {
- lowercaseData = ToLower(str);
- }
- result.Append(lowercaseData, lowercaseStart, count);
- }
- else
- {
- result.Append(str, lowercaseStart, count);
- }
- }
- if (i < str.Length)
- {
- // not a letter, just append it
- i = AddNonLetter(ref result, ref str, i, charLen);
- }
- }
- else
- {
- // not a letter, just append it
- i = AddNonLetter(ref result, ref str, i, charLen);
- }
- }
- return result.ToString();
- }
- private static int AddNonLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
- {
- Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
- if (charLen == 2)
- {
- // Surrogate pair
- result.Append(input[inputIndex++]);
- result.Append(input[inputIndex]);
- }
- else
- {
- result.Append(input[inputIndex]);
- }
- return inputIndex;
- }
- private int AddTitlecaseLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
- {
- Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
- if (charLen == 2)
- {
- // for surrogate pairs do a ToUpper operation on the substring
- ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2);
- if (GlobalizationMode.Invariant)
- {
- result.Append(src); // surrogate pair in invariant mode, so changing case is a nop
- }
- else
- {
- Span<char> dst = stackalloc char[2];
- ChangeCaseToUpper(src, dst);
- result.Append(dst);
- }
- inputIndex++;
- }
- else
- {
- switch (input[inputIndex])
- {
- //
- // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
- case (char) 0x01C4: // DZ with Caron -> Dz with Caron
- case (char) 0x01C5: // Dz with Caron -> Dz with Caron
- case (char) 0x01C6: // dz with Caron -> Dz with Caron
- result.Append((char) 0x01C5);
- break;
- case (char) 0x01C7: // LJ -> Lj
- case (char) 0x01C8: // Lj -> Lj
- case (char) 0x01C9: // lj -> Lj
- result.Append((char) 0x01C8);
- break;
- case (char) 0x01CA: // NJ -> Nj
- case (char) 0x01CB: // Nj -> Nj
- case (char) 0x01CC: // nj -> Nj
- result.Append((char) 0x01CB);
- break;
- case (char) 0x01F1: // DZ -> Dz
- case (char) 0x01F2: // Dz -> Dz
- case (char) 0x01F3: // dz -> Dz
- result.Append((char) 0x01F2);
- break;
- default:
- result.Append(ToUpper(input[inputIndex]));
- break;
- }
- }
- return inputIndex;
- }
- //
- // Used in ToTitleCase():
- // When we find a starting letter, the following array decides if a category should be
- // considered as word seprator or not.
- //
- private const int c_wordSeparatorMask =
- /* false */ (0 << 0) | // UppercaseLetter = 0,
- /* false */ (0 << 1) | // LowercaseLetter = 1,
- /* false */ (0 << 2) | // TitlecaseLetter = 2,
- /* false */ (0 << 3) | // ModifierLetter = 3,
- /* false */ (0 << 4) | // OtherLetter = 4,
- /* false */ (0 << 5) | // NonSpacingMark = 5,
- /* false */ (0 << 6) | // SpacingCombiningMark = 6,
- /* false */ (0 << 7) | // EnclosingMark = 7,
- /* false */ (0 << 8) | // DecimalDigitNumber = 8,
- /* false */ (0 << 9) | // LetterNumber = 9,
- /* false */ (0 << 10) | // OtherNumber = 10,
- /* true */ (1 << 11) | // SpaceSeparator = 11,
- /* true */ (1 << 12) | // LineSeparator = 12,
- /* true */ (1 << 13) | // ParagraphSeparator = 13,
- /* true */ (1 << 14) | // Control = 14,
- /* true */ (1 << 15) | // Format = 15,
- /* false */ (0 << 16) | // Surrogate = 16,
- /* false */ (0 << 17) | // PrivateUse = 17,
- /* true */ (1 << 18) | // ConnectorPunctuation = 18,
- /* true */ (1 << 19) | // DashPunctuation = 19,
- /* true */ (1 << 20) | // OpenPunctuation = 20,
- /* true */ (1 << 21) | // ClosePunctuation = 21,
- /* true */ (1 << 22) | // InitialQuotePunctuation = 22,
- /* true */ (1 << 23) | // FinalQuotePunctuation = 23,
- /* true */ (1 << 24) | // OtherPunctuation = 24,
- /* true */ (1 << 25) | // MathSymbol = 25,
- /* true */ (1 << 26) | // CurrencySymbol = 26,
- /* true */ (1 << 27) | // ModifierSymbol = 27,
- /* true */ (1 << 28) | // OtherSymbol = 28,
- /* false */ (0 << 29); // OtherNotAssigned = 29;
-
- private static bool IsWordSeparator(UnicodeCategory category)
- {
- return (c_wordSeparatorMask & (1 << (int) category)) != 0;
- }
- private static bool IsLetterCategory(UnicodeCategory uc)
- {
- return (uc == UnicodeCategory.UppercaseLetter
- || uc == UnicodeCategory.LowercaseLetter
- || uc == UnicodeCategory.TitlecaseLetter
- || uc == UnicodeCategory.ModifierLetter
- || uc == UnicodeCategory.OtherLetter);
- }
- // A dummy struct that is used for 'ToUpper' in generic parameters
- private readonly struct ToUpperConversion { }
- // A dummy struct that is used for 'ToLower' in generic parameters
- private readonly struct ToLowerConversion { }
- }
- }
|