TextInfo.cs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. ////////////////////////////////////////////////////////////////////////////
  5. //
  6. //
  7. // Purpose: This Class defines behaviors specific to a writing system.
  8. // A writing system is the collection of scripts and
  9. // orthographic rules required to represent a language as text.
  10. //
  11. //
  12. ////////////////////////////////////////////////////////////////////////////
  13. using System.Diagnostics;
  14. using System.Runtime.CompilerServices;
  15. using System.Runtime.InteropServices;
  16. using System.Runtime.Serialization;
  17. using System.Text;
  18. using Internal.Runtime.CompilerServices;
  19. #if BIT64
  20. using nuint = System.UInt64;
  21. using nint = System.Int64;
  22. #else // BIT64
  23. using nuint = System.UInt32;
  24. using nint = System.Int32;
  25. #endif // BIT64
  26. namespace System.Globalization
  27. {
  28. public partial class TextInfo : ICloneable, IDeserializationCallback
  29. {
  30. private enum Tristate : byte
  31. {
  32. NotInitialized = 0,
  33. False = 1,
  34. True = 2
  35. }
  36. private string _listSeparator;
  37. private bool _isReadOnly = false;
  38. /* _cultureName is the name of the creating culture.
  39. _cultureData is the data that backs this class.
  40. _textInfoName is the actual name of the textInfo (from cultureData.STEXTINFO)
  41. In the desktop, when we call the sorting dll, it doesn't
  42. know how to resolve custom locle names to sort ids so we have to have already resolved this.
  43. */
  44. private readonly string _cultureName; // Name of the culture that created this text info
  45. private readonly CultureData _cultureData; // Data record for the culture that made us, not for this textinfo
  46. private readonly string _textInfoName; // Name of the text info we're using (ie: _cultureData.STEXTINFO)
  47. private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;
  48. // Invariant text info
  49. internal static TextInfo Invariant
  50. {
  51. get
  52. {
  53. if (s_Invariant == null)
  54. s_Invariant = new TextInfo(CultureData.Invariant);
  55. return s_Invariant;
  56. }
  57. }
  58. internal volatile static TextInfo s_Invariant;
  59. //////////////////////////////////////////////////////////////////////////
  60. ////
  61. //// TextInfo Constructors
  62. ////
  63. //// Implements CultureInfo.TextInfo.
  64. ////
  65. //////////////////////////////////////////////////////////////////////////
  66. internal TextInfo(CultureData cultureData)
  67. {
  68. // This is our primary data source, we don't need most of the rest of this
  69. _cultureData = cultureData;
  70. _cultureName = _cultureData.CultureName;
  71. _textInfoName = _cultureData.STEXTINFO;
  72. FinishInitialization();
  73. }
  74. void IDeserializationCallback.OnDeserialization(object sender)
  75. {
  76. throw new PlatformNotSupportedException();
  77. }
  78. public virtual int ANSICodePage => _cultureData.IDEFAULTANSICODEPAGE;
  79. public virtual int OEMCodePage => _cultureData.IDEFAULTOEMCODEPAGE;
  80. public virtual int MacCodePage => _cultureData.IDEFAULTMACCODEPAGE;
  81. public virtual int EBCDICCodePage => _cultureData.IDEFAULTEBCDICCODEPAGE;
  82. // Just use the LCID from our text info name
  83. public int LCID => CultureInfo.GetCultureInfo(_textInfoName).LCID;
  84. public string CultureName => _textInfoName;
  85. public bool IsReadOnly => _isReadOnly;
  86. //////////////////////////////////////////////////////////////////////////
  87. ////
  88. //// Clone
  89. ////
  90. //// Is the implementation of ICloneable.
  91. ////
  92. //////////////////////////////////////////////////////////////////////////
  93. public virtual object Clone()
  94. {
  95. object o = MemberwiseClone();
  96. ((TextInfo)o).SetReadOnlyState(false);
  97. return o;
  98. }
  99. ////////////////////////////////////////////////////////////////////////
  100. //
  101. // ReadOnly
  102. //
  103. // Create a cloned readonly instance or return the input one if it is
  104. // readonly.
  105. //
  106. ////////////////////////////////////////////////////////////////////////
  107. public static TextInfo ReadOnly(TextInfo textInfo)
  108. {
  109. if (textInfo == null) { throw new ArgumentNullException(nameof(textInfo)); }
  110. if (textInfo.IsReadOnly) { return textInfo; }
  111. TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
  112. clonedTextInfo.SetReadOnlyState(true);
  113. return clonedTextInfo;
  114. }
  115. private void VerifyWritable()
  116. {
  117. if (_isReadOnly)
  118. {
  119. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  120. }
  121. }
  122. internal void SetReadOnlyState(bool readOnly)
  123. {
  124. _isReadOnly = readOnly;
  125. }
  126. ////////////////////////////////////////////////////////////////////////
  127. //
  128. // ListSeparator
  129. //
  130. // Returns the string used to separate items in a list.
  131. //
  132. ////////////////////////////////////////////////////////////////////////
  133. public virtual string ListSeparator
  134. {
  135. get
  136. {
  137. if (_listSeparator == null)
  138. {
  139. _listSeparator = _cultureData.SLIST;
  140. }
  141. return _listSeparator;
  142. }
  143. set
  144. {
  145. if (value == null)
  146. {
  147. throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String);
  148. }
  149. VerifyWritable();
  150. _listSeparator = value;
  151. }
  152. }
  153. ////////////////////////////////////////////////////////////////////////
  154. //
  155. // ToLower
  156. //
  157. // Converts the character or string to lower case. Certain locales
  158. // have different casing semantics from the file systems in Win32.
  159. //
  160. ////////////////////////////////////////////////////////////////////////
  161. public virtual char ToLower(char c)
  162. {
  163. if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
  164. {
  165. return ToLowerAsciiInvariant(c);
  166. }
  167. return ChangeCase(c, toUpper: false);
  168. }
  169. public virtual string ToLower(string str)
  170. {
  171. if (str == null) { throw new ArgumentNullException(nameof(str)); }
  172. if (GlobalizationMode.Invariant)
  173. {
  174. return ToLowerAsciiInvariant(str);
  175. }
  176. return ChangeCaseCommon<ToLowerConversion>(str);
  177. }
  178. private unsafe char ChangeCase(char c, bool toUpper)
  179. {
  180. Debug.Assert(!GlobalizationMode.Invariant);
  181. char dst = default;
  182. ChangeCase(&c, 1, &dst, 1, toUpper);
  183. return dst;
  184. }
  185. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  186. internal void ChangeCaseToLower(ReadOnlySpan<char> source, Span<char> destination)
  187. {
  188. Debug.Assert(destination.Length >= source.Length);
  189. ChangeCaseCommon<ToLowerConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
  190. }
  191. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  192. internal void ChangeCaseToUpper(ReadOnlySpan<char> source, Span<char> destination)
  193. {
  194. Debug.Assert(destination.Length >= source.Length);
  195. ChangeCaseCommon<ToUpperConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
  196. }
  197. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  198. private void ChangeCaseCommon<TConversion>(ReadOnlySpan<char> source, Span<char> destination) where TConversion : struct
  199. {
  200. Debug.Assert(destination.Length >= source.Length);
  201. ChangeCaseCommon<TConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
  202. }
  203. private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
  204. {
  205. Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
  206. bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
  207. Debug.Assert(!GlobalizationMode.Invariant);
  208. Debug.Assert(charCount >= 0);
  209. if (charCount == 0)
  210. {
  211. goto Return;
  212. }
  213. fixed (char* pSource = &source)
  214. fixed (char* pDestination = &destination)
  215. {
  216. nuint currIdx = 0; // in chars
  217. if (IsAsciiCasingSameAsInvariant)
  218. {
  219. // Read 4 chars (two 32-bit integers) at a time
  220. if (charCount >= 4)
  221. {
  222. nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4;
  223. do
  224. {
  225. // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
  226. // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
  227. // the ASCII data, we expect that characters of either case might be about equally distributed, so
  228. // we want the case change operation itself to be branchless. This gives optimal performance in the
  229. // common case. We also expect that developers aren't passing very long (16+ character) strings into
  230. // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.
  231. uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
  232. if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
  233. {
  234. goto NonAscii;
  235. }
  236. tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
  237. Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
  238. tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx + 2);
  239. if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
  240. {
  241. goto NonAsciiSkipTwoChars;
  242. }
  243. tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
  244. Unsafe.WriteUnaligned<uint>(pDestination + currIdx + 2, tempValue);
  245. currIdx += 4;
  246. } while (currIdx <= lastIndexWhereCanReadFourChars);
  247. // At this point, there are fewer than 4 characters remaining to convert.
  248. Debug.Assert((uint)charCount - currIdx < 4);
  249. }
  250. // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
  251. if ((charCount & 2) != 0)
  252. {
  253. uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
  254. if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
  255. {
  256. goto NonAscii;
  257. }
  258. tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
  259. Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
  260. currIdx += 2;
  261. }
  262. // If there's a single character left to convert, do it now.
  263. if ((charCount & 1) != 0)
  264. {
  265. uint tempValue = pSource[currIdx];
  266. if (tempValue > 0x7Fu)
  267. {
  268. goto NonAscii;
  269. }
  270. tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
  271. pDestination[currIdx] = (char)tempValue;
  272. }
  273. // And we're finished!
  274. goto Return;
  275. // If we reached this point, we found non-ASCII data.
  276. // Fall back down the p/invoke code path.
  277. NonAsciiSkipTwoChars:
  278. currIdx += 2;
  279. NonAscii:
  280. Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer.");
  281. charCount -= (int)currIdx;
  282. }
  283. // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
  284. // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
  285. // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).
  286. ChangeCase(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper);
  287. }
  288. Return:
  289. return;
  290. }
  291. private unsafe string ChangeCaseCommon<TConversion>(string source) where TConversion : struct
  292. {
  293. Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
  294. bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
  295. Debug.Assert(!GlobalizationMode.Invariant);
  296. Debug.Assert(source != null);
  297. // If the string is empty, we're done.
  298. if (source.Length == 0)
  299. {
  300. return string.Empty;
  301. }
  302. fixed (char* pSource = source)
  303. {
  304. nuint currIdx = 0; // in chars
  305. // If this culture's casing for ASCII is the same as invariant, try to take
  306. // a fast path that'll work in managed code and ASCII rather than calling out
  307. // to the OS for culture-aware casing.
  308. if (IsAsciiCasingSameAsInvariant)
  309. {
  310. // Read 2 chars (one 32-bit integer) at a time
  311. if (source.Length >= 2)
  312. {
  313. nuint lastIndexWhereCanReadTwoChars = (uint)source.Length - 2;
  314. do
  315. {
  316. // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.
  317. uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
  318. if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
  319. {
  320. goto NotAscii;
  321. }
  322. if ((toUpper) ? Utf16Utility.UInt32ContainsAnyLowercaseAsciiChar(tempValue) : Utf16Utility.UInt32ContainsAnyUppercaseAsciiChar(tempValue))
  323. {
  324. goto AsciiMustChangeCase;
  325. }
  326. currIdx += 2;
  327. } while (currIdx <= lastIndexWhereCanReadTwoChars);
  328. }
  329. // If there's a single character left to convert, do it now.
  330. if ((source.Length & 1) != 0)
  331. {
  332. uint tempValue = pSource[currIdx];
  333. if (tempValue > 0x7Fu)
  334. {
  335. goto NotAscii;
  336. }
  337. if ((toUpper) ? ((tempValue - 'a') <= (uint)('z' - 'a')) : ((tempValue - 'A') <= (uint)('Z' - 'A')))
  338. {
  339. goto AsciiMustChangeCase;
  340. }
  341. }
  342. // We got through all characters without finding anything that needed to change - done!
  343. return source;
  344. AsciiMustChangeCase:
  345. {
  346. // We reached ASCII data that requires a case change.
  347. // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
  348. // conversion code path if we can.
  349. string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
  350. // copy existing known-good data into the result
  351. Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
  352. source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
  353. // and re-run the fast span-based logic over the remainder of the data
  354. ChangeCaseCommon<TConversion>(source.AsSpan((int)currIdx), resultSpan.Slice((int)currIdx));
  355. return result;
  356. }
  357. }
  358. NotAscii:
  359. {
  360. // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
  361. // In either case we need to fall back to the localization tables.
  362. string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
  363. if (currIdx > 0)
  364. {
  365. // copy existing known-good data into the result
  366. Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
  367. source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
  368. }
  369. // and run the culture-aware logic over the remainder of the data
  370. fixed (char* pResult = result)
  371. {
  372. ChangeCase(pSource + currIdx, source.Length - (int)currIdx, pResult + currIdx, result.Length - (int)currIdx, toUpper);
  373. }
  374. return result;
  375. }
  376. }
  377. }
  378. internal static unsafe string ToLowerAsciiInvariant(string s)
  379. {
  380. if (s.Length == 0)
  381. {
  382. return string.Empty;
  383. }
  384. fixed (char* pSource = s)
  385. {
  386. int i = 0;
  387. while (i < s.Length)
  388. {
  389. if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A'))
  390. {
  391. break;
  392. }
  393. i++;
  394. }
  395. if (i >= s.Length)
  396. {
  397. return s;
  398. }
  399. string result = string.FastAllocateString(s.Length);
  400. fixed (char* pResult = result)
  401. {
  402. for (int j = 0; j < i; j++)
  403. {
  404. pResult[j] = pSource[j];
  405. }
  406. pResult[i] = (char)(pSource[i] | 0x20);
  407. i++;
  408. while (i < s.Length)
  409. {
  410. pResult[i] = ToLowerAsciiInvariant(pSource[i]);
  411. i++;
  412. }
  413. }
  414. return result;
  415. }
  416. }
  417. internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
  418. {
  419. Debug.Assert(destination.Length >= source.Length);
  420. for (int i = 0; i < source.Length; i++)
  421. {
  422. destination[i] = ToLowerAsciiInvariant(source[i]);
  423. }
  424. }
  425. private static unsafe string ToUpperAsciiInvariant(string s)
  426. {
  427. if (s.Length == 0)
  428. {
  429. return string.Empty;
  430. }
  431. fixed (char* pSource = s)
  432. {
  433. int i = 0;
  434. while (i < s.Length)
  435. {
  436. if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a'))
  437. {
  438. break;
  439. }
  440. i++;
  441. }
  442. if (i >= s.Length)
  443. {
  444. return s;
  445. }
  446. string result = string.FastAllocateString(s.Length);
  447. fixed (char* pResult = result)
  448. {
  449. for (int j = 0; j < i; j++)
  450. {
  451. pResult[j] = pSource[j];
  452. }
  453. pResult[i] = (char)(pSource[i] & ~0x20);
  454. i++;
  455. while (i < s.Length)
  456. {
  457. pResult[i] = ToUpperAsciiInvariant(pSource[i]);
  458. i++;
  459. }
  460. }
  461. return result;
  462. }
  463. }
  464. internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
  465. {
  466. Debug.Assert(destination.Length >= source.Length);
  467. for (int i = 0; i < source.Length; i++)
  468. {
  469. destination[i] = ToUpperAsciiInvariant(source[i]);
  470. }
  471. }
  472. private static char ToLowerAsciiInvariant(char c)
  473. {
  474. if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
  475. {
  476. c = (char)(c | 0x20);
  477. }
  478. return c;
  479. }
  480. ////////////////////////////////////////////////////////////////////////
  481. //
  482. // ToUpper
  483. //
  484. // Converts the character or string to upper case. Certain locales
  485. // have different casing semantics from the file systems in Win32.
  486. //
  487. ////////////////////////////////////////////////////////////////////////
  488. public virtual char ToUpper(char c)
  489. {
  490. if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
  491. {
  492. return ToUpperAsciiInvariant(c);
  493. }
  494. return ChangeCase(c, toUpper: true);
  495. }
  496. public virtual string ToUpper(string str)
  497. {
  498. if (str == null) { throw new ArgumentNullException(nameof(str)); }
  499. if (GlobalizationMode.Invariant)
  500. {
  501. return ToUpperAsciiInvariant(str);
  502. }
  503. return ChangeCaseCommon<ToUpperConversion>(str);
  504. }
  505. internal static char ToUpperAsciiInvariant(char c)
  506. {
  507. if ((uint)(c - 'a') <= (uint)('z' - 'a'))
  508. {
  509. c = (char)(c & ~0x20);
  510. }
  511. return c;
  512. }
  513. private static bool IsAscii(char c)
  514. {
  515. return c < 0x80;
  516. }
  517. private bool IsAsciiCasingSameAsInvariant
  518. {
  519. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  520. get
  521. {
  522. if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
  523. {
  524. PopulateIsAsciiCasingSameAsInvariant();
  525. }
  526. Debug.Assert(_isAsciiCasingSameAsInvariant == Tristate.True || _isAsciiCasingSameAsInvariant == Tristate.False);
  527. return (_isAsciiCasingSameAsInvariant == Tristate.True);
  528. }
  529. }
  530. [MethodImpl(MethodImplOptions.NoInlining)]
  531. private void PopulateIsAsciiCasingSameAsInvariant()
  532. {
  533. bool compareResult = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0;
  534. _isAsciiCasingSameAsInvariant = (compareResult) ? Tristate.True : Tristate.False;
  535. }
  536. // IsRightToLeft
  537. //
  538. // Returns true if the dominant direction of text and UI such as the relative position of buttons and scroll bars
  539. //
  540. public bool IsRightToLeft => _cultureData.IsRightToLeft;
  541. ////////////////////////////////////////////////////////////////////////
  542. //
  543. // Equals
  544. //
  545. // Implements Object.Equals(). Returns a boolean indicating whether
  546. // or not object refers to the same CultureInfo as the current instance.
  547. //
  548. ////////////////////////////////////////////////////////////////////////
  549. public override bool Equals(object obj)
  550. {
  551. TextInfo that = obj as TextInfo;
  552. if (that != null)
  553. {
  554. return CultureName.Equals(that.CultureName);
  555. }
  556. return false;
  557. }
  558. ////////////////////////////////////////////////////////////////////////
  559. //
  560. // GetHashCode
  561. //
  562. // Implements Object.GetHashCode(). Returns the hash code for the
  563. // CultureInfo. The hash code is guaranteed to be the same for CultureInfo A
  564. // and B where A.Equals(B) is true.
  565. //
  566. ////////////////////////////////////////////////////////////////////////
  567. public override int GetHashCode()
  568. {
  569. return CultureName.GetHashCode();
  570. }
  571. ////////////////////////////////////////////////////////////////////////
  572. //
  573. // ToString
  574. //
  575. // Implements Object.ToString(). Returns a string describing the
  576. // TextInfo.
  577. //
  578. ////////////////////////////////////////////////////////////////////////
  579. public override string ToString()
  580. {
  581. return "TextInfo - " + _cultureData.CultureName;
  582. }
  583. //
  584. // Titlecasing:
  585. // -----------
  586. // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
  587. // and the rest of the letters are lowercase. The choice of which words to titlecase in headings
  588. // and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor"
  589. // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
  590. // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
  591. // are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
  592. //
  593. // Moreover, the determination of what actually constitutes a word is language dependent, and this can
  594. // influence which letter or letters of a "word" are uppercased when titlecasing strings. For example
  595. // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
  596. //
  597. public unsafe string ToTitleCase(string str)
  598. {
  599. if (str == null)
  600. {
  601. throw new ArgumentNullException(nameof(str));
  602. }
  603. if (str.Length == 0)
  604. {
  605. return str;
  606. }
  607. StringBuilder result = new StringBuilder();
  608. string lowercaseData = null;
  609. // Store if the current culture is Dutch (special case)
  610. bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);
  611. for (int i = 0; i < str.Length; i++)
  612. {
  613. UnicodeCategory charType;
  614. int charLen;
  615. charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
  616. if (char.CheckLetter(charType))
  617. {
  618. // Special case to check for Dutch specific titlecasing with "IJ" characters
  619. // at the beginning of a word
  620. if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i+1] == 'j' || str[i+1] == 'J'))
  621. {
  622. result.Append("IJ");
  623. i += 2;
  624. }
  625. else
  626. {
  627. // Do the titlecasing for the first character of the word.
  628. i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
  629. }
  630. //
  631. // Convert the characters until the end of the this word
  632. // to lowercase.
  633. //
  634. int lowercaseStart = i;
  635. //
  636. // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
  637. // This is in line with Word 2000 behavior of titlecasing.
  638. //
  639. bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
  640. // Use a loop to find all of the other letters following this letter.
  641. while (i < str.Length)
  642. {
  643. charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
  644. if (IsLetterCategory(charType))
  645. {
  646. if (charType == UnicodeCategory.LowercaseLetter)
  647. {
  648. hasLowerCase = true;
  649. }
  650. i += charLen;
  651. }
  652. else if (str[i] == '\'')
  653. {
  654. i++;
  655. if (hasLowerCase)
  656. {
  657. if (lowercaseData == null)
  658. {
  659. lowercaseData = ToLower(str);
  660. }
  661. result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
  662. }
  663. else
  664. {
  665. result.Append(str, lowercaseStart, i - lowercaseStart);
  666. }
  667. lowercaseStart = i;
  668. hasLowerCase = true;
  669. }
  670. else if (!IsWordSeparator(charType))
  671. {
  672. // This category is considered to be part of the word.
  673. // This is any category that is marked as false in wordSeprator array.
  674. i+= charLen;
  675. }
  676. else
  677. {
  678. // A word separator. Break out of the loop.
  679. break;
  680. }
  681. }
  682. int count = i - lowercaseStart;
  683. if (count > 0)
  684. {
  685. if (hasLowerCase)
  686. {
  687. if (lowercaseData == null)
  688. {
  689. lowercaseData = ToLower(str);
  690. }
  691. result.Append(lowercaseData, lowercaseStart, count);
  692. }
  693. else
  694. {
  695. result.Append(str, lowercaseStart, count);
  696. }
  697. }
  698. if (i < str.Length)
  699. {
  700. // not a letter, just append it
  701. i = AddNonLetter(ref result, ref str, i, charLen);
  702. }
  703. }
  704. else
  705. {
  706. // not a letter, just append it
  707. i = AddNonLetter(ref result, ref str, i, charLen);
  708. }
  709. }
  710. return result.ToString();
  711. }
  712. private static int AddNonLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
  713. {
  714. Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
  715. if (charLen == 2)
  716. {
  717. // Surrogate pair
  718. result.Append(input[inputIndex++]);
  719. result.Append(input[inputIndex]);
  720. }
  721. else
  722. {
  723. result.Append(input[inputIndex]);
  724. }
  725. return inputIndex;
  726. }
  727. private int AddTitlecaseLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
  728. {
  729. Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
  730. if (charLen == 2)
  731. {
  732. // for surrogate pairs do a ToUpper operation on the substring
  733. ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2);
  734. if (GlobalizationMode.Invariant)
  735. {
  736. result.Append(src); // surrogate pair in invariant mode, so changing case is a nop
  737. }
  738. else
  739. {
  740. Span<char> dst = stackalloc char[2];
  741. ChangeCaseToUpper(src, dst);
  742. result.Append(dst);
  743. }
  744. inputIndex++;
  745. }
  746. else
  747. {
  748. switch (input[inputIndex])
  749. {
  750. //
  751. // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
  752. case (char) 0x01C4: // DZ with Caron -> Dz with Caron
  753. case (char) 0x01C5: // Dz with Caron -> Dz with Caron
  754. case (char) 0x01C6: // dz with Caron -> Dz with Caron
  755. result.Append((char) 0x01C5);
  756. break;
  757. case (char) 0x01C7: // LJ -> Lj
  758. case (char) 0x01C8: // Lj -> Lj
  759. case (char) 0x01C9: // lj -> Lj
  760. result.Append((char) 0x01C8);
  761. break;
  762. case (char) 0x01CA: // NJ -> Nj
  763. case (char) 0x01CB: // Nj -> Nj
  764. case (char) 0x01CC: // nj -> Nj
  765. result.Append((char) 0x01CB);
  766. break;
  767. case (char) 0x01F1: // DZ -> Dz
  768. case (char) 0x01F2: // Dz -> Dz
  769. case (char) 0x01F3: // dz -> Dz
  770. result.Append((char) 0x01F2);
  771. break;
  772. default:
  773. result.Append(ToUpper(input[inputIndex]));
  774. break;
  775. }
  776. }
  777. return inputIndex;
  778. }
  779. //
  780. // Used in ToTitleCase():
  781. // When we find a starting letter, the following array decides if a category should be
  782. // considered as word seprator or not.
  783. //
  784. private const int c_wordSeparatorMask =
  785. /* false */ (0 << 0) | // UppercaseLetter = 0,
  786. /* false */ (0 << 1) | // LowercaseLetter = 1,
  787. /* false */ (0 << 2) | // TitlecaseLetter = 2,
  788. /* false */ (0 << 3) | // ModifierLetter = 3,
  789. /* false */ (0 << 4) | // OtherLetter = 4,
  790. /* false */ (0 << 5) | // NonSpacingMark = 5,
  791. /* false */ (0 << 6) | // SpacingCombiningMark = 6,
  792. /* false */ (0 << 7) | // EnclosingMark = 7,
  793. /* false */ (0 << 8) | // DecimalDigitNumber = 8,
  794. /* false */ (0 << 9) | // LetterNumber = 9,
  795. /* false */ (0 << 10) | // OtherNumber = 10,
  796. /* true */ (1 << 11) | // SpaceSeparator = 11,
  797. /* true */ (1 << 12) | // LineSeparator = 12,
  798. /* true */ (1 << 13) | // ParagraphSeparator = 13,
  799. /* true */ (1 << 14) | // Control = 14,
  800. /* true */ (1 << 15) | // Format = 15,
  801. /* false */ (0 << 16) | // Surrogate = 16,
  802. /* false */ (0 << 17) | // PrivateUse = 17,
  803. /* true */ (1 << 18) | // ConnectorPunctuation = 18,
  804. /* true */ (1 << 19) | // DashPunctuation = 19,
  805. /* true */ (1 << 20) | // OpenPunctuation = 20,
  806. /* true */ (1 << 21) | // ClosePunctuation = 21,
  807. /* true */ (1 << 22) | // InitialQuotePunctuation = 22,
  808. /* true */ (1 << 23) | // FinalQuotePunctuation = 23,
  809. /* true */ (1 << 24) | // OtherPunctuation = 24,
  810. /* true */ (1 << 25) | // MathSymbol = 25,
  811. /* true */ (1 << 26) | // CurrencySymbol = 26,
  812. /* true */ (1 << 27) | // ModifierSymbol = 27,
  813. /* true */ (1 << 28) | // OtherSymbol = 28,
  814. /* false */ (0 << 29); // OtherNotAssigned = 29;
  815. private static bool IsWordSeparator(UnicodeCategory category)
  816. {
  817. return (c_wordSeparatorMask & (1 << (int) category)) != 0;
  818. }
  819. private static bool IsLetterCategory(UnicodeCategory uc)
  820. {
  821. return (uc == UnicodeCategory.UppercaseLetter
  822. || uc == UnicodeCategory.LowercaseLetter
  823. || uc == UnicodeCategory.TitlecaseLetter
  824. || uc == UnicodeCategory.ModifierLetter
  825. || uc == UnicodeCategory.OtherLetter);
  826. }
  827. // A dummy struct that is used for 'ToUpper' in generic parameters
  828. private readonly struct ToUpperConversion { }
  829. // A dummy struct that is used for 'ToLower' in generic parameters
  830. private readonly struct ToLowerConversion { }
  831. }
  832. }