CompareInfo.Unix.cs 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Buffers;
  5. using System.Collections.Generic;
  6. using System.Diagnostics;
  7. using System.Runtime.CompilerServices;
  8. using System.Runtime.InteropServices;
  9. using System.Security;
  10. using System.Threading;
  11. using Internal.Runtime.CompilerServices;
  12. namespace System.Globalization
  13. {
  14. public partial class CompareInfo
  15. {
  16. [NonSerialized]
  17. private IntPtr _sortHandle;
  18. [NonSerialized]
  19. private bool _isAsciiEqualityOrdinal;
  20. private void InitSort(CultureInfo culture)
  21. {
  22. _sortName = culture.SortName;
  23. if (GlobalizationMode.Invariant)
  24. {
  25. _isAsciiEqualityOrdinal = true;
  26. }
  27. else
  28. {
  29. _isAsciiEqualityOrdinal = (_sortName == "en-US" || _sortName == "");
  30. _sortHandle = SortHandleCache.GetCachedSortHandle(_sortName);
  31. }
  32. }
  33. internal static unsafe int IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
  34. {
  35. Debug.Assert(!GlobalizationMode.Invariant);
  36. Debug.Assert(source != null);
  37. Debug.Assert(value != null);
  38. if (value.Length == 0)
  39. {
  40. return startIndex;
  41. }
  42. if (count < value.Length)
  43. {
  44. return -1;
  45. }
  46. if (ignoreCase)
  47. {
  48. fixed (char* pSource = source)
  49. {
  50. int index = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false);
  51. return index != -1 ?
  52. startIndex + index :
  53. -1;
  54. }
  55. }
  56. int endIndex = startIndex + (count - value.Length);
  57. for (int i = startIndex; i <= endIndex; i++)
  58. {
  59. int valueIndex, sourceIndex;
  60. for (valueIndex = 0, sourceIndex = i;
  61. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  62. valueIndex++, sourceIndex++) ;
  63. if (valueIndex == value.Length)
  64. {
  65. return i;
  66. }
  67. }
  68. return -1;
  69. }
  70. internal static unsafe int IndexOfOrdinalCore(ReadOnlySpan<char> source, ReadOnlySpan<char> value, bool ignoreCase, bool fromBeginning)
  71. {
  72. Debug.Assert(!GlobalizationMode.Invariant);
  73. Debug.Assert(source.Length != 0);
  74. Debug.Assert(value.Length != 0);
  75. if (source.Length < value.Length)
  76. {
  77. return -1;
  78. }
  79. if (ignoreCase)
  80. {
  81. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  82. fixed (char* pValue = &MemoryMarshal.GetReference(value))
  83. {
  84. return Interop.Globalization.IndexOfOrdinalIgnoreCase(pValue, value.Length, pSource, source.Length, findLast: !fromBeginning);
  85. }
  86. }
  87. int startIndex, endIndex, jump;
  88. if (fromBeginning)
  89. {
  90. // Left to right, from zero to last possible index in the source string.
  91. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  92. startIndex = 0;
  93. endIndex = source.Length - value.Length + 1;
  94. jump = 1;
  95. }
  96. else
  97. {
  98. // Right to left, from first possible index in the source string to zero.
  99. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  100. startIndex = source.Length - value.Length;
  101. endIndex = -1;
  102. jump = -1;
  103. }
  104. for (int i = startIndex; i != endIndex; i += jump)
  105. {
  106. int valueIndex, sourceIndex;
  107. for (valueIndex = 0, sourceIndex = i;
  108. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  109. valueIndex++, sourceIndex++)
  110. ;
  111. if (valueIndex == value.Length)
  112. {
  113. return i;
  114. }
  115. }
  116. return -1;
  117. }
  118. internal static unsafe int LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
  119. {
  120. Debug.Assert(!GlobalizationMode.Invariant);
  121. Debug.Assert(source != null);
  122. Debug.Assert(value != null);
  123. if (value.Length == 0)
  124. {
  125. return startIndex;
  126. }
  127. if (count < value.Length)
  128. {
  129. return -1;
  130. }
  131. // startIndex is the index into source where we start search backwards from.
  132. // leftStartIndex is the index into source of the start of the string that is
  133. // count characters away from startIndex.
  134. int leftStartIndex = startIndex - count + 1;
  135. if (ignoreCase)
  136. {
  137. fixed (char* pSource = source)
  138. {
  139. int lastIndex = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
  140. return lastIndex != -1 ?
  141. leftStartIndex + lastIndex :
  142. -1;
  143. }
  144. }
  145. for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
  146. {
  147. int valueIndex, sourceIndex;
  148. for (valueIndex = 0, sourceIndex = i;
  149. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  150. valueIndex++, sourceIndex++) ;
  151. if (valueIndex == value.Length) {
  152. return i;
  153. }
  154. }
  155. return -1;
  156. }
  157. private static unsafe int CompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2)
  158. {
  159. Debug.Assert(!GlobalizationMode.Invariant);
  160. fixed (char* char1 = &string1)
  161. fixed (char* char2 = &string2)
  162. {
  163. return Interop.Globalization.CompareStringOrdinalIgnoreCase(char1, count1, char2, count2);
  164. }
  165. }
  166. // TODO https://github.com/dotnet/coreclr/issues/13827:
  167. // This method shouldn't be necessary, as we should be able to just use the overload
  168. // that takes two spans. But due to this issue, that's adding significant overhead.
  169. private unsafe int CompareString(ReadOnlySpan<char> string1, string string2, CompareOptions options)
  170. {
  171. Debug.Assert(!GlobalizationMode.Invariant);
  172. Debug.Assert(string2 != null);
  173. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  174. fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
  175. fixed (char* pString2 = &string2.GetRawStringData())
  176. {
  177. return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
  178. }
  179. }
  180. private unsafe int CompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
  181. {
  182. Debug.Assert(!GlobalizationMode.Invariant);
  183. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  184. fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
  185. fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
  186. {
  187. return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
  188. }
  189. }
  190. internal unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr)
  191. {
  192. Debug.Assert(!GlobalizationMode.Invariant);
  193. Debug.Assert(!string.IsNullOrEmpty(source));
  194. Debug.Assert(target != null);
  195. Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
  196. Debug.Assert((options & CompareOptions.Ordinal) == 0);
  197. #if CORECLR
  198. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort())
  199. {
  200. int index = IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
  201. if (index != -1)
  202. {
  203. if (matchLengthPtr != null)
  204. *matchLengthPtr = target.Length;
  205. }
  206. return index;
  207. }
  208. #endif
  209. fixed (char* pSource = source)
  210. fixed (char* pTarget = target)
  211. {
  212. int index = Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource + startIndex, count, options, matchLengthPtr);
  213. return index != -1 ? index + startIndex : -1;
  214. }
  215. }
  216. // For now, this method is only called from Span APIs with either options == CompareOptions.None or CompareOptions.IgnoreCase
  217. internal unsafe int IndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  218. {
  219. Debug.Assert(!GlobalizationMode.Invariant);
  220. Debug.Assert(source.Length != 0);
  221. Debug.Assert(target.Length != 0);
  222. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  223. {
  224. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  225. return IndexOfOrdinalIgnoreCaseHelper(source, target, options, matchLengthPtr, fromBeginning);
  226. else
  227. return IndexOfOrdinalHelper(source, target, options, matchLengthPtr, fromBeginning);
  228. }
  229. else
  230. {
  231. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  232. fixed (char* pTarget = &MemoryMarshal.GetReference(target))
  233. {
  234. if (fromBeginning)
  235. return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr);
  236. else
  237. return Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options);
  238. }
  239. }
  240. }
  241. /// <summary>
  242. /// Duplicate of IndexOfOrdinalHelper that also handles ignore case. Can't converge both methods
  243. /// as the JIT wouldn't be able to optimize the ignoreCase path away.
  244. /// </summary>
  245. /// <returns></returns>
  246. private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  247. {
  248. Debug.Assert(!GlobalizationMode.Invariant);
  249. Debug.Assert(!source.IsEmpty);
  250. Debug.Assert(!target.IsEmpty);
  251. Debug.Assert(_isAsciiEqualityOrdinal);
  252. fixed (char* ap = &MemoryMarshal.GetReference(source))
  253. fixed (char* bp = &MemoryMarshal.GetReference(target))
  254. {
  255. char* a = ap;
  256. char* b = bp;
  257. if (target.Length > source.Length)
  258. goto InteropCall;
  259. for (int j = 0; j < target.Length; j++)
  260. {
  261. char targetChar = *(b + j);
  262. if (targetChar >= 0x80 || s_highCharTable[targetChar])
  263. goto InteropCall;
  264. }
  265. int startIndex, endIndex, jump;
  266. if (fromBeginning)
  267. {
  268. // Left to right, from zero to last possible index in the source string.
  269. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  270. startIndex = 0;
  271. endIndex = source.Length - target.Length + 1;
  272. jump = 1;
  273. }
  274. else
  275. {
  276. // Right to left, from first possible index in the source string to zero.
  277. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  278. startIndex = source.Length - target.Length;
  279. endIndex = -1;
  280. jump = -1;
  281. }
  282. for (int i = startIndex; i != endIndex; i += jump)
  283. {
  284. int targetIndex = 0;
  285. int sourceIndex = i;
  286. for (; targetIndex < target.Length; targetIndex++, sourceIndex++)
  287. {
  288. char valueChar = *(a + sourceIndex);
  289. char targetChar = *(b + targetIndex);
  290. if (valueChar == targetChar && valueChar < 0x80 && !s_highCharTable[valueChar])
  291. {
  292. continue;
  293. }
  294. // uppercase both chars - notice that we need just one compare per char
  295. if ((uint)(valueChar - 'a') <= ('z' - 'a'))
  296. valueChar = (char)(valueChar - 0x20);
  297. if ((uint)(targetChar - 'a') <= ('z' - 'a'))
  298. targetChar = (char)(targetChar - 0x20);
  299. if (valueChar >= 0x80 || s_highCharTable[valueChar])
  300. goto InteropCall;
  301. else if (valueChar != targetChar)
  302. break;
  303. }
  304. if (targetIndex == target.Length)
  305. {
  306. if (matchLengthPtr != null)
  307. *matchLengthPtr = target.Length;
  308. return i;
  309. }
  310. }
  311. return -1;
  312. InteropCall:
  313. if (fromBeginning)
  314. return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
  315. else
  316. return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options);
  317. }
  318. }
  319. private unsafe int IndexOfOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  320. {
  321. Debug.Assert(!GlobalizationMode.Invariant);
  322. Debug.Assert(!source.IsEmpty);
  323. Debug.Assert(!target.IsEmpty);
  324. Debug.Assert(_isAsciiEqualityOrdinal);
  325. fixed (char* ap = &MemoryMarshal.GetReference(source))
  326. fixed (char* bp = &MemoryMarshal.GetReference(target))
  327. {
  328. char* a = ap;
  329. char* b = bp;
  330. if (target.Length > source.Length)
  331. goto InteropCall;
  332. for (int j = 0; j < target.Length; j++)
  333. {
  334. char targetChar = *(b + j);
  335. if (targetChar >= 0x80 || s_highCharTable[targetChar])
  336. goto InteropCall;
  337. }
  338. int startIndex, endIndex, jump;
  339. if (fromBeginning)
  340. {
  341. // Left to right, from zero to last possible index in the source string.
  342. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  343. startIndex = 0;
  344. endIndex = source.Length - target.Length + 1;
  345. jump = 1;
  346. }
  347. else
  348. {
  349. // Right to left, from first possible index in the source string to zero.
  350. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  351. startIndex = source.Length - target.Length;
  352. endIndex = -1;
  353. jump = -1;
  354. }
  355. for (int i = startIndex; i != endIndex; i += jump)
  356. {
  357. int targetIndex = 0;
  358. int sourceIndex = i;
  359. for (; targetIndex < target.Length; targetIndex++, sourceIndex++)
  360. {
  361. char valueChar = *(a + sourceIndex);
  362. char targetChar = *(b + targetIndex);
  363. if (valueChar >= 0x80 || s_highCharTable[valueChar])
  364. goto InteropCall;
  365. else if (valueChar != targetChar)
  366. break;
  367. }
  368. if (targetIndex == target.Length)
  369. {
  370. if (matchLengthPtr != null)
  371. *matchLengthPtr = target.Length;
  372. return i;
  373. }
  374. }
  375. return -1;
  376. InteropCall:
  377. if (fromBeginning)
  378. return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
  379. else
  380. return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options);
  381. }
  382. }
  383. private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
  384. {
  385. Debug.Assert(!GlobalizationMode.Invariant);
  386. Debug.Assert(!string.IsNullOrEmpty(source));
  387. Debug.Assert(target != null);
  388. Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
  389. if (target.Length == 0)
  390. {
  391. return startIndex;
  392. }
  393. if (options == CompareOptions.Ordinal)
  394. {
  395. return LastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false);
  396. }
  397. #if CORECLR
  398. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort())
  399. {
  400. return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
  401. }
  402. #endif
  403. // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source
  404. // of the start of the string that is count characters away from startIndex.
  405. int leftStartIndex = (startIndex - count + 1);
  406. fixed (char* pSource = source)
  407. fixed (char* pTarget = target)
  408. {
  409. int lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource + (startIndex - count + 1), count, options);
  410. return lastIndex != -1 ? lastIndex + leftStartIndex : -1;
  411. }
  412. }
  413. private bool StartsWith(string source, string prefix, CompareOptions options)
  414. {
  415. Debug.Assert(!GlobalizationMode.Invariant);
  416. Debug.Assert(!string.IsNullOrEmpty(source));
  417. Debug.Assert(!string.IsNullOrEmpty(prefix));
  418. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  419. #if CORECLR
  420. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && prefix.IsFastSort())
  421. {
  422. return IsPrefix(source, prefix, GetOrdinalCompareOptions(options));
  423. }
  424. #endif
  425. return Interop.Globalization.StartsWith(_sortHandle, prefix, prefix.Length, source, source.Length, options);
  426. }
  427. private unsafe bool StartsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  428. {
  429. Debug.Assert(!GlobalizationMode.Invariant);
  430. Debug.Assert(!source.IsEmpty);
  431. Debug.Assert(!prefix.IsEmpty);
  432. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  433. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  434. {
  435. if (source.Length < prefix.Length)
  436. {
  437. return false;
  438. }
  439. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  440. {
  441. return StartsWithOrdinalIgnoreCaseHelper(source, prefix, options);
  442. }
  443. else
  444. {
  445. return StartsWithOrdinalHelper(source, prefix, options);
  446. }
  447. }
  448. else
  449. {
  450. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  451. fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
  452. {
  453. return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options);
  454. }
  455. }
  456. }
  457. private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  458. {
  459. Debug.Assert(!GlobalizationMode.Invariant);
  460. Debug.Assert(!source.IsEmpty);
  461. Debug.Assert(!prefix.IsEmpty);
  462. Debug.Assert(_isAsciiEqualityOrdinal);
  463. Debug.Assert(source.Length >= prefix.Length);
  464. int length = prefix.Length;
  465. fixed (char* ap = &MemoryMarshal.GetReference(source))
  466. fixed (char* bp = &MemoryMarshal.GetReference(prefix))
  467. {
  468. char* a = ap;
  469. char* b = bp;
  470. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  471. {
  472. int charA = *a;
  473. int charB = *b;
  474. if (charA == charB)
  475. {
  476. a++; b++;
  477. length--;
  478. continue;
  479. }
  480. // uppercase both chars - notice that we need just one compare per char
  481. if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
  482. if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
  483. if (charA != charB)
  484. return false;
  485. // Next char
  486. a++; b++;
  487. length--;
  488. }
  489. if (length == 0) return true;
  490. return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options);
  491. }
  492. }
  493. private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  494. {
  495. Debug.Assert(!GlobalizationMode.Invariant);
  496. Debug.Assert(!source.IsEmpty);
  497. Debug.Assert(!prefix.IsEmpty);
  498. Debug.Assert(_isAsciiEqualityOrdinal);
  499. Debug.Assert(source.Length >= prefix.Length);
  500. int length = prefix.Length;
  501. fixed (char* ap = &MemoryMarshal.GetReference(source))
  502. fixed (char* bp = &MemoryMarshal.GetReference(prefix))
  503. {
  504. char* a = ap;
  505. char* b = bp;
  506. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  507. {
  508. int charA = *a;
  509. int charB = *b;
  510. if (charA != charB)
  511. return false;
  512. // Next char
  513. a++; b++;
  514. length--;
  515. }
  516. if (length == 0) return true;
  517. return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options);
  518. }
  519. }
  520. private bool EndsWith(string source, string suffix, CompareOptions options)
  521. {
  522. Debug.Assert(!GlobalizationMode.Invariant);
  523. Debug.Assert(!string.IsNullOrEmpty(source));
  524. Debug.Assert(!string.IsNullOrEmpty(suffix));
  525. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  526. #if CORECLR
  527. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && suffix.IsFastSort())
  528. {
  529. return IsSuffix(source, suffix, GetOrdinalCompareOptions(options));
  530. }
  531. #endif
  532. return Interop.Globalization.EndsWith(_sortHandle, suffix, suffix.Length, source, source.Length, options);
  533. }
  534. private unsafe bool EndsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  535. {
  536. Debug.Assert(!GlobalizationMode.Invariant);
  537. Debug.Assert(!source.IsEmpty);
  538. Debug.Assert(!suffix.IsEmpty);
  539. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  540. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  541. {
  542. if (source.Length < suffix.Length)
  543. {
  544. return false;
  545. }
  546. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  547. {
  548. return EndsWithOrdinalIgnoreCaseHelper(source, suffix, options);
  549. }
  550. else
  551. {
  552. return EndsWithOrdinalHelper(source, suffix, options);
  553. }
  554. }
  555. else
  556. {
  557. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  558. fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix))
  559. {
  560. return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options);
  561. }
  562. }
  563. }
  564. private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  565. {
  566. Debug.Assert(!GlobalizationMode.Invariant);
  567. Debug.Assert(!source.IsEmpty);
  568. Debug.Assert(!suffix.IsEmpty);
  569. Debug.Assert(_isAsciiEqualityOrdinal);
  570. Debug.Assert(source.Length >= suffix.Length);
  571. int length = suffix.Length;
  572. fixed (char* ap = &MemoryMarshal.GetReference(source))
  573. fixed (char* bp = &MemoryMarshal.GetReference(suffix))
  574. {
  575. char* a = ap + source.Length - 1;
  576. char* b = bp + suffix.Length - 1;
  577. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  578. {
  579. int charA = *a;
  580. int charB = *b;
  581. if (charA == charB)
  582. {
  583. a--; b--;
  584. length--;
  585. continue;
  586. }
  587. // uppercase both chars - notice that we need just one compare per char
  588. if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
  589. if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
  590. if (charA != charB)
  591. return false;
  592. // Next char
  593. a--; b--;
  594. length--;
  595. }
  596. if (length == 0) return true;
  597. return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options);
  598. }
  599. }
  600. private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  601. {
  602. Debug.Assert(!GlobalizationMode.Invariant);
  603. Debug.Assert(!source.IsEmpty);
  604. Debug.Assert(!suffix.IsEmpty);
  605. Debug.Assert(_isAsciiEqualityOrdinal);
  606. Debug.Assert(source.Length >= suffix.Length);
  607. int length = suffix.Length;
  608. fixed (char* ap = &MemoryMarshal.GetReference(source))
  609. fixed (char* bp = &MemoryMarshal.GetReference(suffix))
  610. {
  611. char* a = ap + source.Length - 1;
  612. char* b = bp + suffix.Length - 1;
  613. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  614. {
  615. int charA = *a;
  616. int charB = *b;
  617. if (charA != charB)
  618. return false;
  619. // Next char
  620. a--; b--;
  621. length--;
  622. }
  623. if (length == 0) return true;
  624. return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options);
  625. }
  626. }
  627. private unsafe SortKey CreateSortKey(string source, CompareOptions options)
  628. {
  629. Debug.Assert(!GlobalizationMode.Invariant);
  630. if (source==null) { throw new ArgumentNullException(nameof(source)); }
  631. if ((options & ValidSortkeyCtorMaskOffFlags) != 0)
  632. {
  633. throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options));
  634. }
  635. byte [] keyData;
  636. if (source.Length == 0)
  637. {
  638. keyData = Array.Empty<Byte>();
  639. }
  640. else
  641. {
  642. fixed (char* pSource = source)
  643. {
  644. int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options);
  645. keyData = new byte[sortKeyLength];
  646. fixed (byte* pSortKey = keyData)
  647. {
  648. if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
  649. {
  650. throw new ArgumentException(SR.Arg_ExternalException);
  651. }
  652. }
  653. }
  654. }
  655. return new SortKey(Name, source, options, keyData);
  656. }
  657. private static unsafe bool IsSortable(char *text, int length)
  658. {
  659. Debug.Assert(!GlobalizationMode.Invariant);
  660. int index = 0;
  661. UnicodeCategory uc;
  662. while (index < length)
  663. {
  664. if (char.IsHighSurrogate(text[index]))
  665. {
  666. if (index == length - 1 || !char.IsLowSurrogate(text[index+1]))
  667. return false; // unpaired surrogate
  668. uc = CharUnicodeInfo.GetUnicodeCategory(char.ConvertToUtf32(text[index], text[index+1]));
  669. if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
  670. return false;
  671. index += 2;
  672. continue;
  673. }
  674. if (char.IsLowSurrogate(text[index]))
  675. {
  676. return false; // unpaired surrogate
  677. }
  678. uc = CharUnicodeInfo.GetUnicodeCategory(text[index]);
  679. if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
  680. {
  681. return false;
  682. }
  683. index++;
  684. }
  685. return true;
  686. }
  687. // -----------------------------
  688. // ---- PAL layer ends here ----
  689. // -----------------------------
  690. internal unsafe int GetHashCodeOfStringCore(ReadOnlySpan<char> source, CompareOptions options)
  691. {
  692. Debug.Assert(!GlobalizationMode.Invariant);
  693. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  694. if (source.Length == 0)
  695. {
  696. return 0;
  697. }
  698. // according to ICU User Guide the performance of ucol_getSortKey is worse when it is called with null output buffer
  699. // the solution is to try to fill the sort key in a temporary buffer of size equal 4 x string length
  700. // 1MB is the biggest array that can be rented from ArrayPool.Shared without memory allocation
  701. int sortKeyLength = (source.Length > 1024 * 1024 / 4) ? 0 : 4 * source.Length;
  702. byte[]? borrowedArray = null;
  703. Span<byte> sortKey = sortKeyLength <= 1024
  704. ? stackalloc byte[1024]
  705. : (borrowedArray = ArrayPool<byte>.Shared.Rent(sortKeyLength));
  706. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  707. {
  708. fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey))
  709. {
  710. sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options);
  711. }
  712. if (sortKeyLength > sortKey.Length) // slow path for big strings
  713. {
  714. if (borrowedArray != null)
  715. {
  716. ArrayPool<byte>.Shared.Return(borrowedArray);
  717. }
  718. sortKey = (borrowedArray = ArrayPool<byte>.Shared.Rent(sortKeyLength));
  719. fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey))
  720. {
  721. sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options);
  722. }
  723. }
  724. }
  725. if (sortKeyLength == 0 || sortKeyLength > sortKey.Length) // internal error (0) or a bug (2nd call failed) in ucol_getSortKey
  726. {
  727. throw new ArgumentException(SR.Arg_ExternalException);
  728. }
  729. int hash = Marvin.ComputeHash32(sortKey.Slice(0, sortKeyLength), Marvin.DefaultSeed);
  730. if (borrowedArray != null)
  731. {
  732. ArrayPool<byte>.Shared.Return(borrowedArray);
  733. }
  734. return hash;
  735. }
  736. private static CompareOptions GetOrdinalCompareOptions(CompareOptions options)
  737. {
  738. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  739. {
  740. return CompareOptions.OrdinalIgnoreCase;
  741. }
  742. else
  743. {
  744. return CompareOptions.Ordinal;
  745. }
  746. }
  747. private static bool CanUseAsciiOrdinalForOptions(CompareOptions options)
  748. {
  749. // Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. ').
  750. return (options & CompareOptions.IgnoreSymbols) == 0;
  751. }
  752. private SortVersion GetSortVersion()
  753. {
  754. Debug.Assert(!GlobalizationMode.Invariant);
  755. int sortVersion = Interop.Globalization.GetSortVersion(_sortHandle);
  756. return new SortVersion(sortVersion, LCID, new Guid(sortVersion, 0, 0, 0, 0, 0, 0,
  757. (byte) (LCID >> 24),
  758. (byte) ((LCID & 0x00FF0000) >> 16),
  759. (byte) ((LCID & 0x0000FF00) >> 8),
  760. (byte) (LCID & 0xFF)));
  761. }
  762. private static class SortHandleCache
  763. {
  764. // in most scenarios there is a limited number of cultures with limited number of sort options
  765. // so caching the sort handles and not freeing them is OK, see https://github.com/dotnet/coreclr/pull/25117 for more
  766. private static readonly Dictionary<string, IntPtr> s_sortNameToSortHandleCache = new Dictionary<string, IntPtr>();
  767. internal static IntPtr GetCachedSortHandle(string sortName)
  768. {
  769. lock (s_sortNameToSortHandleCache)
  770. {
  771. if (!s_sortNameToSortHandleCache.TryGetValue(sortName, out IntPtr result))
  772. {
  773. Interop.Globalization.ResultCode resultCode = Interop.Globalization.GetSortHandle(sortName, out result);
  774. if (resultCode == Interop.Globalization.ResultCode.OutOfMemory)
  775. throw new OutOfMemoryException();
  776. else if (resultCode != Interop.Globalization.ResultCode.Success)
  777. throw new ExternalException(SR.Arg_ExternalException);
  778. try
  779. {
  780. s_sortNameToSortHandleCache.Add(sortName, result);
  781. }
  782. catch
  783. {
  784. Interop.Globalization.CloseSortHandle(result);
  785. throw;
  786. }
  787. }
  788. return result;
  789. }
  790. }
  791. }
  792. // See https://github.com/dotnet/coreclr/blob/master/src/utilcode/util_nodependencies.cpp#L970
  793. private static readonly bool[] s_highCharTable = new bool[0x80]
  794. {
  795. true, /* 0x0, 0x0 */
  796. true, /* 0x1, .*/
  797. true, /* 0x2, .*/
  798. true, /* 0x3, .*/
  799. true, /* 0x4, .*/
  800. true, /* 0x5, .*/
  801. true, /* 0x6, .*/
  802. true, /* 0x7, .*/
  803. true, /* 0x8, .*/
  804. false, /* 0x9, */
  805. true, /* 0xA, */
  806. false, /* 0xB, .*/
  807. false, /* 0xC, .*/
  808. true, /* 0xD, */
  809. true, /* 0xE, .*/
  810. true, /* 0xF, .*/
  811. true, /* 0x10, .*/
  812. true, /* 0x11, .*/
  813. true, /* 0x12, .*/
  814. true, /* 0x13, .*/
  815. true, /* 0x14, .*/
  816. true, /* 0x15, .*/
  817. true, /* 0x16, .*/
  818. true, /* 0x17, .*/
  819. true, /* 0x18, .*/
  820. true, /* 0x19, .*/
  821. true, /* 0x1A, */
  822. true, /* 0x1B, .*/
  823. true, /* 0x1C, .*/
  824. true, /* 0x1D, .*/
  825. true, /* 0x1E, .*/
  826. true, /* 0x1F, .*/
  827. false, /*0x20, */
  828. false, /*0x21, !*/
  829. false, /*0x22, "*/
  830. false, /*0x23, #*/
  831. false, /*0x24, $*/
  832. false, /*0x25, %*/
  833. false, /*0x26, &*/
  834. true, /*0x27, '*/
  835. false, /*0x28, (*/
  836. false, /*0x29, )*/
  837. false, /*0x2A **/
  838. false, /*0x2B, +*/
  839. false, /*0x2C, ,*/
  840. true, /*0x2D, -*/
  841. false, /*0x2E, .*/
  842. false, /*0x2F, /*/
  843. false, /*0x30, 0*/
  844. false, /*0x31, 1*/
  845. false, /*0x32, 2*/
  846. false, /*0x33, 3*/
  847. false, /*0x34, 4*/
  848. false, /*0x35, 5*/
  849. false, /*0x36, 6*/
  850. false, /*0x37, 7*/
  851. false, /*0x38, 8*/
  852. false, /*0x39, 9*/
  853. false, /*0x3A, :*/
  854. false, /*0x3B, ;*/
  855. false, /*0x3C, <*/
  856. false, /*0x3D, =*/
  857. false, /*0x3E, >*/
  858. false, /*0x3F, ?*/
  859. false, /*0x40, @*/
  860. false, /*0x41, A*/
  861. false, /*0x42, B*/
  862. false, /*0x43, C*/
  863. false, /*0x44, D*/
  864. false, /*0x45, E*/
  865. false, /*0x46, F*/
  866. false, /*0x47, G*/
  867. false, /*0x48, H*/
  868. false, /*0x49, I*/
  869. false, /*0x4A, J*/
  870. false, /*0x4B, K*/
  871. false, /*0x4C, L*/
  872. false, /*0x4D, M*/
  873. false, /*0x4E, N*/
  874. false, /*0x4F, O*/
  875. false, /*0x50, P*/
  876. false, /*0x51, Q*/
  877. false, /*0x52, R*/
  878. false, /*0x53, S*/
  879. false, /*0x54, T*/
  880. false, /*0x55, U*/
  881. false, /*0x56, V*/
  882. false, /*0x57, W*/
  883. false, /*0x58, X*/
  884. false, /*0x59, Y*/
  885. false, /*0x5A, Z*/
  886. false, /*0x5B, [*/
  887. false, /*0x5C, \*/
  888. false, /*0x5D, ]*/
  889. false, /*0x5E, ^*/
  890. false, /*0x5F, _*/
  891. false, /*0x60, `*/
  892. false, /*0x61, a*/
  893. false, /*0x62, b*/
  894. false, /*0x63, c*/
  895. false, /*0x64, d*/
  896. false, /*0x65, e*/
  897. false, /*0x66, f*/
  898. false, /*0x67, g*/
  899. false, /*0x68, h*/
  900. false, /*0x69, i*/
  901. false, /*0x6A, j*/
  902. false, /*0x6B, k*/
  903. false, /*0x6C, l*/
  904. false, /*0x6D, m*/
  905. false, /*0x6E, n*/
  906. false, /*0x6F, o*/
  907. false, /*0x70, p*/
  908. false, /*0x71, q*/
  909. false, /*0x72, r*/
  910. false, /*0x73, s*/
  911. false, /*0x74, t*/
  912. false, /*0x75, u*/
  913. false, /*0x76, v*/
  914. false, /*0x77, w*/
  915. false, /*0x78, x*/
  916. false, /*0x79, y*/
  917. false, /*0x7A, z*/
  918. false, /*0x7B, {*/
  919. false, /*0x7C, |*/
  920. false, /*0x7D, }*/
  921. false, /*0x7E, ~*/
  922. true, /*0x7F, */
  923. };
  924. }
  925. }