CompareInfo.Unix.cs 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Buffers;
  5. using System.Collections.Generic;
  6. using System.Diagnostics;
  7. using System.Runtime.CompilerServices;
  8. using System.Runtime.InteropServices;
  9. using System.Security;
  10. using System.Threading;
  11. using Internal.Runtime.CompilerServices;
  12. namespace System.Globalization
  13. {
  14. public partial class CompareInfo
  15. {
  16. [NonSerialized]
  17. private IntPtr _sortHandle;
  18. [NonSerialized]
  19. private bool _isAsciiEqualityOrdinal;
  20. private void InitSort(CultureInfo culture)
  21. {
  22. _sortName = culture.SortName;
  23. if (GlobalizationMode.Invariant)
  24. {
  25. _isAsciiEqualityOrdinal = true;
  26. }
  27. else
  28. {
  29. // Inline the following condition to avoid potential implementation cycles within globalization
  30. //
  31. // _isAsciiEqualityOrdinal = _sortName == "" || _sortName == "en" || _sortName.StartsWith("en-", StringComparison.Ordinal);
  32. //
  33. _isAsciiEqualityOrdinal = _sortName.Length == 0 ||
  34. (_sortName.Length >= 2 && _sortName[0] == 'e' && _sortName[1] == 'n' && (_sortName.Length == 2 || _sortName[2] == '-'));
  35. _sortHandle = SortHandleCache.GetCachedSortHandle(_sortName);
  36. }
  37. }
  38. internal static unsafe int IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
  39. {
  40. Debug.Assert(!GlobalizationMode.Invariant);
  41. Debug.Assert(source != null);
  42. Debug.Assert(value != null);
  43. if (value.Length == 0)
  44. {
  45. return startIndex;
  46. }
  47. if (count < value.Length)
  48. {
  49. return -1;
  50. }
  51. if (ignoreCase)
  52. {
  53. fixed (char* pSource = source)
  54. {
  55. int index = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false);
  56. return index != -1 ?
  57. startIndex + index :
  58. -1;
  59. }
  60. }
  61. int endIndex = startIndex + (count - value.Length);
  62. for (int i = startIndex; i <= endIndex; i++)
  63. {
  64. int valueIndex, sourceIndex;
  65. for (valueIndex = 0, sourceIndex = i;
  66. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  67. valueIndex++, sourceIndex++) ;
  68. if (valueIndex == value.Length)
  69. {
  70. return i;
  71. }
  72. }
  73. return -1;
  74. }
  75. internal static unsafe int IndexOfOrdinalCore(ReadOnlySpan<char> source, ReadOnlySpan<char> value, bool ignoreCase, bool fromBeginning)
  76. {
  77. Debug.Assert(!GlobalizationMode.Invariant);
  78. Debug.Assert(source.Length != 0);
  79. Debug.Assert(value.Length != 0);
  80. if (source.Length < value.Length)
  81. {
  82. return -1;
  83. }
  84. if (ignoreCase)
  85. {
  86. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  87. fixed (char* pValue = &MemoryMarshal.GetReference(value))
  88. {
  89. return Interop.Globalization.IndexOfOrdinalIgnoreCase(pValue, value.Length, pSource, source.Length, findLast: !fromBeginning);
  90. }
  91. }
  92. int startIndex, endIndex, jump;
  93. if (fromBeginning)
  94. {
  95. // Left to right, from zero to last possible index in the source string.
  96. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  97. startIndex = 0;
  98. endIndex = source.Length - value.Length + 1;
  99. jump = 1;
  100. }
  101. else
  102. {
  103. // Right to left, from first possible index in the source string to zero.
  104. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  105. startIndex = source.Length - value.Length;
  106. endIndex = -1;
  107. jump = -1;
  108. }
  109. for (int i = startIndex; i != endIndex; i += jump)
  110. {
  111. int valueIndex, sourceIndex;
  112. for (valueIndex = 0, sourceIndex = i;
  113. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  114. valueIndex++, sourceIndex++)
  115. ;
  116. if (valueIndex == value.Length)
  117. {
  118. return i;
  119. }
  120. }
  121. return -1;
  122. }
  123. internal static unsafe int LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
  124. {
  125. Debug.Assert(!GlobalizationMode.Invariant);
  126. Debug.Assert(source != null);
  127. Debug.Assert(value != null);
  128. if (value.Length == 0)
  129. {
  130. return startIndex;
  131. }
  132. if (count < value.Length)
  133. {
  134. return -1;
  135. }
  136. // startIndex is the index into source where we start search backwards from.
  137. // leftStartIndex is the index into source of the start of the string that is
  138. // count characters away from startIndex.
  139. int leftStartIndex = startIndex - count + 1;
  140. if (ignoreCase)
  141. {
  142. fixed (char* pSource = source)
  143. {
  144. int lastIndex = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
  145. return lastIndex != -1 ?
  146. leftStartIndex + lastIndex :
  147. -1;
  148. }
  149. }
  150. for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
  151. {
  152. int valueIndex, sourceIndex;
  153. for (valueIndex = 0, sourceIndex = i;
  154. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  155. valueIndex++, sourceIndex++) ;
  156. if (valueIndex == value.Length) {
  157. return i;
  158. }
  159. }
  160. return -1;
  161. }
  162. private static unsafe int CompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2)
  163. {
  164. Debug.Assert(!GlobalizationMode.Invariant);
  165. fixed (char* char1 = &string1)
  166. fixed (char* char2 = &string2)
  167. {
  168. return Interop.Globalization.CompareStringOrdinalIgnoreCase(char1, count1, char2, count2);
  169. }
  170. }
  171. // TODO https://github.com/dotnet/coreclr/issues/13827:
  172. // This method shouldn't be necessary, as we should be able to just use the overload
  173. // that takes two spans. But due to this issue, that's adding significant overhead.
  174. private unsafe int CompareString(ReadOnlySpan<char> string1, string string2, CompareOptions options)
  175. {
  176. Debug.Assert(!GlobalizationMode.Invariant);
  177. Debug.Assert(string2 != null);
  178. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  179. fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
  180. fixed (char* pString2 = &string2.GetRawStringData())
  181. {
  182. return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
  183. }
  184. }
  185. private unsafe int CompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
  186. {
  187. Debug.Assert(!GlobalizationMode.Invariant);
  188. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  189. fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
  190. fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
  191. {
  192. return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
  193. }
  194. }
  195. internal unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr)
  196. {
  197. Debug.Assert(!GlobalizationMode.Invariant);
  198. Debug.Assert(!string.IsNullOrEmpty(source));
  199. Debug.Assert(target != null);
  200. Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
  201. Debug.Assert((options & CompareOptions.Ordinal) == 0);
  202. int index;
  203. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  204. {
  205. if ((options & CompareOptions.IgnoreCase) != 0)
  206. index = IndexOfOrdinalIgnoreCaseHelper(source.AsSpan(startIndex, count), target.AsSpan(), options, matchLengthPtr, fromBeginning: true);
  207. else
  208. index = IndexOfOrdinalHelper(source.AsSpan(startIndex, count), target.AsSpan(), options, matchLengthPtr, fromBeginning: true);
  209. }
  210. else
  211. {
  212. fixed (char* pSource = source)
  213. fixed (char* pTarget = target)
  214. {
  215. index = Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource + startIndex, count, options, matchLengthPtr);
  216. }
  217. }
  218. return index != -1 ? index + startIndex : -1;
  219. }
  220. // For now, this method is only called from Span APIs with either options == CompareOptions.None or CompareOptions.IgnoreCase
  221. internal unsafe int IndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  222. {
  223. Debug.Assert(!GlobalizationMode.Invariant);
  224. Debug.Assert(source.Length != 0);
  225. Debug.Assert(target.Length != 0);
  226. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  227. {
  228. if ((options & CompareOptions.IgnoreCase) != 0)
  229. return IndexOfOrdinalIgnoreCaseHelper(source, target, options, matchLengthPtr, fromBeginning);
  230. else
  231. return IndexOfOrdinalHelper(source, target, options, matchLengthPtr, fromBeginning);
  232. }
  233. else
  234. {
  235. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  236. fixed (char* pTarget = &MemoryMarshal.GetReference(target))
  237. {
  238. if (fromBeginning)
  239. return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr);
  240. else
  241. return Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options);
  242. }
  243. }
  244. }
  245. /// <summary>
  246. /// Duplicate of IndexOfOrdinalHelper that also handles ignore case. Can't converge both methods
  247. /// as the JIT wouldn't be able to optimize the ignoreCase path away.
  248. /// </summary>
  249. /// <returns></returns>
  250. private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  251. {
  252. Debug.Assert(!GlobalizationMode.Invariant);
  253. Debug.Assert(!target.IsEmpty);
  254. Debug.Assert(_isAsciiEqualityOrdinal);
  255. fixed (char* ap = &MemoryMarshal.GetReference(source))
  256. fixed (char* bp = &MemoryMarshal.GetReference(target))
  257. {
  258. char* a = ap;
  259. char* b = bp;
  260. for (int j = 0; j < target.Length; j++)
  261. {
  262. char targetChar = *(b + j);
  263. if (targetChar >= 0x80 || HighCharTable[targetChar])
  264. goto InteropCall;
  265. }
  266. if (target.Length > source.Length)
  267. {
  268. for (int k = 0; k < source.Length; k++)
  269. {
  270. char targetChar = *(a + k);
  271. if (targetChar >= 0x80 || HighCharTable[targetChar])
  272. goto InteropCall;
  273. }
  274. return -1;
  275. }
  276. int startIndex, endIndex, jump;
  277. if (fromBeginning)
  278. {
  279. // Left to right, from zero to last possible index in the source string.
  280. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  281. startIndex = 0;
  282. endIndex = source.Length - target.Length + 1;
  283. jump = 1;
  284. }
  285. else
  286. {
  287. // Right to left, from first possible index in the source string to zero.
  288. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  289. startIndex = source.Length - target.Length;
  290. endIndex = -1;
  291. jump = -1;
  292. }
  293. for (int i = startIndex; i != endIndex; i += jump)
  294. {
  295. int targetIndex = 0;
  296. int sourceIndex = i;
  297. for (; targetIndex < target.Length; targetIndex++, sourceIndex++)
  298. {
  299. char valueChar = *(a + sourceIndex);
  300. char targetChar = *(b + targetIndex);
  301. if (valueChar >= 0x80 || HighCharTable[valueChar])
  302. goto InteropCall;
  303. if (valueChar == targetChar)
  304. {
  305. continue;
  306. }
  307. // uppercase both chars - notice that we need just one compare per char
  308. if ((uint)(valueChar - 'a') <= ('z' - 'a'))
  309. valueChar = (char)(valueChar - 0x20);
  310. if ((uint)(targetChar - 'a') <= ('z' - 'a'))
  311. targetChar = (char)(targetChar - 0x20);
  312. if (valueChar == targetChar)
  313. {
  314. continue;
  315. }
  316. // The match may be affected by special character. Verify that the following character is regular ASCII.
  317. if (sourceIndex < source.Length - 1 && *(a + sourceIndex + 1) >= 0x80)
  318. goto InteropCall;
  319. goto Next;
  320. }
  321. // The match may be affected by special character. Verify that the following character is regular ASCII.
  322. if (sourceIndex < source.Length && *(a + sourceIndex) >= 0x80)
  323. goto InteropCall;
  324. if (matchLengthPtr != null)
  325. *matchLengthPtr = target.Length;
  326. return i;
  327. Next: ;
  328. }
  329. return -1;
  330. InteropCall:
  331. if (fromBeginning)
  332. return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
  333. else
  334. return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options);
  335. }
  336. }
  337. private unsafe int IndexOfOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  338. {
  339. Debug.Assert(!GlobalizationMode.Invariant);
  340. Debug.Assert(!target.IsEmpty);
  341. Debug.Assert(_isAsciiEqualityOrdinal);
  342. fixed (char* ap = &MemoryMarshal.GetReference(source))
  343. fixed (char* bp = &MemoryMarshal.GetReference(target))
  344. {
  345. char* a = ap;
  346. char* b = bp;
  347. for (int j = 0; j < target.Length; j++)
  348. {
  349. char targetChar = *(b + j);
  350. if (targetChar >= 0x80 || HighCharTable[targetChar])
  351. goto InteropCall;
  352. }
  353. if (target.Length > source.Length)
  354. {
  355. for (int k = 0; k < source.Length; k++)
  356. {
  357. char targetChar = *(a + k);
  358. if (targetChar >= 0x80 || HighCharTable[targetChar])
  359. goto InteropCall;
  360. }
  361. return -1;
  362. }
  363. int startIndex, endIndex, jump;
  364. if (fromBeginning)
  365. {
  366. // Left to right, from zero to last possible index in the source string.
  367. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  368. startIndex = 0;
  369. endIndex = source.Length - target.Length + 1;
  370. jump = 1;
  371. }
  372. else
  373. {
  374. // Right to left, from first possible index in the source string to zero.
  375. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  376. startIndex = source.Length - target.Length;
  377. endIndex = -1;
  378. jump = -1;
  379. }
  380. for (int i = startIndex; i != endIndex; i += jump)
  381. {
  382. int targetIndex = 0;
  383. int sourceIndex = i;
  384. for (; targetIndex < target.Length; targetIndex++, sourceIndex++)
  385. {
  386. char valueChar = *(a + sourceIndex);
  387. char targetChar = *(b + targetIndex);
  388. if (valueChar >= 0x80 || HighCharTable[valueChar])
  389. goto InteropCall;
  390. if (valueChar == targetChar)
  391. {
  392. continue;
  393. }
  394. // The match may be affected by special character. Verify that the following character is regular ASCII.
  395. if (sourceIndex < source.Length - 1 && *(a + sourceIndex + 1) >= 0x80)
  396. goto InteropCall;
  397. goto Next;
  398. }
  399. // The match may be affected by special character. Verify that the following character is regular ASCII.
  400. if (sourceIndex < source.Length && *(a + sourceIndex) >= 0x80)
  401. goto InteropCall;
  402. if (matchLengthPtr != null)
  403. *matchLengthPtr = target.Length;
  404. return i;
  405. Next: ;
  406. }
  407. return -1;
  408. InteropCall:
  409. if (fromBeginning)
  410. return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
  411. else
  412. return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options);
  413. }
  414. }
  415. private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
  416. {
  417. Debug.Assert(!GlobalizationMode.Invariant);
  418. Debug.Assert(!string.IsNullOrEmpty(source));
  419. Debug.Assert(target != null);
  420. Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
  421. if (target.Length == 0)
  422. {
  423. return startIndex;
  424. }
  425. if (options == CompareOptions.Ordinal)
  426. {
  427. return LastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false);
  428. }
  429. // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source
  430. // of the start of the string that is count characters away from startIndex.
  431. int leftStartIndex = (startIndex - count + 1);
  432. int lastIndex;
  433. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  434. {
  435. if ((options & CompareOptions.IgnoreCase) != 0)
  436. lastIndex = IndexOfOrdinalIgnoreCaseHelper(source.AsSpan(leftStartIndex, count), target.AsSpan(), options, matchLengthPtr: null, fromBeginning: false);
  437. else
  438. lastIndex = IndexOfOrdinalHelper(source.AsSpan(leftStartIndex, count), target.AsSpan(), options, matchLengthPtr: null, fromBeginning: false);
  439. }
  440. else
  441. {
  442. fixed (char* pSource = source)
  443. fixed (char* pTarget = target)
  444. {
  445. lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource + (startIndex - count + 1), count, options);
  446. }
  447. }
  448. return lastIndex != -1 ? lastIndex + leftStartIndex : -1;
  449. }
  450. private unsafe bool StartsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  451. {
  452. Debug.Assert(!GlobalizationMode.Invariant);
  453. Debug.Assert(!source.IsEmpty);
  454. Debug.Assert(!prefix.IsEmpty);
  455. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  456. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  457. {
  458. if ((options & CompareOptions.IgnoreCase) != 0)
  459. return StartsWithOrdinalIgnoreCaseHelper(source, prefix, options);
  460. else
  461. return StartsWithOrdinalHelper(source, prefix, options);
  462. }
  463. else
  464. {
  465. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  466. fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
  467. {
  468. return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options);
  469. }
  470. }
  471. }
  472. private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  473. {
  474. Debug.Assert(!GlobalizationMode.Invariant);
  475. Debug.Assert(!source.IsEmpty);
  476. Debug.Assert(!prefix.IsEmpty);
  477. Debug.Assert(_isAsciiEqualityOrdinal);
  478. int length = Math.Min(source.Length, prefix.Length);
  479. fixed (char* ap = &MemoryMarshal.GetReference(source))
  480. fixed (char* bp = &MemoryMarshal.GetReference(prefix))
  481. {
  482. char* a = ap;
  483. char* b = bp;
  484. while (length != 0)
  485. {
  486. int charA = *a;
  487. int charB = *b;
  488. if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB])
  489. goto InteropCall;
  490. if (charA == charB)
  491. {
  492. a++; b++;
  493. length--;
  494. continue;
  495. }
  496. // uppercase both chars - notice that we need just one compare per char
  497. if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
  498. if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
  499. if (charA == charB)
  500. {
  501. a++; b++;
  502. length--;
  503. continue;
  504. }
  505. // The match may be affected by special character. Verify that the following character is regular ASCII.
  506. if (a < ap + source.Length - 1 && *(a + 1) >= 0x80)
  507. goto InteropCall;
  508. if (b < bp + prefix.Length - 1 && *(b + 1) >= 0x80)
  509. goto InteropCall;
  510. return false;
  511. }
  512. // The match may be affected by special character. Verify that the following character is regular ASCII.
  513. if (source.Length < prefix.Length)
  514. {
  515. if (*b >= 0x80)
  516. goto InteropCall;
  517. return false;
  518. }
  519. if (source.Length > prefix.Length)
  520. {
  521. if (*a >= 0x80)
  522. goto InteropCall;
  523. }
  524. return true;
  525. InteropCall:
  526. return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options);
  527. }
  528. }
  529. private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  530. {
  531. Debug.Assert(!GlobalizationMode.Invariant);
  532. Debug.Assert(!source.IsEmpty);
  533. Debug.Assert(!prefix.IsEmpty);
  534. Debug.Assert(_isAsciiEqualityOrdinal);
  535. int length = Math.Min(source.Length, prefix.Length);
  536. fixed (char* ap = &MemoryMarshal.GetReference(source))
  537. fixed (char* bp = &MemoryMarshal.GetReference(prefix))
  538. {
  539. char* a = ap;
  540. char* b = bp;
  541. while (length != 0)
  542. {
  543. int charA = *a;
  544. int charB = *b;
  545. if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB])
  546. goto InteropCall;
  547. if (charA == charB)
  548. {
  549. a++; b++;
  550. length--;
  551. continue;
  552. }
  553. // The match may be affected by special character. Verify that the following character is regular ASCII.
  554. if (a < ap + source.Length - 1 && *(a + 1) >= 0x80)
  555. goto InteropCall;
  556. if (b < bp + prefix.Length - 1 && *(b + 1) >= 0x80)
  557. goto InteropCall;
  558. return false;
  559. }
  560. // The match may be affected by special character. Verify that the following character is regular ASCII.
  561. if (source.Length < prefix.Length)
  562. {
  563. if (*b >= 0x80)
  564. goto InteropCall;
  565. return false;
  566. }
  567. if (source.Length > prefix.Length)
  568. {
  569. if (*a >= 0x80)
  570. goto InteropCall;
  571. }
  572. return true;
  573. InteropCall:
  574. return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options);
  575. }
  576. }
  577. private unsafe bool EndsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  578. {
  579. Debug.Assert(!GlobalizationMode.Invariant);
  580. Debug.Assert(!source.IsEmpty);
  581. Debug.Assert(!suffix.IsEmpty);
  582. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  583. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  584. {
  585. if ((options & CompareOptions.IgnoreCase) != 0)
  586. return EndsWithOrdinalIgnoreCaseHelper(source, suffix, options);
  587. else
  588. return EndsWithOrdinalHelper(source, suffix, options);
  589. }
  590. else
  591. {
  592. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  593. fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix))
  594. {
  595. return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options);
  596. }
  597. }
  598. }
  599. private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  600. {
  601. Debug.Assert(!GlobalizationMode.Invariant);
  602. Debug.Assert(!source.IsEmpty);
  603. Debug.Assert(!suffix.IsEmpty);
  604. Debug.Assert(_isAsciiEqualityOrdinal);
  605. int length = Math.Min(source.Length, suffix.Length);
  606. fixed (char* ap = &MemoryMarshal.GetReference(source))
  607. fixed (char* bp = &MemoryMarshal.GetReference(suffix))
  608. {
  609. char* a = ap + source.Length - 1;
  610. char* b = bp + suffix.Length - 1;
  611. while (length != 0)
  612. {
  613. int charA = *a;
  614. int charB = *b;
  615. if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB])
  616. goto InteropCall;
  617. if (charA == charB)
  618. {
  619. a--; b--;
  620. length--;
  621. continue;
  622. }
  623. // uppercase both chars - notice that we need just one compare per char
  624. if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
  625. if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
  626. if (charA == charB)
  627. {
  628. a--; b--;
  629. length--;
  630. continue;
  631. }
  632. return false;
  633. }
  634. return (source.Length >= suffix.Length);
  635. InteropCall:
  636. return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options);
  637. }
  638. }
  639. private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  640. {
  641. Debug.Assert(!GlobalizationMode.Invariant);
  642. Debug.Assert(!source.IsEmpty);
  643. Debug.Assert(!suffix.IsEmpty);
  644. Debug.Assert(_isAsciiEqualityOrdinal);
  645. int length = Math.Min(source.Length, suffix.Length);
  646. fixed (char* ap = &MemoryMarshal.GetReference(source))
  647. fixed (char* bp = &MemoryMarshal.GetReference(suffix))
  648. {
  649. char* a = ap + source.Length - 1;
  650. char* b = bp + suffix.Length - 1;
  651. while (length != 0)
  652. {
  653. int charA = *a;
  654. int charB = *b;
  655. if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB])
  656. goto InteropCall;
  657. if (charA == charB)
  658. {
  659. a--; b--;
  660. length--;
  661. continue;
  662. }
  663. return false;
  664. }
  665. return (source.Length >= suffix.Length);
  666. InteropCall:
  667. return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options);
  668. }
  669. }
  670. private unsafe SortKey CreateSortKey(string source, CompareOptions options)
  671. {
  672. Debug.Assert(!GlobalizationMode.Invariant);
  673. if (source==null) { throw new ArgumentNullException(nameof(source)); }
  674. if ((options & ValidSortkeyCtorMaskOffFlags) != 0)
  675. {
  676. throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options));
  677. }
  678. byte [] keyData;
  679. if (source.Length == 0)
  680. {
  681. keyData = Array.Empty<byte>();
  682. }
  683. else
  684. {
  685. fixed (char* pSource = source)
  686. {
  687. int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options);
  688. keyData = new byte[sortKeyLength];
  689. fixed (byte* pSortKey = keyData)
  690. {
  691. if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
  692. {
  693. throw new ArgumentException(SR.Arg_ExternalException);
  694. }
  695. }
  696. }
  697. }
  698. return new SortKey(Name, source, options, keyData);
  699. }
  700. private static unsafe bool IsSortable(char *text, int length)
  701. {
  702. Debug.Assert(!GlobalizationMode.Invariant);
  703. int index = 0;
  704. UnicodeCategory uc;
  705. while (index < length)
  706. {
  707. if (char.IsHighSurrogate(text[index]))
  708. {
  709. if (index == length - 1 || !char.IsLowSurrogate(text[index+1]))
  710. return false; // unpaired surrogate
  711. uc = CharUnicodeInfo.GetUnicodeCategory(char.ConvertToUtf32(text[index], text[index+1]));
  712. if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
  713. return false;
  714. index += 2;
  715. continue;
  716. }
  717. if (char.IsLowSurrogate(text[index]))
  718. {
  719. return false; // unpaired surrogate
  720. }
  721. uc = CharUnicodeInfo.GetUnicodeCategory(text[index]);
  722. if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
  723. {
  724. return false;
  725. }
  726. index++;
  727. }
  728. return true;
  729. }
  730. // -----------------------------
  731. // ---- PAL layer ends here ----
  732. // -----------------------------
  733. internal unsafe int GetHashCodeOfStringCore(ReadOnlySpan<char> source, CompareOptions options)
  734. {
  735. Debug.Assert(!GlobalizationMode.Invariant);
  736. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  737. if (source.Length == 0)
  738. {
  739. return 0;
  740. }
  741. // according to ICU User Guide the performance of ucol_getSortKey is worse when it is called with null output buffer
  742. // the solution is to try to fill the sort key in a temporary buffer of size equal 4 x string length
  743. // 1MB is the biggest array that can be rented from ArrayPool.Shared without memory allocation
  744. int sortKeyLength = (source.Length > 1024 * 1024 / 4) ? 0 : 4 * source.Length;
  745. byte[]? borrowedArray = null;
  746. Span<byte> sortKey = sortKeyLength <= 1024
  747. ? stackalloc byte[1024]
  748. : (borrowedArray = ArrayPool<byte>.Shared.Rent(sortKeyLength));
  749. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  750. {
  751. fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey))
  752. {
  753. sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options);
  754. }
  755. if (sortKeyLength > sortKey.Length) // slow path for big strings
  756. {
  757. if (borrowedArray != null)
  758. {
  759. ArrayPool<byte>.Shared.Return(borrowedArray);
  760. }
  761. sortKey = (borrowedArray = ArrayPool<byte>.Shared.Rent(sortKeyLength));
  762. fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey))
  763. {
  764. sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKey.Length, options);
  765. }
  766. }
  767. }
  768. if (sortKeyLength == 0 || sortKeyLength > sortKey.Length) // internal error (0) or a bug (2nd call failed) in ucol_getSortKey
  769. {
  770. throw new ArgumentException(SR.Arg_ExternalException);
  771. }
  772. int hash = Marvin.ComputeHash32(sortKey.Slice(0, sortKeyLength), Marvin.DefaultSeed);
  773. if (borrowedArray != null)
  774. {
  775. ArrayPool<byte>.Shared.Return(borrowedArray);
  776. }
  777. return hash;
  778. }
  779. private static CompareOptions GetOrdinalCompareOptions(CompareOptions options)
  780. {
  781. if ((options & CompareOptions.IgnoreCase) != 0)
  782. {
  783. return CompareOptions.OrdinalIgnoreCase;
  784. }
  785. else
  786. {
  787. return CompareOptions.Ordinal;
  788. }
  789. }
  790. private static bool CanUseAsciiOrdinalForOptions(CompareOptions options)
  791. {
  792. // Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. ').
  793. return (options & CompareOptions.IgnoreSymbols) == 0;
  794. }
  795. private SortVersion GetSortVersion()
  796. {
  797. Debug.Assert(!GlobalizationMode.Invariant);
  798. int sortVersion = Interop.Globalization.GetSortVersion(_sortHandle);
  799. return new SortVersion(sortVersion, LCID, new Guid(sortVersion, 0, 0, 0, 0, 0, 0,
  800. (byte) (LCID >> 24),
  801. (byte) ((LCID & 0x00FF0000) >> 16),
  802. (byte) ((LCID & 0x0000FF00) >> 8),
  803. (byte) (LCID & 0xFF)));
  804. }
  805. private static class SortHandleCache
  806. {
  807. // in most scenarios there is a limited number of cultures with limited number of sort options
  808. // so caching the sort handles and not freeing them is OK, see https://github.com/dotnet/coreclr/pull/25117 for more
  809. private static readonly Dictionary<string, IntPtr> s_sortNameToSortHandleCache = new Dictionary<string, IntPtr>();
  810. internal static IntPtr GetCachedSortHandle(string sortName)
  811. {
  812. lock (s_sortNameToSortHandleCache)
  813. {
  814. if (!s_sortNameToSortHandleCache.TryGetValue(sortName, out IntPtr result))
  815. {
  816. Interop.Globalization.ResultCode resultCode = Interop.Globalization.GetSortHandle(sortName, out result);
  817. if (resultCode == Interop.Globalization.ResultCode.OutOfMemory)
  818. throw new OutOfMemoryException();
  819. else if (resultCode != Interop.Globalization.ResultCode.Success)
  820. throw new ExternalException(SR.Arg_ExternalException);
  821. try
  822. {
  823. s_sortNameToSortHandleCache.Add(sortName, result);
  824. }
  825. catch
  826. {
  827. Interop.Globalization.CloseSortHandle(result);
  828. throw;
  829. }
  830. }
  831. return result;
  832. }
  833. }
  834. }
  835. private static ReadOnlySpan<bool> HighCharTable => new bool[0x80]
  836. {
  837. true, /* 0x0, 0x0 */
  838. true, /* 0x1, .*/
  839. true, /* 0x2, .*/
  840. true, /* 0x3, .*/
  841. true, /* 0x4, .*/
  842. true, /* 0x5, .*/
  843. true, /* 0x6, .*/
  844. true, /* 0x7, .*/
  845. true, /* 0x8, .*/
  846. false, /* 0x9, */
  847. true, /* 0xA, */
  848. false, /* 0xB, .*/
  849. false, /* 0xC, .*/
  850. true, /* 0xD, */
  851. true, /* 0xE, .*/
  852. true, /* 0xF, .*/
  853. true, /* 0x10, .*/
  854. true, /* 0x11, .*/
  855. true, /* 0x12, .*/
  856. true, /* 0x13, .*/
  857. true, /* 0x14, .*/
  858. true, /* 0x15, .*/
  859. true, /* 0x16, .*/
  860. true, /* 0x17, .*/
  861. true, /* 0x18, .*/
  862. true, /* 0x19, .*/
  863. true, /* 0x1A, */
  864. true, /* 0x1B, .*/
  865. true, /* 0x1C, .*/
  866. true, /* 0x1D, .*/
  867. true, /* 0x1E, .*/
  868. true, /* 0x1F, .*/
  869. false, /*0x20, */
  870. false, /*0x21, !*/
  871. false, /*0x22, "*/
  872. false, /*0x23, #*/
  873. false, /*0x24, $*/
  874. false, /*0x25, %*/
  875. false, /*0x26, &*/
  876. true, /*0x27, '*/
  877. false, /*0x28, (*/
  878. false, /*0x29, )*/
  879. false, /*0x2A **/
  880. false, /*0x2B, +*/
  881. false, /*0x2C, ,*/
  882. true, /*0x2D, -*/
  883. false, /*0x2E, .*/
  884. false, /*0x2F, /*/
  885. false, /*0x30, 0*/
  886. false, /*0x31, 1*/
  887. false, /*0x32, 2*/
  888. false, /*0x33, 3*/
  889. false, /*0x34, 4*/
  890. false, /*0x35, 5*/
  891. false, /*0x36, 6*/
  892. false, /*0x37, 7*/
  893. false, /*0x38, 8*/
  894. false, /*0x39, 9*/
  895. false, /*0x3A, :*/
  896. false, /*0x3B, ;*/
  897. false, /*0x3C, <*/
  898. false, /*0x3D, =*/
  899. false, /*0x3E, >*/
  900. false, /*0x3F, ?*/
  901. false, /*0x40, @*/
  902. false, /*0x41, A*/
  903. false, /*0x42, B*/
  904. false, /*0x43, C*/
  905. false, /*0x44, D*/
  906. false, /*0x45, E*/
  907. false, /*0x46, F*/
  908. false, /*0x47, G*/
  909. false, /*0x48, H*/
  910. false, /*0x49, I*/
  911. false, /*0x4A, J*/
  912. false, /*0x4B, K*/
  913. false, /*0x4C, L*/
  914. false, /*0x4D, M*/
  915. false, /*0x4E, N*/
  916. false, /*0x4F, O*/
  917. false, /*0x50, P*/
  918. false, /*0x51, Q*/
  919. false, /*0x52, R*/
  920. false, /*0x53, S*/
  921. false, /*0x54, T*/
  922. false, /*0x55, U*/
  923. false, /*0x56, V*/
  924. false, /*0x57, W*/
  925. false, /*0x58, X*/
  926. false, /*0x59, Y*/
  927. false, /*0x5A, Z*/
  928. false, /*0x5B, [*/
  929. false, /*0x5C, \*/
  930. false, /*0x5D, ]*/
  931. false, /*0x5E, ^*/
  932. false, /*0x5F, _*/
  933. false, /*0x60, `*/
  934. false, /*0x61, a*/
  935. false, /*0x62, b*/
  936. false, /*0x63, c*/
  937. false, /*0x64, d*/
  938. false, /*0x65, e*/
  939. false, /*0x66, f*/
  940. false, /*0x67, g*/
  941. false, /*0x68, h*/
  942. false, /*0x69, i*/
  943. false, /*0x6A, j*/
  944. false, /*0x6B, k*/
  945. false, /*0x6C, l*/
  946. false, /*0x6D, m*/
  947. false, /*0x6E, n*/
  948. false, /*0x6F, o*/
  949. false, /*0x70, p*/
  950. false, /*0x71, q*/
  951. false, /*0x72, r*/
  952. false, /*0x73, s*/
  953. false, /*0x74, t*/
  954. false, /*0x75, u*/
  955. false, /*0x76, v*/
  956. false, /*0x77, w*/
  957. false, /*0x78, x*/
  958. false, /*0x79, y*/
  959. false, /*0x7A, z*/
  960. false, /*0x7B, {*/
  961. false, /*0x7C, |*/
  962. false, /*0x7D, }*/
  963. false, /*0x7E, ~*/
  964. true, /*0x7F, */
  965. };
  966. }
  967. }