CompareInfo.Unix.cs 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Buffers;
  5. using System.Diagnostics;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.InteropServices;
  8. using System.Security;
  9. using Internal.Runtime.CompilerServices;
  10. namespace System.Globalization
  11. {
  12. public partial class CompareInfo
  13. {
  14. [NonSerialized]
  15. private Interop.Globalization.SafeSortHandle _sortHandle;
  16. [NonSerialized]
  17. private bool _isAsciiEqualityOrdinal;
  18. private void InitSort(CultureInfo culture)
  19. {
  20. _sortName = culture.SortName;
  21. if (GlobalizationMode.Invariant)
  22. {
  23. _isAsciiEqualityOrdinal = true;
  24. }
  25. else
  26. {
  27. Interop.Globalization.ResultCode resultCode = Interop.Globalization.GetSortHandle(GetNullTerminatedUtf8String(_sortName), out _sortHandle);
  28. if (resultCode != Interop.Globalization.ResultCode.Success)
  29. {
  30. _sortHandle.Dispose();
  31. if (resultCode == Interop.Globalization.ResultCode.OutOfMemory)
  32. throw new OutOfMemoryException();
  33. throw new ExternalException(SR.Arg_ExternalException);
  34. }
  35. _isAsciiEqualityOrdinal = (_sortName == "en-US" || _sortName == "");
  36. }
  37. }
  38. internal static unsafe int IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
  39. {
  40. Debug.Assert(!GlobalizationMode.Invariant);
  41. Debug.Assert(source != null);
  42. Debug.Assert(value != null);
  43. if (value.Length == 0)
  44. {
  45. return startIndex;
  46. }
  47. if (count < value.Length)
  48. {
  49. return -1;
  50. }
  51. if (ignoreCase)
  52. {
  53. fixed (char* pSource = source)
  54. {
  55. int index = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false);
  56. return index != -1 ?
  57. startIndex + index :
  58. -1;
  59. }
  60. }
  61. int endIndex = startIndex + (count - value.Length);
  62. for (int i = startIndex; i <= endIndex; i++)
  63. {
  64. int valueIndex, sourceIndex;
  65. for (valueIndex = 0, sourceIndex = i;
  66. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  67. valueIndex++, sourceIndex++) ;
  68. if (valueIndex == value.Length)
  69. {
  70. return i;
  71. }
  72. }
  73. return -1;
  74. }
  75. internal static unsafe int IndexOfOrdinalCore(ReadOnlySpan<char> source, ReadOnlySpan<char> value, bool ignoreCase, bool fromBeginning)
  76. {
  77. Debug.Assert(!GlobalizationMode.Invariant);
  78. Debug.Assert(source.Length != 0);
  79. Debug.Assert(value.Length != 0);
  80. if (source.Length < value.Length)
  81. {
  82. return -1;
  83. }
  84. if (ignoreCase)
  85. {
  86. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  87. fixed (char* pValue = &MemoryMarshal.GetReference(value))
  88. {
  89. return Interop.Globalization.IndexOfOrdinalIgnoreCase(pValue, value.Length, pSource, source.Length, findLast: !fromBeginning);
  90. }
  91. }
  92. int startIndex, endIndex, jump;
  93. if (fromBeginning)
  94. {
  95. // Left to right, from zero to last possible index in the source string.
  96. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  97. startIndex = 0;
  98. endIndex = source.Length - value.Length + 1;
  99. jump = 1;
  100. }
  101. else
  102. {
  103. // Right to left, from first possible index in the source string to zero.
  104. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  105. startIndex = source.Length - value.Length;
  106. endIndex = -1;
  107. jump = -1;
  108. }
  109. for (int i = startIndex; i != endIndex; i += jump)
  110. {
  111. int valueIndex, sourceIndex;
  112. for (valueIndex = 0, sourceIndex = i;
  113. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  114. valueIndex++, sourceIndex++)
  115. ;
  116. if (valueIndex == value.Length)
  117. {
  118. return i;
  119. }
  120. }
  121. return -1;
  122. }
  123. internal static unsafe int LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
  124. {
  125. Debug.Assert(!GlobalizationMode.Invariant);
  126. Debug.Assert(source != null);
  127. Debug.Assert(value != null);
  128. if (value.Length == 0)
  129. {
  130. return startIndex;
  131. }
  132. if (count < value.Length)
  133. {
  134. return -1;
  135. }
  136. // startIndex is the index into source where we start search backwards from.
  137. // leftStartIndex is the index into source of the start of the string that is
  138. // count characters away from startIndex.
  139. int leftStartIndex = startIndex - count + 1;
  140. if (ignoreCase)
  141. {
  142. fixed (char* pSource = source)
  143. {
  144. int lastIndex = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
  145. return lastIndex != -1 ?
  146. leftStartIndex + lastIndex :
  147. -1;
  148. }
  149. }
  150. for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
  151. {
  152. int valueIndex, sourceIndex;
  153. for (valueIndex = 0, sourceIndex = i;
  154. valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
  155. valueIndex++, sourceIndex++) ;
  156. if (valueIndex == value.Length) {
  157. return i;
  158. }
  159. }
  160. return -1;
  161. }
  162. private static unsafe int CompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2)
  163. {
  164. Debug.Assert(!GlobalizationMode.Invariant);
  165. fixed (char* char1 = &string1)
  166. fixed (char* char2 = &string2)
  167. {
  168. return Interop.Globalization.CompareStringOrdinalIgnoreCase(char1, count1, char2, count2);
  169. }
  170. }
  171. // TODO https://github.com/dotnet/coreclr/issues/13827:
  172. // This method shouldn't be necessary, as we should be able to just use the overload
  173. // that takes two spans. But due to this issue, that's adding significant overhead.
  174. private unsafe int CompareString(ReadOnlySpan<char> string1, string string2, CompareOptions options)
  175. {
  176. Debug.Assert(!GlobalizationMode.Invariant);
  177. Debug.Assert(string2 != null);
  178. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  179. fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
  180. fixed (char* pString2 = &string2.GetRawStringData())
  181. {
  182. return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
  183. }
  184. }
  185. private unsafe int CompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
  186. {
  187. Debug.Assert(!GlobalizationMode.Invariant);
  188. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  189. fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
  190. fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
  191. {
  192. return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
  193. }
  194. }
  195. internal unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr)
  196. {
  197. Debug.Assert(!GlobalizationMode.Invariant);
  198. Debug.Assert(!string.IsNullOrEmpty(source));
  199. Debug.Assert(target != null);
  200. Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
  201. Debug.Assert((options & CompareOptions.Ordinal) == 0);
  202. #if CORECLR
  203. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort())
  204. {
  205. int index = IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
  206. if (index != -1)
  207. {
  208. if (matchLengthPtr != null)
  209. *matchLengthPtr = target.Length;
  210. }
  211. return index;
  212. }
  213. #endif
  214. fixed (char* pSource = source)
  215. fixed (char* pTarget = target)
  216. {
  217. int index = Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource + startIndex, count, options, matchLengthPtr);
  218. return index != -1 ? index + startIndex : -1;
  219. }
  220. }
  221. // For now, this method is only called from Span APIs with either options == CompareOptions.None or CompareOptions.IgnoreCase
  222. internal unsafe int IndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  223. {
  224. Debug.Assert(!GlobalizationMode.Invariant);
  225. Debug.Assert(source.Length != 0);
  226. Debug.Assert(target.Length != 0);
  227. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  228. {
  229. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  230. return IndexOfOrdinalIgnoreCaseHelper(source, target, options, matchLengthPtr, fromBeginning);
  231. else
  232. return IndexOfOrdinalHelper(source, target, options, matchLengthPtr, fromBeginning);
  233. }
  234. else
  235. {
  236. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  237. fixed (char* pTarget = &MemoryMarshal.GetReference(target))
  238. {
  239. if (fromBeginning)
  240. return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr);
  241. else
  242. return Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options);
  243. }
  244. }
  245. }
  246. /// <summary>
  247. /// Duplicate of IndexOfOrdinalHelper that also handles ignore case. Can't converge both methods
  248. /// as the JIT wouldn't be able to optimize the ignoreCase path away.
  249. /// </summary>
  250. /// <returns></returns>
  251. private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  252. {
  253. Debug.Assert(!GlobalizationMode.Invariant);
  254. Debug.Assert(!source.IsEmpty);
  255. Debug.Assert(!target.IsEmpty);
  256. Debug.Assert(_isAsciiEqualityOrdinal);
  257. fixed (char* ap = &MemoryMarshal.GetReference(source))
  258. fixed (char* bp = &MemoryMarshal.GetReference(target))
  259. {
  260. char* a = ap;
  261. char* b = bp;
  262. if (target.Length > source.Length)
  263. goto InteropCall;
  264. for (int j = 0; j < target.Length; j++)
  265. {
  266. char targetChar = *(b + j);
  267. if (targetChar >= 0x80 || s_highCharTable[targetChar])
  268. goto InteropCall;
  269. }
  270. int startIndex, endIndex, jump;
  271. if (fromBeginning)
  272. {
  273. // Left to right, from zero to last possible index in the source string.
  274. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  275. startIndex = 0;
  276. endIndex = source.Length - target.Length + 1;
  277. jump = 1;
  278. }
  279. else
  280. {
  281. // Right to left, from first possible index in the source string to zero.
  282. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  283. startIndex = source.Length - target.Length;
  284. endIndex = -1;
  285. jump = -1;
  286. }
  287. for (int i = startIndex; i != endIndex; i += jump)
  288. {
  289. int targetIndex = 0;
  290. int sourceIndex = i;
  291. for (; targetIndex < target.Length; targetIndex++, sourceIndex++)
  292. {
  293. char valueChar = *(a + sourceIndex);
  294. char targetChar = *(b + targetIndex);
  295. if (valueChar == targetChar && valueChar < 0x80 && !s_highCharTable[valueChar])
  296. {
  297. continue;
  298. }
  299. // uppercase both chars - notice that we need just one compare per char
  300. if ((uint)(valueChar - 'a') <= ('z' - 'a'))
  301. valueChar = (char)(valueChar - 0x20);
  302. if ((uint)(targetChar - 'a') <= ('z' - 'a'))
  303. targetChar = (char)(targetChar - 0x20);
  304. if (valueChar >= 0x80 || s_highCharTable[valueChar])
  305. goto InteropCall;
  306. else if (valueChar != targetChar)
  307. break;
  308. }
  309. if (targetIndex == target.Length)
  310. {
  311. if (matchLengthPtr != null)
  312. *matchLengthPtr = target.Length;
  313. return i;
  314. }
  315. }
  316. return -1;
  317. InteropCall:
  318. if (fromBeginning)
  319. return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
  320. else
  321. return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options);
  322. }
  323. }
  324. private unsafe int IndexOfOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
  325. {
  326. Debug.Assert(!GlobalizationMode.Invariant);
  327. Debug.Assert(!source.IsEmpty);
  328. Debug.Assert(!target.IsEmpty);
  329. Debug.Assert(_isAsciiEqualityOrdinal);
  330. fixed (char* ap = &MemoryMarshal.GetReference(source))
  331. fixed (char* bp = &MemoryMarshal.GetReference(target))
  332. {
  333. char* a = ap;
  334. char* b = bp;
  335. if (target.Length > source.Length)
  336. goto InteropCall;
  337. for (int j = 0; j < target.Length; j++)
  338. {
  339. char targetChar = *(b + j);
  340. if (targetChar >= 0x80 || s_highCharTable[targetChar])
  341. goto InteropCall;
  342. }
  343. int startIndex, endIndex, jump;
  344. if (fromBeginning)
  345. {
  346. // Left to right, from zero to last possible index in the source string.
  347. // Incrementing by one after each iteration. Stop condition is last possible index plus 1.
  348. startIndex = 0;
  349. endIndex = source.Length - target.Length + 1;
  350. jump = 1;
  351. }
  352. else
  353. {
  354. // Right to left, from first possible index in the source string to zero.
  355. // Decrementing by one after each iteration. Stop condition is last possible index minus 1.
  356. startIndex = source.Length - target.Length;
  357. endIndex = -1;
  358. jump = -1;
  359. }
  360. for (int i = startIndex; i != endIndex; i += jump)
  361. {
  362. int targetIndex = 0;
  363. int sourceIndex = i;
  364. for (; targetIndex < target.Length; targetIndex++, sourceIndex++)
  365. {
  366. char valueChar = *(a + sourceIndex);
  367. char targetChar = *(b + targetIndex);
  368. if (valueChar >= 0x80 || s_highCharTable[valueChar])
  369. goto InteropCall;
  370. else if (valueChar != targetChar)
  371. break;
  372. }
  373. if (targetIndex == target.Length)
  374. {
  375. if (matchLengthPtr != null)
  376. *matchLengthPtr = target.Length;
  377. return i;
  378. }
  379. }
  380. return -1;
  381. InteropCall:
  382. if (fromBeginning)
  383. return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
  384. else
  385. return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options);
  386. }
  387. }
  388. private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
  389. {
  390. Debug.Assert(!GlobalizationMode.Invariant);
  391. Debug.Assert(!string.IsNullOrEmpty(source));
  392. Debug.Assert(target != null);
  393. Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
  394. if (target.Length == 0)
  395. {
  396. return startIndex;
  397. }
  398. if (options == CompareOptions.Ordinal)
  399. {
  400. return LastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false);
  401. }
  402. #if CORECLR
  403. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort())
  404. {
  405. return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
  406. }
  407. #endif
  408. // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source
  409. // of the start of the string that is count characters away from startIndex.
  410. int leftStartIndex = (startIndex - count + 1);
  411. fixed (char* pSource = source)
  412. fixed (char* pTarget = target)
  413. {
  414. int lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource + (startIndex - count + 1), count, options);
  415. return lastIndex != -1 ? lastIndex + leftStartIndex : -1;
  416. }
  417. }
  418. private bool StartsWith(string source, string prefix, CompareOptions options)
  419. {
  420. Debug.Assert(!GlobalizationMode.Invariant);
  421. Debug.Assert(!string.IsNullOrEmpty(source));
  422. Debug.Assert(!string.IsNullOrEmpty(prefix));
  423. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  424. #if CORECLR
  425. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && prefix.IsFastSort())
  426. {
  427. return IsPrefix(source, prefix, GetOrdinalCompareOptions(options));
  428. }
  429. #endif
  430. return Interop.Globalization.StartsWith(_sortHandle, prefix, prefix.Length, source, source.Length, options);
  431. }
  432. private unsafe bool StartsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  433. {
  434. Debug.Assert(!GlobalizationMode.Invariant);
  435. Debug.Assert(!source.IsEmpty);
  436. Debug.Assert(!prefix.IsEmpty);
  437. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  438. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  439. {
  440. if (source.Length < prefix.Length)
  441. {
  442. return false;
  443. }
  444. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  445. {
  446. return StartsWithOrdinalIgnoreCaseHelper(source, prefix, options);
  447. }
  448. else
  449. {
  450. return StartsWithOrdinalHelper(source, prefix, options);
  451. }
  452. }
  453. else
  454. {
  455. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  456. fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
  457. {
  458. return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options);
  459. }
  460. }
  461. }
  462. private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  463. {
  464. Debug.Assert(!GlobalizationMode.Invariant);
  465. Debug.Assert(!source.IsEmpty);
  466. Debug.Assert(!prefix.IsEmpty);
  467. Debug.Assert(_isAsciiEqualityOrdinal);
  468. Debug.Assert(source.Length >= prefix.Length);
  469. int length = prefix.Length;
  470. fixed (char* ap = &MemoryMarshal.GetReference(source))
  471. fixed (char* bp = &MemoryMarshal.GetReference(prefix))
  472. {
  473. char* a = ap;
  474. char* b = bp;
  475. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  476. {
  477. int charA = *a;
  478. int charB = *b;
  479. if (charA == charB)
  480. {
  481. a++; b++;
  482. length--;
  483. continue;
  484. }
  485. // uppercase both chars - notice that we need just one compare per char
  486. if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
  487. if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
  488. if (charA != charB)
  489. return false;
  490. // Next char
  491. a++; b++;
  492. length--;
  493. }
  494. if (length == 0) return true;
  495. return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options);
  496. }
  497. }
  498. private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
  499. {
  500. Debug.Assert(!GlobalizationMode.Invariant);
  501. Debug.Assert(!source.IsEmpty);
  502. Debug.Assert(!prefix.IsEmpty);
  503. Debug.Assert(_isAsciiEqualityOrdinal);
  504. Debug.Assert(source.Length >= prefix.Length);
  505. int length = prefix.Length;
  506. fixed (char* ap = &MemoryMarshal.GetReference(source))
  507. fixed (char* bp = &MemoryMarshal.GetReference(prefix))
  508. {
  509. char* a = ap;
  510. char* b = bp;
  511. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  512. {
  513. int charA = *a;
  514. int charB = *b;
  515. if (charA != charB)
  516. return false;
  517. // Next char
  518. a++; b++;
  519. length--;
  520. }
  521. if (length == 0) return true;
  522. return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options);
  523. }
  524. }
  525. private bool EndsWith(string source, string suffix, CompareOptions options)
  526. {
  527. Debug.Assert(!GlobalizationMode.Invariant);
  528. Debug.Assert(!string.IsNullOrEmpty(source));
  529. Debug.Assert(!string.IsNullOrEmpty(suffix));
  530. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  531. #if CORECLR
  532. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && suffix.IsFastSort())
  533. {
  534. return IsSuffix(source, suffix, GetOrdinalCompareOptions(options));
  535. }
  536. #endif
  537. return Interop.Globalization.EndsWith(_sortHandle, suffix, suffix.Length, source, source.Length, options);
  538. }
  539. private unsafe bool EndsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  540. {
  541. Debug.Assert(!GlobalizationMode.Invariant);
  542. Debug.Assert(!source.IsEmpty);
  543. Debug.Assert(!suffix.IsEmpty);
  544. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  545. if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
  546. {
  547. if (source.Length < suffix.Length)
  548. {
  549. return false;
  550. }
  551. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  552. {
  553. return EndsWithOrdinalIgnoreCaseHelper(source, suffix, options);
  554. }
  555. else
  556. {
  557. return EndsWithOrdinalHelper(source, suffix, options);
  558. }
  559. }
  560. else
  561. {
  562. fixed (char* pSource = &MemoryMarshal.GetReference(source))
  563. fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix))
  564. {
  565. return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options);
  566. }
  567. }
  568. }
  569. private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  570. {
  571. Debug.Assert(!GlobalizationMode.Invariant);
  572. Debug.Assert(!source.IsEmpty);
  573. Debug.Assert(!suffix.IsEmpty);
  574. Debug.Assert(_isAsciiEqualityOrdinal);
  575. Debug.Assert(source.Length >= suffix.Length);
  576. int length = suffix.Length;
  577. fixed (char* ap = &MemoryMarshal.GetReference(source))
  578. fixed (char* bp = &MemoryMarshal.GetReference(suffix))
  579. {
  580. char* a = ap + source.Length - 1;
  581. char* b = bp + suffix.Length - 1;
  582. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  583. {
  584. int charA = *a;
  585. int charB = *b;
  586. if (charA == charB)
  587. {
  588. a--; b--;
  589. length--;
  590. continue;
  591. }
  592. // uppercase both chars - notice that we need just one compare per char
  593. if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
  594. if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
  595. if (charA != charB)
  596. return false;
  597. // Next char
  598. a--; b--;
  599. length--;
  600. }
  601. if (length == 0) return true;
  602. return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options);
  603. }
  604. }
  605. private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
  606. {
  607. Debug.Assert(!GlobalizationMode.Invariant);
  608. Debug.Assert(!source.IsEmpty);
  609. Debug.Assert(!suffix.IsEmpty);
  610. Debug.Assert(_isAsciiEqualityOrdinal);
  611. Debug.Assert(source.Length >= suffix.Length);
  612. int length = suffix.Length;
  613. fixed (char* ap = &MemoryMarshal.GetReference(source))
  614. fixed (char* bp = &MemoryMarshal.GetReference(suffix))
  615. {
  616. char* a = ap + source.Length - 1;
  617. char* b = bp + suffix.Length - 1;
  618. while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
  619. {
  620. int charA = *a;
  621. int charB = *b;
  622. if (charA != charB)
  623. return false;
  624. // Next char
  625. a--; b--;
  626. length--;
  627. }
  628. if (length == 0) return true;
  629. return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options);
  630. }
  631. }
  632. private unsafe SortKey CreateSortKey(string source, CompareOptions options)
  633. {
  634. Debug.Assert(!GlobalizationMode.Invariant);
  635. if (source==null) { throw new ArgumentNullException(nameof(source)); }
  636. if ((options & ValidSortkeyCtorMaskOffFlags) != 0)
  637. {
  638. throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options));
  639. }
  640. byte [] keyData;
  641. if (source.Length == 0)
  642. {
  643. keyData = Array.Empty<Byte>();
  644. }
  645. else
  646. {
  647. fixed (char* pSource = source)
  648. {
  649. int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options);
  650. keyData = new byte[sortKeyLength];
  651. fixed (byte* pSortKey = keyData)
  652. {
  653. if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
  654. {
  655. throw new ArgumentException(SR.Arg_ExternalException);
  656. }
  657. }
  658. }
  659. }
  660. return new SortKey(Name, source, options, keyData);
  661. }
  662. private static unsafe bool IsSortable(char *text, int length)
  663. {
  664. Debug.Assert(!GlobalizationMode.Invariant);
  665. int index = 0;
  666. UnicodeCategory uc;
  667. while (index < length)
  668. {
  669. if (char.IsHighSurrogate(text[index]))
  670. {
  671. if (index == length - 1 || !char.IsLowSurrogate(text[index+1]))
  672. return false; // unpaired surrogate
  673. uc = CharUnicodeInfo.GetUnicodeCategory(char.ConvertToUtf32(text[index], text[index+1]));
  674. if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
  675. return false;
  676. index += 2;
  677. continue;
  678. }
  679. if (char.IsLowSurrogate(text[index]))
  680. {
  681. return false; // unpaired surrogate
  682. }
  683. uc = CharUnicodeInfo.GetUnicodeCategory(text[index]);
  684. if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
  685. {
  686. return false;
  687. }
  688. index++;
  689. }
  690. return true;
  691. }
  692. // -----------------------------
  693. // ---- PAL layer ends here ----
  694. // -----------------------------
  695. internal unsafe int GetHashCodeOfStringCore(ReadOnlySpan<char> source, CompareOptions options)
  696. {
  697. Debug.Assert(!GlobalizationMode.Invariant);
  698. Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
  699. if (source.Length == 0)
  700. {
  701. return 0;
  702. }
  703. fixed (char* pSource = source)
  704. {
  705. int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options);
  706. byte[] borrowedArr = null;
  707. Span<byte> span = sortKeyLength <= 512 ?
  708. stackalloc byte[512] :
  709. (borrowedArr = ArrayPool<byte>.Shared.Rent(sortKeyLength));
  710. fixed (byte* pSortKey = &MemoryMarshal.GetReference(span))
  711. {
  712. if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
  713. {
  714. throw new ArgumentException(SR.Arg_ExternalException);
  715. }
  716. }
  717. int hash = Marvin.ComputeHash32(span.Slice(0, sortKeyLength), Marvin.DefaultSeed);
  718. // Return the borrowed array if necessary.
  719. if (borrowedArr != null)
  720. {
  721. ArrayPool<byte>.Shared.Return(borrowedArr);
  722. }
  723. return hash;
  724. }
  725. }
  726. private static CompareOptions GetOrdinalCompareOptions(CompareOptions options)
  727. {
  728. if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
  729. {
  730. return CompareOptions.OrdinalIgnoreCase;
  731. }
  732. else
  733. {
  734. return CompareOptions.Ordinal;
  735. }
  736. }
  737. private static bool CanUseAsciiOrdinalForOptions(CompareOptions options)
  738. {
  739. // Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. ').
  740. return (options & CompareOptions.IgnoreSymbols) == 0;
  741. }
  742. private static byte[] GetNullTerminatedUtf8String(string s)
  743. {
  744. int byteLen = System.Text.Encoding.UTF8.GetByteCount(s);
  745. // Allocate an extra byte (which defaults to 0) as the null terminator.
  746. byte[] buffer = new byte[byteLen + 1];
  747. int bytesWritten = System.Text.Encoding.UTF8.GetBytes(s, 0, s.Length, buffer, 0);
  748. Debug.Assert(bytesWritten == byteLen);
  749. return buffer;
  750. }
  751. private SortVersion GetSortVersion()
  752. {
  753. Debug.Assert(!GlobalizationMode.Invariant);
  754. int sortVersion = Interop.Globalization.GetSortVersion(_sortHandle);
  755. return new SortVersion(sortVersion, LCID, new Guid(sortVersion, 0, 0, 0, 0, 0, 0,
  756. (byte) (LCID >> 24),
  757. (byte) ((LCID & 0x00FF0000) >> 16),
  758. (byte) ((LCID & 0x0000FF00) >> 8),
  759. (byte) (LCID & 0xFF)));
  760. }
  761. // See https://github.com/dotnet/coreclr/blob/master/src/utilcode/util_nodependencies.cpp#L970
  762. private static readonly bool[] s_highCharTable = new bool[0x80]
  763. {
  764. true, /* 0x0, 0x0 */
  765. true, /* 0x1, .*/
  766. true, /* 0x2, .*/
  767. true, /* 0x3, .*/
  768. true, /* 0x4, .*/
  769. true, /* 0x5, .*/
  770. true, /* 0x6, .*/
  771. true, /* 0x7, .*/
  772. true, /* 0x8, .*/
  773. false, /* 0x9, */
  774. true, /* 0xA, */
  775. false, /* 0xB, .*/
  776. false, /* 0xC, .*/
  777. true, /* 0xD, */
  778. true, /* 0xE, .*/
  779. true, /* 0xF, .*/
  780. true, /* 0x10, .*/
  781. true, /* 0x11, .*/
  782. true, /* 0x12, .*/
  783. true, /* 0x13, .*/
  784. true, /* 0x14, .*/
  785. true, /* 0x15, .*/
  786. true, /* 0x16, .*/
  787. true, /* 0x17, .*/
  788. true, /* 0x18, .*/
  789. true, /* 0x19, .*/
  790. true, /* 0x1A, */
  791. true, /* 0x1B, .*/
  792. true, /* 0x1C, .*/
  793. true, /* 0x1D, .*/
  794. true, /* 0x1E, .*/
  795. true, /* 0x1F, .*/
  796. false, /*0x20, */
  797. false, /*0x21, !*/
  798. false, /*0x22, "*/
  799. false, /*0x23, #*/
  800. false, /*0x24, $*/
  801. false, /*0x25, %*/
  802. false, /*0x26, &*/
  803. true, /*0x27, '*/
  804. false, /*0x28, (*/
  805. false, /*0x29, )*/
  806. false, /*0x2A **/
  807. false, /*0x2B, +*/
  808. false, /*0x2C, ,*/
  809. true, /*0x2D, -*/
  810. false, /*0x2E, .*/
  811. false, /*0x2F, /*/
  812. false, /*0x30, 0*/
  813. false, /*0x31, 1*/
  814. false, /*0x32, 2*/
  815. false, /*0x33, 3*/
  816. false, /*0x34, 4*/
  817. false, /*0x35, 5*/
  818. false, /*0x36, 6*/
  819. false, /*0x37, 7*/
  820. false, /*0x38, 8*/
  821. false, /*0x39, 9*/
  822. false, /*0x3A, :*/
  823. false, /*0x3B, ;*/
  824. false, /*0x3C, <*/
  825. false, /*0x3D, =*/
  826. false, /*0x3E, >*/
  827. false, /*0x3F, ?*/
  828. false, /*0x40, @*/
  829. false, /*0x41, A*/
  830. false, /*0x42, B*/
  831. false, /*0x43, C*/
  832. false, /*0x44, D*/
  833. false, /*0x45, E*/
  834. false, /*0x46, F*/
  835. false, /*0x47, G*/
  836. false, /*0x48, H*/
  837. false, /*0x49, I*/
  838. false, /*0x4A, J*/
  839. false, /*0x4B, K*/
  840. false, /*0x4C, L*/
  841. false, /*0x4D, M*/
  842. false, /*0x4E, N*/
  843. false, /*0x4F, O*/
  844. false, /*0x50, P*/
  845. false, /*0x51, Q*/
  846. false, /*0x52, R*/
  847. false, /*0x53, S*/
  848. false, /*0x54, T*/
  849. false, /*0x55, U*/
  850. false, /*0x56, V*/
  851. false, /*0x57, W*/
  852. false, /*0x58, X*/
  853. false, /*0x59, Y*/
  854. false, /*0x5A, Z*/
  855. false, /*0x5B, [*/
  856. false, /*0x5C, \*/
  857. false, /*0x5D, ]*/
  858. false, /*0x5E, ^*/
  859. false, /*0x5F, _*/
  860. false, /*0x60, `*/
  861. false, /*0x61, a*/
  862. false, /*0x62, b*/
  863. false, /*0x63, c*/
  864. false, /*0x64, d*/
  865. false, /*0x65, e*/
  866. false, /*0x66, f*/
  867. false, /*0x67, g*/
  868. false, /*0x68, h*/
  869. false, /*0x69, i*/
  870. false, /*0x6A, j*/
  871. false, /*0x6B, k*/
  872. false, /*0x6C, l*/
  873. false, /*0x6D, m*/
  874. false, /*0x6E, n*/
  875. false, /*0x6F, o*/
  876. false, /*0x70, p*/
  877. false, /*0x71, q*/
  878. false, /*0x72, r*/
  879. false, /*0x73, s*/
  880. false, /*0x74, t*/
  881. false, /*0x75, u*/
  882. false, /*0x76, v*/
  883. false, /*0x77, w*/
  884. false, /*0x78, x*/
  885. false, /*0x79, y*/
  886. false, /*0x7A, z*/
  887. false, /*0x7B, {*/
  888. false, /*0x7C, |*/
  889. false, /*0x7D, }*/
  890. false, /*0x7E, ~*/
  891. true, /*0x7F, */
  892. };
  893. }
  894. }