String.Searching.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Globalization;
  5. using System.Runtime.InteropServices;
  6. using Internal.Runtime.CompilerServices;
  7. namespace System
  8. {
  9. public partial class String
  10. {
  11. public bool Contains(string value)
  12. {
  13. if (value == null)
  14. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value);
  15. return SpanHelpers.IndexOf(
  16. ref _firstChar,
  17. Length,
  18. ref value._firstChar,
  19. value.Length) >= 0;
  20. }
  21. public bool Contains(string value, StringComparison comparisonType)
  22. {
  23. return (IndexOf(value, comparisonType) >= 0);
  24. }
  25. public bool Contains(char value) => SpanHelpers.Contains(ref _firstChar, value, Length);
  26. public bool Contains(char value, StringComparison comparisonType)
  27. {
  28. return IndexOf(value, comparisonType) != -1;
  29. }
  30. // Returns the index of the first occurrence of a specified character in the current instance.
  31. // The search starts at startIndex and runs thorough the next count characters.
  32. //
  33. public int IndexOf(char value) => SpanHelpers.IndexOf(ref _firstChar, value, Length);
  34. public int IndexOf(char value, int startIndex)
  35. {
  36. return IndexOf(value, startIndex, this.Length - startIndex);
  37. }
  38. public int IndexOf(char value, StringComparison comparisonType)
  39. {
  40. switch (comparisonType)
  41. {
  42. case StringComparison.CurrentCulture:
  43. case StringComparison.CurrentCultureIgnoreCase:
  44. return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, GetCaseCompareOfComparisonCulture(comparisonType));
  45. case StringComparison.InvariantCulture:
  46. case StringComparison.InvariantCultureIgnoreCase:
  47. return CompareInfo.Invariant.IndexOf(this, value, GetCaseCompareOfComparisonCulture(comparisonType));
  48. case StringComparison.Ordinal:
  49. return IndexOf(value);
  50. case StringComparison.OrdinalIgnoreCase:
  51. return CompareInfo.Invariant.IndexOf(this, value, CompareOptions.OrdinalIgnoreCase);
  52. default:
  53. throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
  54. }
  55. }
  56. public unsafe int IndexOf(char value, int startIndex, int count)
  57. {
  58. if ((uint)startIndex > (uint)Length)
  59. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  60. if ((uint)count > (uint)(Length - startIndex))
  61. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  62. int result = SpanHelpers.IndexOf(ref Unsafe.Add(ref _firstChar, startIndex), value, count);
  63. return result == -1 ? result : result + startIndex;
  64. }
  65. // Returns the index of the first occurrence of any specified character in the current instance.
  66. // The search starts at startIndex and runs to startIndex + count - 1.
  67. //
  68. public int IndexOfAny(char[] anyOf)
  69. {
  70. return IndexOfAny(anyOf, 0, this.Length);
  71. }
  72. public int IndexOfAny(char[] anyOf, int startIndex)
  73. {
  74. return IndexOfAny(anyOf, startIndex, this.Length - startIndex);
  75. }
  76. public int IndexOfAny(char[] anyOf, int startIndex, int count)
  77. {
  78. if (anyOf == null)
  79. throw new ArgumentNullException(nameof(anyOf));
  80. if ((uint)startIndex > (uint)Length)
  81. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  82. if ((uint)count > (uint)(Length - startIndex))
  83. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  84. if (anyOf.Length > 0 && anyOf.Length <= 5)
  85. {
  86. // The ReadOnlySpan.IndexOfAny extension is vectorized for values of 1 - 5 in length
  87. var result = new ReadOnlySpan<char>(ref Unsafe.Add(ref _firstChar, startIndex), count).IndexOfAny(anyOf);
  88. return result == -1 ? result : result + startIndex;
  89. }
  90. else if (anyOf.Length > 5)
  91. {
  92. // Use Probabilistic Map
  93. return IndexOfCharArray(anyOf, startIndex, count);
  94. }
  95. else // anyOf.Length == 0
  96. {
  97. return -1;
  98. }
  99. }
  100. private unsafe int IndexOfCharArray(char[] anyOf, int startIndex, int count)
  101. {
  102. // use probabilistic map, see InitializeProbabilisticMap
  103. ProbabilisticMap map = default;
  104. uint* charMap = (uint*)&map;
  105. InitializeProbabilisticMap(charMap, anyOf);
  106. fixed (char* pChars = &_firstChar)
  107. {
  108. char* pCh = pChars + startIndex;
  109. while (count > 0)
  110. {
  111. int thisChar = *pCh;
  112. if (IsCharBitSet(charMap, (byte)thisChar) &&
  113. IsCharBitSet(charMap, (byte)(thisChar >> 8)) &&
  114. ArrayContains((char)thisChar, anyOf))
  115. {
  116. return (int)(pCh - pChars);
  117. }
  118. count--;
  119. pCh++;
  120. }
  121. return -1;
  122. }
  123. }
  124. private const int PROBABILISTICMAP_BLOCK_INDEX_MASK = 0x7;
  125. private const int PROBABILISTICMAP_BLOCK_INDEX_SHIFT = 0x3;
  126. private const int PROBABILISTICMAP_SIZE = 0x8;
  127. // A probabilistic map is an optimization that is used in IndexOfAny/
  128. // LastIndexOfAny methods. The idea is to create a bit map of the characters we
  129. // are searching for and use this map as a "cheap" check to decide if the
  130. // current character in the string exists in the array of input characters.
  131. // There are 256 bits in the map, with each character mapped to 2 bits. Every
  132. // character is divided into 2 bytes, and then every byte is mapped to 1 bit.
  133. // The character map is an array of 8 integers acting as map blocks. The 3 lsb
  134. // in each byte in the character is used to index into this map to get the
  135. // right block, the value of the remaining 5 msb are used as the bit position
  136. // inside this block.
  137. private static unsafe void InitializeProbabilisticMap(uint* charMap, ReadOnlySpan<char> anyOf)
  138. {
  139. bool hasAscii = false;
  140. uint* charMapLocal = charMap; // https://github.com/dotnet/coreclr/issues/14264
  141. for (int i = 0; i < anyOf.Length; ++i)
  142. {
  143. int c = anyOf[i];
  144. // Map low bit
  145. SetCharBit(charMapLocal, (byte)c);
  146. // Map high bit
  147. c >>= 8;
  148. if (c == 0)
  149. {
  150. hasAscii = true;
  151. }
  152. else
  153. {
  154. SetCharBit(charMapLocal, (byte)c);
  155. }
  156. }
  157. if (hasAscii)
  158. {
  159. // Common to search for ASCII symbols. Just set the high value once.
  160. charMapLocal[0] |= 1u;
  161. }
  162. }
  163. private static bool ArrayContains(char searchChar, char[] anyOf)
  164. {
  165. for (int i = 0; i < anyOf.Length; i++)
  166. {
  167. if (anyOf[i] == searchChar)
  168. return true;
  169. }
  170. return false;
  171. }
  172. private static unsafe bool IsCharBitSet(uint* charMap, byte value)
  173. {
  174. return (charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] & (1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT))) != 0;
  175. }
  176. private static unsafe void SetCharBit(uint* charMap, byte value)
  177. {
  178. charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] |= 1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
  179. }
  180. public int IndexOf(string value)
  181. {
  182. return IndexOf(value, StringComparison.CurrentCulture);
  183. }
  184. public int IndexOf(string value, int startIndex)
  185. {
  186. return IndexOf(value, startIndex, StringComparison.CurrentCulture);
  187. }
  188. public int IndexOf(string value, int startIndex, int count)
  189. {
  190. if (startIndex < 0 || startIndex > this.Length)
  191. {
  192. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  193. }
  194. if (count < 0 || count > this.Length - startIndex)
  195. {
  196. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  197. }
  198. return IndexOf(value, startIndex, count, StringComparison.CurrentCulture);
  199. }
  200. public int IndexOf(string value, StringComparison comparisonType)
  201. {
  202. return IndexOf(value, 0, this.Length, comparisonType);
  203. }
  204. public int IndexOf(string value, int startIndex, StringComparison comparisonType)
  205. {
  206. return IndexOf(value, startIndex, this.Length - startIndex, comparisonType);
  207. }
  208. public int IndexOf(string value, int startIndex, int count, StringComparison comparisonType)
  209. {
  210. // Validate inputs
  211. if (value == null)
  212. throw new ArgumentNullException(nameof(value));
  213. if (startIndex < 0 || startIndex > this.Length)
  214. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  215. if (count < 0 || startIndex > this.Length - count)
  216. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  217. if (comparisonType == StringComparison.Ordinal)
  218. {
  219. var result = SpanHelpers.IndexOf(
  220. ref Unsafe.Add(ref this._firstChar, startIndex),
  221. count,
  222. ref value._firstChar,
  223. value.Length);
  224. return (result >= 0 ? startIndex : 0) + result;
  225. }
  226. switch (comparisonType)
  227. {
  228. case StringComparison.CurrentCulture:
  229. case StringComparison.CurrentCultureIgnoreCase:
  230. return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  231. case StringComparison.InvariantCulture:
  232. case StringComparison.InvariantCultureIgnoreCase:
  233. return CompareInfo.Invariant.IndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  234. case StringComparison.OrdinalIgnoreCase:
  235. return CompareInfo.Invariant.IndexOfOrdinal(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None);
  236. default:
  237. throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
  238. }
  239. }
  240. // Returns the index of the last occurrence of a specified character in the current instance.
  241. // The search starts at startIndex and runs backwards to startIndex - count + 1.
  242. // The character at position startIndex is included in the search. startIndex is the larger
  243. // index within the string.
  244. //
  245. public int LastIndexOf(char value) => SpanHelpers.LastIndexOf(ref _firstChar, value, Length);
  246. public int LastIndexOf(char value, int startIndex)
  247. {
  248. return LastIndexOf(value, startIndex, startIndex + 1);
  249. }
  250. public unsafe int LastIndexOf(char value, int startIndex, int count)
  251. {
  252. if (Length == 0)
  253. return -1;
  254. if ((uint)startIndex >= (uint)Length)
  255. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  256. if ((uint)count > (uint)startIndex + 1)
  257. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  258. int startSearchAt = startIndex + 1 - count;
  259. int result = SpanHelpers.LastIndexOf(ref Unsafe.Add(ref _firstChar, startSearchAt), value, count);
  260. return result == -1 ? result : result + startSearchAt;
  261. }
  262. // Returns the index of the last occurrence of any specified character in the current instance.
  263. // The search starts at startIndex and runs backwards to startIndex - count + 1.
  264. // The character at position startIndex is included in the search. startIndex is the larger
  265. // index within the string.
  266. //
  267. public int LastIndexOfAny(char[] anyOf)
  268. {
  269. return LastIndexOfAny(anyOf, this.Length - 1, this.Length);
  270. }
  271. public int LastIndexOfAny(char[] anyOf, int startIndex)
  272. {
  273. return LastIndexOfAny(anyOf, startIndex, startIndex + 1);
  274. }
  275. public unsafe int LastIndexOfAny(char[] anyOf, int startIndex, int count)
  276. {
  277. if (anyOf == null)
  278. throw new ArgumentNullException(nameof(anyOf));
  279. if (Length == 0)
  280. return -1;
  281. if ((uint)startIndex >= (uint)Length)
  282. {
  283. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  284. }
  285. if ((count < 0) || ((count - 1) > startIndex))
  286. {
  287. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  288. }
  289. if (anyOf.Length > 1)
  290. {
  291. return LastIndexOfCharArray(anyOf, startIndex, count);
  292. }
  293. else if (anyOf.Length == 1)
  294. {
  295. return LastIndexOf(anyOf[0], startIndex, count);
  296. }
  297. else // anyOf.Length == 0
  298. {
  299. return -1;
  300. }
  301. }
  302. private unsafe int LastIndexOfCharArray(char[] anyOf, int startIndex, int count)
  303. {
  304. // use probabilistic map, see InitializeProbabilisticMap
  305. ProbabilisticMap map = default;
  306. uint* charMap = (uint*)&map;
  307. InitializeProbabilisticMap(charMap, anyOf);
  308. fixed (char* pChars = &_firstChar)
  309. {
  310. char* pCh = pChars + startIndex;
  311. while (count > 0)
  312. {
  313. int thisChar = *pCh;
  314. if (IsCharBitSet(charMap, (byte)thisChar) &&
  315. IsCharBitSet(charMap, (byte)(thisChar >> 8)) &&
  316. ArrayContains((char)thisChar, anyOf))
  317. {
  318. return (int)(pCh - pChars);
  319. }
  320. count--;
  321. pCh--;
  322. }
  323. return -1;
  324. }
  325. }
  326. // Returns the index of the last occurrence of any character in value in the current instance.
  327. // The search starts at startIndex and runs backwards to startIndex - count + 1.
  328. // The character at position startIndex is included in the search. startIndex is the larger
  329. // index within the string.
  330. //
  331. public int LastIndexOf(string value)
  332. {
  333. return LastIndexOf(value, this.Length - 1, this.Length, StringComparison.CurrentCulture);
  334. }
  335. public int LastIndexOf(string value, int startIndex)
  336. {
  337. return LastIndexOf(value, startIndex, startIndex + 1, StringComparison.CurrentCulture);
  338. }
  339. public int LastIndexOf(string value, int startIndex, int count)
  340. {
  341. if (count < 0)
  342. {
  343. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  344. }
  345. return LastIndexOf(value, startIndex, count, StringComparison.CurrentCulture);
  346. }
  347. public int LastIndexOf(string value, StringComparison comparisonType)
  348. {
  349. return LastIndexOf(value, this.Length - 1, this.Length, comparisonType);
  350. }
  351. public int LastIndexOf(string value, int startIndex, StringComparison comparisonType)
  352. {
  353. return LastIndexOf(value, startIndex, startIndex + 1, comparisonType);
  354. }
  355. public int LastIndexOf(string value, int startIndex, int count, StringComparison comparisonType)
  356. {
  357. if (value == null)
  358. throw new ArgumentNullException(nameof(value));
  359. // Special case for 0 length input strings
  360. if (this.Length == 0 && (startIndex == -1 || startIndex == 0))
  361. return (value.Length == 0) ? 0 : -1;
  362. // Now after handling empty strings, make sure we're not out of range
  363. if (startIndex < 0 || startIndex > this.Length)
  364. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  365. // Make sure that we allow startIndex == this.Length
  366. if (startIndex == this.Length)
  367. {
  368. startIndex--;
  369. if (count > 0)
  370. count--;
  371. }
  372. // 2nd half of this also catches when startIndex == MAXINT, so MAXINT - 0 + 1 == -1, which is < 0.
  373. if (count < 0 || startIndex - count + 1 < 0)
  374. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  375. // If we are looking for nothing, just return startIndex
  376. if (value.Length == 0)
  377. return startIndex;
  378. switch (comparisonType)
  379. {
  380. case StringComparison.CurrentCulture:
  381. case StringComparison.CurrentCultureIgnoreCase:
  382. return CultureInfo.CurrentCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  383. case StringComparison.InvariantCulture:
  384. case StringComparison.InvariantCultureIgnoreCase:
  385. return CompareInfo.Invariant.LastIndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  386. case StringComparison.Ordinal:
  387. case StringComparison.OrdinalIgnoreCase:
  388. return CompareInfo.Invariant.LastIndexOfOrdinal(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None);
  389. default:
  390. throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
  391. }
  392. }
  393. [StructLayout(LayoutKind.Explicit, Size = PROBABILISTICMAP_SIZE * sizeof(uint))]
  394. private struct ProbabilisticMap { }
  395. }
  396. }