String.Searching.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Globalization;
  5. using System.Numerics;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.InteropServices;
  8. using Internal.Runtime.CompilerServices;
  9. namespace System
  10. {
  11. public partial class String
  12. {
  13. public bool Contains(string value)
  14. {
  15. return (IndexOf(value, StringComparison.Ordinal) >= 0);
  16. }
  17. public bool Contains(string value, StringComparison comparisonType)
  18. {
  19. return (IndexOf(value, comparisonType) >= 0);
  20. }
  21. public bool Contains(char value) => SpanHelpers.Contains(ref _firstChar, value, Length);
  22. public bool Contains(char value, StringComparison comparisonType)
  23. {
  24. return IndexOf(value, comparisonType) != -1;
  25. }
  26. // Returns the index of the first occurrence of a specified character in the current instance.
  27. // The search starts at startIndex and runs thorough the next count characters.
  28. //
  29. public int IndexOf(char value) => SpanHelpers.IndexOf(ref _firstChar, value, Length);
  30. public int IndexOf(char value, int startIndex)
  31. {
  32. return IndexOf(value, startIndex, this.Length - startIndex);
  33. }
  34. public int IndexOf(char value, StringComparison comparisonType)
  35. {
  36. switch (comparisonType)
  37. {
  38. case StringComparison.CurrentCulture:
  39. case StringComparison.CurrentCultureIgnoreCase:
  40. return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, GetCaseCompareOfComparisonCulture(comparisonType));
  41. case StringComparison.InvariantCulture:
  42. case StringComparison.InvariantCultureIgnoreCase:
  43. return CompareInfo.Invariant.IndexOf(this, value, GetCaseCompareOfComparisonCulture(comparisonType));
  44. case StringComparison.Ordinal:
  45. return IndexOf(value);
  46. case StringComparison.OrdinalIgnoreCase:
  47. return CompareInfo.Invariant.IndexOf(this, value, CompareOptions.OrdinalIgnoreCase);
  48. default:
  49. throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
  50. }
  51. }
  52. public unsafe int IndexOf(char value, int startIndex, int count)
  53. {
  54. if ((uint)startIndex > (uint)Length)
  55. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  56. if ((uint)count > (uint)(Length - startIndex))
  57. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  58. int result = SpanHelpers.IndexOf(ref Unsafe.Add(ref _firstChar, startIndex), value, count);
  59. return result == -1 ? result : result + startIndex;
  60. }
  61. // Returns the index of the first occurrence of any specified character in the current instance.
  62. // The search starts at startIndex and runs to startIndex + count - 1.
  63. //
  64. public int IndexOfAny(char[] anyOf)
  65. {
  66. return IndexOfAny(anyOf, 0, this.Length);
  67. }
  68. public int IndexOfAny(char[] anyOf, int startIndex)
  69. {
  70. return IndexOfAny(anyOf, startIndex, this.Length - startIndex);
  71. }
  72. public int IndexOfAny(char[] anyOf, int startIndex, int count)
  73. {
  74. if (anyOf == null)
  75. throw new ArgumentNullException(nameof(anyOf));
  76. if ((uint)startIndex > (uint)Length)
  77. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  78. if ((uint)count > (uint)(Length - startIndex))
  79. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  80. if (anyOf.Length > 0 && anyOf.Length <= 5)
  81. {
  82. // The ReadOnlySpan.IndexOfAny extension is vectorized for values of 1 - 5 in length
  83. var result = new ReadOnlySpan<char>(ref Unsafe.Add(ref _firstChar, startIndex), count).IndexOfAny(anyOf);
  84. return result == -1 ? result : result + startIndex;
  85. }
  86. else if (anyOf.Length > 5)
  87. {
  88. // Use Probabilistic Map
  89. return IndexOfCharArray(anyOf, startIndex, count);
  90. }
  91. else // anyOf.Length == 0
  92. {
  93. return -1;
  94. }
  95. }
  96. private unsafe int IndexOfCharArray(char[] anyOf, int startIndex, int count)
  97. {
  98. // use probabilistic map, see InitializeProbabilisticMap
  99. ProbabilisticMap map = default;
  100. uint* charMap = (uint*)&map;
  101. InitializeProbabilisticMap(charMap, anyOf);
  102. fixed (char* pChars = &_firstChar)
  103. {
  104. char* pCh = pChars + startIndex;
  105. while (count > 0)
  106. {
  107. int thisChar = *pCh;
  108. if (IsCharBitSet(charMap, (byte)thisChar) &&
  109. IsCharBitSet(charMap, (byte)(thisChar >> 8)) &&
  110. ArrayContains((char)thisChar, anyOf))
  111. {
  112. return (int)(pCh - pChars);
  113. }
  114. count--;
  115. pCh++;
  116. }
  117. return -1;
  118. }
  119. }
  120. private const int PROBABILISTICMAP_BLOCK_INDEX_MASK = 0x7;
  121. private const int PROBABILISTICMAP_BLOCK_INDEX_SHIFT = 0x3;
  122. private const int PROBABILISTICMAP_SIZE = 0x8;
  123. // A probabilistic map is an optimization that is used in IndexOfAny/
  124. // LastIndexOfAny methods. The idea is to create a bit map of the characters we
  125. // are searching for and use this map as a "cheap" check to decide if the
  126. // current character in the string exists in the array of input characters.
  127. // There are 256 bits in the map, with each character mapped to 2 bits. Every
  128. // character is divided into 2 bytes, and then every byte is mapped to 1 bit.
  129. // The character map is an array of 8 integers acting as map blocks. The 3 lsb
  130. // in each byte in the character is used to index into this map to get the
  131. // right block, the value of the remaining 5 msb are used as the bit position
  132. // inside this block.
  133. private static unsafe void InitializeProbabilisticMap(uint* charMap, ReadOnlySpan<char> anyOf)
  134. {
  135. bool hasAscii = false;
  136. uint* charMapLocal = charMap; // https://github.com/dotnet/coreclr/issues/14264
  137. for (int i = 0; i < anyOf.Length; ++i)
  138. {
  139. int c = anyOf[i];
  140. // Map low bit
  141. SetCharBit(charMapLocal, (byte)c);
  142. // Map high bit
  143. c >>= 8;
  144. if (c == 0)
  145. {
  146. hasAscii = true;
  147. }
  148. else
  149. {
  150. SetCharBit(charMapLocal, (byte)c);
  151. }
  152. }
  153. if (hasAscii)
  154. {
  155. // Common to search for ASCII symbols. Just set the high value once.
  156. charMapLocal[0] |= 1u;
  157. }
  158. }
  159. private static bool ArrayContains(char searchChar, char[] anyOf)
  160. {
  161. for (int i = 0; i < anyOf.Length; i++)
  162. {
  163. if (anyOf[i] == searchChar)
  164. return true;
  165. }
  166. return false;
  167. }
  168. private static unsafe bool IsCharBitSet(uint* charMap, byte value)
  169. {
  170. return (charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] & (1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT))) != 0;
  171. }
  172. private static unsafe void SetCharBit(uint* charMap, byte value)
  173. {
  174. charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] |= 1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
  175. }
  176. public int IndexOf(string value)
  177. {
  178. return IndexOf(value, StringComparison.CurrentCulture);
  179. }
  180. public int IndexOf(string value, int startIndex)
  181. {
  182. return IndexOf(value, startIndex, StringComparison.CurrentCulture);
  183. }
  184. public int IndexOf(string value, int startIndex, int count)
  185. {
  186. if (startIndex < 0 || startIndex > this.Length)
  187. {
  188. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  189. }
  190. if (count < 0 || count > this.Length - startIndex)
  191. {
  192. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  193. }
  194. return IndexOf(value, startIndex, count, StringComparison.CurrentCulture);
  195. }
  196. public int IndexOf(string value, StringComparison comparisonType)
  197. {
  198. return IndexOf(value, 0, this.Length, comparisonType);
  199. }
  200. public int IndexOf(string value, int startIndex, StringComparison comparisonType)
  201. {
  202. return IndexOf(value, startIndex, this.Length - startIndex, comparisonType);
  203. }
  204. public int IndexOf(string value, int startIndex, int count, StringComparison comparisonType)
  205. {
  206. // Validate inputs
  207. if (value == null)
  208. throw new ArgumentNullException(nameof(value));
  209. if (startIndex < 0 || startIndex > this.Length)
  210. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  211. if (count < 0 || startIndex > this.Length - count)
  212. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  213. if (comparisonType == StringComparison.Ordinal)
  214. {
  215. var result = SpanHelpers.IndexOf(
  216. ref Unsafe.Add(ref this._firstChar, startIndex),
  217. count,
  218. ref value._firstChar,
  219. value.Length);
  220. return (result >= 0 ? startIndex : 0) + result;
  221. }
  222. switch (comparisonType)
  223. {
  224. case StringComparison.CurrentCulture:
  225. case StringComparison.CurrentCultureIgnoreCase:
  226. return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  227. case StringComparison.InvariantCulture:
  228. case StringComparison.InvariantCultureIgnoreCase:
  229. return CompareInfo.Invariant.IndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  230. case StringComparison.OrdinalIgnoreCase:
  231. return CompareInfo.Invariant.IndexOfOrdinal(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None);
  232. default:
  233. throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
  234. }
  235. }
  236. // Returns the index of the last occurrence of a specified character in the current instance.
  237. // The search starts at startIndex and runs backwards to startIndex - count + 1.
  238. // The character at position startIndex is included in the search. startIndex is the larger
  239. // index within the string.
  240. //
  241. public int LastIndexOf(char value) => SpanHelpers.LastIndexOf(ref _firstChar, value, Length);
  242. public int LastIndexOf(char value, int startIndex)
  243. {
  244. return LastIndexOf(value, startIndex, startIndex + 1);
  245. }
  246. public unsafe int LastIndexOf(char value, int startIndex, int count)
  247. {
  248. if (Length == 0)
  249. return -1;
  250. if ((uint)startIndex >= (uint)Length)
  251. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  252. if ((uint)count > (uint)startIndex + 1)
  253. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  254. int startSearchAt = startIndex + 1 - count;
  255. int result = SpanHelpers.LastIndexOf(ref Unsafe.Add(ref _firstChar, startSearchAt), value, count);
  256. return result == -1 ? result : result + startSearchAt;
  257. }
  258. // Returns the index of the last occurrence of any specified character in the current instance.
  259. // The search starts at startIndex and runs backwards to startIndex - count + 1.
  260. // The character at position startIndex is included in the search. startIndex is the larger
  261. // index within the string.
  262. //
  263. public int LastIndexOfAny(char[] anyOf)
  264. {
  265. return LastIndexOfAny(anyOf, this.Length - 1, this.Length);
  266. }
  267. public int LastIndexOfAny(char[] anyOf, int startIndex)
  268. {
  269. return LastIndexOfAny(anyOf, startIndex, startIndex + 1);
  270. }
  271. public unsafe int LastIndexOfAny(char[] anyOf, int startIndex, int count)
  272. {
  273. if (anyOf == null)
  274. throw new ArgumentNullException(nameof(anyOf));
  275. if (Length == 0)
  276. return -1;
  277. if ((uint)startIndex >= (uint)Length)
  278. {
  279. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  280. }
  281. if ((count < 0) || ((count - 1) > startIndex))
  282. {
  283. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  284. }
  285. if (anyOf.Length > 1)
  286. {
  287. return LastIndexOfCharArray(anyOf, startIndex, count);
  288. }
  289. else if (anyOf.Length == 1)
  290. {
  291. return LastIndexOf(anyOf[0], startIndex, count);
  292. }
  293. else // anyOf.Length == 0
  294. {
  295. return -1;
  296. }
  297. }
  298. private unsafe int LastIndexOfCharArray(char[] anyOf, int startIndex, int count)
  299. {
  300. // use probabilistic map, see InitializeProbabilisticMap
  301. ProbabilisticMap map = default;
  302. uint* charMap = (uint*)&map;
  303. InitializeProbabilisticMap(charMap, anyOf);
  304. fixed (char* pChars = &_firstChar)
  305. {
  306. char* pCh = pChars + startIndex;
  307. while (count > 0)
  308. {
  309. int thisChar = *pCh;
  310. if (IsCharBitSet(charMap, (byte)thisChar) &&
  311. IsCharBitSet(charMap, (byte)(thisChar >> 8)) &&
  312. ArrayContains((char)thisChar, anyOf))
  313. {
  314. return (int)(pCh - pChars);
  315. }
  316. count--;
  317. pCh--;
  318. }
  319. return -1;
  320. }
  321. }
  322. // Returns the index of the last occurrence of any character in value in the current instance.
  323. // The search starts at startIndex and runs backwards to startIndex - count + 1.
  324. // The character at position startIndex is included in the search. startIndex is the larger
  325. // index within the string.
  326. //
  327. public int LastIndexOf(string value)
  328. {
  329. return LastIndexOf(value, this.Length - 1, this.Length, StringComparison.CurrentCulture);
  330. }
  331. public int LastIndexOf(string value, int startIndex)
  332. {
  333. return LastIndexOf(value, startIndex, startIndex + 1, StringComparison.CurrentCulture);
  334. }
  335. public int LastIndexOf(string value, int startIndex, int count)
  336. {
  337. if (count < 0)
  338. {
  339. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  340. }
  341. return LastIndexOf(value, startIndex, count, StringComparison.CurrentCulture);
  342. }
  343. public int LastIndexOf(string value, StringComparison comparisonType)
  344. {
  345. return LastIndexOf(value, this.Length - 1, this.Length, comparisonType);
  346. }
  347. public int LastIndexOf(string value, int startIndex, StringComparison comparisonType)
  348. {
  349. return LastIndexOf(value, startIndex, startIndex + 1, comparisonType);
  350. }
  351. public int LastIndexOf(string value, int startIndex, int count, StringComparison comparisonType)
  352. {
  353. if (value == null)
  354. throw new ArgumentNullException(nameof(value));
  355. // Special case for 0 length input strings
  356. if (this.Length == 0 && (startIndex == -1 || startIndex == 0))
  357. return (value.Length == 0) ? 0 : -1;
  358. // Now after handling empty strings, make sure we're not out of range
  359. if (startIndex < 0 || startIndex > this.Length)
  360. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  361. // Make sure that we allow startIndex == this.Length
  362. if (startIndex == this.Length)
  363. {
  364. startIndex--;
  365. if (count > 0)
  366. count--;
  367. }
  368. // 2nd half of this also catches when startIndex == MAXINT, so MAXINT - 0 + 1 == -1, which is < 0.
  369. if (count < 0 || startIndex - count + 1 < 0)
  370. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
  371. // If we are looking for nothing, just return startIndex
  372. if (value.Length == 0)
  373. return startIndex;
  374. switch (comparisonType)
  375. {
  376. case StringComparison.CurrentCulture:
  377. case StringComparison.CurrentCultureIgnoreCase:
  378. return CultureInfo.CurrentCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  379. case StringComparison.InvariantCulture:
  380. case StringComparison.InvariantCultureIgnoreCase:
  381. return CompareInfo.Invariant.LastIndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType));
  382. case StringComparison.Ordinal:
  383. case StringComparison.OrdinalIgnoreCase:
  384. return CompareInfo.Invariant.LastIndexOfOrdinal(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None);
  385. default:
  386. throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
  387. }
  388. }
  389. [StructLayout(LayoutKind.Explicit, Size = PROBABILISTICMAP_SIZE * sizeof(uint))]
  390. private struct ProbabilisticMap { }
  391. }
  392. }