SpanHelpers.Byte.cs 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime.CompilerServices;
  6. using System.Numerics;
  7. using System.Runtime.Intrinsics;
  8. using System.Runtime.Intrinsics.X86;
  9. using Internal.Runtime.CompilerServices;
  10. #if BIT64
  11. using nuint = System.UInt64;
  12. #else
  13. using nuint = System.UInt32;
  14. #endif // BIT64
  15. namespace System
  16. {
  17. internal static partial class SpanHelpers // .Byte
  18. {
  19. public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  20. {
  21. Debug.Assert(searchSpaceLength >= 0);
  22. Debug.Assert(valueLength >= 0);
  23. if (valueLength == 0)
  24. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  25. byte valueHead = value;
  26. ref byte valueTail = ref Unsafe.Add(ref value, 1);
  27. int valueTailLength = valueLength - 1;
  28. int remainingSearchSpaceLength = searchSpaceLength - valueTailLength;
  29. int offset = 0;
  30. while (remainingSearchSpaceLength > 0)
  31. {
  32. // Do a quick search for the first element of "value".
  33. int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, offset), valueHead, remainingSearchSpaceLength);
  34. if (relativeIndex == -1)
  35. break;
  36. remainingSearchSpaceLength -= relativeIndex;
  37. offset += relativeIndex;
  38. if (remainingSearchSpaceLength <= 0)
  39. break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there.
  40. // Found the first element of "value". See if the tail matches.
  41. if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailLength))
  42. return offset; // The tail matched. Return a successful find.
  43. remainingSearchSpaceLength--;
  44. offset++;
  45. }
  46. return -1;
  47. }
  48. public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  49. {
  50. Debug.Assert(searchSpaceLength >= 0);
  51. Debug.Assert(valueLength >= 0);
  52. if (valueLength == 0)
  53. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  54. int offset = -1;
  55. for (int i = 0; i < valueLength; i++)
  56. {
  57. var tempIndex = IndexOf(ref searchSpace, Unsafe.Add(ref value, i), searchSpaceLength);
  58. if ((uint)tempIndex < (uint)offset)
  59. {
  60. offset = tempIndex;
  61. // Reduce space for search, cause we don't care if we find the search value after the index of a previously found value
  62. searchSpaceLength = tempIndex;
  63. if (offset == 0)
  64. break;
  65. }
  66. }
  67. return offset;
  68. }
  69. public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  70. {
  71. Debug.Assert(searchSpaceLength >= 0);
  72. Debug.Assert(valueLength >= 0);
  73. if (valueLength == 0)
  74. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  75. int offset = -1;
  76. for (int i = 0; i < valueLength; i++)
  77. {
  78. var tempIndex = LastIndexOf(ref searchSpace, Unsafe.Add(ref value, i), searchSpaceLength);
  79. if (tempIndex > offset)
  80. offset = tempIndex;
  81. }
  82. return offset;
  83. }
  84. // Adapted from IndexOf(...)
  85. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  86. public static unsafe bool Contains(ref byte searchSpace, byte value, int length)
  87. {
  88. Debug.Assert(length >= 0);
  89. uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  90. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  91. IntPtr nLength = (IntPtr)length;
  92. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  93. {
  94. nLength = UnalignedByteCountVector(ref searchSpace);
  95. }
  96. SequentialScan:
  97. while ((byte*)nLength >= (byte*)8)
  98. {
  99. nLength -= 8;
  100. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) ||
  101. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) ||
  102. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) ||
  103. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3) ||
  104. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4) ||
  105. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5) ||
  106. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6) ||
  107. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
  108. {
  109. goto Found;
  110. }
  111. offset += 8;
  112. }
  113. if ((byte*)nLength >= (byte*)4)
  114. {
  115. nLength -= 4;
  116. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) ||
  117. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) ||
  118. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) ||
  119. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  120. {
  121. goto Found;
  122. }
  123. offset += 4;
  124. }
  125. while ((byte*)nLength > (byte*)0)
  126. {
  127. nLength -= 1;
  128. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  129. goto Found;
  130. offset += 1;
  131. }
  132. if (Vector.IsHardwareAccelerated && ((int)(byte*)offset < length))
  133. {
  134. nLength = (IntPtr)((length - (int)(byte*)offset) & ~(Vector<byte>.Count - 1));
  135. Vector<byte> values = new Vector<byte>(value);
  136. while ((byte*)nLength > (byte*)offset)
  137. {
  138. var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
  139. if (Vector<byte>.Zero.Equals(matches))
  140. {
  141. offset += Vector<byte>.Count;
  142. continue;
  143. }
  144. goto Found;
  145. }
  146. if ((int)(byte*)offset < length)
  147. {
  148. nLength = (IntPtr)(length - (int)(byte*)offset);
  149. goto SequentialScan;
  150. }
  151. }
  152. return false;
  153. Found:
  154. return true;
  155. }
  156. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  157. public static unsafe int IndexOf(ref byte searchSpace, byte value, int length)
  158. {
  159. Debug.Assert(length >= 0);
  160. uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  161. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  162. IntPtr nLength = (IntPtr)length;
  163. if (Avx2.IsSupported || Sse2.IsSupported)
  164. {
  165. // Avx2 branch also operates on Sse2 sizes, so check is combined.
  166. if (length >= Vector128<byte>.Count * 2)
  167. {
  168. nLength = UnalignedByteCountVector128(ref searchSpace);
  169. }
  170. }
  171. else if (Vector.IsHardwareAccelerated)
  172. {
  173. if (length >= Vector<byte>.Count * 2)
  174. {
  175. nLength = UnalignedByteCountVector(ref searchSpace);
  176. }
  177. }
  178. SequentialScan:
  179. while ((byte*)nLength >= (byte*)8)
  180. {
  181. nLength -= 8;
  182. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  183. goto Found;
  184. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  185. goto Found1;
  186. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  187. goto Found2;
  188. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  189. goto Found3;
  190. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4))
  191. goto Found4;
  192. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5))
  193. goto Found5;
  194. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6))
  195. goto Found6;
  196. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
  197. goto Found7;
  198. offset += 8;
  199. }
  200. if ((byte*)nLength >= (byte*)4)
  201. {
  202. nLength -= 4;
  203. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  204. goto Found;
  205. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  206. goto Found1;
  207. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  208. goto Found2;
  209. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  210. goto Found3;
  211. offset += 4;
  212. }
  213. while ((byte*)nLength > (byte*)0)
  214. {
  215. nLength -= 1;
  216. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  217. goto Found;
  218. offset += 1;
  219. }
  220. // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true; and remain length is greater than Vector length.
  221. // However, we still have the redundant check to allow the JIT to see that the code is unreachable and eliminate it when the platform does not
  222. // have hardware accelerated. After processing Vector lengths we return to SequentialScan to finish any remaining.
  223. if (Avx2.IsSupported)
  224. {
  225. if ((int)(byte*)offset < length)
  226. {
  227. if ((((nuint)Unsafe.AsPointer(ref searchSpace) + (nuint)offset) & (nuint)(Vector256<byte>.Count - 1)) != 0)
  228. {
  229. // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches
  230. // with no upper bound e.g. String.strlen.
  231. // Start with a check on Vector128 to align to Vector256, before moving to processing Vector256.
  232. // This ensures we do not fault across memory pages while searching for an end of string.
  233. Vector128<byte> values = Vector128.Create(value);
  234. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  235. // Same method as below
  236. int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search));
  237. if (matches == 0)
  238. {
  239. // Zero flags set so no matches
  240. offset += Vector128<byte>.Count;
  241. }
  242. else
  243. {
  244. // Find bitflag offset of first match and add to current offset
  245. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  246. }
  247. }
  248. nLength = GetByteVector256SpanLength(offset, length);
  249. if ((byte*)nLength > (byte*)offset)
  250. {
  251. Vector256<byte> values = Vector256.Create(value);
  252. do
  253. {
  254. Vector256<byte> search = LoadVector256(ref searchSpace, offset);
  255. int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search));
  256. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  257. // So the bit position in 'matches' corresponds to the element offset.
  258. if (matches == 0)
  259. {
  260. // Zero flags set so no matches
  261. offset += Vector256<byte>.Count;
  262. continue;
  263. }
  264. // Find bitflag offset of first match and add to current offset
  265. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  266. } while ((byte*)nLength > (byte*)offset);
  267. }
  268. nLength = GetByteVector128SpanLength(offset, length);
  269. if ((byte*)nLength > (byte*)offset)
  270. {
  271. Vector128<byte> values = Vector128.Create(value);
  272. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  273. // Same method as above
  274. int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search));
  275. if (matches == 0)
  276. {
  277. // Zero flags set so no matches
  278. offset += Vector128<byte>.Count;
  279. }
  280. else
  281. {
  282. // Find bitflag offset of first match and add to current offset
  283. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  284. }
  285. }
  286. if ((int)(byte*)offset < length)
  287. {
  288. nLength = (IntPtr)(length - (int)(byte*)offset);
  289. goto SequentialScan;
  290. }
  291. }
  292. }
  293. else if (Sse2.IsSupported)
  294. {
  295. if ((int)(byte*)offset < length)
  296. {
  297. nLength = GetByteVector128SpanLength(offset, length);
  298. Vector128<byte> values = Vector128.Create(value);
  299. while ((byte*)nLength > (byte*)offset)
  300. {
  301. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  302. // Same method as above
  303. int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search));
  304. if (matches == 0)
  305. {
  306. // Zero flags set so no matches
  307. offset += Vector128<byte>.Count;
  308. continue;
  309. }
  310. // Find bitflag offset of first match and add to current offset
  311. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  312. }
  313. if ((int)(byte*)offset < length)
  314. {
  315. nLength = (IntPtr)(length - (int)(byte*)offset);
  316. goto SequentialScan;
  317. }
  318. }
  319. }
  320. else if (Vector.IsHardwareAccelerated)
  321. {
  322. if ((int)(byte*)offset < length)
  323. {
  324. nLength = GetByteVectorSpanLength(offset, length);
  325. Vector<byte> values = new Vector<byte>(value);
  326. while ((byte*)nLength > (byte*)offset)
  327. {
  328. var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
  329. if (Vector<byte>.Zero.Equals(matches))
  330. {
  331. offset += Vector<byte>.Count;
  332. continue;
  333. }
  334. // Find offset of first match and add to current offset
  335. return (int)(byte*)offset + LocateFirstFoundByte(matches);
  336. }
  337. if ((int)(byte*)offset < length)
  338. {
  339. nLength = (IntPtr)(length - (int)(byte*)offset);
  340. goto SequentialScan;
  341. }
  342. }
  343. }
  344. return -1;
  345. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  346. return (int)(byte*)offset;
  347. Found1:
  348. return (int)(byte*)(offset + 1);
  349. Found2:
  350. return (int)(byte*)(offset + 2);
  351. Found3:
  352. return (int)(byte*)(offset + 3);
  353. Found4:
  354. return (int)(byte*)(offset + 4);
  355. Found5:
  356. return (int)(byte*)(offset + 5);
  357. Found6:
  358. return (int)(byte*)(offset + 6);
  359. Found7:
  360. return (int)(byte*)(offset + 7);
  361. }
  362. public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  363. {
  364. Debug.Assert(searchSpaceLength >= 0);
  365. Debug.Assert(valueLength >= 0);
  366. if (valueLength == 0)
  367. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  368. byte valueHead = value;
  369. ref byte valueTail = ref Unsafe.Add(ref value, 1);
  370. int valueTailLength = valueLength - 1;
  371. int offset = 0;
  372. for (; ; )
  373. {
  374. Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength".
  375. int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength;
  376. if (remainingSearchSpaceLength <= 0)
  377. break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there.
  378. // Do a quick search for the first element of "value".
  379. int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength);
  380. if (relativeIndex == -1)
  381. break;
  382. // Found the first element of "value". See if the tail matches.
  383. if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailLength))
  384. return relativeIndex; // The tail matched. Return a successful find.
  385. offset += remainingSearchSpaceLength - relativeIndex;
  386. }
  387. return -1;
  388. }
  389. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  390. public static unsafe int LastIndexOf(ref byte searchSpace, byte value, int length)
  391. {
  392. Debug.Assert(length >= 0);
  393. uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  394. IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  395. IntPtr nLength = (IntPtr)length;
  396. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  397. {
  398. nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length);
  399. }
  400. SequentialScan:
  401. while ((byte*)nLength >= (byte*)8)
  402. {
  403. nLength -= 8;
  404. offset -= 8;
  405. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
  406. goto Found7;
  407. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6))
  408. goto Found6;
  409. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5))
  410. goto Found5;
  411. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4))
  412. goto Found4;
  413. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  414. goto Found3;
  415. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  416. goto Found2;
  417. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  418. goto Found1;
  419. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  420. goto Found;
  421. }
  422. if ((byte*)nLength >= (byte*)4)
  423. {
  424. nLength -= 4;
  425. offset -= 4;
  426. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  427. goto Found3;
  428. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  429. goto Found2;
  430. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  431. goto Found1;
  432. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  433. goto Found;
  434. }
  435. while ((byte*)nLength > (byte*)0)
  436. {
  437. nLength -= 1;
  438. offset -= 1;
  439. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  440. goto Found;
  441. }
  442. if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0))
  443. {
  444. nLength = (IntPtr)((int)(byte*)offset & ~(Vector<byte>.Count - 1));
  445. Vector<byte> values = new Vector<byte>(value);
  446. while ((byte*)nLength > (byte*)(Vector<byte>.Count - 1))
  447. {
  448. var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset - Vector<byte>.Count));
  449. if (Vector<byte>.Zero.Equals(matches))
  450. {
  451. offset -= Vector<byte>.Count;
  452. nLength -= Vector<byte>.Count;
  453. continue;
  454. }
  455. // Find offset of first match and add to current offset
  456. return (int)(offset) - Vector<byte>.Count + LocateLastFoundByte(matches);
  457. }
  458. if ((byte*)offset > (byte*)0)
  459. {
  460. nLength = offset;
  461. goto SequentialScan;
  462. }
  463. }
  464. return -1;
  465. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  466. return (int)(byte*)offset;
  467. Found1:
  468. return (int)(byte*)(offset + 1);
  469. Found2:
  470. return (int)(byte*)(offset + 2);
  471. Found3:
  472. return (int)(byte*)(offset + 3);
  473. Found4:
  474. return (int)(byte*)(offset + 4);
  475. Found5:
  476. return (int)(byte*)(offset + 5);
  477. Found6:
  478. return (int)(byte*)(offset + 6);
  479. Found7:
  480. return (int)(byte*)(offset + 7);
  481. }
  482. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  483. public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int length)
  484. {
  485. Debug.Assert(length >= 0);
  486. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  487. uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  488. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  489. IntPtr nLength = (IntPtr)length;
  490. if (Avx2.IsSupported || Sse2.IsSupported)
  491. {
  492. // Avx2 branch also operates on Sse2 sizes, so check is combined.
  493. if (length >= Vector128<byte>.Count * 2)
  494. {
  495. nLength = UnalignedByteCountVector128(ref searchSpace);
  496. }
  497. }
  498. else if (Vector.IsHardwareAccelerated)
  499. {
  500. if (length >= Vector<byte>.Count * 2)
  501. {
  502. nLength = UnalignedByteCountVector(ref searchSpace);
  503. }
  504. }
  505. SequentialScan:
  506. uint lookUp;
  507. while ((byte*)nLength >= (byte*)8)
  508. {
  509. nLength -= 8;
  510. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  511. if (uValue0 == lookUp || uValue1 == lookUp)
  512. goto Found;
  513. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  514. if (uValue0 == lookUp || uValue1 == lookUp)
  515. goto Found1;
  516. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  517. if (uValue0 == lookUp || uValue1 == lookUp)
  518. goto Found2;
  519. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  520. if (uValue0 == lookUp || uValue1 == lookUp)
  521. goto Found3;
  522. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  523. if (uValue0 == lookUp || uValue1 == lookUp)
  524. goto Found4;
  525. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  526. if (uValue0 == lookUp || uValue1 == lookUp)
  527. goto Found5;
  528. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  529. if (uValue0 == lookUp || uValue1 == lookUp)
  530. goto Found6;
  531. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  532. if (uValue0 == lookUp || uValue1 == lookUp)
  533. goto Found7;
  534. offset += 8;
  535. }
  536. if ((byte*)nLength >= (byte*)4)
  537. {
  538. nLength -= 4;
  539. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  540. if (uValue0 == lookUp || uValue1 == lookUp)
  541. goto Found;
  542. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  543. if (uValue0 == lookUp || uValue1 == lookUp)
  544. goto Found1;
  545. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  546. if (uValue0 == lookUp || uValue1 == lookUp)
  547. goto Found2;
  548. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  549. if (uValue0 == lookUp || uValue1 == lookUp)
  550. goto Found3;
  551. offset += 4;
  552. }
  553. while ((byte*)nLength > (byte*)0)
  554. {
  555. nLength -= 1;
  556. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  557. if (uValue0 == lookUp || uValue1 == lookUp)
  558. goto Found;
  559. offset += 1;
  560. }
  561. if (Avx2.IsSupported)
  562. {
  563. if ((int)(byte*)offset < length)
  564. {
  565. nLength = GetByteVector256SpanLength(offset, length);
  566. if ((byte*)nLength > (byte*)offset)
  567. {
  568. Vector256<byte> values0 = Vector256.Create(value0);
  569. Vector256<byte> values1 = Vector256.Create(value1);
  570. do
  571. {
  572. Vector256<byte> search = LoadVector256(ref searchSpace, offset);
  573. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  574. // So the bit position in 'matches' corresponds to the element offset.
  575. int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search));
  576. // Bitwise Or to combine the flagged matches for the second value to our match flags
  577. matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search));
  578. if (matches == 0)
  579. {
  580. // Zero flags set so no matches
  581. offset += Vector256<byte>.Count;
  582. continue;
  583. }
  584. // Find bitflag offset of first match and add to current offset
  585. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  586. } while ((byte*)nLength > (byte*)offset);
  587. }
  588. nLength = GetByteVector128SpanLength(offset, length);
  589. if ((byte*)nLength > (byte*)offset)
  590. {
  591. Vector128<byte> values0 = Vector128.Create(value0);
  592. Vector128<byte> values1 = Vector128.Create(value1);
  593. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  594. // Same method as above
  595. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  596. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  597. if (matches == 0)
  598. {
  599. // Zero flags set so no matches
  600. offset += Vector128<byte>.Count;
  601. }
  602. else
  603. {
  604. // Find bitflag offset of first match and add to current offset
  605. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  606. }
  607. }
  608. if ((int)(byte*)offset < length)
  609. {
  610. nLength = (IntPtr)(length - (int)(byte*)offset);
  611. goto SequentialScan;
  612. }
  613. }
  614. }
  615. else if (Sse2.IsSupported)
  616. {
  617. if ((int)(byte*)offset < length)
  618. {
  619. nLength = GetByteVector128SpanLength(offset, length);
  620. Vector128<byte> values0 = Vector128.Create(value0);
  621. Vector128<byte> values1 = Vector128.Create(value1);
  622. while ((byte*)nLength > (byte*)offset)
  623. {
  624. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  625. // Same method as above
  626. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  627. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  628. if (matches == 0)
  629. {
  630. // Zero flags set so no matches
  631. offset += Vector128<byte>.Count;
  632. continue;
  633. }
  634. // Find bitflag offset of first match and add to current offset
  635. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  636. }
  637. if ((int)(byte*)offset < length)
  638. {
  639. nLength = (IntPtr)(length - (int)(byte*)offset);
  640. goto SequentialScan;
  641. }
  642. }
  643. }
  644. else if (Vector.IsHardwareAccelerated)
  645. {
  646. if ((int)(byte*)offset < length)
  647. {
  648. nLength = GetByteVectorSpanLength(offset, length);
  649. Vector<byte> values0 = new Vector<byte>(value0);
  650. Vector<byte> values1 = new Vector<byte>(value1);
  651. while ((byte*)nLength > (byte*)offset)
  652. {
  653. Vector<byte> search = LoadVector(ref searchSpace, offset);
  654. var matches = Vector.BitwiseOr(
  655. Vector.Equals(search, values0),
  656. Vector.Equals(search, values1));
  657. if (Vector<byte>.Zero.Equals(matches))
  658. {
  659. offset += Vector<byte>.Count;
  660. continue;
  661. }
  662. // Find offset of first match and add to current offset
  663. return (int)(byte*)offset + LocateFirstFoundByte(matches);
  664. }
  665. if ((int)(byte*)offset < length)
  666. {
  667. nLength = (IntPtr)(length - (int)(byte*)offset);
  668. goto SequentialScan;
  669. }
  670. }
  671. }
  672. return -1;
  673. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  674. return (int)(byte*)offset;
  675. Found1:
  676. return (int)(byte*)(offset + 1);
  677. Found2:
  678. return (int)(byte*)(offset + 2);
  679. Found3:
  680. return (int)(byte*)(offset + 3);
  681. Found4:
  682. return (int)(byte*)(offset + 4);
  683. Found5:
  684. return (int)(byte*)(offset + 5);
  685. Found6:
  686. return (int)(byte*)(offset + 6);
  687. Found7:
  688. return (int)(byte*)(offset + 7);
  689. }
  690. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  691. public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byte value2, int length)
  692. {
  693. Debug.Assert(length >= 0);
  694. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  695. uint uValue1 = value1;
  696. uint uValue2 = value2;
  697. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  698. IntPtr nLength = (IntPtr)length;
  699. if (Avx2.IsSupported || Sse2.IsSupported)
  700. {
  701. // Avx2 branch also operates on Sse2 sizes, so check is combined.
  702. if (length >= Vector128<byte>.Count * 2)
  703. {
  704. nLength = UnalignedByteCountVector128(ref searchSpace);
  705. }
  706. }
  707. else if (Vector.IsHardwareAccelerated)
  708. {
  709. if (length >= Vector<byte>.Count * 2)
  710. {
  711. nLength = UnalignedByteCountVector(ref searchSpace);
  712. }
  713. }
  714. SequentialScan:
  715. uint lookUp;
  716. while ((byte*)nLength >= (byte*)8)
  717. {
  718. nLength -= 8;
  719. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  720. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  721. goto Found;
  722. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  723. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  724. goto Found1;
  725. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  726. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  727. goto Found2;
  728. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  729. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  730. goto Found3;
  731. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  732. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  733. goto Found4;
  734. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  735. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  736. goto Found5;
  737. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  738. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  739. goto Found6;
  740. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  741. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  742. goto Found7;
  743. offset += 8;
  744. }
  745. if ((byte*)nLength >= (byte*)4)
  746. {
  747. nLength -= 4;
  748. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  749. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  750. goto Found;
  751. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  752. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  753. goto Found1;
  754. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  755. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  756. goto Found2;
  757. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  758. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  759. goto Found3;
  760. offset += 4;
  761. }
  762. while ((byte*)nLength > (byte*)0)
  763. {
  764. nLength -= 1;
  765. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  766. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  767. goto Found;
  768. offset += 1;
  769. }
  770. if (Avx2.IsSupported)
  771. {
  772. if ((int)(byte*)offset < length)
  773. {
  774. nLength = GetByteVector256SpanLength(offset, length);
  775. if ((byte*)nLength > (byte*)offset)
  776. {
  777. Vector256<byte> values0 = Vector256.Create(value0);
  778. Vector256<byte> values1 = Vector256.Create(value1);
  779. Vector256<byte> values2 = Vector256.Create(value2);
  780. do
  781. {
  782. Vector256<byte> search = LoadVector256(ref searchSpace, offset);
  783. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  784. // So the bit position in 'matches' corresponds to the element offset.
  785. int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search));
  786. // Bitwise Or to combine the flagged matches for the second value to our match flags
  787. matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search));
  788. // Bitwise Or to combine the flagged matches for the third value to our match flags
  789. matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search));
  790. if (matches == 0)
  791. {
  792. // Zero flags set so no matches
  793. offset += Vector256<byte>.Count;
  794. continue;
  795. }
  796. // Find bitflag offset of first match and add to current offset
  797. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  798. } while ((byte*)nLength > (byte*)offset);
  799. }
  800. nLength = GetByteVector128SpanLength(offset, length);
  801. if ((byte*)nLength > (byte*)offset)
  802. {
  803. Vector128<byte> values0 = Vector128.Create(value0);
  804. Vector128<byte> values1 = Vector128.Create(value1);
  805. Vector128<byte> values2 = Vector128.Create(value2);
  806. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  807. // Same method as above
  808. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  809. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  810. matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search));
  811. if (matches == 0)
  812. {
  813. // Zero flags set so no matches
  814. offset += Vector128<byte>.Count;
  815. }
  816. else
  817. {
  818. // Find bitflag offset of first match and add to current offset
  819. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  820. }
  821. }
  822. if ((int)(byte*)offset < length)
  823. {
  824. nLength = (IntPtr)(length - (int)(byte*)offset);
  825. goto SequentialScan;
  826. }
  827. }
  828. }
  829. else if (Sse2.IsSupported)
  830. {
  831. if ((int)(byte*)offset < length)
  832. {
  833. nLength = GetByteVector128SpanLength(offset, length);
  834. Vector128<byte> values0 = Vector128.Create(value0);
  835. Vector128<byte> values1 = Vector128.Create(value1);
  836. Vector128<byte> values2 = Vector128.Create(value2);
  837. while ((byte*)nLength > (byte*)offset)
  838. {
  839. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  840. // Same method as above
  841. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  842. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  843. matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search));
  844. if (matches == 0)
  845. {
  846. // Zero flags set so no matches
  847. offset += Vector128<byte>.Count;
  848. continue;
  849. }
  850. // Find bitflag offset of first match and add to current offset
  851. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  852. }
  853. if ((int)(byte*)offset < length)
  854. {
  855. nLength = (IntPtr)(length - (int)(byte*)offset);
  856. goto SequentialScan;
  857. }
  858. }
  859. }
  860. else if (Vector.IsHardwareAccelerated)
  861. {
  862. if ((int)(byte*)offset < length)
  863. {
  864. nLength = GetByteVectorSpanLength(offset, length);
  865. Vector<byte> values0 = new Vector<byte>(value0);
  866. Vector<byte> values1 = new Vector<byte>(value1);
  867. Vector<byte> values2 = new Vector<byte>(value2);
  868. while ((byte*)nLength > (byte*)offset)
  869. {
  870. Vector<byte> search = LoadVector(ref searchSpace, offset);
  871. var matches = Vector.BitwiseOr(
  872. Vector.BitwiseOr(
  873. Vector.Equals(search, values0),
  874. Vector.Equals(search, values1)),
  875. Vector.Equals(search, values2));
  876. if (Vector<byte>.Zero.Equals(matches))
  877. {
  878. offset += Vector<byte>.Count;
  879. continue;
  880. }
  881. // Find offset of first match and add to current offset
  882. return (int)(byte*)offset + LocateFirstFoundByte(matches);
  883. }
  884. if ((int)(byte*)offset < length)
  885. {
  886. nLength = (IntPtr)(length - (int)(byte*)offset);
  887. goto SequentialScan;
  888. }
  889. }
  890. }
  891. return -1;
  892. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  893. return (int)(byte*)offset;
  894. Found1:
  895. return (int)(byte*)(offset + 1);
  896. Found2:
  897. return (int)(byte*)(offset + 2);
  898. Found3:
  899. return (int)(byte*)(offset + 3);
  900. Found4:
  901. return (int)(byte*)(offset + 4);
  902. Found5:
  903. return (int)(byte*)(offset + 5);
  904. Found6:
  905. return (int)(byte*)(offset + 6);
  906. Found7:
  907. return (int)(byte*)(offset + 7);
  908. }
  909. public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte value1, int length)
  910. {
  911. Debug.Assert(length >= 0);
  912. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  913. uint uValue1 = value1;
  914. IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  915. IntPtr nLength = (IntPtr)length;
  916. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  917. {
  918. nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length);
  919. }
  920. SequentialScan:
  921. uint lookUp;
  922. while ((byte*)nLength >= (byte*)8)
  923. {
  924. nLength -= 8;
  925. offset -= 8;
  926. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  927. if (uValue0 == lookUp || uValue1 == lookUp)
  928. goto Found7;
  929. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  930. if (uValue0 == lookUp || uValue1 == lookUp)
  931. goto Found6;
  932. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  933. if (uValue0 == lookUp || uValue1 == lookUp)
  934. goto Found5;
  935. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  936. if (uValue0 == lookUp || uValue1 == lookUp)
  937. goto Found4;
  938. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  939. if (uValue0 == lookUp || uValue1 == lookUp)
  940. goto Found3;
  941. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  942. if (uValue0 == lookUp || uValue1 == lookUp)
  943. goto Found2;
  944. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  945. if (uValue0 == lookUp || uValue1 == lookUp)
  946. goto Found1;
  947. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  948. if (uValue0 == lookUp || uValue1 == lookUp)
  949. goto Found;
  950. }
  951. if ((byte*)nLength >= (byte*)4)
  952. {
  953. nLength -= 4;
  954. offset -= 4;
  955. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  956. if (uValue0 == lookUp || uValue1 == lookUp)
  957. goto Found3;
  958. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  959. if (uValue0 == lookUp || uValue1 == lookUp)
  960. goto Found2;
  961. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  962. if (uValue0 == lookUp || uValue1 == lookUp)
  963. goto Found1;
  964. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  965. if (uValue0 == lookUp || uValue1 == lookUp)
  966. goto Found;
  967. }
  968. while ((byte*)nLength > (byte*)0)
  969. {
  970. nLength -= 1;
  971. offset -= 1;
  972. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  973. if (uValue0 == lookUp || uValue1 == lookUp)
  974. goto Found;
  975. }
  976. if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0))
  977. {
  978. nLength = (IntPtr)((int)(byte*)offset & ~(Vector<byte>.Count - 1));
  979. Vector<byte> values0 = new Vector<byte>(value0);
  980. Vector<byte> values1 = new Vector<byte>(value1);
  981. while ((byte*)nLength > (byte*)(Vector<byte>.Count - 1))
  982. {
  983. Vector<byte> search = LoadVector(ref searchSpace, offset - Vector<byte>.Count);
  984. var matches = Vector.BitwiseOr(
  985. Vector.Equals(search, values0),
  986. Vector.Equals(search, values1));
  987. if (Vector<byte>.Zero.Equals(matches))
  988. {
  989. offset -= Vector<byte>.Count;
  990. nLength -= Vector<byte>.Count;
  991. continue;
  992. }
  993. // Find offset of first match and add to current offset
  994. return (int)(offset) - Vector<byte>.Count + LocateLastFoundByte(matches);
  995. }
  996. if ((byte*)offset > (byte*)0)
  997. {
  998. nLength = offset;
  999. goto SequentialScan;
  1000. }
  1001. }
  1002. return -1;
  1003. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1004. return (int)(byte*)offset;
  1005. Found1:
  1006. return (int)(byte*)(offset + 1);
  1007. Found2:
  1008. return (int)(byte*)(offset + 2);
  1009. Found3:
  1010. return (int)(byte*)(offset + 3);
  1011. Found4:
  1012. return (int)(byte*)(offset + 4);
  1013. Found5:
  1014. return (int)(byte*)(offset + 5);
  1015. Found6:
  1016. return (int)(byte*)(offset + 6);
  1017. Found7:
  1018. return (int)(byte*)(offset + 7);
  1019. }
  1020. public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte value1, byte value2, int length)
  1021. {
  1022. Debug.Assert(length >= 0);
  1023. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  1024. uint uValue1 = value1;
  1025. uint uValue2 = value2;
  1026. IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  1027. IntPtr nLength = (IntPtr)length;
  1028. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  1029. {
  1030. nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length);
  1031. }
  1032. SequentialScan:
  1033. uint lookUp;
  1034. while ((byte*)nLength >= (byte*)8)
  1035. {
  1036. nLength -= 8;
  1037. offset -= 8;
  1038. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  1039. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1040. goto Found7;
  1041. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  1042. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1043. goto Found6;
  1044. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  1045. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1046. goto Found5;
  1047. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  1048. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1049. goto Found4;
  1050. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  1051. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1052. goto Found3;
  1053. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  1054. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1055. goto Found2;
  1056. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  1057. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1058. goto Found1;
  1059. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  1060. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1061. goto Found;
  1062. }
  1063. if ((byte*)nLength >= (byte*)4)
  1064. {
  1065. nLength -= 4;
  1066. offset -= 4;
  1067. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  1068. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1069. goto Found3;
  1070. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  1071. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1072. goto Found2;
  1073. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  1074. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1075. goto Found1;
  1076. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  1077. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1078. goto Found;
  1079. }
  1080. while ((byte*)nLength > (byte*)0)
  1081. {
  1082. nLength -= 1;
  1083. offset -= 1;
  1084. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  1085. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1086. goto Found;
  1087. }
  1088. if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0))
  1089. {
  1090. nLength = (IntPtr)((int)(byte*)offset & ~(Vector<byte>.Count - 1));
  1091. Vector<byte> values0 = new Vector<byte>(value0);
  1092. Vector<byte> values1 = new Vector<byte>(value1);
  1093. Vector<byte> values2 = new Vector<byte>(value2);
  1094. while ((byte*)nLength > (byte*)(Vector<byte>.Count - 1))
  1095. {
  1096. Vector<byte> search = LoadVector(ref searchSpace, offset - Vector<byte>.Count);
  1097. var matches = Vector.BitwiseOr(
  1098. Vector.BitwiseOr(
  1099. Vector.Equals(search, values0),
  1100. Vector.Equals(search, values1)),
  1101. Vector.Equals(search, values2));
  1102. if (Vector<byte>.Zero.Equals(matches))
  1103. {
  1104. offset -= Vector<byte>.Count;
  1105. nLength -= Vector<byte>.Count;
  1106. continue;
  1107. }
  1108. // Find offset of first match and add to current offset
  1109. return (int)(offset) - Vector<byte>.Count + LocateLastFoundByte(matches);
  1110. }
  1111. if ((byte*)offset > (byte*)0)
  1112. {
  1113. nLength = offset;
  1114. goto SequentialScan;
  1115. }
  1116. }
  1117. return -1;
  1118. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1119. return (int)(byte*)offset;
  1120. Found1:
  1121. return (int)(byte*)(offset + 1);
  1122. Found2:
  1123. return (int)(byte*)(offset + 2);
  1124. Found3:
  1125. return (int)(byte*)(offset + 3);
  1126. Found4:
  1127. return (int)(byte*)(offset + 4);
  1128. Found5:
  1129. return (int)(byte*)(offset + 5);
  1130. Found6:
  1131. return (int)(byte*)(offset + 6);
  1132. Found7:
  1133. return (int)(byte*)(offset + 7);
  1134. }
  1135. // Optimized byte-based SequenceEquals. The "length" parameter for this one is declared a nuint rather than int as we also use it for types other than byte
  1136. // where the length can exceed 2Gb once scaled by sizeof(T).
  1137. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  1138. public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint length)
  1139. {
  1140. if (Unsafe.AreSame(ref first, ref second))
  1141. goto Equal;
  1142. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  1143. IntPtr nLength = (IntPtr)(void*)length;
  1144. if (Vector.IsHardwareAccelerated && (byte*)nLength >= (byte*)Vector<byte>.Count)
  1145. {
  1146. nLength -= Vector<byte>.Count;
  1147. while ((byte*)nLength > (byte*)offset)
  1148. {
  1149. if (LoadVector(ref first, offset) != LoadVector(ref second, offset))
  1150. {
  1151. goto NotEqual;
  1152. }
  1153. offset += Vector<byte>.Count;
  1154. }
  1155. return LoadVector(ref first, nLength) == LoadVector(ref second, nLength);
  1156. }
  1157. if ((byte*)nLength >= (byte*)sizeof(UIntPtr))
  1158. {
  1159. nLength -= sizeof(UIntPtr);
  1160. while ((byte*)nLength > (byte*)offset)
  1161. {
  1162. if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset))
  1163. {
  1164. goto NotEqual;
  1165. }
  1166. offset += sizeof(UIntPtr);
  1167. }
  1168. return LoadUIntPtr(ref first, nLength) == LoadUIntPtr(ref second, nLength);
  1169. }
  1170. while ((byte*)nLength > (byte*)offset)
  1171. {
  1172. if (Unsafe.AddByteOffset(ref first, offset) != Unsafe.AddByteOffset(ref second, offset))
  1173. goto NotEqual;
  1174. offset += 1;
  1175. }
  1176. Equal:
  1177. return true;
  1178. NotEqual: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1179. return false;
  1180. }
  1181. // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
  1182. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1183. private static int LocateFirstFoundByte(Vector<byte> match)
  1184. {
  1185. var vector64 = Vector.AsVectorUInt64(match);
  1186. ulong candidate = 0;
  1187. int i = 0;
  1188. // Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
  1189. for (; i < Vector<ulong>.Count; i++)
  1190. {
  1191. candidate = vector64[i];
  1192. if (candidate != 0)
  1193. {
  1194. break;
  1195. }
  1196. }
  1197. // Single LEA instruction with jitted const (using function result)
  1198. return i * 8 + LocateFirstFoundByte(candidate);
  1199. }
  1200. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  1201. public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref byte second, int secondLength)
  1202. {
  1203. Debug.Assert(firstLength >= 0);
  1204. Debug.Assert(secondLength >= 0);
  1205. if (Unsafe.AreSame(ref first, ref second))
  1206. goto Equal;
  1207. IntPtr minLength = (IntPtr)((firstLength < secondLength) ? firstLength : secondLength);
  1208. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  1209. IntPtr nLength = (IntPtr)(void*)minLength;
  1210. if (Avx2.IsSupported)
  1211. {
  1212. if ((byte*)nLength >= (byte*)Vector256<byte>.Count)
  1213. {
  1214. nLength -= Vector256<byte>.Count;
  1215. uint matches;
  1216. while ((byte*)nLength > (byte*)offset)
  1217. {
  1218. matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset)));
  1219. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  1220. // So the bit position in 'matches' corresponds to the element offset.
  1221. // 32 elements in Vector256<byte> so we compare to uint.MaxValue to check if everything matched
  1222. if (matches == uint.MaxValue)
  1223. {
  1224. // All matched
  1225. offset += Vector256<byte>.Count;
  1226. continue;
  1227. }
  1228. goto Difference;
  1229. }
  1230. // Move to Vector length from end for final compare
  1231. offset = nLength;
  1232. // Same as method as above
  1233. matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset)));
  1234. if (matches == uint.MaxValue)
  1235. {
  1236. // All matched
  1237. goto Equal;
  1238. }
  1239. Difference:
  1240. // Invert matches to find differences
  1241. uint differences = ~matches;
  1242. // Find bitflag offset of first difference and add to current offset
  1243. offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences));
  1244. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1245. Debug.Assert(result != 0);
  1246. return result;
  1247. }
  1248. if ((byte*)nLength >= (byte*)Vector128<byte>.Count)
  1249. {
  1250. nLength -= Vector128<byte>.Count;
  1251. uint matches;
  1252. if ((byte*)nLength > (byte*)offset)
  1253. {
  1254. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1255. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  1256. // So the bit position in 'matches' corresponds to the element offset.
  1257. // 16 elements in Vector128<byte> so we compare to ushort.MaxValue to check if everything matched
  1258. if (matches == ushort.MaxValue)
  1259. {
  1260. // All matched
  1261. offset += Vector128<byte>.Count;
  1262. }
  1263. else
  1264. {
  1265. goto Difference;
  1266. }
  1267. }
  1268. // Move to Vector length from end for final compare
  1269. offset = nLength;
  1270. // Same as method as above
  1271. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1272. if (matches == ushort.MaxValue)
  1273. {
  1274. // All matched
  1275. goto Equal;
  1276. }
  1277. Difference:
  1278. // Invert matches to find differences
  1279. uint differences = ~matches;
  1280. // Find bitflag offset of first difference and add to current offset
  1281. offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences));
  1282. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1283. Debug.Assert(result != 0);
  1284. return result;
  1285. }
  1286. }
  1287. else if (Sse2.IsSupported)
  1288. {
  1289. if ((byte*)nLength >= (byte*)Vector128<byte>.Count)
  1290. {
  1291. nLength -= Vector128<byte>.Count;
  1292. uint matches;
  1293. while ((byte*)nLength > (byte*)offset)
  1294. {
  1295. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1296. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  1297. // So the bit position in 'matches' corresponds to the element offset.
  1298. // 16 elements in Vector128<byte> so we compare to ushort.MaxValue to check if everything matched
  1299. if (matches == ushort.MaxValue)
  1300. {
  1301. // All matched
  1302. offset += Vector128<byte>.Count;
  1303. continue;
  1304. }
  1305. goto Difference;
  1306. }
  1307. // Move to Vector length from end for final compare
  1308. offset = nLength;
  1309. // Same as method as above
  1310. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1311. if (matches == ushort.MaxValue)
  1312. {
  1313. // All matched
  1314. goto Equal;
  1315. }
  1316. Difference:
  1317. // Invert matches to find differences
  1318. uint differences = ~matches;
  1319. // Find bitflag offset of first difference and add to current offset
  1320. offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences));
  1321. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1322. Debug.Assert(result != 0);
  1323. return result;
  1324. }
  1325. }
  1326. else if (Vector.IsHardwareAccelerated)
  1327. {
  1328. if ((byte*)nLength > (byte*)Vector<byte>.Count)
  1329. {
  1330. nLength -= Vector<byte>.Count;
  1331. while ((byte*)nLength > (byte*)offset)
  1332. {
  1333. if (LoadVector(ref first, offset) != LoadVector(ref second, offset))
  1334. {
  1335. goto BytewiseCheck;
  1336. }
  1337. offset += Vector<byte>.Count;
  1338. }
  1339. goto BytewiseCheck;
  1340. }
  1341. }
  1342. if ((byte*)nLength > (byte*)sizeof(UIntPtr))
  1343. {
  1344. nLength -= sizeof(UIntPtr);
  1345. while ((byte*)nLength > (byte*)offset)
  1346. {
  1347. if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset))
  1348. {
  1349. goto BytewiseCheck;
  1350. }
  1351. offset += sizeof(UIntPtr);
  1352. }
  1353. }
  1354. BytewiseCheck: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1355. while ((byte*)minLength > (byte*)offset)
  1356. {
  1357. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1358. if (result != 0)
  1359. return result;
  1360. offset += 1;
  1361. }
  1362. Equal:
  1363. return firstLength - secondLength;
  1364. }
  1365. // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
  1366. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1367. private static int LocateLastFoundByte(Vector<byte> match)
  1368. {
  1369. var vector64 = Vector.AsVectorUInt64(match);
  1370. ulong candidate = 0;
  1371. int i = Vector<ulong>.Count - 1;
  1372. // Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
  1373. for (; i >= 0; i--)
  1374. {
  1375. candidate = vector64[i];
  1376. if (candidate != 0)
  1377. {
  1378. break;
  1379. }
  1380. }
  1381. // Single LEA instruction with jitted const (using function result)
  1382. return i * 8 + LocateLastFoundByte(candidate);
  1383. }
  1384. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1385. private static int LocateFirstFoundByte(ulong match)
  1386. {
  1387. if (Bmi1.X64.IsSupported)
  1388. {
  1389. return (int)(Bmi1.X64.TrailingZeroCount(match) >> 3);
  1390. }
  1391. else
  1392. {
  1393. // Flag least significant power of two bit
  1394. var powerOfTwoFlag = match ^ (match - 1);
  1395. // Shift all powers of two into the high byte and extract
  1396. return (int)((powerOfTwoFlag * XorPowerOfTwoToHighByte) >> 57);
  1397. }
  1398. }
  1399. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1400. private static int LocateLastFoundByte(ulong match)
  1401. {
  1402. return 7 - (BitOps.LeadingZeroCount(match) >> 3);
  1403. }
  1404. private const ulong XorPowerOfTwoToHighByte = (0x07ul |
  1405. 0x06ul << 8 |
  1406. 0x05ul << 16 |
  1407. 0x04ul << 24 |
  1408. 0x03ul << 32 |
  1409. 0x02ul << 40 |
  1410. 0x01ul << 48) + 1;
  1411. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1412. private static unsafe UIntPtr LoadUIntPtr(ref byte start, IntPtr offset)
  1413. => Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.AddByteOffset(ref start, offset));
  1414. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1415. private static unsafe Vector<byte> LoadVector(ref byte start, IntPtr offset)
  1416. => Unsafe.ReadUnaligned<Vector<byte>>(ref Unsafe.AddByteOffset(ref start, offset));
  1417. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1418. private static unsafe Vector128<byte> LoadVector128(ref byte start, IntPtr offset)
  1419. => Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AddByteOffset(ref start, offset));
  1420. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1421. private static unsafe Vector256<byte> LoadVector256(ref byte start, IntPtr offset)
  1422. => Unsafe.ReadUnaligned<Vector256<byte>>(ref Unsafe.AddByteOffset(ref start, offset));
  1423. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1424. private static unsafe IntPtr GetByteVectorSpanLength(IntPtr offset, int length)
  1425. => (IntPtr)((length - (int)(byte*)offset) & ~(Vector<byte>.Count - 1));
  1426. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1427. private static unsafe IntPtr GetByteVector128SpanLength(IntPtr offset, int length)
  1428. => (IntPtr)((length - (int)(byte*)offset) & ~(Vector128<byte>.Count - 1));
  1429. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1430. private static unsafe IntPtr GetByteVector256SpanLength(IntPtr offset, int length)
  1431. => (IntPtr)((length - (int)(byte*)offset) & ~(Vector256<byte>.Count - 1));
  1432. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1433. private static unsafe IntPtr UnalignedByteCountVector(ref byte searchSpace)
  1434. {
  1435. int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
  1436. return (IntPtr)((Vector<byte>.Count - unaligned) & (Vector<byte>.Count - 1));
  1437. }
  1438. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1439. private static unsafe IntPtr UnalignedByteCountVector128(ref byte searchSpace)
  1440. {
  1441. int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector128<byte>.Count - 1);
  1442. return (IntPtr)((Vector128<byte>.Count - unaligned) & (Vector128<byte>.Count - 1));
  1443. }
  1444. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1445. private static unsafe IntPtr UnalignedByteCountVectorFromEnd(ref byte searchSpace, int length)
  1446. {
  1447. int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
  1448. return (IntPtr)(((length & (Vector<byte>.Count - 1)) + unaligned) & (Vector<byte>.Count - 1));
  1449. }
  1450. }
  1451. }