SpanHelpers.Byte.cs 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime.CompilerServices;
  6. using System.Numerics;
  7. using System.Runtime.Intrinsics;
  8. using System.Runtime.Intrinsics.X86;
  9. using Internal.Runtime.CompilerServices;
  10. #if BIT64
  11. using nuint = System.UInt64;
  12. #else
  13. using nuint = System.UInt32;
  14. #endif // BIT64
  15. namespace System
  16. {
  17. internal static partial class SpanHelpers // .Byte
  18. {
  19. public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  20. {
  21. Debug.Assert(searchSpaceLength >= 0);
  22. Debug.Assert(valueLength >= 0);
  23. if (valueLength == 0)
  24. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  25. byte valueHead = value;
  26. ref byte valueTail = ref Unsafe.Add(ref value, 1);
  27. int valueTailLength = valueLength - 1;
  28. int remainingSearchSpaceLength = searchSpaceLength - valueTailLength;
  29. int offset = 0;
  30. while (remainingSearchSpaceLength > 0)
  31. {
  32. // Do a quick search for the first element of "value".
  33. int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, offset), valueHead, remainingSearchSpaceLength);
  34. if (relativeIndex == -1)
  35. break;
  36. remainingSearchSpaceLength -= relativeIndex;
  37. offset += relativeIndex;
  38. if (remainingSearchSpaceLength <= 0)
  39. break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there.
  40. // Found the first element of "value". See if the tail matches.
  41. if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailLength))
  42. return offset; // The tail matched. Return a successful find.
  43. remainingSearchSpaceLength--;
  44. offset++;
  45. }
  46. return -1;
  47. }
  48. public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  49. {
  50. Debug.Assert(searchSpaceLength >= 0);
  51. Debug.Assert(valueLength >= 0);
  52. if (valueLength == 0)
  53. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  54. int offset = -1;
  55. for (int i = 0; i < valueLength; i++)
  56. {
  57. var tempIndex = IndexOf(ref searchSpace, Unsafe.Add(ref value, i), searchSpaceLength);
  58. if ((uint)tempIndex < (uint)offset)
  59. {
  60. offset = tempIndex;
  61. // Reduce space for search, cause we don't care if we find the search value after the index of a previously found value
  62. searchSpaceLength = tempIndex;
  63. if (offset == 0)
  64. break;
  65. }
  66. }
  67. return offset;
  68. }
  69. public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  70. {
  71. Debug.Assert(searchSpaceLength >= 0);
  72. Debug.Assert(valueLength >= 0);
  73. if (valueLength == 0)
  74. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  75. int offset = -1;
  76. for (int i = 0; i < valueLength; i++)
  77. {
  78. var tempIndex = LastIndexOf(ref searchSpace, Unsafe.Add(ref value, i), searchSpaceLength);
  79. if (tempIndex > offset)
  80. offset = tempIndex;
  81. }
  82. return offset;
  83. }
  84. // Adapted from IndexOf(...)
  85. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  86. public static unsafe bool Contains(ref byte searchSpace, byte value, int length)
  87. {
  88. Debug.Assert(length >= 0);
  89. uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  90. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  91. IntPtr nLength = (IntPtr)length;
  92. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  93. {
  94. nLength = UnalignedByteCountVector(ref searchSpace);
  95. }
  96. SequentialScan:
  97. while ((byte*)nLength >= (byte*)8)
  98. {
  99. nLength -= 8;
  100. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) ||
  101. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) ||
  102. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) ||
  103. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3) ||
  104. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4) ||
  105. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5) ||
  106. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6) ||
  107. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
  108. {
  109. goto Found;
  110. }
  111. offset += 8;
  112. }
  113. if ((byte*)nLength >= (byte*)4)
  114. {
  115. nLength -= 4;
  116. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) ||
  117. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) ||
  118. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) ||
  119. uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  120. {
  121. goto Found;
  122. }
  123. offset += 4;
  124. }
  125. while ((byte*)nLength > (byte*)0)
  126. {
  127. nLength -= 1;
  128. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  129. goto Found;
  130. offset += 1;
  131. }
  132. if (Vector.IsHardwareAccelerated && ((int)(byte*)offset < length))
  133. {
  134. nLength = (IntPtr)((length - (int)(byte*)offset) & ~(Vector<byte>.Count - 1));
  135. Vector<byte> values = new Vector<byte>(value);
  136. while ((byte*)nLength > (byte*)offset)
  137. {
  138. var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
  139. if (Vector<byte>.Zero.Equals(matches))
  140. {
  141. offset += Vector<byte>.Count;
  142. continue;
  143. }
  144. goto Found;
  145. }
  146. if ((int)(byte*)offset < length)
  147. {
  148. nLength = (IntPtr)(length - (int)(byte*)offset);
  149. goto SequentialScan;
  150. }
  151. }
  152. return false;
  153. Found:
  154. return true;
  155. }
  156. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  157. public static unsafe int IndexOf(ref byte searchSpace, byte value, int length)
  158. {
  159. Debug.Assert(length >= 0);
  160. uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  161. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  162. IntPtr nLength = (IntPtr)length;
  163. if (Avx2.IsSupported || Sse2.IsSupported)
  164. {
  165. // Avx2 branch also operates on Sse2 sizes, so check is combined.
  166. if (length >= Vector128<byte>.Count * 2)
  167. {
  168. nLength = UnalignedByteCountVector128(ref searchSpace);
  169. }
  170. }
  171. else if (Vector.IsHardwareAccelerated)
  172. {
  173. if (length >= Vector<byte>.Count * 2)
  174. {
  175. nLength = UnalignedByteCountVector(ref searchSpace);
  176. }
  177. }
  178. SequentialScan:
  179. while ((byte*)nLength >= (byte*)8)
  180. {
  181. nLength -= 8;
  182. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  183. goto Found;
  184. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  185. goto Found1;
  186. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  187. goto Found2;
  188. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  189. goto Found3;
  190. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4))
  191. goto Found4;
  192. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5))
  193. goto Found5;
  194. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6))
  195. goto Found6;
  196. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
  197. goto Found7;
  198. offset += 8;
  199. }
  200. if ((byte*)nLength >= (byte*)4)
  201. {
  202. nLength -= 4;
  203. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  204. goto Found;
  205. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  206. goto Found1;
  207. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  208. goto Found2;
  209. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  210. goto Found3;
  211. offset += 4;
  212. }
  213. while ((byte*)nLength > (byte*)0)
  214. {
  215. nLength -= 1;
  216. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  217. goto Found;
  218. offset += 1;
  219. }
  220. if (Avx2.IsSupported)
  221. {
  222. if ((int)(byte*)offset < length)
  223. {
  224. nLength = GetByteVector256SpanLength(offset, length);
  225. if ((byte*)nLength > (byte*)offset)
  226. {
  227. Vector256<byte> values = Vector256.Create(value);
  228. do
  229. {
  230. Vector256<byte> search = LoadVector256(ref searchSpace, offset);
  231. int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search));
  232. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  233. // So the bit position in 'matches' corresponds to the element offset.
  234. if (matches == 0)
  235. {
  236. // Zero flags set so no matches
  237. offset += Vector256<byte>.Count;
  238. continue;
  239. }
  240. // Find bitflag offset of first match and add to current offset
  241. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  242. } while ((byte*)nLength > (byte*)offset);
  243. }
  244. nLength = GetByteVector128SpanLength(offset, length);
  245. if ((byte*)nLength > (byte*)offset)
  246. {
  247. Vector128<byte> values = Vector128.Create(value);
  248. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  249. // Same method as above
  250. int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search));
  251. if (matches == 0)
  252. {
  253. // Zero flags set so no matches
  254. offset += Vector128<byte>.Count;
  255. }
  256. else
  257. {
  258. // Find bitflag offset of first match and add to current offset
  259. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  260. }
  261. }
  262. if ((int)(byte*)offset < length)
  263. {
  264. nLength = (IntPtr)(length - (int)(byte*)offset);
  265. goto SequentialScan;
  266. }
  267. }
  268. }
  269. else if (Sse2.IsSupported)
  270. {
  271. if ((int)(byte*)offset < length)
  272. {
  273. nLength = GetByteVector128SpanLength(offset, length);
  274. Vector128<byte> values = Vector128.Create(value);
  275. while ((byte*)nLength > (byte*)offset)
  276. {
  277. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  278. // Same method as above
  279. int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search));
  280. if (matches == 0)
  281. {
  282. // Zero flags set so no matches
  283. offset += Vector128<byte>.Count;
  284. continue;
  285. }
  286. // Find bitflag offset of first match and add to current offset
  287. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  288. }
  289. if ((int)(byte*)offset < length)
  290. {
  291. nLength = (IntPtr)(length - (int)(byte*)offset);
  292. goto SequentialScan;
  293. }
  294. }
  295. }
  296. else if (Vector.IsHardwareAccelerated)
  297. {
  298. if ((int)(byte*)offset < length)
  299. {
  300. nLength = GetByteVectorSpanLength(offset, length);
  301. Vector<byte> values = new Vector<byte>(value);
  302. while ((byte*)nLength > (byte*)offset)
  303. {
  304. var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
  305. if (Vector<byte>.Zero.Equals(matches))
  306. {
  307. offset += Vector<byte>.Count;
  308. continue;
  309. }
  310. // Find offset of first match and add to current offset
  311. return (int)(byte*)offset + LocateFirstFoundByte(matches);
  312. }
  313. if ((int)(byte*)offset < length)
  314. {
  315. nLength = (IntPtr)(length - (int)(byte*)offset);
  316. goto SequentialScan;
  317. }
  318. }
  319. }
  320. return -1;
  321. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  322. return (int)(byte*)offset;
  323. Found1:
  324. return (int)(byte*)(offset + 1);
  325. Found2:
  326. return (int)(byte*)(offset + 2);
  327. Found3:
  328. return (int)(byte*)(offset + 3);
  329. Found4:
  330. return (int)(byte*)(offset + 4);
  331. Found5:
  332. return (int)(byte*)(offset + 5);
  333. Found6:
  334. return (int)(byte*)(offset + 6);
  335. Found7:
  336. return (int)(byte*)(offset + 7);
  337. }
  338. public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength)
  339. {
  340. Debug.Assert(searchSpaceLength >= 0);
  341. Debug.Assert(valueLength >= 0);
  342. if (valueLength == 0)
  343. return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
  344. byte valueHead = value;
  345. ref byte valueTail = ref Unsafe.Add(ref value, 1);
  346. int valueTailLength = valueLength - 1;
  347. int offset = 0;
  348. for (; ; )
  349. {
  350. Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength".
  351. int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength;
  352. if (remainingSearchSpaceLength <= 0)
  353. break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there.
  354. // Do a quick search for the first element of "value".
  355. int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength);
  356. if (relativeIndex == -1)
  357. break;
  358. // Found the first element of "value". See if the tail matches.
  359. if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailLength))
  360. return relativeIndex; // The tail matched. Return a successful find.
  361. offset += remainingSearchSpaceLength - relativeIndex;
  362. }
  363. return -1;
  364. }
  365. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  366. public static unsafe int LastIndexOf(ref byte searchSpace, byte value, int length)
  367. {
  368. Debug.Assert(length >= 0);
  369. uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  370. IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  371. IntPtr nLength = (IntPtr)length;
  372. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  373. {
  374. nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length);
  375. }
  376. SequentialScan:
  377. while ((byte*)nLength >= (byte*)8)
  378. {
  379. nLength -= 8;
  380. offset -= 8;
  381. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
  382. goto Found7;
  383. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6))
  384. goto Found6;
  385. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5))
  386. goto Found5;
  387. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4))
  388. goto Found4;
  389. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  390. goto Found3;
  391. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  392. goto Found2;
  393. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  394. goto Found1;
  395. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  396. goto Found;
  397. }
  398. if ((byte*)nLength >= (byte*)4)
  399. {
  400. nLength -= 4;
  401. offset -= 4;
  402. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
  403. goto Found3;
  404. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
  405. goto Found2;
  406. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
  407. goto Found1;
  408. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  409. goto Found;
  410. }
  411. while ((byte*)nLength > (byte*)0)
  412. {
  413. nLength -= 1;
  414. offset -= 1;
  415. if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
  416. goto Found;
  417. }
  418. if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0))
  419. {
  420. nLength = (IntPtr)((int)(byte*)offset & ~(Vector<byte>.Count - 1));
  421. Vector<byte> values = new Vector<byte>(value);
  422. while ((byte*)nLength > (byte*)(Vector<byte>.Count - 1))
  423. {
  424. var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset - Vector<byte>.Count));
  425. if (Vector<byte>.Zero.Equals(matches))
  426. {
  427. offset -= Vector<byte>.Count;
  428. nLength -= Vector<byte>.Count;
  429. continue;
  430. }
  431. // Find offset of first match and add to current offset
  432. return (int)(offset) - Vector<byte>.Count + LocateLastFoundByte(matches);
  433. }
  434. if ((byte*)offset > (byte*)0)
  435. {
  436. nLength = offset;
  437. goto SequentialScan;
  438. }
  439. }
  440. return -1;
  441. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  442. return (int)(byte*)offset;
  443. Found1:
  444. return (int)(byte*)(offset + 1);
  445. Found2:
  446. return (int)(byte*)(offset + 2);
  447. Found3:
  448. return (int)(byte*)(offset + 3);
  449. Found4:
  450. return (int)(byte*)(offset + 4);
  451. Found5:
  452. return (int)(byte*)(offset + 5);
  453. Found6:
  454. return (int)(byte*)(offset + 6);
  455. Found7:
  456. return (int)(byte*)(offset + 7);
  457. }
  458. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  459. public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int length)
  460. {
  461. Debug.Assert(length >= 0);
  462. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  463. uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  464. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  465. IntPtr nLength = (IntPtr)length;
  466. if (Avx2.IsSupported || Sse2.IsSupported)
  467. {
  468. // Avx2 branch also operates on Sse2 sizes, so check is combined.
  469. if (length >= Vector128<byte>.Count * 2)
  470. {
  471. nLength = UnalignedByteCountVector128(ref searchSpace);
  472. }
  473. }
  474. else if (Vector.IsHardwareAccelerated)
  475. {
  476. if (length >= Vector<byte>.Count * 2)
  477. {
  478. nLength = UnalignedByteCountVector(ref searchSpace);
  479. }
  480. }
  481. SequentialScan:
  482. uint lookUp;
  483. while ((byte*)nLength >= (byte*)8)
  484. {
  485. nLength -= 8;
  486. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  487. if (uValue0 == lookUp || uValue1 == lookUp)
  488. goto Found;
  489. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  490. if (uValue0 == lookUp || uValue1 == lookUp)
  491. goto Found1;
  492. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  493. if (uValue0 == lookUp || uValue1 == lookUp)
  494. goto Found2;
  495. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  496. if (uValue0 == lookUp || uValue1 == lookUp)
  497. goto Found3;
  498. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  499. if (uValue0 == lookUp || uValue1 == lookUp)
  500. goto Found4;
  501. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  502. if (uValue0 == lookUp || uValue1 == lookUp)
  503. goto Found5;
  504. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  505. if (uValue0 == lookUp || uValue1 == lookUp)
  506. goto Found6;
  507. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  508. if (uValue0 == lookUp || uValue1 == lookUp)
  509. goto Found7;
  510. offset += 8;
  511. }
  512. if ((byte*)nLength >= (byte*)4)
  513. {
  514. nLength -= 4;
  515. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  516. if (uValue0 == lookUp || uValue1 == lookUp)
  517. goto Found;
  518. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  519. if (uValue0 == lookUp || uValue1 == lookUp)
  520. goto Found1;
  521. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  522. if (uValue0 == lookUp || uValue1 == lookUp)
  523. goto Found2;
  524. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  525. if (uValue0 == lookUp || uValue1 == lookUp)
  526. goto Found3;
  527. offset += 4;
  528. }
  529. while ((byte*)nLength > (byte*)0)
  530. {
  531. nLength -= 1;
  532. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  533. if (uValue0 == lookUp || uValue1 == lookUp)
  534. goto Found;
  535. offset += 1;
  536. }
  537. if (Avx2.IsSupported)
  538. {
  539. if ((int)(byte*)offset < length)
  540. {
  541. nLength = GetByteVector256SpanLength(offset, length);
  542. if ((byte*)nLength > (byte*)offset)
  543. {
  544. Vector256<byte> values0 = Vector256.Create(value0);
  545. Vector256<byte> values1 = Vector256.Create(value1);
  546. do
  547. {
  548. Vector256<byte> search = LoadVector256(ref searchSpace, offset);
  549. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  550. // So the bit position in 'matches' corresponds to the element offset.
  551. int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search));
  552. // Bitwise Or to combine the flagged matches for the second value to our match flags
  553. matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search));
  554. if (matches == 0)
  555. {
  556. // Zero flags set so no matches
  557. offset += Vector256<byte>.Count;
  558. continue;
  559. }
  560. // Find bitflag offset of first match and add to current offset
  561. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  562. } while ((byte*)nLength > (byte*)offset);
  563. }
  564. nLength = GetByteVector128SpanLength(offset, length);
  565. if ((byte*)nLength > (byte*)offset)
  566. {
  567. Vector128<byte> values0 = Vector128.Create(value0);
  568. Vector128<byte> values1 = Vector128.Create(value1);
  569. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  570. // Same method as above
  571. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  572. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  573. if (matches == 0)
  574. {
  575. // Zero flags set so no matches
  576. offset += Vector128<byte>.Count;
  577. }
  578. else
  579. {
  580. // Find bitflag offset of first match and add to current offset
  581. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  582. }
  583. }
  584. if ((int)(byte*)offset < length)
  585. {
  586. nLength = (IntPtr)(length - (int)(byte*)offset);
  587. goto SequentialScan;
  588. }
  589. }
  590. }
  591. else if (Sse2.IsSupported)
  592. {
  593. if ((int)(byte*)offset < length)
  594. {
  595. nLength = GetByteVector128SpanLength(offset, length);
  596. Vector128<byte> values0 = Vector128.Create(value0);
  597. Vector128<byte> values1 = Vector128.Create(value1);
  598. while ((byte*)nLength > (byte*)offset)
  599. {
  600. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  601. // Same method as above
  602. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  603. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  604. if (matches == 0)
  605. {
  606. // Zero flags set so no matches
  607. offset += Vector128<byte>.Count;
  608. continue;
  609. }
  610. // Find bitflag offset of first match and add to current offset
  611. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  612. }
  613. if ((int)(byte*)offset < length)
  614. {
  615. nLength = (IntPtr)(length - (int)(byte*)offset);
  616. goto SequentialScan;
  617. }
  618. }
  619. }
  620. else if (Vector.IsHardwareAccelerated)
  621. {
  622. if ((int)(byte*)offset < length)
  623. {
  624. nLength = GetByteVectorSpanLength(offset, length);
  625. Vector<byte> values0 = new Vector<byte>(value0);
  626. Vector<byte> values1 = new Vector<byte>(value1);
  627. while ((byte*)nLength > (byte*)offset)
  628. {
  629. Vector<byte> search = LoadVector(ref searchSpace, offset);
  630. var matches = Vector.BitwiseOr(
  631. Vector.Equals(search, values0),
  632. Vector.Equals(search, values1));
  633. if (Vector<byte>.Zero.Equals(matches))
  634. {
  635. offset += Vector<byte>.Count;
  636. continue;
  637. }
  638. // Find offset of first match and add to current offset
  639. return (int)(byte*)offset + LocateFirstFoundByte(matches);
  640. }
  641. if ((int)(byte*)offset < length)
  642. {
  643. nLength = (IntPtr)(length - (int)(byte*)offset);
  644. goto SequentialScan;
  645. }
  646. }
  647. }
  648. return -1;
  649. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  650. return (int)(byte*)offset;
  651. Found1:
  652. return (int)(byte*)(offset + 1);
  653. Found2:
  654. return (int)(byte*)(offset + 2);
  655. Found3:
  656. return (int)(byte*)(offset + 3);
  657. Found4:
  658. return (int)(byte*)(offset + 4);
  659. Found5:
  660. return (int)(byte*)(offset + 5);
  661. Found6:
  662. return (int)(byte*)(offset + 6);
  663. Found7:
  664. return (int)(byte*)(offset + 7);
  665. }
  666. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  667. public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byte value2, int length)
  668. {
  669. Debug.Assert(length >= 0);
  670. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  671. uint uValue1 = value1;
  672. uint uValue2 = value2;
  673. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  674. IntPtr nLength = (IntPtr)length;
  675. if (Avx2.IsSupported || Sse2.IsSupported)
  676. {
  677. // Avx2 branch also operates on Sse2 sizes, so check is combined.
  678. if (length >= Vector128<byte>.Count * 2)
  679. {
  680. nLength = UnalignedByteCountVector128(ref searchSpace);
  681. }
  682. }
  683. else if (Vector.IsHardwareAccelerated)
  684. {
  685. if (length >= Vector<byte>.Count * 2)
  686. {
  687. nLength = UnalignedByteCountVector(ref searchSpace);
  688. }
  689. }
  690. SequentialScan:
  691. uint lookUp;
  692. while ((byte*)nLength >= (byte*)8)
  693. {
  694. nLength -= 8;
  695. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  696. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  697. goto Found;
  698. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  699. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  700. goto Found1;
  701. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  702. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  703. goto Found2;
  704. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  705. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  706. goto Found3;
  707. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  708. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  709. goto Found4;
  710. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  711. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  712. goto Found5;
  713. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  714. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  715. goto Found6;
  716. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  717. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  718. goto Found7;
  719. offset += 8;
  720. }
  721. if ((byte*)nLength >= (byte*)4)
  722. {
  723. nLength -= 4;
  724. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  725. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  726. goto Found;
  727. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  728. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  729. goto Found1;
  730. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  731. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  732. goto Found2;
  733. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  734. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  735. goto Found3;
  736. offset += 4;
  737. }
  738. while ((byte*)nLength > (byte*)0)
  739. {
  740. nLength -= 1;
  741. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  742. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  743. goto Found;
  744. offset += 1;
  745. }
  746. if (Avx2.IsSupported)
  747. {
  748. if ((int)(byte*)offset < length)
  749. {
  750. nLength = GetByteVector256SpanLength(offset, length);
  751. if ((byte*)nLength > (byte*)offset)
  752. {
  753. Vector256<byte> values0 = Vector256.Create(value0);
  754. Vector256<byte> values1 = Vector256.Create(value1);
  755. Vector256<byte> values2 = Vector256.Create(value2);
  756. do
  757. {
  758. Vector256<byte> search = LoadVector256(ref searchSpace, offset);
  759. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  760. // So the bit position in 'matches' corresponds to the element offset.
  761. int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search));
  762. // Bitwise Or to combine the flagged matches for the second value to our match flags
  763. matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search));
  764. // Bitwise Or to combine the flagged matches for the third value to our match flags
  765. matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search));
  766. if (matches == 0)
  767. {
  768. // Zero flags set so no matches
  769. offset += Vector256<byte>.Count;
  770. continue;
  771. }
  772. // Find bitflag offset of first match and add to current offset
  773. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  774. } while ((byte*)nLength > (byte*)offset);
  775. }
  776. nLength = GetByteVector128SpanLength(offset, length);
  777. if ((byte*)nLength > (byte*)offset)
  778. {
  779. Vector128<byte> values0 = Vector128.Create(value0);
  780. Vector128<byte> values1 = Vector128.Create(value1);
  781. Vector128<byte> values2 = Vector128.Create(value2);
  782. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  783. // Same method as above
  784. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  785. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  786. matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search));
  787. if (matches == 0)
  788. {
  789. // Zero flags set so no matches
  790. offset += Vector128<byte>.Count;
  791. }
  792. else
  793. {
  794. // Find bitflag offset of first match and add to current offset
  795. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  796. }
  797. }
  798. if ((int)(byte*)offset < length)
  799. {
  800. nLength = (IntPtr)(length - (int)(byte*)offset);
  801. goto SequentialScan;
  802. }
  803. }
  804. }
  805. else if (Sse2.IsSupported)
  806. {
  807. if ((int)(byte*)offset < length)
  808. {
  809. nLength = GetByteVector128SpanLength(offset, length);
  810. Vector128<byte> values0 = Vector128.Create(value0);
  811. Vector128<byte> values1 = Vector128.Create(value1);
  812. Vector128<byte> values2 = Vector128.Create(value2);
  813. while ((byte*)nLength > (byte*)offset)
  814. {
  815. Vector128<byte> search = LoadVector128(ref searchSpace, offset);
  816. // Same method as above
  817. int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search));
  818. matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search));
  819. matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search));
  820. if (matches == 0)
  821. {
  822. // Zero flags set so no matches
  823. offset += Vector128<byte>.Count;
  824. continue;
  825. }
  826. // Find bitflag offset of first match and add to current offset
  827. return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches);
  828. }
  829. if ((int)(byte*)offset < length)
  830. {
  831. nLength = (IntPtr)(length - (int)(byte*)offset);
  832. goto SequentialScan;
  833. }
  834. }
  835. }
  836. else if (Vector.IsHardwareAccelerated)
  837. {
  838. if ((int)(byte*)offset < length)
  839. {
  840. nLength = GetByteVectorSpanLength(offset, length);
  841. Vector<byte> values0 = new Vector<byte>(value0);
  842. Vector<byte> values1 = new Vector<byte>(value1);
  843. Vector<byte> values2 = new Vector<byte>(value2);
  844. while ((byte*)nLength > (byte*)offset)
  845. {
  846. Vector<byte> search = LoadVector(ref searchSpace, offset);
  847. var matches = Vector.BitwiseOr(
  848. Vector.BitwiseOr(
  849. Vector.Equals(search, values0),
  850. Vector.Equals(search, values1)),
  851. Vector.Equals(search, values2));
  852. if (Vector<byte>.Zero.Equals(matches))
  853. {
  854. offset += Vector<byte>.Count;
  855. continue;
  856. }
  857. // Find offset of first match and add to current offset
  858. return (int)(byte*)offset + LocateFirstFoundByte(matches);
  859. }
  860. if ((int)(byte*)offset < length)
  861. {
  862. nLength = (IntPtr)(length - (int)(byte*)offset);
  863. goto SequentialScan;
  864. }
  865. }
  866. }
  867. return -1;
  868. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  869. return (int)(byte*)offset;
  870. Found1:
  871. return (int)(byte*)(offset + 1);
  872. Found2:
  873. return (int)(byte*)(offset + 2);
  874. Found3:
  875. return (int)(byte*)(offset + 3);
  876. Found4:
  877. return (int)(byte*)(offset + 4);
  878. Found5:
  879. return (int)(byte*)(offset + 5);
  880. Found6:
  881. return (int)(byte*)(offset + 6);
  882. Found7:
  883. return (int)(byte*)(offset + 7);
  884. }
  885. public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte value1, int length)
  886. {
  887. Debug.Assert(length >= 0);
  888. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  889. uint uValue1 = value1;
  890. IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  891. IntPtr nLength = (IntPtr)length;
  892. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  893. {
  894. nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length);
  895. }
  896. SequentialScan:
  897. uint lookUp;
  898. while ((byte*)nLength >= (byte*)8)
  899. {
  900. nLength -= 8;
  901. offset -= 8;
  902. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  903. if (uValue0 == lookUp || uValue1 == lookUp)
  904. goto Found7;
  905. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  906. if (uValue0 == lookUp || uValue1 == lookUp)
  907. goto Found6;
  908. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  909. if (uValue0 == lookUp || uValue1 == lookUp)
  910. goto Found5;
  911. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  912. if (uValue0 == lookUp || uValue1 == lookUp)
  913. goto Found4;
  914. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  915. if (uValue0 == lookUp || uValue1 == lookUp)
  916. goto Found3;
  917. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  918. if (uValue0 == lookUp || uValue1 == lookUp)
  919. goto Found2;
  920. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  921. if (uValue0 == lookUp || uValue1 == lookUp)
  922. goto Found1;
  923. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  924. if (uValue0 == lookUp || uValue1 == lookUp)
  925. goto Found;
  926. }
  927. if ((byte*)nLength >= (byte*)4)
  928. {
  929. nLength -= 4;
  930. offset -= 4;
  931. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  932. if (uValue0 == lookUp || uValue1 == lookUp)
  933. goto Found3;
  934. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  935. if (uValue0 == lookUp || uValue1 == lookUp)
  936. goto Found2;
  937. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  938. if (uValue0 == lookUp || uValue1 == lookUp)
  939. goto Found1;
  940. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  941. if (uValue0 == lookUp || uValue1 == lookUp)
  942. goto Found;
  943. }
  944. while ((byte*)nLength > (byte*)0)
  945. {
  946. nLength -= 1;
  947. offset -= 1;
  948. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  949. if (uValue0 == lookUp || uValue1 == lookUp)
  950. goto Found;
  951. }
  952. if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0))
  953. {
  954. nLength = (IntPtr)((int)(byte*)offset & ~(Vector<byte>.Count - 1));
  955. Vector<byte> values0 = new Vector<byte>(value0);
  956. Vector<byte> values1 = new Vector<byte>(value1);
  957. while ((byte*)nLength > (byte*)(Vector<byte>.Count - 1))
  958. {
  959. Vector<byte> search = LoadVector(ref searchSpace, offset - Vector<byte>.Count);
  960. var matches = Vector.BitwiseOr(
  961. Vector.Equals(search, values0),
  962. Vector.Equals(search, values1));
  963. if (Vector<byte>.Zero.Equals(matches))
  964. {
  965. offset -= Vector<byte>.Count;
  966. nLength -= Vector<byte>.Count;
  967. continue;
  968. }
  969. // Find offset of first match and add to current offset
  970. return (int)(offset) - Vector<byte>.Count + LocateLastFoundByte(matches);
  971. }
  972. if ((byte*)offset > (byte*)0)
  973. {
  974. nLength = offset;
  975. goto SequentialScan;
  976. }
  977. }
  978. return -1;
  979. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  980. return (int)(byte*)offset;
  981. Found1:
  982. return (int)(byte*)(offset + 1);
  983. Found2:
  984. return (int)(byte*)(offset + 2);
  985. Found3:
  986. return (int)(byte*)(offset + 3);
  987. Found4:
  988. return (int)(byte*)(offset + 4);
  989. Found5:
  990. return (int)(byte*)(offset + 5);
  991. Found6:
  992. return (int)(byte*)(offset + 6);
  993. Found7:
  994. return (int)(byte*)(offset + 7);
  995. }
  996. public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte value1, byte value2, int length)
  997. {
  998. Debug.Assert(length >= 0);
  999. uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions
  1000. uint uValue1 = value1;
  1001. uint uValue2 = value2;
  1002. IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  1003. IntPtr nLength = (IntPtr)length;
  1004. if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
  1005. {
  1006. nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length);
  1007. }
  1008. SequentialScan:
  1009. uint lookUp;
  1010. while ((byte*)nLength >= (byte*)8)
  1011. {
  1012. nLength -= 8;
  1013. offset -= 8;
  1014. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7);
  1015. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1016. goto Found7;
  1017. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6);
  1018. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1019. goto Found6;
  1020. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5);
  1021. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1022. goto Found5;
  1023. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4);
  1024. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1025. goto Found4;
  1026. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  1027. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1028. goto Found3;
  1029. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  1030. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1031. goto Found2;
  1032. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  1033. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1034. goto Found1;
  1035. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  1036. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1037. goto Found;
  1038. }
  1039. if ((byte*)nLength >= (byte*)4)
  1040. {
  1041. nLength -= 4;
  1042. offset -= 4;
  1043. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3);
  1044. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1045. goto Found3;
  1046. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2);
  1047. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1048. goto Found2;
  1049. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1);
  1050. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1051. goto Found1;
  1052. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  1053. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1054. goto Found;
  1055. }
  1056. while ((byte*)nLength > (byte*)0)
  1057. {
  1058. nLength -= 1;
  1059. offset -= 1;
  1060. lookUp = Unsafe.AddByteOffset(ref searchSpace, offset);
  1061. if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp)
  1062. goto Found;
  1063. }
  1064. if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0))
  1065. {
  1066. nLength = (IntPtr)((int)(byte*)offset & ~(Vector<byte>.Count - 1));
  1067. Vector<byte> values0 = new Vector<byte>(value0);
  1068. Vector<byte> values1 = new Vector<byte>(value1);
  1069. Vector<byte> values2 = new Vector<byte>(value2);
  1070. while ((byte*)nLength > (byte*)(Vector<byte>.Count - 1))
  1071. {
  1072. Vector<byte> search = LoadVector(ref searchSpace, offset - Vector<byte>.Count);
  1073. var matches = Vector.BitwiseOr(
  1074. Vector.BitwiseOr(
  1075. Vector.Equals(search, values0),
  1076. Vector.Equals(search, values1)),
  1077. Vector.Equals(search, values2));
  1078. if (Vector<byte>.Zero.Equals(matches))
  1079. {
  1080. offset -= Vector<byte>.Count;
  1081. nLength -= Vector<byte>.Count;
  1082. continue;
  1083. }
  1084. // Find offset of first match and add to current offset
  1085. return (int)(offset) - Vector<byte>.Count + LocateLastFoundByte(matches);
  1086. }
  1087. if ((byte*)offset > (byte*)0)
  1088. {
  1089. nLength = offset;
  1090. goto SequentialScan;
  1091. }
  1092. }
  1093. return -1;
  1094. Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1095. return (int)(byte*)offset;
  1096. Found1:
  1097. return (int)(byte*)(offset + 1);
  1098. Found2:
  1099. return (int)(byte*)(offset + 2);
  1100. Found3:
  1101. return (int)(byte*)(offset + 3);
  1102. Found4:
  1103. return (int)(byte*)(offset + 4);
  1104. Found5:
  1105. return (int)(byte*)(offset + 5);
  1106. Found6:
  1107. return (int)(byte*)(offset + 6);
  1108. Found7:
  1109. return (int)(byte*)(offset + 7);
  1110. }
  1111. // Optimized byte-based SequenceEquals. The "length" parameter for this one is declared a nuint rather than int as we also use it for types other than byte
  1112. // where the length can exceed 2Gb once scaled by sizeof(T).
  1113. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  1114. public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint length)
  1115. {
  1116. if (Unsafe.AreSame(ref first, ref second))
  1117. goto Equal;
  1118. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  1119. IntPtr nLength = (IntPtr)(void*)length;
  1120. if (Vector.IsHardwareAccelerated && (byte*)nLength >= (byte*)Vector<byte>.Count)
  1121. {
  1122. nLength -= Vector<byte>.Count;
  1123. while ((byte*)nLength > (byte*)offset)
  1124. {
  1125. if (LoadVector(ref first, offset) != LoadVector(ref second, offset))
  1126. {
  1127. goto NotEqual;
  1128. }
  1129. offset += Vector<byte>.Count;
  1130. }
  1131. return LoadVector(ref first, nLength) == LoadVector(ref second, nLength);
  1132. }
  1133. if ((byte*)nLength >= (byte*)sizeof(UIntPtr))
  1134. {
  1135. nLength -= sizeof(UIntPtr);
  1136. while ((byte*)nLength > (byte*)offset)
  1137. {
  1138. if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset))
  1139. {
  1140. goto NotEqual;
  1141. }
  1142. offset += sizeof(UIntPtr);
  1143. }
  1144. return LoadUIntPtr(ref first, nLength) == LoadUIntPtr(ref second, nLength);
  1145. }
  1146. while ((byte*)nLength > (byte*)offset)
  1147. {
  1148. if (Unsafe.AddByteOffset(ref first, offset) != Unsafe.AddByteOffset(ref second, offset))
  1149. goto NotEqual;
  1150. offset += 1;
  1151. }
  1152. Equal:
  1153. return true;
  1154. NotEqual: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1155. return false;
  1156. }
  1157. // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
  1158. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1159. private static int LocateFirstFoundByte(Vector<byte> match)
  1160. {
  1161. var vector64 = Vector.AsVectorUInt64(match);
  1162. ulong candidate = 0;
  1163. int i = 0;
  1164. // Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
  1165. for (; i < Vector<ulong>.Count; i++)
  1166. {
  1167. candidate = vector64[i];
  1168. if (candidate != 0)
  1169. {
  1170. break;
  1171. }
  1172. }
  1173. // Single LEA instruction with jitted const (using function result)
  1174. return i * 8 + LocateFirstFoundByte(candidate);
  1175. }
  1176. [MethodImpl(MethodImplOptions.AggressiveOptimization)]
  1177. public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref byte second, int secondLength)
  1178. {
  1179. Debug.Assert(firstLength >= 0);
  1180. Debug.Assert(secondLength >= 0);
  1181. if (Unsafe.AreSame(ref first, ref second))
  1182. goto Equal;
  1183. IntPtr minLength = (IntPtr)((firstLength < secondLength) ? firstLength : secondLength);
  1184. IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
  1185. IntPtr nLength = (IntPtr)(void*)minLength;
  1186. if (Avx2.IsSupported)
  1187. {
  1188. if ((byte*)nLength >= (byte*)Vector256<byte>.Count)
  1189. {
  1190. nLength -= Vector256<byte>.Count;
  1191. uint matches;
  1192. while ((byte*)nLength > (byte*)offset)
  1193. {
  1194. matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset)));
  1195. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  1196. // So the bit position in 'matches' corresponds to the element offset.
  1197. // 32 elements in Vector256<byte> so we compare to uint.MaxValue to check if everything matched
  1198. if (matches == uint.MaxValue)
  1199. {
  1200. // All matched
  1201. offset += Vector256<byte>.Count;
  1202. continue;
  1203. }
  1204. goto Difference;
  1205. }
  1206. // Move to Vector length from end for final compare
  1207. offset = nLength;
  1208. // Same as method as above
  1209. matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset)));
  1210. if (matches == uint.MaxValue)
  1211. {
  1212. // All matched
  1213. goto Equal;
  1214. }
  1215. Difference:
  1216. // Invert matches to find differences
  1217. uint differences = ~matches;
  1218. // Find bitflag offset of first difference and add to current offset
  1219. offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences));
  1220. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1221. Debug.Assert(result != 0);
  1222. return result;
  1223. }
  1224. if ((byte*)nLength >= (byte*)Vector128<byte>.Count)
  1225. {
  1226. nLength -= Vector128<byte>.Count;
  1227. uint matches;
  1228. if ((byte*)nLength > (byte*)offset)
  1229. {
  1230. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1231. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  1232. // So the bit position in 'matches' corresponds to the element offset.
  1233. // 16 elements in Vector128<byte> so we compare to ushort.MaxValue to check if everything matched
  1234. if (matches == ushort.MaxValue)
  1235. {
  1236. // All matched
  1237. offset += Vector128<byte>.Count;
  1238. }
  1239. else
  1240. {
  1241. goto Difference;
  1242. }
  1243. }
  1244. // Move to Vector length from end for final compare
  1245. offset = nLength;
  1246. // Same as method as above
  1247. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1248. if (matches == ushort.MaxValue)
  1249. {
  1250. // All matched
  1251. goto Equal;
  1252. }
  1253. Difference:
  1254. // Invert matches to find differences
  1255. uint differences = ~matches;
  1256. // Find bitflag offset of first difference and add to current offset
  1257. offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences));
  1258. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1259. Debug.Assert(result != 0);
  1260. return result;
  1261. }
  1262. }
  1263. else if (Sse2.IsSupported)
  1264. {
  1265. if ((byte*)nLength >= (byte*)Vector128<byte>.Count)
  1266. {
  1267. nLength -= Vector128<byte>.Count;
  1268. uint matches;
  1269. while ((byte*)nLength > (byte*)offset)
  1270. {
  1271. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1272. // Note that MoveMask has converted the equal vector elements into a set of bit flags,
  1273. // So the bit position in 'matches' corresponds to the element offset.
  1274. // 16 elements in Vector128<byte> so we compare to ushort.MaxValue to check if everything matched
  1275. if (matches == ushort.MaxValue)
  1276. {
  1277. // All matched
  1278. offset += Vector128<byte>.Count;
  1279. continue;
  1280. }
  1281. goto Difference;
  1282. }
  1283. // Move to Vector length from end for final compare
  1284. offset = nLength;
  1285. // Same as method as above
  1286. matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset)));
  1287. if (matches == ushort.MaxValue)
  1288. {
  1289. // All matched
  1290. goto Equal;
  1291. }
  1292. Difference:
  1293. // Invert matches to find differences
  1294. uint differences = ~matches;
  1295. // Find bitflag offset of first difference and add to current offset
  1296. offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences));
  1297. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1298. Debug.Assert(result != 0);
  1299. return result;
  1300. }
  1301. }
  1302. else if (Vector.IsHardwareAccelerated)
  1303. {
  1304. if ((byte*)nLength > (byte*)Vector<byte>.Count)
  1305. {
  1306. nLength -= Vector<byte>.Count;
  1307. while ((byte*)nLength > (byte*)offset)
  1308. {
  1309. if (LoadVector(ref first, offset) != LoadVector(ref second, offset))
  1310. {
  1311. goto BytewiseCheck;
  1312. }
  1313. offset += Vector<byte>.Count;
  1314. }
  1315. goto BytewiseCheck;
  1316. }
  1317. }
  1318. if ((byte*)nLength > (byte*)sizeof(UIntPtr))
  1319. {
  1320. nLength -= sizeof(UIntPtr);
  1321. while ((byte*)nLength > (byte*)offset)
  1322. {
  1323. if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset))
  1324. {
  1325. goto BytewiseCheck;
  1326. }
  1327. offset += sizeof(UIntPtr);
  1328. }
  1329. }
  1330. BytewiseCheck: // Workaround for https://github.com/dotnet/coreclr/issues/13549
  1331. while ((byte*)minLength > (byte*)offset)
  1332. {
  1333. int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset));
  1334. if (result != 0)
  1335. return result;
  1336. offset += 1;
  1337. }
  1338. Equal:
  1339. return firstLength - secondLength;
  1340. }
  1341. // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
  1342. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1343. private static int LocateLastFoundByte(Vector<byte> match)
  1344. {
  1345. var vector64 = Vector.AsVectorUInt64(match);
  1346. ulong candidate = 0;
  1347. int i = Vector<ulong>.Count - 1;
  1348. // Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
  1349. for (; i >= 0; i--)
  1350. {
  1351. candidate = vector64[i];
  1352. if (candidate != 0)
  1353. {
  1354. break;
  1355. }
  1356. }
  1357. // Single LEA instruction with jitted const (using function result)
  1358. return i * 8 + LocateLastFoundByte(candidate);
  1359. }
  1360. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1361. private static int LocateFirstFoundByte(ulong match)
  1362. {
  1363. // TODO: Arm variants
  1364. if (Bmi1.X64.IsSupported)
  1365. {
  1366. return (int)(Bmi1.X64.TrailingZeroCount(match) >> 3);
  1367. }
  1368. else
  1369. {
  1370. // Flag least significant power of two bit
  1371. var powerOfTwoFlag = match ^ (match - 1);
  1372. // Shift all powers of two into the high byte and extract
  1373. return (int)((powerOfTwoFlag * XorPowerOfTwoToHighByte) >> 57);
  1374. }
  1375. }
  1376. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1377. private static int LocateLastFoundByte(ulong match)
  1378. {
  1379. // TODO: Arm variants
  1380. if (Lzcnt.X64.IsSupported)
  1381. {
  1382. return 7 - (int)(Lzcnt.X64.LeadingZeroCount(match) >> 3);
  1383. }
  1384. else
  1385. {
  1386. // Find the most significant byte that has its highest bit set
  1387. int index = 7;
  1388. while ((long)match > 0)
  1389. {
  1390. match = match << 8;
  1391. index--;
  1392. }
  1393. return index;
  1394. }
  1395. }
  1396. private const ulong XorPowerOfTwoToHighByte = (0x07ul |
  1397. 0x06ul << 8 |
  1398. 0x05ul << 16 |
  1399. 0x04ul << 24 |
  1400. 0x03ul << 32 |
  1401. 0x02ul << 40 |
  1402. 0x01ul << 48) + 1;
  1403. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1404. private static unsafe UIntPtr LoadUIntPtr(ref byte start, IntPtr offset)
  1405. => Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.AddByteOffset(ref start, offset));
  1406. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1407. private static unsafe Vector<byte> LoadVector(ref byte start, IntPtr offset)
  1408. => Unsafe.ReadUnaligned<Vector<byte>>(ref Unsafe.AddByteOffset(ref start, offset));
  1409. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1410. private static unsafe Vector128<byte> LoadVector128(ref byte start, IntPtr offset)
  1411. => Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AddByteOffset(ref start, offset));
  1412. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1413. private static unsafe Vector256<byte> LoadVector256(ref byte start, IntPtr offset)
  1414. => Unsafe.ReadUnaligned<Vector256<byte>>(ref Unsafe.AddByteOffset(ref start, offset));
  1415. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1416. private static unsafe IntPtr GetByteVectorSpanLength(IntPtr offset, int length)
  1417. => (IntPtr)((length - (int)(byte*)offset) & ~(Vector<byte>.Count - 1));
  1418. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1419. private static unsafe IntPtr GetByteVector128SpanLength(IntPtr offset, int length)
  1420. => (IntPtr)((length - (int)(byte*)offset) & ~(Vector128<byte>.Count - 1));
  1421. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1422. private static unsafe IntPtr GetByteVector256SpanLength(IntPtr offset, int length)
  1423. => (IntPtr)((length - (int)(byte*)offset) & ~(Vector256<byte>.Count - 1));
  1424. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1425. private static unsafe IntPtr UnalignedByteCountVector(ref byte searchSpace)
  1426. {
  1427. int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
  1428. return (IntPtr)((Vector<byte>.Count - unaligned) & (Vector<byte>.Count - 1));
  1429. }
  1430. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1431. private static unsafe IntPtr UnalignedByteCountVector128(ref byte searchSpace)
  1432. {
  1433. int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector128<byte>.Count - 1);
  1434. return (IntPtr)((Vector128<byte>.Count - unaligned) & (Vector128<byte>.Count - 1));
  1435. }
  1436. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  1437. private static unsafe IntPtr UnalignedByteCountVectorFromEnd(ref byte searchSpace, int length)
  1438. {
  1439. int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
  1440. return (IntPtr)(((length & (Vector<byte>.Count - 1)) + unaligned) & (Vector<byte>.Count - 1));
  1441. }
  1442. }
  1443. }