SpanHelpers.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime;
  6. using Internal.Runtime.CompilerServices;
  7. #if BIT64
  8. using nuint = System.UInt64;
  9. #else
  10. using nuint = System.UInt32;
  11. #endif
  12. namespace System
  13. {
  14. internal static partial class SpanHelpers
  15. {
  16. public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength)
  17. {
  18. if (byteLength == 0)
  19. return;
  20. #if CORECLR && (AMD64 || ARM64)
  21. if (byteLength > 4096)
  22. goto PInvoke;
  23. Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength);
  24. return;
  25. #else
  26. // TODO: Optimize other platforms to be on par with AMD64 CoreCLR
  27. // Note: It's important that this switch handles lengths at least up to 22.
  28. // See notes below near the main loop for why.
  29. // The switch will be very fast since it can be implemented using a jump
  30. // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info.
  31. switch (byteLength)
  32. {
  33. case 1:
  34. b = 0;
  35. return;
  36. case 2:
  37. Unsafe.As<byte, short>(ref b) = 0;
  38. return;
  39. case 3:
  40. Unsafe.As<byte, short>(ref b) = 0;
  41. Unsafe.Add<byte>(ref b, 2) = 0;
  42. return;
  43. case 4:
  44. Unsafe.As<byte, int>(ref b) = 0;
  45. return;
  46. case 5:
  47. Unsafe.As<byte, int>(ref b) = 0;
  48. Unsafe.Add<byte>(ref b, 4) = 0;
  49. return;
  50. case 6:
  51. Unsafe.As<byte, int>(ref b) = 0;
  52. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  53. return;
  54. case 7:
  55. Unsafe.As<byte, int>(ref b) = 0;
  56. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  57. Unsafe.Add<byte>(ref b, 6) = 0;
  58. return;
  59. case 8:
  60. #if BIT64
  61. Unsafe.As<byte, long>(ref b) = 0;
  62. #else
  63. Unsafe.As<byte, int>(ref b) = 0;
  64. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  65. #endif
  66. return;
  67. case 9:
  68. #if BIT64
  69. Unsafe.As<byte, long>(ref b) = 0;
  70. #else
  71. Unsafe.As<byte, int>(ref b) = 0;
  72. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  73. #endif
  74. Unsafe.Add<byte>(ref b, 8) = 0;
  75. return;
  76. case 10:
  77. #if BIT64
  78. Unsafe.As<byte, long>(ref b) = 0;
  79. #else
  80. Unsafe.As<byte, int>(ref b) = 0;
  81. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  82. #endif
  83. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  84. return;
  85. case 11:
  86. #if BIT64
  87. Unsafe.As<byte, long>(ref b) = 0;
  88. #else
  89. Unsafe.As<byte, int>(ref b) = 0;
  90. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  91. #endif
  92. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  93. Unsafe.Add<byte>(ref b, 10) = 0;
  94. return;
  95. case 12:
  96. #if BIT64
  97. Unsafe.As<byte, long>(ref b) = 0;
  98. #else
  99. Unsafe.As<byte, int>(ref b) = 0;
  100. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  101. #endif
  102. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  103. return;
  104. case 13:
  105. #if BIT64
  106. Unsafe.As<byte, long>(ref b) = 0;
  107. #else
  108. Unsafe.As<byte, int>(ref b) = 0;
  109. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  110. #endif
  111. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  112. Unsafe.Add<byte>(ref b, 12) = 0;
  113. return;
  114. case 14:
  115. #if BIT64
  116. Unsafe.As<byte, long>(ref b) = 0;
  117. #else
  118. Unsafe.As<byte, int>(ref b) = 0;
  119. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  120. #endif
  121. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  122. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  123. return;
  124. case 15:
  125. #if BIT64
  126. Unsafe.As<byte, long>(ref b) = 0;
  127. #else
  128. Unsafe.As<byte, int>(ref b) = 0;
  129. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  130. #endif
  131. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  132. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  133. Unsafe.Add<byte>(ref b, 14) = 0;
  134. return;
  135. case 16:
  136. #if BIT64
  137. Unsafe.As<byte, long>(ref b) = 0;
  138. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  139. #else
  140. Unsafe.As<byte, int>(ref b) = 0;
  141. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  142. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  143. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  144. #endif
  145. return;
  146. case 17:
  147. #if BIT64
  148. Unsafe.As<byte, long>(ref b) = 0;
  149. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  150. #else
  151. Unsafe.As<byte, int>(ref b) = 0;
  152. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  153. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  154. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  155. #endif
  156. Unsafe.Add<byte>(ref b, 16) = 0;
  157. return;
  158. case 18:
  159. #if BIT64
  160. Unsafe.As<byte, long>(ref b) = 0;
  161. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  162. #else
  163. Unsafe.As<byte, int>(ref b) = 0;
  164. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  165. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  166. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  167. #endif
  168. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  169. return;
  170. case 19:
  171. #if BIT64
  172. Unsafe.As<byte, long>(ref b) = 0;
  173. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  174. #else
  175. Unsafe.As<byte, int>(ref b) = 0;
  176. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  177. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  178. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  179. #endif
  180. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  181. Unsafe.Add<byte>(ref b, 18) = 0;
  182. return;
  183. case 20:
  184. #if BIT64
  185. Unsafe.As<byte, long>(ref b) = 0;
  186. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  187. #else
  188. Unsafe.As<byte, int>(ref b) = 0;
  189. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  190. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  191. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  192. #endif
  193. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  194. return;
  195. case 21:
  196. #if BIT64
  197. Unsafe.As<byte, long>(ref b) = 0;
  198. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  199. #else
  200. Unsafe.As<byte, int>(ref b) = 0;
  201. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  202. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  203. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  204. #endif
  205. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  206. Unsafe.Add<byte>(ref b, 20) = 0;
  207. return;
  208. case 22:
  209. #if BIT64
  210. Unsafe.As<byte, long>(ref b) = 0;
  211. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  212. #else
  213. Unsafe.As<byte, int>(ref b) = 0;
  214. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  215. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  216. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  217. #endif
  218. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  219. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 20)) = 0;
  220. return;
  221. }
  222. // P/Invoke into the native version for large lengths
  223. if (byteLength >= 512) goto PInvoke;
  224. nuint i = 0; // byte offset at which we're copying
  225. if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0)
  226. {
  227. if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0)
  228. {
  229. b = 0;
  230. i += 1;
  231. if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0)
  232. goto IntAligned;
  233. }
  234. Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  235. i += 2;
  236. }
  237. IntAligned:
  238. // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If
  239. // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1
  240. // bytes to the next aligned address (respectively), so do nothing. On the other hand,
  241. // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until
  242. // we're aligned.
  243. // The thing 1, 2, 3, and 4 have in common that the others don't is that if you
  244. // subtract one from them, their 3rd lsb will not be set. Hence, the below check.
  245. if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0)
  246. {
  247. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  248. i += 4;
  249. }
  250. nuint end = byteLength - 16;
  251. byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop
  252. // We know due to the above switch-case that this loop will always run 1 iteration; max
  253. // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so
  254. // the switch handles lengths 0-22.
  255. Debug.Assert(end >= 7 && i <= end);
  256. // This is separated out into a different variable, so the i + 16 addition can be
  257. // performed at the start of the pipeline and the loop condition does not have
  258. // a dependency on the writes.
  259. nuint counter;
  260. do
  261. {
  262. counter = i + 16;
  263. // This loop looks very costly since there appear to be a bunch of temporary values
  264. // being created with the adds, but the jit (for x86 anyways) will convert each of
  265. // these to use memory addressing operands.
  266. // So the only cost is a bit of code size, which is made up for by the fact that
  267. // we save on writes to b.
  268. #if BIT64
  269. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  270. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
  271. #else
  272. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  273. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
  274. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
  275. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 12)) = 0;
  276. #endif
  277. i = counter;
  278. // See notes above for why this wasn't used instead
  279. // i += 16;
  280. }
  281. while (counter <= end);
  282. if ((byteLength & 8) != 0)
  283. {
  284. #if BIT64
  285. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  286. #else
  287. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  288. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
  289. #endif
  290. i += 8;
  291. }
  292. if ((byteLength & 4) != 0)
  293. {
  294. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  295. i += 4;
  296. }
  297. if ((byteLength & 2) != 0)
  298. {
  299. Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  300. i += 2;
  301. }
  302. if ((byteLength & 1) != 0)
  303. {
  304. Unsafe.AddByteOffset<byte>(ref b, i) = 0;
  305. // We're not using i after this, so not needed
  306. // i += 1;
  307. }
  308. return;
  309. #endif
  310. PInvoke:
  311. RuntimeImports.RhZeroMemory(ref b, byteLength);
  312. }
  313. public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength)
  314. {
  315. Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary.");
  316. // First write backward 8 natural words at a time.
  317. // Writing backward allows us to get away with only simple modifications to the
  318. // mov instruction's base and index registers between loop iterations.
  319. for (; pointerSizeLength >= 8; pointerSizeLength -= 8)
  320. {
  321. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
  322. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
  323. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
  324. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -4) = default;
  325. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -5) = default;
  326. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -6) = default;
  327. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -7) = default;
  328. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -8) = default;
  329. }
  330. Debug.Assert(pointerSizeLength <= 7);
  331. // The logic below works by trying to minimize the number of branches taken for any
  332. // given range of lengths. For example, the lengths [ 4 .. 7 ] are handled by a single
  333. // branch, [ 2 .. 3 ] are handled by a single branch, and [ 1 ] is handled by a single
  334. // branch.
  335. //
  336. // We can write both forward and backward as a perf improvement. For example,
  337. // the lengths [ 4 .. 7 ] can be handled by zeroing out the first four natural
  338. // words and the last 3 natural words. In the best case (length = 7), there are
  339. // no overlapping writes. In the worst case (length = 4), there are three
  340. // overlapping writes near the middle of the buffer. In perf testing, the
  341. // penalty for performing duplicate writes is less expensive than the penalty
  342. // for complex branching.
  343. if (pointerSizeLength >= 4)
  344. {
  345. goto Write4To7;
  346. }
  347. else if (pointerSizeLength >= 2)
  348. {
  349. goto Write2To3;
  350. }
  351. else if (pointerSizeLength > 0)
  352. {
  353. goto Write1;
  354. }
  355. else
  356. {
  357. return; // nothing to write
  358. }
  359. Write4To7:
  360. Debug.Assert(pointerSizeLength >= 4);
  361. // Write first four and last three.
  362. Unsafe.Add(ref ip, 2) = default;
  363. Unsafe.Add(ref ip, 3) = default;
  364. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
  365. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
  366. Write2To3:
  367. Debug.Assert(pointerSizeLength >= 2);
  368. // Write first two and last one.
  369. Unsafe.Add(ref ip, 1) = default;
  370. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
  371. Write1:
  372. Debug.Assert(pointerSizeLength >= 1);
  373. // Write only element.
  374. ip = default;
  375. }
  376. }
  377. }