SpanHelpers.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime;
  6. using Internal.Runtime.CompilerServices;
  7. #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
  8. #if BIT64
  9. using nuint = System.UInt64;
  10. #else
  11. using nuint = System.UInt32;
  12. #endif
  13. namespace System
  14. {
  15. internal static partial class SpanHelpers
  16. {
  17. public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength)
  18. {
  19. if (byteLength == 0)
  20. return;
  21. #if AMD64 || ARM64
  22. // The exact matrix on when ZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include
  23. // type of hardware and memory aligment. This threshold was chosen as a good balance accross different configurations.
  24. if (byteLength > 768)
  25. goto PInvoke;
  26. Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength);
  27. return;
  28. #else
  29. // TODO: Optimize other platforms to be on par with AMD64 CoreCLR
  30. // Note: It's important that this switch handles lengths at least up to 22.
  31. // See notes below near the main loop for why.
  32. // The switch will be very fast since it can be implemented using a jump
  33. // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info.
  34. switch (byteLength)
  35. {
  36. case 1:
  37. b = 0;
  38. return;
  39. case 2:
  40. Unsafe.As<byte, short>(ref b) = 0;
  41. return;
  42. case 3:
  43. Unsafe.As<byte, short>(ref b) = 0;
  44. Unsafe.Add<byte>(ref b, 2) = 0;
  45. return;
  46. case 4:
  47. Unsafe.As<byte, int>(ref b) = 0;
  48. return;
  49. case 5:
  50. Unsafe.As<byte, int>(ref b) = 0;
  51. Unsafe.Add<byte>(ref b, 4) = 0;
  52. return;
  53. case 6:
  54. Unsafe.As<byte, int>(ref b) = 0;
  55. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  56. return;
  57. case 7:
  58. Unsafe.As<byte, int>(ref b) = 0;
  59. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  60. Unsafe.Add<byte>(ref b, 6) = 0;
  61. return;
  62. case 8:
  63. #if BIT64
  64. Unsafe.As<byte, long>(ref b) = 0;
  65. #else
  66. Unsafe.As<byte, int>(ref b) = 0;
  67. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  68. #endif
  69. return;
  70. case 9:
  71. #if BIT64
  72. Unsafe.As<byte, long>(ref b) = 0;
  73. #else
  74. Unsafe.As<byte, int>(ref b) = 0;
  75. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  76. #endif
  77. Unsafe.Add<byte>(ref b, 8) = 0;
  78. return;
  79. case 10:
  80. #if BIT64
  81. Unsafe.As<byte, long>(ref b) = 0;
  82. #else
  83. Unsafe.As<byte, int>(ref b) = 0;
  84. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  85. #endif
  86. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  87. return;
  88. case 11:
  89. #if BIT64
  90. Unsafe.As<byte, long>(ref b) = 0;
  91. #else
  92. Unsafe.As<byte, int>(ref b) = 0;
  93. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  94. #endif
  95. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  96. Unsafe.Add<byte>(ref b, 10) = 0;
  97. return;
  98. case 12:
  99. #if BIT64
  100. Unsafe.As<byte, long>(ref b) = 0;
  101. #else
  102. Unsafe.As<byte, int>(ref b) = 0;
  103. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  104. #endif
  105. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  106. return;
  107. case 13:
  108. #if BIT64
  109. Unsafe.As<byte, long>(ref b) = 0;
  110. #else
  111. Unsafe.As<byte, int>(ref b) = 0;
  112. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  113. #endif
  114. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  115. Unsafe.Add<byte>(ref b, 12) = 0;
  116. return;
  117. case 14:
  118. #if BIT64
  119. Unsafe.As<byte, long>(ref b) = 0;
  120. #else
  121. Unsafe.As<byte, int>(ref b) = 0;
  122. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  123. #endif
  124. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  125. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  126. return;
  127. case 15:
  128. #if BIT64
  129. Unsafe.As<byte, long>(ref b) = 0;
  130. #else
  131. Unsafe.As<byte, int>(ref b) = 0;
  132. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  133. #endif
  134. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  135. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  136. Unsafe.Add<byte>(ref b, 14) = 0;
  137. return;
  138. case 16:
  139. #if BIT64
  140. Unsafe.As<byte, long>(ref b) = 0;
  141. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  142. #else
  143. Unsafe.As<byte, int>(ref b) = 0;
  144. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  145. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  146. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  147. #endif
  148. return;
  149. case 17:
  150. #if BIT64
  151. Unsafe.As<byte, long>(ref b) = 0;
  152. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  153. #else
  154. Unsafe.As<byte, int>(ref b) = 0;
  155. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  156. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  157. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  158. #endif
  159. Unsafe.Add<byte>(ref b, 16) = 0;
  160. return;
  161. case 18:
  162. #if BIT64
  163. Unsafe.As<byte, long>(ref b) = 0;
  164. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  165. #else
  166. Unsafe.As<byte, int>(ref b) = 0;
  167. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  168. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  169. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  170. #endif
  171. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  172. return;
  173. case 19:
  174. #if BIT64
  175. Unsafe.As<byte, long>(ref b) = 0;
  176. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  177. #else
  178. Unsafe.As<byte, int>(ref b) = 0;
  179. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  180. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  181. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  182. #endif
  183. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  184. Unsafe.Add<byte>(ref b, 18) = 0;
  185. return;
  186. case 20:
  187. #if BIT64
  188. Unsafe.As<byte, long>(ref b) = 0;
  189. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  190. #else
  191. Unsafe.As<byte, int>(ref b) = 0;
  192. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  193. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  194. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  195. #endif
  196. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  197. return;
  198. case 21:
  199. #if BIT64
  200. Unsafe.As<byte, long>(ref b) = 0;
  201. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  202. #else
  203. Unsafe.As<byte, int>(ref b) = 0;
  204. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  205. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  206. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  207. #endif
  208. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  209. Unsafe.Add<byte>(ref b, 20) = 0;
  210. return;
  211. case 22:
  212. #if BIT64
  213. Unsafe.As<byte, long>(ref b) = 0;
  214. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  215. #else
  216. Unsafe.As<byte, int>(ref b) = 0;
  217. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  218. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  219. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  220. #endif
  221. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  222. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 20)) = 0;
  223. return;
  224. }
  225. // P/Invoke into the native version for large lengths
  226. if (byteLength >= 512) goto PInvoke;
  227. nuint i = 0; // byte offset at which we're copying
  228. if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0)
  229. {
  230. if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0)
  231. {
  232. b = 0;
  233. i += 1;
  234. if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0)
  235. goto IntAligned;
  236. }
  237. Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  238. i += 2;
  239. }
  240. IntAligned:
  241. // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If
  242. // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1
  243. // bytes to the next aligned address (respectively), so do nothing. On the other hand,
  244. // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until
  245. // we're aligned.
  246. // The thing 1, 2, 3, and 4 have in common that the others don't is that if you
  247. // subtract one from them, their 3rd lsb will not be set. Hence, the below check.
  248. if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0)
  249. {
  250. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  251. i += 4;
  252. }
  253. nuint end = byteLength - 16;
  254. byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop
  255. // We know due to the above switch-case that this loop will always run 1 iteration; max
  256. // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so
  257. // the switch handles lengths 0-22.
  258. Debug.Assert(end >= 7 && i <= end);
  259. // This is separated out into a different variable, so the i + 16 addition can be
  260. // performed at the start of the pipeline and the loop condition does not have
  261. // a dependency on the writes.
  262. nuint counter;
  263. do
  264. {
  265. counter = i + 16;
  266. // This loop looks very costly since there appear to be a bunch of temporary values
  267. // being created with the adds, but the jit (for x86 anyways) will convert each of
  268. // these to use memory addressing operands.
  269. // So the only cost is a bit of code size, which is made up for by the fact that
  270. // we save on writes to b.
  271. #if BIT64
  272. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  273. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
  274. #else
  275. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  276. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
  277. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
  278. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 12)) = 0;
  279. #endif
  280. i = counter;
  281. // See notes above for why this wasn't used instead
  282. // i += 16;
  283. }
  284. while (counter <= end);
  285. if ((byteLength & 8) != 0)
  286. {
  287. #if BIT64
  288. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  289. #else
  290. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  291. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
  292. #endif
  293. i += 8;
  294. }
  295. if ((byteLength & 4) != 0)
  296. {
  297. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  298. i += 4;
  299. }
  300. if ((byteLength & 2) != 0)
  301. {
  302. Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  303. i += 2;
  304. }
  305. if ((byteLength & 1) != 0)
  306. {
  307. Unsafe.AddByteOffset<byte>(ref b, i) = 0;
  308. // We're not using i after this, so not needed
  309. // i += 1;
  310. }
  311. return;
  312. #endif
  313. PInvoke:
  314. Buffer._ZeroMemory(ref b, byteLength);
  315. }
  316. public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength)
  317. {
  318. Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary.");
  319. // First write backward 8 natural words at a time.
  320. // Writing backward allows us to get away with only simple modifications to the
  321. // mov instruction's base and index registers between loop iterations.
  322. for (; pointerSizeLength >= 8; pointerSizeLength -= 8)
  323. {
  324. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
  325. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
  326. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
  327. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -4) = default;
  328. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -5) = default;
  329. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -6) = default;
  330. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -7) = default;
  331. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -8) = default;
  332. }
  333. Debug.Assert(pointerSizeLength <= 7);
  334. // The logic below works by trying to minimize the number of branches taken for any
  335. // given range of lengths. For example, the lengths [ 4 .. 7 ] are handled by a single
  336. // branch, [ 2 .. 3 ] are handled by a single branch, and [ 1 ] is handled by a single
  337. // branch.
  338. //
  339. // We can write both forward and backward as a perf improvement. For example,
  340. // the lengths [ 4 .. 7 ] can be handled by zeroing out the first four natural
  341. // words and the last 3 natural words. In the best case (length = 7), there are
  342. // no overlapping writes. In the worst case (length = 4), there are three
  343. // overlapping writes near the middle of the buffer. In perf testing, the
  344. // penalty for performing duplicate writes is less expensive than the penalty
  345. // for complex branching.
  346. if (pointerSizeLength >= 4)
  347. {
  348. goto Write4To7;
  349. }
  350. else if (pointerSizeLength >= 2)
  351. {
  352. goto Write2To3;
  353. }
  354. else if (pointerSizeLength > 0)
  355. {
  356. goto Write1;
  357. }
  358. else
  359. {
  360. return; // nothing to write
  361. }
  362. Write4To7:
  363. Debug.Assert(pointerSizeLength >= 4);
  364. // Write first four and last three.
  365. Unsafe.Add(ref ip, 2) = default;
  366. Unsafe.Add(ref ip, 3) = default;
  367. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
  368. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
  369. Write2To3:
  370. Debug.Assert(pointerSizeLength >= 2);
  371. // Write first two and last one.
  372. Unsafe.Add(ref ip, 1) = default;
  373. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
  374. Write1:
  375. Debug.Assert(pointerSizeLength >= 1);
  376. // Write only element.
  377. ip = default;
  378. }
  379. }
  380. }