SpanHelpers.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime;
  6. using Internal.Runtime.CompilerServices;
  7. #if BIT64
  8. using nuint = System.UInt64;
  9. #else
  10. using nuint = System.UInt32;
  11. #endif
  12. namespace System
  13. {
  14. internal static partial class SpanHelpers
  15. {
  16. public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength)
  17. {
  18. if (byteLength == 0)
  19. return;
  20. #if !PROJECTN && (AMD64 || ARM64)
  21. // The exact matrix on when RhZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include
  22. // type of hardware and memory aligment. This threshold was chosen as a good balance accross different configurations.
  23. if (byteLength > 768)
  24. goto PInvoke;
  25. Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength);
  26. return;
  27. #else
  28. // TODO: Optimize other platforms to be on par with AMD64 CoreCLR
  29. // Note: It's important that this switch handles lengths at least up to 22.
  30. // See notes below near the main loop for why.
  31. // The switch will be very fast since it can be implemented using a jump
  32. // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info.
  33. switch (byteLength)
  34. {
  35. case 1:
  36. b = 0;
  37. return;
  38. case 2:
  39. Unsafe.As<byte, short>(ref b) = 0;
  40. return;
  41. case 3:
  42. Unsafe.As<byte, short>(ref b) = 0;
  43. Unsafe.Add<byte>(ref b, 2) = 0;
  44. return;
  45. case 4:
  46. Unsafe.As<byte, int>(ref b) = 0;
  47. return;
  48. case 5:
  49. Unsafe.As<byte, int>(ref b) = 0;
  50. Unsafe.Add<byte>(ref b, 4) = 0;
  51. return;
  52. case 6:
  53. Unsafe.As<byte, int>(ref b) = 0;
  54. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  55. return;
  56. case 7:
  57. Unsafe.As<byte, int>(ref b) = 0;
  58. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  59. Unsafe.Add<byte>(ref b, 6) = 0;
  60. return;
  61. case 8:
  62. #if BIT64
  63. Unsafe.As<byte, long>(ref b) = 0;
  64. #else
  65. Unsafe.As<byte, int>(ref b) = 0;
  66. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  67. #endif
  68. return;
  69. case 9:
  70. #if BIT64
  71. Unsafe.As<byte, long>(ref b) = 0;
  72. #else
  73. Unsafe.As<byte, int>(ref b) = 0;
  74. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  75. #endif
  76. Unsafe.Add<byte>(ref b, 8) = 0;
  77. return;
  78. case 10:
  79. #if BIT64
  80. Unsafe.As<byte, long>(ref b) = 0;
  81. #else
  82. Unsafe.As<byte, int>(ref b) = 0;
  83. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  84. #endif
  85. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  86. return;
  87. case 11:
  88. #if BIT64
  89. Unsafe.As<byte, long>(ref b) = 0;
  90. #else
  91. Unsafe.As<byte, int>(ref b) = 0;
  92. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  93. #endif
  94. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  95. Unsafe.Add<byte>(ref b, 10) = 0;
  96. return;
  97. case 12:
  98. #if BIT64
  99. Unsafe.As<byte, long>(ref b) = 0;
  100. #else
  101. Unsafe.As<byte, int>(ref b) = 0;
  102. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  103. #endif
  104. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  105. return;
  106. case 13:
  107. #if BIT64
  108. Unsafe.As<byte, long>(ref b) = 0;
  109. #else
  110. Unsafe.As<byte, int>(ref b) = 0;
  111. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  112. #endif
  113. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  114. Unsafe.Add<byte>(ref b, 12) = 0;
  115. return;
  116. case 14:
  117. #if BIT64
  118. Unsafe.As<byte, long>(ref b) = 0;
  119. #else
  120. Unsafe.As<byte, int>(ref b) = 0;
  121. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  122. #endif
  123. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  124. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  125. return;
  126. case 15:
  127. #if BIT64
  128. Unsafe.As<byte, long>(ref b) = 0;
  129. #else
  130. Unsafe.As<byte, int>(ref b) = 0;
  131. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  132. #endif
  133. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  134. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  135. Unsafe.Add<byte>(ref b, 14) = 0;
  136. return;
  137. case 16:
  138. #if BIT64
  139. Unsafe.As<byte, long>(ref b) = 0;
  140. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  141. #else
  142. Unsafe.As<byte, int>(ref b) = 0;
  143. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  144. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  145. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  146. #endif
  147. return;
  148. case 17:
  149. #if BIT64
  150. Unsafe.As<byte, long>(ref b) = 0;
  151. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  152. #else
  153. Unsafe.As<byte, int>(ref b) = 0;
  154. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  155. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  156. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  157. #endif
  158. Unsafe.Add<byte>(ref b, 16) = 0;
  159. return;
  160. case 18:
  161. #if BIT64
  162. Unsafe.As<byte, long>(ref b) = 0;
  163. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  164. #else
  165. Unsafe.As<byte, int>(ref b) = 0;
  166. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  167. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  168. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  169. #endif
  170. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  171. return;
  172. case 19:
  173. #if BIT64
  174. Unsafe.As<byte, long>(ref b) = 0;
  175. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  176. #else
  177. Unsafe.As<byte, int>(ref b) = 0;
  178. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  179. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  180. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  181. #endif
  182. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  183. Unsafe.Add<byte>(ref b, 18) = 0;
  184. return;
  185. case 20:
  186. #if BIT64
  187. Unsafe.As<byte, long>(ref b) = 0;
  188. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  189. #else
  190. Unsafe.As<byte, int>(ref b) = 0;
  191. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  192. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  193. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  194. #endif
  195. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  196. return;
  197. case 21:
  198. #if BIT64
  199. Unsafe.As<byte, long>(ref b) = 0;
  200. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  201. #else
  202. Unsafe.As<byte, int>(ref b) = 0;
  203. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  204. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  205. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  206. #endif
  207. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  208. Unsafe.Add<byte>(ref b, 20) = 0;
  209. return;
  210. case 22:
  211. #if BIT64
  212. Unsafe.As<byte, long>(ref b) = 0;
  213. Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  214. #else
  215. Unsafe.As<byte, int>(ref b) = 0;
  216. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
  217. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
  218. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
  219. #endif
  220. Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
  221. Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 20)) = 0;
  222. return;
  223. }
  224. // P/Invoke into the native version for large lengths
  225. if (byteLength >= 512) goto PInvoke;
  226. nuint i = 0; // byte offset at which we're copying
  227. if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0)
  228. {
  229. if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0)
  230. {
  231. b = 0;
  232. i += 1;
  233. if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0)
  234. goto IntAligned;
  235. }
  236. Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  237. i += 2;
  238. }
  239. IntAligned:
  240. // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If
  241. // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1
  242. // bytes to the next aligned address (respectively), so do nothing. On the other hand,
  243. // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until
  244. // we're aligned.
  245. // The thing 1, 2, 3, and 4 have in common that the others don't is that if you
  246. // subtract one from them, their 3rd lsb will not be set. Hence, the below check.
  247. if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0)
  248. {
  249. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  250. i += 4;
  251. }
  252. nuint end = byteLength - 16;
  253. byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop
  254. // We know due to the above switch-case that this loop will always run 1 iteration; max
  255. // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so
  256. // the switch handles lengths 0-22.
  257. Debug.Assert(end >= 7 && i <= end);
  258. // This is separated out into a different variable, so the i + 16 addition can be
  259. // performed at the start of the pipeline and the loop condition does not have
  260. // a dependency on the writes.
  261. nuint counter;
  262. do
  263. {
  264. counter = i + 16;
  265. // This loop looks very costly since there appear to be a bunch of temporary values
  266. // being created with the adds, but the jit (for x86 anyways) will convert each of
  267. // these to use memory addressing operands.
  268. // So the only cost is a bit of code size, which is made up for by the fact that
  269. // we save on writes to b.
  270. #if BIT64
  271. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  272. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
  273. #else
  274. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  275. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
  276. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
  277. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 12)) = 0;
  278. #endif
  279. i = counter;
  280. // See notes above for why this wasn't used instead
  281. // i += 16;
  282. }
  283. while (counter <= end);
  284. if ((byteLength & 8) != 0)
  285. {
  286. #if BIT64
  287. Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  288. #else
  289. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  290. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
  291. #endif
  292. i += 8;
  293. }
  294. if ((byteLength & 4) != 0)
  295. {
  296. Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  297. i += 4;
  298. }
  299. if ((byteLength & 2) != 0)
  300. {
  301. Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
  302. i += 2;
  303. }
  304. if ((byteLength & 1) != 0)
  305. {
  306. Unsafe.AddByteOffset<byte>(ref b, i) = 0;
  307. // We're not using i after this, so not needed
  308. // i += 1;
  309. }
  310. return;
  311. #endif
  312. PInvoke:
  313. RuntimeImports.RhZeroMemory(ref b, byteLength);
  314. }
  315. public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength)
  316. {
  317. Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary.");
  318. // First write backward 8 natural words at a time.
  319. // Writing backward allows us to get away with only simple modifications to the
  320. // mov instruction's base and index registers between loop iterations.
  321. for (; pointerSizeLength >= 8; pointerSizeLength -= 8)
  322. {
  323. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
  324. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
  325. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
  326. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -4) = default;
  327. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -5) = default;
  328. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -6) = default;
  329. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -7) = default;
  330. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -8) = default;
  331. }
  332. Debug.Assert(pointerSizeLength <= 7);
  333. // The logic below works by trying to minimize the number of branches taken for any
  334. // given range of lengths. For example, the lengths [ 4 .. 7 ] are handled by a single
  335. // branch, [ 2 .. 3 ] are handled by a single branch, and [ 1 ] is handled by a single
  336. // branch.
  337. //
  338. // We can write both forward and backward as a perf improvement. For example,
  339. // the lengths [ 4 .. 7 ] can be handled by zeroing out the first four natural
  340. // words and the last 3 natural words. In the best case (length = 7), there are
  341. // no overlapping writes. In the worst case (length = 4), there are three
  342. // overlapping writes near the middle of the buffer. In perf testing, the
  343. // penalty for performing duplicate writes is less expensive than the penalty
  344. // for complex branching.
  345. if (pointerSizeLength >= 4)
  346. {
  347. goto Write4To7;
  348. }
  349. else if (pointerSizeLength >= 2)
  350. {
  351. goto Write2To3;
  352. }
  353. else if (pointerSizeLength > 0)
  354. {
  355. goto Write1;
  356. }
  357. else
  358. {
  359. return; // nothing to write
  360. }
  361. Write4To7:
  362. Debug.Assert(pointerSizeLength >= 4);
  363. // Write first four and last three.
  364. Unsafe.Add(ref ip, 2) = default;
  365. Unsafe.Add(ref ip, 3) = default;
  366. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
  367. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
  368. Write2To3:
  369. Debug.Assert(pointerSizeLength >= 2);
  370. // Write first two and last one.
  371. Unsafe.Add(ref ip, 1) = default;
  372. Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
  373. Write1:
  374. Debug.Assert(pointerSizeLength >= 1);
  375. // Write only element.
  376. ip = default;
  377. }
  378. }
  379. }