| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414 |
- // Licensed to the .NET Foundation under one or more agreements.
- // The .NET Foundation licenses this file to you under the MIT license.
- // See the LICENSE file in the project root for more information.
- using System.Diagnostics;
- using System.Runtime;
- using Internal.Runtime.CompilerServices;
- #if BIT64
- using nuint = System.UInt64;
- #else
- using nuint = System.UInt32;
- #endif
- namespace System
- {
- internal static partial class SpanHelpers
- {
- public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength)
- {
- if (byteLength == 0)
- return;
- #if CORECLR && (AMD64 || ARM64)
- if (byteLength > 4096)
- goto PInvoke;
- Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength);
- return;
- #else
- // TODO: Optimize other platforms to be on par with AMD64 CoreCLR
- // Note: It's important that this switch handles lengths at least up to 22.
- // See notes below near the main loop for why.
- // The switch will be very fast since it can be implemented using a jump
- // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info.
- switch (byteLength)
- {
- case 1:
- b = 0;
- return;
- case 2:
- Unsafe.As<byte, short>(ref b) = 0;
- return;
- case 3:
- Unsafe.As<byte, short>(ref b) = 0;
- Unsafe.Add<byte>(ref b, 2) = 0;
- return;
- case 4:
- Unsafe.As<byte, int>(ref b) = 0;
- return;
- case 5:
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.Add<byte>(ref b, 4) = 0;
- return;
- case 6:
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- return;
- case 7:
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.Add<byte>(ref b, 6) = 0;
- return;
- case 8:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- return;
- case 9:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.Add<byte>(ref b, 8) = 0;
- return;
- case 10:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- return;
- case 11:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.Add<byte>(ref b, 10) = 0;
- return;
- case 12:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- return;
- case 13:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.Add<byte>(ref b, 12) = 0;
- return;
- case 14:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- return;
- case 15:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- Unsafe.Add<byte>(ref b, 14) = 0;
- return;
- case 16:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- return;
- case 17:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- Unsafe.Add<byte>(ref b, 16) = 0;
- return;
- case 18:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
- return;
- case 19:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
- Unsafe.Add<byte>(ref b, 18) = 0;
- return;
- case 20:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
- return;
- case 21:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
- Unsafe.Add<byte>(ref b, 20) = 0;
- return;
- case 22:
- #if BIT64
- Unsafe.As<byte, long>(ref b) = 0;
- Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref b) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 12)) = 0;
- #endif
- Unsafe.As<byte, int>(ref Unsafe.Add<byte>(ref b, 16)) = 0;
- Unsafe.As<byte, short>(ref Unsafe.Add<byte>(ref b, 20)) = 0;
- return;
- }
- // P/Invoke into the native version for large lengths
- if (byteLength >= 512) goto PInvoke;
- nuint i = 0; // byte offset at which we're copying
- if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0)
- {
- if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0)
- {
- b = 0;
- i += 1;
- if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0)
- goto IntAligned;
- }
- Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- i += 2;
- }
- IntAligned:
- // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If
- // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1
- // bytes to the next aligned address (respectively), so do nothing. On the other hand,
- // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until
- // we're aligned.
- // The thing 1, 2, 3, and 4 have in common that the others don't is that if you
- // subtract one from them, their 3rd lsb will not be set. Hence, the below check.
- if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0)
- {
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- i += 4;
- }
- nuint end = byteLength - 16;
- byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop
- // We know due to the above switch-case that this loop will always run 1 iteration; max
- // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so
- // the switch handles lengths 0-22.
- Debug.Assert(end >= 7 && i <= end);
- // This is separated out into a different variable, so the i + 16 addition can be
- // performed at the start of the pipeline and the loop condition does not have
- // a dependency on the writes.
- nuint counter;
- do
- {
- counter = i + 16;
- // This loop looks very costly since there appear to be a bunch of temporary values
- // being created with the adds, but the jit (for x86 anyways) will convert each of
- // these to use memory addressing operands.
- // So the only cost is a bit of code size, which is made up for by the fact that
- // we save on writes to b.
- #if BIT64
- Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
- #else
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 12)) = 0;
- #endif
- i = counter;
- // See notes above for why this wasn't used instead
- // i += 16;
- }
- while (counter <= end);
- if ((byteLength & 8) != 0)
- {
- #if BIT64
- Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- #else
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i + 4)) = 0;
- #endif
- i += 8;
- }
- if ((byteLength & 4) != 0)
- {
- Unsafe.As<byte, int>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- i += 4;
- }
- if ((byteLength & 2) != 0)
- {
- Unsafe.As<byte, short>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
- i += 2;
- }
- if ((byteLength & 1) != 0)
- {
- Unsafe.AddByteOffset<byte>(ref b, i) = 0;
- // We're not using i after this, so not needed
- // i += 1;
- }
- return;
- #endif
- PInvoke:
- RuntimeImports.RhZeroMemory(ref b, byteLength);
- }
- public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength)
- {
- Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary.");
- // First write backward 8 natural words at a time.
- // Writing backward allows us to get away with only simple modifications to the
- // mov instruction's base and index registers between loop iterations.
- for (; pointerSizeLength >= 8; pointerSizeLength -= 8)
- {
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -4) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -5) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -6) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -7) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -8) = default;
- }
- Debug.Assert(pointerSizeLength <= 7);
- // The logic below works by trying to minimize the number of branches taken for any
- // given range of lengths. For example, the lengths [ 4 .. 7 ] are handled by a single
- // branch, [ 2 .. 3 ] are handled by a single branch, and [ 1 ] is handled by a single
- // branch.
- //
- // We can write both forward and backward as a perf improvement. For example,
- // the lengths [ 4 .. 7 ] can be handled by zeroing out the first four natural
- // words and the last 3 natural words. In the best case (length = 7), there are
- // no overlapping writes. In the worst case (length = 4), there are three
- // overlapping writes near the middle of the buffer. In perf testing, the
- // penalty for performing duplicate writes is less expensive than the penalty
- // for complex branching.
- if (pointerSizeLength >= 4)
- {
- goto Write4To7;
- }
- else if (pointerSizeLength >= 2)
- {
- goto Write2To3;
- }
- else if (pointerSizeLength > 0)
- {
- goto Write1;
- }
- else
- {
- return; // nothing to write
- }
- Write4To7:
- Debug.Assert(pointerSizeLength >= 4);
- // Write first four and last three.
- Unsafe.Add(ref ip, 2) = default;
- Unsafe.Add(ref ip, 3) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default;
- Write2To3:
- Debug.Assert(pointerSizeLength >= 2);
- // Write first two and last one.
- Unsafe.Add(ref ip, 1) = default;
- Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default;
- Write1:
- Debug.Assert(pointerSizeLength >= 1);
- // Write only element.
- ip = default;
- }
- }
- }
|