Marvin.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Numerics;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.InteropServices;
  8. using Internal.Runtime.CompilerServices;
  9. #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
  10. #if BIT64
  11. using nuint = System.UInt64;
  12. #else
  13. using nuint = System.UInt32;
  14. #endif
  15. namespace System
  16. {
  17. internal static partial class Marvin
  18. {
  19. /// <summary>
  20. /// Compute a Marvin hash and collapse it into a 32-bit hash.
  21. /// </summary>
  22. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  23. public static int ComputeHash32(ReadOnlySpan<byte> data, ulong seed) => ComputeHash32(ref MemoryMarshal.GetReference(data), (uint)data.Length, (uint)seed, (uint)(seed >> 32));
  24. /// <summary>
  25. /// Compute a Marvin hash and collapse it into a 32-bit hash.
  26. /// </summary>
  27. public static int ComputeHash32(ref byte data, uint count, uint p0, uint p1)
  28. {
  29. // Control flow of this method generally flows top-to-bottom, trying to
  30. // minimize the number of branches taken for large (>= 8 bytes, 4 chars) inputs.
  31. // If small inputs (< 8 bytes, 4 chars) are given, this jumps to a "small inputs"
  32. // handler at the end of the method.
  33. if (count < 8)
  34. {
  35. // We can't run the main loop, but we might still have 4 or more bytes available to us.
  36. // If so, jump to the 4 .. 7 bytes logic immediately after the main loop.
  37. if (count >= 4)
  38. {
  39. goto Between4And7BytesRemain;
  40. }
  41. else
  42. {
  43. goto InputTooSmallToEnterMainLoop;
  44. }
  45. }
  46. // Main loop - read 8 bytes at a time.
  47. // The block function is unrolled 2x in this loop.
  48. uint loopCount = count / 8;
  49. Debug.Assert(loopCount > 0, "Shouldn't reach this code path for small inputs.");
  50. do
  51. {
  52. // Most x86 processors have two dispatch ports for reads, so we can read 2x 32-bit
  53. // values in parallel. We opt for this instead of a single 64-bit read since the
  54. // typical use case for Marvin32 is computing String hash codes, and the particular
  55. // layout of String instances means the starting data is never 8-byte aligned when
  56. // running in a 64-bit process.
  57. p0 += Unsafe.ReadUnaligned<uint>(ref data);
  58. uint nextUInt32 = Unsafe.ReadUnaligned<uint>(ref Unsafe.AddByteOffset(ref data, 4));
  59. // One block round for each of the 32-bit integers we just read, 2x rounds total.
  60. Block(ref p0, ref p1);
  61. p0 += nextUInt32;
  62. Block(ref p0, ref p1);
  63. // Bump the data reference pointer and decrement the loop count.
  64. // Decrementing by 1 every time and comparing against zero allows the JIT to produce
  65. // better codegen compared to a standard 'for' loop with an incrementing counter.
  66. // Requires https://github.com/dotnet/coreclr/issues/7566 to be addressed first
  67. // before we can realize the full benefits of this.
  68. data = ref Unsafe.AddByteOffset(ref data, 8);
  69. } while (--loopCount > 0);
  70. // n.b. We've not been updating the original 'count' parameter, so its actual value is
  71. // still the original data length. However, we can still rely on its least significant
  72. // 3 bits to tell us how much data remains (0 .. 7 bytes) after the loop above is
  73. // completed.
  74. if ((count & 0b_0100) == 0)
  75. {
  76. goto DoFinalPartialRead;
  77. }
  78. Between4And7BytesRemain:
  79. // If after finishing the main loop we still have 4 or more leftover bytes, or if we had
  80. // 4 .. 7 bytes to begin with and couldn't enter the loop in the first place, we need to
  81. // consume 4 bytes immediately and send them through one round of the block function.
  82. Debug.Assert(count >= 4, "Only should've gotten here if the original count was >= 4.");
  83. p0 += Unsafe.ReadUnaligned<uint>(ref data);
  84. Block(ref p0, ref p1);
  85. DoFinalPartialRead:
  86. // Finally, we have 0 .. 3 bytes leftover. Since we know the original data length was at
  87. // least 4 bytes (smaller lengths are handled at the end of this routine), we can safely
  88. // read the 4 bytes at the end of the buffer without reading past the beginning of the
  89. // original buffer. This necessarily means the data we're about to read will overlap with
  90. // some data we've already processed, but we can handle that below.
  91. Debug.Assert(count >= 4, "Only should've gotten here if the original count was >= 4.");
  92. // Read the last 4 bytes of the buffer.
  93. uint partialResult = Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref Unsafe.AddByteOffset(ref data, (nuint)count & 7), -4));
  94. // The 'partialResult' local above contains any data we have yet to read, plus some number
  95. // of bytes which we've already read from the buffer. An example of this is given below
  96. // for little-endian architectures. In this table, AA BB CC are the bytes which we still
  97. // need to consume, and ## are bytes which we want to throw away since we've already
  98. // consumed them as part of a previous read.
  99. //
  100. // (partialResult contains) (we want it to contain)
  101. // count mod 4 = 0 -> [ ## ## ## ## | ] -> 0x####_#### -> 0x0000_0080
  102. // count mod 4 = 1 -> [ ## ## ## ## | AA ] -> 0xAA##_#### -> 0x0000_80AA
  103. // count mod 4 = 2 -> [ ## ## ## ## | AA BB ] -> 0xBBAA_#### -> 0x0080_BBAA
  104. // count mod 4 = 3 -> [ ## ## ## ## | AA BB CC ] -> 0xCCBB_AA## -> 0x80CC_BBAA
  105. count = ~count << 3;
  106. if (BitConverter.IsLittleEndian)
  107. {
  108. partialResult >>= 8; // make some room for the 0x80 byte
  109. partialResult |= 0x8000_0000u; // put the 0x80 byte at the beginning
  110. partialResult >>= (int)count & 0x1F; // shift out all previously consumed bytes
  111. }
  112. else
  113. {
  114. partialResult <<= 8; // make some room for the 0x80 byte
  115. partialResult |= 0x80u; // put the 0x80 byte at the end
  116. partialResult <<= (int)count & 0x1F; // shift out all previously consumed bytes
  117. }
  118. DoFinalRoundsAndReturn:
  119. // Now that we've computed the final partial result, merge it in and run two rounds of
  120. // the block function to finish out the Marvin algorithm.
  121. p0 += partialResult;
  122. Block(ref p0, ref p1);
  123. Block(ref p0, ref p1);
  124. return (int)(p1 ^ p0);
  125. InputTooSmallToEnterMainLoop:
  126. // We had only 0 .. 3 bytes to begin with, so we can't perform any 32-bit reads.
  127. // This means that we're going to be building up the final result right away and
  128. // will only ever run two rounds total of the block function. Let's initialize
  129. // the partial result to "no data".
  130. if (BitConverter.IsLittleEndian)
  131. {
  132. partialResult = 0x80u;
  133. }
  134. else
  135. {
  136. partialResult = 0x80000000u;
  137. }
  138. if ((count & 0b_0001) != 0)
  139. {
  140. // If the buffer is 1 or 3 bytes in length, let's read a single byte now
  141. // and merge it into our partial result. This will result in partialResult
  142. // having one of the two values below, where AA BB CC are the buffer bytes.
  143. //
  144. // (little-endian / big-endian)
  145. // [ AA ] -> 0x0000_80AA / 0xAA80_0000
  146. // [ AA BB CC ] -> 0x0000_80CC / 0xCC80_0000
  147. partialResult = Unsafe.AddByteOffset(ref data, (nuint)count & 2);
  148. if (BitConverter.IsLittleEndian)
  149. {
  150. partialResult |= 0x8000;
  151. }
  152. else
  153. {
  154. partialResult <<= 24;
  155. partialResult |= 0x800000u;
  156. }
  157. }
  158. if ((count & 0b_0010) != 0)
  159. {
  160. // If the buffer is 2 or 3 bytes in length, let's read a single ushort now
  161. // and merge it into the partial result. This will result in partialResult
  162. // having one of the two values below, where AA BB CC are the buffer bytes.
  163. //
  164. // (little-endian / big-endian)
  165. // [ AA BB ] -> 0x0080_BBAA / 0xAABB_8000
  166. // [ AA BB CC ] -> 0x80CC_BBAA / 0xAABB_CC80 (carried over from above)
  167. if (BitConverter.IsLittleEndian)
  168. {
  169. partialResult <<= 16;
  170. partialResult |= (uint)Unsafe.ReadUnaligned<ushort>(ref data);
  171. }
  172. else
  173. {
  174. partialResult |= (uint)Unsafe.ReadUnaligned<ushort>(ref data);
  175. partialResult = BitOperations.RotateLeft(partialResult, 16);
  176. }
  177. }
  178. // Everything is consumed! Go perform the final rounds and return.
  179. goto DoFinalRoundsAndReturn;
  180. }
  181. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  182. private static void Block(ref uint rp0, ref uint rp1)
  183. {
  184. uint p0 = rp0;
  185. uint p1 = rp1;
  186. p1 ^= p0;
  187. p0 = BitOperations.RotateLeft(p0, 20);
  188. p0 += p1;
  189. p1 = BitOperations.RotateLeft(p1, 9);
  190. p1 ^= p0;
  191. p0 = BitOperations.RotateLeft(p0, 27);
  192. p0 += p1;
  193. p1 = BitOperations.RotateLeft(p1, 19);
  194. rp0 = p0;
  195. rp1 = p1;
  196. }
  197. public static ulong DefaultSeed { get; } = GenerateSeed();
  198. private static unsafe ulong GenerateSeed()
  199. {
  200. ulong seed;
  201. Interop.GetRandomBytes((byte*)&seed, sizeof(ulong));
  202. return seed;
  203. }
  204. }
  205. }