Browse Source

Added MurmurHash3.

Бранимир Караџић 2 years ago
parent
commit
ac1401faad
4 changed files with 335 additions and 88 deletions
  1. 33 3
      include/bx/hash.h
  2. 24 2
      include/bx/inline/hash.inl
  3. 149 68
      src/hash.cpp
  4. 129 15
      tests/hash_test.cpp

+ 33 - 3
include/bx/hash.h

@@ -76,7 +76,7 @@ namespace bx
 		uint32_t m_hash;
 	};
 
-	/// 32-bit multiply and rotate hash.
+	/// 32-bit non-cryptographic multiply and rotate hash.
 	class HashMurmur2A
 	{
 	public:
@@ -101,9 +101,39 @@ namespace bx
 
 	private:
 		uint32_t m_hash;
-		uint32_t m_tail;
-		uint32_t m_count;
 		uint32_t m_size;
+		uint8_t  m_tail[4];
+		uint8_t  m_count;
+	};
+
+	/// 32-bit non-cryptographic multiply and rotate hash.
+	class HashMurmur3
+	{
+	public:
+		///
+		void begin(uint32_t _seed = 0);
+
+		///
+		void add(const void* _data, int32_t _len);
+
+		///
+		void add(const char* _data);
+
+		///
+		void add(const StringView& _data);
+
+		///
+		template<typename Ty>
+		void add(const Ty& _data);
+
+		///
+		uint32_t end();
+
+	private:
+		uint32_t m_hash;
+		uint32_t m_size;
+		uint8_t  m_tail[4];
+		uint8_t  m_count;
 	};
 
 	///

+ 24 - 2
include/bx/inline/hash.inl

@@ -73,9 +73,8 @@ namespace bx
 	inline void HashMurmur2A::begin(uint32_t _seed)
 	{
 		m_hash  = _seed;
-		m_tail  = 0;
-		m_count = 0;
 		m_size  = 0;
+		m_count = 0;
 	}
 
 	inline void HashMurmur2A::add(const char* _data)
@@ -94,6 +93,29 @@ namespace bx
 		add(&_data, sizeof(Ty) );
 	}
 
+	inline void HashMurmur3::begin(uint32_t _seed)
+	{
+		m_hash  = _seed;
+		m_size  = 0;
+		m_count = 0;
+	}
+
+	inline void HashMurmur3::add(const char* _data)
+	{
+		return add(StringView(_data) );
+	}
+
+	inline void HashMurmur3::add(const StringView& _data)
+	{
+		return add(_data.getPtr(), _data.getLength() );
+	}
+
+	template<typename Ty>
+	inline void HashMurmur3::add(const Ty& _data)
+	{
+		add(&_data, sizeof(Ty) );
+	}
+
 	template<typename HashT>
 	inline uint32_t hash(const void* _data, uint32_t _size)
 	{

+ 149 - 68
src/hash.cpp

@@ -147,84 +147,52 @@ void HashCrc32::add(const void* _data, int32_t _len)
 	m_hash = hash;
 }
 
-struct HashMurmur2APod
+BX_FORCE_INLINE uint32_t readAligned(const uint8_t* _data)
 {
-	uint32_t m_hash;
-	uint32_t m_tail;
-	uint32_t m_count;
-	uint32_t m_size;
-};
-BX_STATIC_ASSERT(sizeof(HashMurmur2A) == sizeof(HashMurmur2APod) );
+	return *(uint32_t*)_data;
+}
 
-BX_FORCE_INLINE void mmix(uint32_t& _h, uint32_t& _k)
+BX_FORCE_INLINE uint32_t readUnaligned(const uint8_t* _data)
 {
-	constexpr uint32_t kMurmurMul = 0x5bd1e995;
-	constexpr uint32_t kMurmurRightShift = 24;
-
-	_k *= kMurmurMul;
-	_k ^= _k >> kMurmurRightShift;
-	_k *= kMurmurMul;
-	_h *= kMurmurMul;
-	_h ^= _k;
+	return 0
+		| _data[3]<<24
+		| _data[2]<<16
+		| _data[1]<<8
+		| _data[0]
+		;
 }
 
-static void mixTail(HashMurmur2APod& _self, const uint8_t*& _data, int32_t& _len)
+namespace
 {
-	while (_len
-	&&  ( (_len<4) || _self.m_count)
-		)
-	{
-		_self.m_tail |= (*_data++) << (_self.m_count * 8);
 
-		_self.m_count++;
+template<typename Ty>
+void mixTail(Ty& _self, const uint8_t*& _data, int32_t& _len)
+{
+	while (0 != _len
+	&&    (0 < _self.m_count || 4 > _len) )
+	{
+		_self.m_tail[_self.m_count++] = *_data++;
 		_len--;
 
-		if (_self.m_count == 4)
+		if (4 == _self.m_count)
 		{
-			mmix(_self.m_hash, _self.m_tail);
-			_self.m_tail  = 0;
+			uint32_t kk = *( (uint32_t*)&_self.m_tail[0]);
+			_self.mix(kk);
 			_self.m_count = 0;
 		}
 	}
 }
 
-BX_FORCE_INLINE uint32_t readAligned(const uint8_t* _data)
-{
-	return *(uint32_t*)_data;
-}
-
-BX_FORCE_INLINE uint32_t readUnaligned(const uint8_t* _data)
-{
-	if (BX_ENABLED(BX_CPU_ENDIAN_BIG) )
-	{
-		return 0
-			| _data[0]<<24
-			| _data[1]<<16
-			| _data[2]<<8
-			| _data[3]
-			;
-	}
-	else
-	{
-		return 0
-			| _data[0]
-			| _data[1]<<8
-			| _data[2]<<16
-			| _data[3]<<24
-			;
-	}
-}
-
 typedef uint32_t (*ReadDataFn)(const uint8_t* _data);
 
-template<ReadDataFn FnT>
-static void addData(HashMurmur2APod& _self, const uint8_t* _data, int32_t _len)
+template<typename Ty, ReadDataFn FnT>
+void addData(Ty& _self, const uint8_t* _data, int32_t _len)
 {
 	while (_len >= 4)
 	{
 		uint32_t kk = FnT(_data);
 
-		mmix(_self.m_hash, kk);
+		_self.mix(kk);
 
 		_data += 4;
 		_len  -= 4;
@@ -233,36 +201,149 @@ static void addData(HashMurmur2APod& _self, const uint8_t* _data, int32_t _len)
 	mixTail(_self, _data, _len);
 }
 
-void HashMurmur2A::add(const void* _data, int32_t _len)
+template<typename SelfT, typename ThisT>
+void addData(ThisT* _this, const void* _data, int32_t _len)
 {
-	HashMurmur2APod& self = *(HashMurmur2APod*)this;
+	SelfT& self = *(SelfT*)_this;
 
 	const uint8_t* data = (const uint8_t*)_data;
 
-	m_size += _len;
+	self.m_size += _len;
 	mixTail(self, data, _len);
 
 	if (BX_UNLIKELY(!isAligned(data, 4) ) )
 	{
-		addData<readUnaligned>(self, data, _len);
+		addData<SelfT, readUnaligned>(self, data, _len);
 		return;
 	}
 
-	addData<readAligned>(self, data, _len);
+	addData<SelfT, readAligned>(self, data, _len);
+}
+
+template<typename SelfT, typename ThisT>
+uint32_t finalize(ThisT* _this)
+{
+	SelfT& self = *(SelfT*)_this;
+	self.finalize();
+
+	return self.m_hash;
+}
+
+} // namespace
+
+struct HashMurmur2APod
+{
+	uint32_t m_hash;
+	uint32_t m_size;
+	uint8_t  m_tail[4];
+	uint8_t  m_count;
+
+	static constexpr uint32_t kMurmur2AMul = 0x5bd1e995;
+
+	BX_FORCE_INLINE void mix(uint32_t& _k)
+	{
+		_k *= kMurmur2AMul;
+		_k ^= _k >> 24;
+		_k *= kMurmur2AMul;
+
+		m_hash *= kMurmur2AMul;
+		m_hash ^= _k;
+	}
+
+	void finalize()
+	{
+		uint32_t kk = 0;
+
+		switch (m_count)
+		{
+			case  3: kk |= m_tail[2] << 16; BX_FALLTHROUGH;
+			case  2: kk |= m_tail[1] <<  8; BX_FALLTHROUGH;
+			case  1: kk |= m_tail[0];       BX_FALLTHROUGH;
+			case  0: mix(kk); break;
+			default: BX_ASSERT(false, "Bug, m_count can't be %d (expected < 4).", m_count); BX_UNREACHABLE;
+		}
+
+		mix(m_size);
+
+		m_hash ^= m_hash >> 13;
+		m_hash *= kMurmur2AMul;
+		m_hash ^= m_hash >> 15;
+	}
+};
+BX_STATIC_ASSERT(sizeof(HashMurmur2A) == sizeof(HashMurmur2APod) );
+
+void HashMurmur2A::add(const void* _data, int32_t _len)
+{
+	addData<HashMurmur2APod>(this, _data, _len);
 }
 
 uint32_t HashMurmur2A::end()
 {
-	constexpr uint32_t kMurmurMul = 0x5bd1e995;
+	return finalize<HashMurmur2APod>(this);
+}
+
+struct HashMurmur3Pod
+{
+	uint32_t m_hash;
+	uint32_t m_size;
+	uint8_t  m_tail[4];
+	uint8_t  m_count;
 
-	mmix(m_hash, m_tail);
-	mmix(m_hash, m_size);
+	static constexpr uint32_t kMurmur3Mul1 = 0xcc9e2d51;
+	static constexpr uint32_t kMurmur3Mul2 = 0x1b873593;
+	static constexpr uint32_t kMurmur3Mul3 = 0x85ebca6b;
+	static constexpr uint32_t kMurmur3Mul4 = 0xc2b2ae35;
+	static constexpr uint32_t kMurmur3Add  = 0xe6546b64;
 
-	m_hash ^= m_hash >> 13;
-	m_hash *= kMurmurMul;
-	m_hash ^= m_hash >> 15;
+	BX_FORCE_INLINE void mix1(uint32_t _k)
+	{
+		_k *= kMurmur3Mul1;
+		_k  = uint32_rol(_k, 15);
+		_k *= kMurmur3Mul2;
+
+		m_hash ^= _k;
+	}
+
+	BX_FORCE_INLINE void mix(uint32_t _k)
+	{
+		mix1(_k);
+
+		m_hash = uint32_rol(m_hash, 13);
+		m_hash = m_hash*5 + kMurmur3Add;
+	}
+
+	void finalize()
+	{
+		uint32_t kk = 0;
+
+		switch (m_count)
+		{
+			case  3: kk |= m_tail[2] << 16; BX_FALLTHROUGH;
+			case  2: kk |= m_tail[1] <<  8; BX_FALLTHROUGH;
+			case  1: kk |= m_tail[0]; mix1(kk); break;
+			case  0: break;
+			default: BX_ASSERT(false, "Bug, m_count can't be %d (expected < 4).", m_count); BX_UNREACHABLE;
+		}
+
+		m_hash ^= m_size;
+
+		m_hash ^= m_hash >> 16;
+		m_hash *= kMurmur3Mul3;
+		m_hash ^= m_hash >> 13;
+		m_hash *= kMurmur3Mul4;
+		m_hash ^= m_hash >> 16;
+	}
+};
+BX_STATIC_ASSERT(sizeof(HashMurmur3) == sizeof(HashMurmur3Pod) );
 
-	return m_hash;
+void HashMurmur3::add(const void* _data, int32_t _len)
+{
+	addData<HashMurmur3Pod>(this, _data, _len);
+}
+
+uint32_t HashMurmur3::end()
+{
+	return finalize<HashMurmur3Pod>(this);
 }
 
 } // namespace bx

+ 129 - 15
tests/hash_test.cpp

@@ -32,22 +32,23 @@ struct HashTest
 	uint32_t crc32[bx::HashCrc32::Count];
 	uint32_t adler32;
 	uint32_t murmur2a;
+	uint32_t murmur3;
 	const char* input;
 };
 
 const HashTest s_hashTest[] =
 {
-	//  Crc32                               | Adler32   | Murmur2A  | Input
-	//  Ieee        Castagnoli  Koopman     |           |           |
-	{ { 0,          0,          0          }, 1,          0,          ""       },
-	{ { 0xe8b7be43, 0xc1d04330, 0x0da2aa8a }, 0x00620062, 0x0803888b, "a"      },
-	{ { 0x9e83486d, 0xe2a22936, 0x31ec935a }, 0x012600c4, 0x618515af, "ab"     },
-	{ { 0xc340daab, 0x49e1b6e3, 0x945a1e78 }, 0x06060205, 0x94e3dc4d, "abvgd"  },
-	{ { 0x07642fe2, 0x45a04162, 0x3d4bf72d }, 0x020a00d6, 0xe602fc07, "1389"   },
-	{ { 0x26d75737, 0xb73d7b80, 0xd524eb40 }, 0x04530139, 0x58d37863, "555333" },
+	//  Crc32                               | Adler32   | Murmur2A  | Murmur3   |  Input
+	//  Ieee        Castagnoli  Koopman     |           |           |           |
+	{ { 0,          0,          0          }, 1,          0,                   0,  ""       },
+	{ { 0xe8b7be43, 0xc1d04330, 0x0da2aa8a }, 0x00620062, 0x0803888b, 0x3c2569b2,  "a"      },
+	{ { 0x9e83486d, 0xe2a22936, 0x31ec935a }, 0x012600c4, 0x618515af, 0x9bbfd75f,  "ab"     },
+	{ { 0xc340daab, 0x49e1b6e3, 0x945a1e78 }, 0x06060205, 0x94e3dc4d, 0x1e661875,  "abvgd"  },
+	{ { 0x07642fe2, 0x45a04162, 0x3d4bf72d }, 0x020a00d6, 0xe602fc07, 0x7af40d31,  "1389"   },
+	{ { 0x26d75737, 0xb73d7b80, 0xd524eb40 }, 0x04530139, 0x58d37863, 0x0c090160,  "555333" },
 };
 
-TEST_CASE("HashCrc32", "")
+TEST_CASE("HashCrc32", "[hash]")
 {
 #if 0
 	makeCrcTable(0xedb88320);
@@ -71,7 +72,7 @@ TEST_CASE("HashCrc32", "")
 	}
 }
 
-TEST_CASE("HashAdler32", "")
+TEST_CASE("HashAdler32", "[hash]")
 {
 	for (uint32_t ii = 0; ii < BX_COUNTOF(s_hashTest); ++ii)
 	{
@@ -84,6 +85,9 @@ TEST_CASE("HashAdler32", "")
 	}
 }
 
+namespace
+{
+
 /*-----------------------------------------------------------------------------
 // MurmurHash2A, by Austin Appleby
 //
@@ -96,8 +100,6 @@ TEST_CASE("HashAdler32", "")
 // more amenable to incremental implementations.
 */
 
-#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
 uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
 {
 	const uint32_t m = 0x5bd1e995;
@@ -108,6 +110,8 @@ uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
 
 	uint32_t h = seed;
 
+#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
 	while(len >= 4)
 	{
 		uint32_t k = *(uint32_t*)data;
@@ -130,6 +134,8 @@ uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
 	mmix(h,t);
 	mmix(h,l);
 
+#undef mmix
+
 	h ^= h >> 13;
 	h *= m;
 	h ^= h >> 15;
@@ -137,7 +143,9 @@ uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
 	return h;
 }
 
-TEST_CASE("HashMurmur2A", "")
+} // namespace
+
+TEST_CASE("HashMurmur2A", "[hash]")
 {
 	uint32_t seed = 0;
 
@@ -154,11 +162,117 @@ TEST_CASE("HashMurmur2A", "")
 	}
 }
 
-TEST_CASE("HashMurmur2A-Separate-Add", "")
+TEST_CASE("HashMurmur2A-Separate-Add", "[hash]")
 {
 	bx::HashMurmur2A hash;
 	hash.begin();
 	hash.add("0123456789");
 	hash.add("abvgd012345");
-	REQUIRE(MurmurHash2A("0123456789abvgd012345", 21) == hash.end() );
+	hash.add("1389");
+	hash.add("555333");
+	REQUIRE(MurmurHash2A("0123456789abvgd0123451389555333", 31) == hash.end() );
+}
+
+namespace
+{
+
+BX_FORCE_INLINE uint32_t fmix32 ( uint32_t h )
+{
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+
+	return h;
+}
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+	return (x << r) | (x >> (32 - r));
+}
+
+uint32_t MurmurHash3_x86_32(const void * key, int len, uint32_t seed)
+{
+	const uint8_t * data = (const uint8_t*)key;
+	const int nblocks = len / 4;
+
+	uint32_t h1 = seed;
+
+	const uint32_t c1 = 0xcc9e2d51;
+	const uint32_t c2 = 0x1b873593;
+
+	//----------
+	// body
+
+	const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+	for(int i = -nblocks; i; i++)
+	{
+		uint32_t k1 = blocks[i];
+
+		k1 *= c1;
+		k1 = rotl32(k1,15);
+		k1 *= c2;
+
+		h1 ^= k1;
+		h1 = rotl32(h1,13);
+		h1 = h1*5+0xe6546b64;
+	}
+
+	//----------
+	// tail
+
+	const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+	uint32_t k1 = 0;
+
+	switch(len & 3)
+	{
+		case 3: k1 ^= tail[2] << 16; BX_FALLTHROUGH;
+		case 2: k1 ^= tail[1] << 8;  BX_FALLTHROUGH;
+		case 1: k1 ^= tail[0];
+				k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
+	};
+
+	//----------
+	// finalization
+
+	h1 ^= len;
+
+	h1 = fmix32(h1);
+
+	return h1;
+}
+
+} // namespace
+
+TEST_CASE("HashMurmur3", "[hash]")
+{
+	uint32_t seed = 0;
+
+	for (uint32_t ii = 0; ii < BX_COUNTOF(s_hashTest); ++ii)
+	{
+		const HashTest& test = s_hashTest[ii];
+
+		bx::HashMurmur3 hash;
+		hash.begin(seed);
+		hash.add(test.input, bx::strLen(test.input) );
+		const uint32_t result = hash.end();
+		const uint32_t sanity = MurmurHash3_x86_32(test.input, bx::strLen(test.input), seed);
+
+		REQUIRE(test.murmur3 == result);
+		REQUIRE(test.murmur3 == sanity);
+	}
+}
+
+TEST_CASE("HashMurmur3-Separate-Add", "[hash]")
+{
+	bx::HashMurmur3 hash;
+	hash.begin();
+	hash.add("0123456789");
+	hash.add("abvgd012345");
+	hash.add("1389");
+	hash.add("555333");
+	REQUIRE(MurmurHash3_x86_32("0123456789abvgd0123451389555333", 31, 0) == hash.end() );
 }