Browse Source

Hash cleanup.

Бранимир Караџић 3 years ago
parent
commit
0314d3b018
4 changed files with 240 additions and 177 deletions
  1. 49 45
      include/bx/hash.h
  2. 48 130
      include/bx/inline/hash.inl
  3. 133 1
      src/hash.cpp
  4. 10 1
      tests/hash_test.cpp

+ 49 - 45
include/bx/hash.h

@@ -11,95 +11,99 @@
 
 
 namespace bx
 namespace bx
 {
 {
-	/// MurmurHash2 was written by Austin Appleby, and is placed in the public
-	/// domain. The author hereby disclaims copyright to this source code.
-	///
-	class HashMurmur2A
+	/// 32-bit Adler checksum hash.
+	class HashAdler32
 	{
 	{
 	public:
 	public:
 		///
 		///
-		void begin(uint32_t _seed = 0);
+		void begin();
+
+		///
+		void add(const void* _data, int32_t _len);
+
+		///
+		void add(const char* _data);
 
 
 		///
 		///
-		void add(const void* _data, int _len);
+		void add(const StringView& _data);
 
 
 		///
 		///
 		template<typename Ty>
 		template<typename Ty>
-		void add(Ty _value);
+		void add(const Ty& _data);
 
 
 		///
 		///
 		uint32_t end();
 		uint32_t end();
 
 
 	private:
 	private:
-		///
-		void addAligned(const void* _data, int _len);
+		uint32_t m_a;
+		uint32_t m_b;
+	};
 
 
-		///
-		void addUnaligned(const void* _data, int _len);
+	/// 32-bit cyclic redundancy checksum hash.
+	class HashCrc32
+	{
+	public:
+		enum Enum
+		{
+			Ieee,       //!< 0xedb88320
+			Castagnoli, //!< 0x82f63b78
+			Koopman,    //!< 0xeb31d82e
 
 
-		///
-		static void readUnaligned(const void* _data, uint32_t& _out);
+			Count
+		};
 
 
 		///
 		///
-		void mixTail(const uint8_t*& _data, int& _len);
+		void begin(Enum _type = Ieee);
 
 
-		uint32_t m_hash;
-		uint32_t m_tail;
-		uint32_t m_count;
-		uint32_t m_size;
-	};
+		///
+		void add(const void* _data, int32_t _len);
 
 
-	///
-	class HashAdler32
-	{
-	public:
 		///
 		///
-		void begin();
+		void add(const char* _data);
 
 
 		///
 		///
-		void add(const void* _data, int _len);
+		void add(const StringView& _data);
 
 
 		///
 		///
 		template<typename Ty>
 		template<typename Ty>
-		void add(Ty _value);
+		void add(const Ty& _data);
 
 
 		///
 		///
 		uint32_t end();
 		uint32_t end();
 
 
 	private:
 	private:
-		uint32_t m_a;
-		uint32_t m_b;
+		const uint32_t* m_table;
+		uint32_t m_hash;
 	};
 	};
 
 
-	///
-	class HashCrc32
+	/// 32-bit multiply and rotate hash.
+	class HashMurmur2A
 	{
 	{
 	public:
 	public:
-		enum Enum
-		{
-			Ieee,       //!< 0xedb88320
-			Castagnoli, //!< 0x82f63b78
-			Koopman,    //!< 0xeb31d82e
+		///
+		void begin(uint32_t _seed = 0);
 
 
-			Count
-		};
+		///
+		void add(const void* _data, int32_t _len);
 
 
 		///
 		///
-		void begin(Enum _type = Ieee);
+		void add(const char* _data);
 
 
 		///
 		///
-		void add(const void* _data, int _len);
+		void add(const StringView& _data);
 
 
 		///
 		///
 		template<typename Ty>
 		template<typename Ty>
-		void add(Ty _value);
+		void add(const Ty& _data);
 
 
 		///
 		///
 		uint32_t end();
 		uint32_t end();
 
 
 	private:
 	private:
-		const uint32_t* m_table;
 		uint32_t m_hash;
 		uint32_t m_hash;
+		uint32_t m_tail;
+		uint32_t m_count;
+		uint32_t m_size;
 	};
 	};
 
 
 	///
 	///
@@ -107,16 +111,16 @@ namespace bx
 	uint32_t hash(const void* _data, uint32_t _size);
 	uint32_t hash(const void* _data, uint32_t _size);
 
 
 	///
 	///
-	template<typename HashT, typename Ty>
-	uint32_t hash(const Ty& _data);
+	template<typename HashT>
+	uint32_t hash(const char* _data);
 
 
 	///
 	///
 	template<typename HashT>
 	template<typename HashT>
 	uint32_t hash(const StringView& _data);
 	uint32_t hash(const StringView& _data);
 
 
 	///
 	///
-	template<typename HashT>
-	uint32_t hash(const char* _data);
+	template<typename HashT, typename Ty>
+	uint32_t hash(const Ty& _data);
 
 
 } // namespace bx
 } // namespace bx
 
 

+ 48 - 130
include/bx/inline/hash.inl

@@ -9,171 +9,89 @@
 
 
 namespace bx
 namespace bx
 {
 {
-#define MURMUR_M 0x5bd1e995
-#define MURMUR_R 24
-#define mmix(_h, _k) { _k *= MURMUR_M; _k ^= _k >> MURMUR_R; _k *= MURMUR_M; _h *= MURMUR_M; _h ^= _k; }
-
-	inline void HashMurmur2A::begin(uint32_t _seed)
+	inline void HashAdler32::begin()
 	{
 	{
-		m_hash = _seed;
-		m_tail = 0;
-		m_count = 0;
-		m_size = 0;
+		m_a = 1;
+		m_b = 0;
 	}
 	}
 
 
-	inline void HashMurmur2A::add(const void* _data, int _len)
+	inline void HashAdler32::add(const void* _data, int32_t _len)
 	{
 	{
-		if (BX_UNLIKELY(!isAligned(_data, 4) ) )
-		{
-			addUnaligned(_data, _len);
-			return;
-		}
-
-		addAligned(_data, _len);
-	}
+		constexpr uint32_t kModAdler = 65521;
 
 
-	inline void HashMurmur2A::addAligned(const void* _data, int _len)
-	{
 		const uint8_t* data = (const uint8_t*)_data;
 		const uint8_t* data = (const uint8_t*)_data;
-		m_size += _len;
-
-		mixTail(data, _len);
-
-		while(_len >= 4)
+		for (; _len != 0; --_len)
 		{
 		{
-			uint32_t kk = *(uint32_t*)data;
-
-			mmix(m_hash, kk);
-
-			data += 4;
-			_len -= 4;
+			m_a = (m_a + *data++) % kModAdler;
+			m_b = (m_b + m_a    ) % kModAdler;
 		}
 		}
-
-		mixTail(data, _len);
 	}
 	}
 
 
-	inline void HashMurmur2A::addUnaligned(const void* _data, int _len)
+	inline void HashAdler32::add(const char* _data)
 	{
 	{
-		const uint8_t* data = (const uint8_t*)_data;
-		m_size += _len;
-
-		mixTail(data, _len);
-
-		while(_len >= 4)
-		{
-			uint32_t kk;
-			readUnaligned(data, kk);
-
-			mmix(m_hash, kk);
-
-			data += 4;
-			_len -= 4;
-		}
+		return add(StringView(_data) );
+	}
 
 
-		mixTail(data, _len);
+	inline void HashAdler32::add(const StringView& _data)
+	{
+		return add(_data.getPtr(), _data.getLength() );
 	}
 	}
 
 
 	template<typename Ty>
 	template<typename Ty>
-	inline void HashMurmur2A::add(Ty _value)
+	inline void HashAdler32::add(const Ty& _data)
 	{
 	{
-		add(&_value, sizeof(Ty) );
+		add(&_data, sizeof(Ty) );
 	}
 	}
 
 
-	inline uint32_t HashMurmur2A::end()
+	inline uint32_t HashAdler32::end()
 	{
 	{
-		mmix(m_hash, m_tail);
-		mmix(m_hash, m_size);
-
-		m_hash ^= m_hash >> 13;
-		m_hash *= MURMUR_M;
-		m_hash ^= m_hash >> 15;
-
-		return m_hash;
+		return m_a | (m_b<<16);
 	}
 	}
 
 
-	inline void HashMurmur2A::readUnaligned(const void* _data, uint32_t& _out)
+	inline void HashCrc32::add(const char* _data)
 	{
 	{
-		const uint8_t* data = (const uint8_t*)_data;
-		if (BX_ENABLED(BX_CPU_ENDIAN_BIG) )
-		{
-			_out = 0
-				| data[0]<<24
-				| data[1]<<16
-				| data[2]<<8
-				| data[3]
-				;
-		}
-		else
-		{
-			_out = 0
-				| data[0]
-				| data[1]<<8
-				| data[2]<<16
-				| data[3]<<24
-				;
-		}
+		return add(StringView(_data) );
 	}
 	}
 
 
-	inline void HashMurmur2A::mixTail(const uint8_t*& _data, int& _len)
+	inline void HashCrc32::add(const StringView& _data)
 	{
 	{
-		while( _len && ((_len<4) || m_count) )
-		{
-			m_tail |= (*_data++) << (m_count * 8);
-
-			m_count++;
-			_len--;
-
-			if(m_count == 4)
-			{
-				mmix(m_hash, m_tail);
-				m_tail = 0;
-				m_count = 0;
-			}
-		}
+		return add(_data.getPtr(), _data.getLength() );
 	}
 	}
 
 
-#undef MURMUR_M
-#undef MURMUR_R
-#undef mmix
-
-	inline void HashAdler32::begin()
+	template<typename Ty>
+	inline void HashCrc32::add(const Ty& _data)
 	{
 	{
-		m_a = 1;
-		m_b = 0;
+		add(&_data, sizeof(Ty) );
 	}
 	}
 
 
-	inline void HashAdler32::add(const void* _data, int _len)
+	inline uint32_t HashCrc32::end()
 	{
 	{
-		const uint32_t kModAdler = 65521;
-		const uint8_t* data = (const uint8_t*)_data;
-		for (; _len != 0; --_len)
-		{
-			m_a = (m_a + *data++) % kModAdler;
-			m_b = (m_b + m_a    ) % kModAdler;
-		}
+		m_hash ^= UINT32_MAX;
+		return m_hash;
 	}
 	}
 
 
-	template<typename Ty>
-	inline void HashAdler32::add(Ty _value)
+	inline void HashMurmur2A::begin(uint32_t _seed)
 	{
 	{
-		add(&_value, sizeof(Ty) );
+		m_hash  = _seed;
+		m_tail  = 0;
+		m_count = 0;
+		m_size  = 0;
 	}
 	}
 
 
-	inline uint32_t HashAdler32::end()
+	inline void HashMurmur2A::add(const char* _data)
 	{
 	{
-		return m_a | (m_b<<16);
+		return add(StringView(_data) );
 	}
 	}
 
 
-	template<typename Ty>
-	inline void HashCrc32::add(Ty _value)
+	inline void HashMurmur2A::add(const StringView& _data)
 	{
 	{
-		add(&_value, sizeof(Ty) );
+		return add(_data.getPtr(), _data.getLength() );
 	}
 	}
 
 
-	inline uint32_t HashCrc32::end()
+	template<typename Ty>
+	inline void HashMurmur2A::add(const Ty& _data)
 	{
 	{
-		m_hash ^= UINT32_MAX;
-		return m_hash;
+		add(&_data, sizeof(Ty) );
 	}
 	}
 
 
 	template<typename HashT>
 	template<typename HashT>
@@ -181,15 +99,14 @@ namespace bx
 	{
 	{
 		HashT hh;
 		HashT hh;
 		hh.begin();
 		hh.begin();
-		hh.add(_data, (int)_size);
+		hh.add(_data, (int32_t)_size);
 		return hh.end();
 		return hh.end();
 	}
 	}
 
 
-	template<typename HashT, typename Ty>
-	inline uint32_t hash(const Ty& _data)
+	template<typename HashT>
+	inline uint32_t hash(const char* _data)
 	{
 	{
-		BX_STATIC_ASSERT(isTriviallyCopyable<Ty>() );
-		return hash<HashT>(&_data, sizeof(Ty) );
+		return hash<HashT>(StringView(_data) );
 	}
 	}
 
 
 	template<typename HashT>
 	template<typename HashT>
@@ -198,10 +115,11 @@ namespace bx
 		return hash<HashT>(_data.getPtr(), _data.getLength() );
 		return hash<HashT>(_data.getPtr(), _data.getLength() );
 	}
 	}
 
 
-	template<typename HashT>
-	inline uint32_t hash(const char* _data)
+	template<typename HashT, typename Ty>
+	inline uint32_t hash(const Ty& _data)
 	{
 	{
-		return hash<HashT>(StringView(_data) );
+		BX_STATIC_ASSERT(isTriviallyCopyable<Ty>() );
+		return hash<HashT>(&_data, sizeof(Ty) );
 	}
 	}
 
 
 } // namespace bx
 } // namespace bx

+ 133 - 1
src/hash.cpp

@@ -133,7 +133,7 @@ void HashCrc32::begin(Enum _type)
 	m_table = s_crcTable[_type];
 	m_table = s_crcTable[_type];
 }
 }
 
 
-void HashCrc32::add(const void* _data, int _len)
+void HashCrc32::add(const void* _data, int32_t _len)
 {
 {
 	const uint8_t* data = (const uint8_t*)_data;
 	const uint8_t* data = (const uint8_t*)_data;
 
 
@@ -147,4 +147,136 @@ void HashCrc32::add(const void* _data, int _len)
 	m_hash = hash;
 	m_hash = hash;
 }
 }
 
 
+struct HashMurmur2APod
+{
+	uint32_t m_hash;
+	uint32_t m_tail;
+	uint32_t m_count;
+	uint32_t m_size;
+};
+BX_STATIC_ASSERT(sizeof(HashMurmur2A) == sizeof(HashMurmur2APod) );
+
+BX_FORCE_INLINE void mmix(uint32_t& _h, uint32_t& _k)
+{
+	constexpr uint32_t kMurmurMul = 0x5bd1e995;
+	constexpr uint32_t kMurmurRightShift = 24;
+
+	_k *= kMurmurMul;
+	_k ^= _k >> kMurmurRightShift;
+	_k *= kMurmurMul;
+	_h *= kMurmurMul;
+	_h ^= _k;
+}
+
+static void mixTail(HashMurmur2APod& _self, const uint8_t*& _data, int32_t& _len)
+{
+	while (_len
+	&&  ( (_len<4) || _self.m_count)
+		)
+	{
+		_self.m_tail |= (*_data++) << (_self.m_count * 8);
+
+		_self.m_count++;
+		_len--;
+
+		if (_self.m_count == 4)
+		{
+			mmix(_self.m_hash, _self.m_tail);
+			_self.m_tail  = 0;
+			_self.m_count = 0;
+		}
+	}
+}
+
+static void addAligned(HashMurmur2APod& _self, const void* _data, int32_t _len)
+{
+	const uint8_t* data = (const uint8_t*)_data;
+	_self.m_size += _len;
+
+	mixTail(_self, data, _len);
+
+	while(_len >= 4)
+	{
+		uint32_t kk = *(uint32_t*)data;
+
+		mmix(_self.m_hash, kk);
+
+		data += 4;
+		_len -= 4;
+	}
+
+	mixTail(_self, data, _len);
+}
+
+BX_FORCE_INLINE void readUnaligned(const void* _data, uint32_t& _out)
+{
+	const uint8_t* data = (const uint8_t*)_data;
+	if (BX_ENABLED(BX_CPU_ENDIAN_BIG) )
+	{
+		_out = 0
+			| data[0]<<24
+			| data[1]<<16
+			| data[2]<<8
+			| data[3]
+			;
+	}
+	else
+	{
+		_out = 0
+			| data[0]
+			| data[1]<<8
+			| data[2]<<16
+			| data[3]<<24
+			;
+	}
+}
+
+static void addUnaligned(HashMurmur2APod& _self, const void* _data, int32_t _len)
+{
+	const uint8_t* data = (const uint8_t*)_data;
+	_self.m_size += _len;
+
+	mixTail(_self, data, _len);
+
+	while(_len >= 4)
+	{
+		uint32_t kk;
+		readUnaligned(data, kk);
+
+		mmix(_self.m_hash, kk);
+
+		data += 4;
+		_len -= 4;
+	}
+
+	mixTail(_self, data, _len);
+}
+
+void HashMurmur2A::add(const void* _data, int32_t _len)
+{
+	HashMurmur2APod& self = *(HashMurmur2APod*)this;
+
+	if (BX_UNLIKELY(!isAligned(_data, 4) ) )
+	{
+		addUnaligned(self, _data, _len);
+		return;
+	}
+
+	addAligned(self, _data, _len);
+}
+
+uint32_t HashMurmur2A::end()
+{
+	constexpr uint32_t kMurmurMul = 0x5bd1e995;
+
+	mmix(m_hash, m_tail);
+	mmix(m_hash, m_size);
+
+	m_hash ^= m_hash >> 13;
+	m_hash *= kMurmurMul;
+	m_hash ^= m_hash >> 15;
+
+	return m_hash;
+}
+
 } // namespace bx
 } // namespace bx

+ 10 - 1
tests/hash_test.cpp

@@ -93,7 +93,7 @@ TEST_CASE("HashAdler32", "")
 
 
 #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
 #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
 
 
-uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
+uint32_t MurmurHash2A(const void * key, int len, uint32_t seed = 0)
 {
 {
 	const uint32_t m = 0x5bd1e995;
 	const uint32_t m = 0x5bd1e995;
 	const int r = 24;
 	const int r = 24;
@@ -148,3 +148,12 @@ TEST_CASE("HashMurmur2A", "")
 		REQUIRE(test.murmur2a == MurmurHash2A(test.input, bx::strLen(test.input), seed) );
 		REQUIRE(test.murmur2a == MurmurHash2A(test.input, bx::strLen(test.input), seed) );
 	}
 	}
 }
 }
+
+TEST_CASE("HashMurmur2A-Separate-Add", "")
+{
+	bx::HashMurmur2A hash;
+	hash.begin();
+	hash.add("0123456789");
+	hash.add("abvgd012345");
+	REQUIRE(MurmurHash2A("0123456789abvgd012345", 21) == hash.end() );
+}