瀏覽代碼

Add initial HashMap implementation

Daniele Bartolini 9 年之前
父節點
當前提交
e08bcfca6f
共有 3 個文件被更改,包括 307 次插入360 次删除
  1. 0 360
      src/core/containers/hash.h
  2. 279 0
      src/core/containers/hash_map.h
  3. 28 0
      src/core/unit_tests.cpp

+ 0 - 360
src/core/containers/hash.h

@@ -1,360 +0,0 @@
-/*
- * Copyright (c) 2012-2016 Daniele Bartolini and individual contributors.
- * License: https://github.com/taylor001/crown/blob/master/LICENSE
- */
-
-/*
- * Copyright (C) 2012 Bitsquid AB
- * License: https://bitbucket.org/bitsquid/foundation/src/default/LICENCSE
- */
-
-#pragma once
-
-#include "array.h"
-#include "container_types.h"
-
-namespace crown
-{
-	/// Functions to manipulate Hash.
-	///
-	/// The hash function stores its data in a "list-in-an-array" where
-	/// indices are used instead of pointers.
-	///
-	/// When items are removed, the array-list is repacked to always keep
-	/// it tightly ordered.
-	///
-	/// @ingroup Containers
-	namespace hash
-	{
-		/// Returns true if the specified key exists in the hash.
-		template<typename T> bool has(const Hash<T>& h, u64 key);
-
-		/// Returns the value stored for the specified key, or deffault if the key
-		/// does not exist in the hash.
-		template<typename T> const T& get(const Hash<T>& h, u64 key, const T& deffault);
-
-		/// Sets the value for the key.
-		template<typename T> void set(Hash<T>& h, u64 key, const T& value);
-
-		/// Removes the key from the hash if it exists.
-		template<typename T> void remove(Hash<T>& h, u64 key);
-
-		/// Resizes the hash lookup table to the specified size.
-		/// (The table will grow automatically when 70 % full.)
-		template<typename T> void reserve(Hash<T>& h, u32 size);
-
-		/// Remove all elements from the hash.
-		template<typename T> void clear(Hash<T>& h);
-
-		/// Returns a pointer to the first entry in the hash table, can be used to
-		/// efficiently iterate over the elements (in random order).
-		template<typename T> const typename Hash<T>::Entry* begin(const Hash<T>& h);
-		template<typename T> const typename Hash<T>::Entry* end(const Hash<T>& h);
-	}
-
-	/// Functions to manipulate Hash as a multi-hash.
-	///
-	/// @ingroup Containers
-	namespace multi_hash
-	{
-		/// Finds the first entry with the specified key.
-		template<typename T> const typename Hash<T>::Entry* find_first(const Hash<T>& h, u64 key);
-
-		/// Finds the next entry with the same key as e.
-		template<typename T> const typename Hash<T>::Entry* find_next(const Hash<T>& h, const typename Hash<T>::Entry* e);
-
-		/// Returns the number of entries with the key.
-		template<typename T> u32 count(const Hash<T>& h, u64 key);
-
-		/// Returns all the entries with the specified key.
-		/// Use a TempAllocator for the array to avoid allocating memory.
-		template<typename T> void get(const Hash<T>& h, u64 key, Array<T> &items);
-
-		/// Inserts the value as an aditional value for the key.
-		template<typename T> void insert(Hash<T>& h, u64 key, const T& value);
-
-		/// Removes the specified entry.
-		template<typename T> void remove(Hash<T>& h, const typename Hash<T>::Entry* e);
-
-		/// Removes all entries with the specified key.
-		template<typename T> void remove_all(Hash<T>& h, u64 key);
-	}
-
-	namespace hash_internal
-	{
-		const u32 END_OF_LIST = 0xffffffffu;
-
-		struct FindResult
-		{
-			u32 hash_i;
-			u32 data_prev;
-			u32 data_i;
-		};
-
-		template<typename T> u32 add_entry(Hash<T>& h, u64 key)
-		{
-			typename Hash<T>::Entry e;
-			e.key = key;
-			e.next = END_OF_LIST;
-			u32 ei = array::size(h._data);
-			array::push_back(h._data, e);
-			return ei;
-		}
-
-		template<typename T> FindResult find(const Hash<T>& h, u64 key)
-		{
-			FindResult fr;
-			fr.hash_i = END_OF_LIST;
-			fr.data_prev = END_OF_LIST;
-			fr.data_i = END_OF_LIST;
-
-			if (array::size(h._hash) == 0)
-				return fr;
-
-			fr.hash_i = key % array::size(h._hash);
-			fr.data_i = h._hash[fr.hash_i];
-			while (fr.data_i != END_OF_LIST) {
-				if (h._data[fr.data_i].key == key)
-					return fr;
-				fr.data_prev = fr.data_i;
-				fr.data_i = h._data[fr.data_i].next;
-			}
-			return fr;
-		}
-
-		template<typename T> FindResult find(const Hash<T>& h, const typename Hash<T>::Entry* e)
-		{
-			FindResult fr;
-			fr.hash_i = END_OF_LIST;
-			fr.data_prev = END_OF_LIST;
-			fr.data_i = END_OF_LIST;
-
-			if (array::size(h._hash) == 0)
-				return fr;
-
-			fr.hash_i = e->key % array::size(h._hash);
-			fr.data_i = h._hash[fr.hash_i];
-			while (fr.data_i != END_OF_LIST) {
-				if (&h._data[fr.data_i] == e)
-					return fr;
-				fr.data_prev = fr.data_i;
-				fr.data_i = h._data[fr.data_i].next;
-			}
-			return fr;
-		}
-
-		template<typename T> void erase(Hash<T>& h, const FindResult &fr)
-		{
-			/// Fix from: https://bitbucket.org/bitsquid/foundation/issues/5/hash-erase-an-array-size-update-is-missing
-			if (fr.data_prev == END_OF_LIST)
-				h._hash[fr.hash_i] = h._data[fr.data_i].next;
-			else
-				h._data[fr.data_prev].next = h._data[fr.data_i].next;
-
-			array::pop_back(h._data);
-
-			if (fr.data_i == array::size(h._data)) return;
-
-			h._data[fr.data_i] = h._data[array::size(h._data)];
-
-			FindResult last = find(h, &h._data[array::size(h._data)]);
-
-			if (last.data_prev != END_OF_LIST)
-				h._data[last.data_prev].next = fr.data_i;
-			else
-				h._hash[last.hash_i] = fr.data_i;
-		}
-
-		template<typename T> u32 find_or_fail(const Hash<T>& h, u64 key)
-		{
-			return find(h, key).data_i;
-		}
-
-		template<typename T> u32 find_or_make(Hash<T>& h, u64 key)
-		{
-			const FindResult fr = find(h, key);
-			if (fr.data_i != END_OF_LIST)
-				return fr.data_i;
-
-			u32 i = add_entry(h, key);
-			if (fr.data_prev == END_OF_LIST)
-				h._hash[fr.hash_i] = i;
-			else
-				h._data[fr.data_prev].next = i;
-			return i;
-		}
-
-		template<typename T> u32 make(Hash<T>& h, u64 key)
-		{
-			const FindResult fr = find(h, key);
-			const u32 i = add_entry(h, key);
-
-			if (fr.data_prev == END_OF_LIST)
-				h._hash[fr.hash_i] = i;
-			else
-				h._data[fr.data_prev].next = i;
-
-			h._data[i].next = fr.data_i;
-			return i;
-		}
-
-		template<typename T> void find_and_erase(Hash<T>& h, u64 key)
-		{
-			const FindResult fr = find(h, key);
-			if (fr.data_i != END_OF_LIST)
-				erase(h, fr);
-		}
-
-		template<typename T> void rehash(Hash<T>& h, u32 new_size)
-		{
-			Hash<T> nh(*h._hash._allocator);
-			array::resize(nh._hash, new_size);
-			array::reserve(nh._data, array::size(h._data));
-			for (u32 i=0; i<new_size; ++i)
-				nh._hash[i] = END_OF_LIST;
-			for (u32 i=0; i<array::size(h._data); ++i) {
-				const typename Hash<T>::Entry &e = h._data[i];
-				multi_hash::insert(nh, e.key, e.value);
-			}
-
-			Hash<T> empty(*h._hash._allocator);
-			h.~Hash<T>();
-			memcpy(&h, &nh, sizeof(Hash<T>));
-			memcpy(&nh, &empty, sizeof(Hash<T>));
-		}
-
-		template<typename T> bool full(const Hash<T>& h)
-		{
-			const f32 max_load_factor = 0.7f;
-			return array::size(h._data) >= array::size(h._hash) * max_load_factor;
-		}
-
-		template<typename T> void grow(Hash<T>& h)
-		{
-			const u32 new_size = array::size(h._data) * 2 + 10;
-			rehash(h, new_size);
-		}
-	}
-
-	namespace hash
-	{
-		template<typename T> bool has(const Hash<T>& h, u64 key)
-		{
-			return hash_internal::find_or_fail(h, key) != hash_internal::END_OF_LIST;
-		}
-
-		template<typename T> const T& get(const Hash<T>& h, u64 key, const T& deffault)
-		{
-			const u32 i = hash_internal::find_or_fail(h, key);
-			return i == hash_internal::END_OF_LIST ? deffault : h._data[i].value;
-		}
-
-		template<typename T> void set(Hash<T>& h, u64 key, const T& value)
-		{
-			if (array::size(h._hash) == 0)
-				hash_internal::grow(h);
-
-			const u32 i = hash_internal::find_or_make(h, key);
-			h._data[i].value = value;
-			if (hash_internal::full(h))
-				hash_internal::grow(h);
-		}
-
-		template<typename T> void remove(Hash<T>& h, u64 key)
-		{
-			hash_internal::find_and_erase(h, key);
-		}
-
-		template<typename T> void reserve(Hash<T>& h, u32 size)
-		{
-			hash_internal::rehash(h, size);
-		}
-
-		template<typename T> void clear(Hash<T>& h)
-		{
-			array::clear(h._data);
-			array::clear(h._hash);
-		}
-
-		template<typename T> const typename Hash<T>::Entry* begin(const Hash<T>& h)
-		{
-			return array::begin(h._data);
-		}
-
-		template<typename T> const typename Hash<T>::Entry* end(const Hash<T>& h)
-		{
-			return array::end(h._data);
-		}
-	}
-
-	namespace multi_hash
-	{
-		template<typename T> const typename Hash<T>::Entry* find_first(const Hash<T>& h, u64 key)
-		{
-			const u32 i = hash_internal::find_or_fail(h, key);
-			return i == hash_internal::END_OF_LIST ? 0 : &h._data[i];
-		}
-
-		template<typename T> const typename Hash<T>::Entry* find_next(const Hash<T>& h, const typename Hash<T>::Entry* e)
-		{
-			u32 i = e->next;
-			while (i != hash_internal::END_OF_LIST) {
-				if (h._data[i].key == e->key)
-					return &h._data[i];
-				i = h._data[i].next;
-			}
-			return 0;
-		}
-
-		template<typename T> u32 count(const Hash<T>& h, u64 key)
-		{
-			u32 i = 0;
-			const typename Hash<T>::Entry* e = find_first(h, key);
-			while (e) {
-				++i;
-				e = find_next(h, e);
-			}
-			return i;
-		}
-
-		template<typename T> void get(const Hash<T>& h, u64 key, Array<T> &items)
-		{
-			const typename Hash<T>::Entry* e = find_first(h, key);
-			while (e) {
-				array::push_back(items, e->value);
-				e = find_next(h, e);
-			}
-		}
-
-		template<typename T> void insert(Hash<T>& h, u64 key, const T& value)
-		{
-			if (array::size(h._hash) == 0)
-				hash_internal::grow(h);
-
-			const u32 i = hash_internal::make(h, key);
-			h._data[i].value = value;
-			if (hash_internal::full(h))
-				hash_internal::grow(h);
-		}
-
-		template<typename T> void remove(Hash<T>& h, const typename Hash<T>::Entry* e)
-		{
-			const hash_internal::FindResult fr = hash_internal::find(h, e);
-			if (fr.data_i != hash_internal::END_OF_LIST)
-				hash_internal::erase(h, fr);
-		}
-
-		template<typename T> void remove_all(Hash<T>& h, u64 key)
-		{
-			while (hash::has(h, key))
-				hash::remove(h, key);
-		}
-	}
-
-	template <typename T> Hash<T>::Hash(Allocator &a)
-		: _hash(a)
-		, _data(a)
-	{
-	}
-
-} // namespace crown

+ 279 - 0
src/core/containers/hash_map.h

@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2012-2016 Daniele Bartolini and individual contributors.
+ * License: https://github.com/taylor001/crown/blob/master/LICENSE
+ */
+
+// http://sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/
+
+#pragma once
+
+#include "container_types.h"
+#include <algorithm> // std::swap
+#include <string.h>  // memcpy
+
+namespace crown
+{
+/// Functions to manipulate HashMap.
+///
+/// @ingroup Containers
+namespace hash_map
+{
+	/// Returns the number of items in the map @a m.
+	template <typename TKey, typename TValue, typename Hash> u32 size(const HashMap<TKey, TValue, Hash>& m);
+
+	/// Returns whether the given @a key exists in the map @a m.
+	template <typename TKey, typename TValue, typename Hash> bool has(const HashMap<TKey, TValue, Hash>& m, const TKey& key);
+
+	/// Returns the value for the given @a key or @a deffault if
+	/// the key does not exist in the map.
+	template <typename TKey, typename TValue, typename Hash> const TValue& get(const HashMap<TKey, TValue, Hash>& m, const TKey& key, const TValue& deffault);
+
+	/// Sets the @a value for the @a key in the map.
+	template <typename TKey, typename TValue, typename Hash> void set(HashMap<TKey, TValue, Hash>& m, const TKey& key, const TValue& value);
+
+	/// Removes the @a key from the map if it exists.
+	template <typename TKey, typename TValue, typename Hash> void remove(HashMap<TKey, TValue, Hash>& m, const TKey& key);
+
+	/// Removes all the items in the map.
+	///
+	/// @note
+	/// Calls destructor on the items.
+	template <typename TKey, typename TValue, typename Hash> void clear(HashMap<TKey, TValue, Hash>& m);
+} // namespace hash_map
+
+namespace hash_map_internal
+{
+	const u32 END_OF_LIST = 0xffffffffu;
+	const u32 DELETED = 0x80000000u;
+	const u32 FREE = 0x00000000u;
+
+	template <typename TKey, class Hash>
+	inline u32 hash_key(const TKey& key)
+	{
+		const Hash hash;
+		u32 h = hash(key);
+
+		// MSB is used to indicate a deleted elem, so
+		// clear it
+		h &= 0x7fffffffu;
+
+		// Ensure that we never return 0 as a hash,
+		// since we use 0 to indicate that the elem has never
+		// been used at all.
+		h |= h == 0u;
+
+		return h;
+	}
+
+	inline bool is_deleted(u32 hash)
+	{
+		// MSB set indicates that this hash is a "tombstone"
+		return (hash >> 31) != 0;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	inline u32 probe_distance(const HashMap<TKey, TValue, Hash>& m, u32 hash, u32 slot_index)
+	{
+		const u32 hash_i = hash & m._mask;
+		return (slot_index + m._capacity - hash_i) & m._mask;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	u32 find(const HashMap<TKey, TValue, Hash>& m, const TKey& key)
+	{
+		if (m._size == 0)
+			return END_OF_LIST;
+
+		const u32 hash = hash_key<TKey, Hash>(key);
+		u32 hash_i = hash & m._mask;
+		u32 dist = 0;
+		for(;;)
+		{
+			if (m._hashes[hash_i] == 0)
+				return END_OF_LIST;
+			else if (dist > probe_distance(m, m._hashes[hash_i], hash_i))
+				return END_OF_LIST;
+			else if (m._hashes[hash_i] == hash && m._data[hash_i].pair.first == key)
+				return hash_i;
+
+			hash_i = (hash_i + 1) & m._mask;
+			++dist;
+		}
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	void insert(HashMap<TKey, TValue, Hash>& m, u32 hash, TKey& key, TValue& value)
+	{
+		u32 hash_i = hash & m._mask;
+		u32 dist = 0;
+		for(;;)
+		{
+			if (m._hashes[hash_i] == FREE)
+				goto INSERT_AND_RETURN;
+
+			// If the existing elem has probed less than us, then swap places with existing
+			// elem, and keep going to find another slot for that elem.
+			u32 existing_elem_probe_dist = probe_distance(m, m._hashes[hash_i], hash_i);
+			if (existing_elem_probe_dist < dist)
+			{
+				if (is_deleted(m._hashes[hash_i]))
+					goto INSERT_AND_RETURN;
+
+				std::swap(hash, m._hashes[hash_i]);
+				std::swap(key, m._data[hash_i].pair.first);
+				std::swap(value, m._data[hash_i].pair.second);
+				dist = existing_elem_probe_dist;
+			}
+
+			hash_i = (hash_i + 1) & m._mask;
+			++dist;
+		}
+
+	INSERT_AND_RETURN:
+		m._data[hash_i].pair.first = key;
+		m._data[hash_i].pair.second = value;
+		m._hashes[hash_i] = hash;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	void rehash(HashMap<TKey, TValue, Hash>& m, u32 new_capacity)
+	{
+		typedef typename HashMap<TKey, TValue, Hash>::Entry Entry;
+
+		HashMap<TKey, TValue, Hash> nm(*m._allocator);
+		nm._hashes = (u32*)nm._allocator->allocate(new_capacity*sizeof(u32), alignof(u32));
+		nm._data = (Entry*)nm._allocator->allocate(new_capacity*sizeof(Entry), alignof(Entry));
+
+		// Flag all elements as free
+		for (u32 i = 0; i < new_capacity; ++i)
+			nm._hashes[i] = FREE;
+
+		nm._capacity = new_capacity;
+		nm._size = m._size;
+		nm._mask = new_capacity - 1;
+
+		for (u32 i = 0; i < m._capacity; ++i)
+		{
+			typename HashMap<TKey, TValue, Hash>::Entry& e = m._data[i];
+			const u32 hash = m._hashes[i];
+
+			if (hash != FREE && !is_deleted(hash))
+				hash_map_internal::insert(nm, hash, e.pair.first, e.pair.second);
+		}
+
+		HashMap<TKey, TValue, Hash> empty(*m._allocator);
+		m.~HashMap<TKey, TValue, Hash>();
+		memcpy(&m, &nm, sizeof(HashMap<TKey, TValue, Hash>));
+		memcpy(&nm, &empty, sizeof(HashMap<TKey, TValue, Hash>));
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	void grow(HashMap<TKey, TValue, Hash>& m)
+	{
+		const u32 new_capacity = (m._capacity == 0 ? 16 : m._capacity * 2);
+		rehash(m, new_capacity);
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	bool full(const HashMap<TKey, TValue, Hash>& m)
+	{
+		return m._size >= m._capacity * 0.9f;
+	}
+} // namespace hash_map_internal
+
+namespace hash_map
+{
+	template <typename TKey, typename TValue, typename Hash>
+	u32 size(const HashMap<TKey, TValue, Hash>& m)
+	{
+		return m._size;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	bool has(const HashMap<TKey, TValue, Hash>& m, const TKey& key)
+	{
+		return hash_map_internal::find(m, key) != hash_map_internal::END_OF_LIST;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	const TValue& get(const HashMap<TKey, TValue, Hash>& m, const TKey& key, const TValue& deffault)
+	{
+		const u32 i = hash_map_internal::find(m, key);
+		return i == hash_map_internal::END_OF_LIST ? deffault : m._data[i].pair.second;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	void set(HashMap<TKey, TValue, Hash>& m, const TKey& key, const TValue& value)
+	{
+		if (m._size == 0)
+			hash_map_internal::grow(m);
+
+		// Find or make
+		const u32 i = hash_map_internal::find(m, key);
+		if (i == hash_map_internal::END_OF_LIST)
+		{
+			hash_map_internal::insert(m, hash_map_internal::hash_key<TKey, Hash>(key), const_cast<TKey&>(key), const_cast<TValue&>(value));
+			++m._size;
+		}
+		else
+		{
+			m._data[i].pair.second = value;
+		}
+		if (hash_map_internal::full(m))
+			hash_map_internal::grow(m);
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	void remove(HashMap<TKey, TValue, Hash>& m, const TKey& key)
+	{
+		const u32 i = hash_map_internal::find(m, key);
+		if (i == hash_map_internal::END_OF_LIST)
+			return;
+
+		m._data[i].~Entry();
+		m._hashes[i] |= hash_map_internal::DELETED;
+		--m._size;
+	}
+
+	template <typename TKey, typename TValue, typename Hash>
+	void clear(HashMap<TKey, TValue, Hash>& m)
+	{
+		m._size = 0;
+
+		// Flag all elements as free
+		for (u32 i = 0; i < m._capacity; ++i)
+			m._hashes[i] = hash_map_internal::FREE;
+
+		for (u32 i = 0; i < m._size; ++i)
+			m._data[i].~Entry();
+	}
+} // namespace hash_map
+
+template <typename TKey, typename TValue, typename Hash>
+HashMap<TKey, TValue, Hash>::HashMap(Allocator& a)
+	: _allocator(&a)
+	, _capacity(0)
+	, _size(0)
+	, _mask(0)
+	, _hashes(NULL)
+	, _data(NULL)
+{
+}
+
+template <typename TKey, typename TValue, typename Hash>
+HashMap<TKey, TValue, Hash>::~HashMap()
+{
+	_allocator->deallocate(_hashes);
+
+	for (u32 i = 0; i < _size; ++i)
+		_data[i].~Entry();
+	_allocator->deallocate(_data);
+}
+
+template <typename TKey, typename TValue, typename Hash>
+const TValue& HashMap<TKey, TValue, Hash>::operator[](const TKey& key) const
+{
+	return hash_map::get(*this, key, TValue());
+}
+
+} // namespace crown

+ 28 - 0
src/core/unit_tests.cpp

@@ -12,6 +12,7 @@
 #include "color4.h"
 #include "command_line.h"
 #include "dynamic_string.h"
+#include "hash_map.h"
 #include "json.h"
 #include "macros.h"
 #include "math_utils.h"
@@ -78,6 +79,32 @@ static void test_vector()
 	memory_globals::shutdown();
 }
 
+static void test_hash_map()
+{
+	memory_globals::init();
+	Allocator& a = default_allocator();
+	{
+		HashMap<s32, s32> m(a);
+
+		ENSURE(hash_map::get(m, 0, 42) == 42);
+		ENSURE(!hash_map::has(m, 10));
+
+		for (s32 i = 0; i < 100; ++i)
+			hash_map::set(m, i, i*i);
+		for (s32 i = 0; i < 100; ++i)
+			ENSURE(hash_map::get(m, i, 0) == i*i);
+
+		hash_map::remove(m, 20);
+		ENSURE(!hash_map::has(m, 20));
+
+		hash_map::clear(m);
+
+		for (s32 i = 0; i < 100; ++i)
+			ENSURE(!hash_map::has(m, i));
+	}
+	memory_globals::shutdown();
+}
+
 static void test_vector2()
 {
 	{
@@ -1113,6 +1140,7 @@ static void run_unit_tests()
 	test_memory();
 	test_array();
 	test_vector();
+	test_hash_map();
 	test_vector2();
 	test_vector3();
 	test_vector4();