Sfoglia il codice sorgente

Removed std::unordered_map/set (#1337)

* Replaced with a custom hashmap/set implementation that does a lot less allocations and thus performs better
* Added Hash class to get 64 bit hashes also on 32 bit platforms (the new hash map depends on 64 bit hashes)
* Added BVec16, a 16 byte SIMD vector
Jorrit Rouwe 9 mesi fa
parent
commit
f1420822d3

+ 14 - 10
Jolt/Core/Array.h

@@ -562,6 +562,19 @@ public:
 		return false;
 	}
 
+	/// Get hash for this array
+	uint64					GetHash() const
+	{
+		// Hash length first
+		uint64 ret = Hash<uint32> { } (uint32(size()));
+
+		// Then hash elements
+		for (const T *element = mElements, *element_end = mElements + mSize; element < element_end; ++element)
+			HashCombine(ret, *element);
+
+		return ret;
+	}
+
 private:
 	size_type				mSize = 0;
 	size_type				mCapacity = 0;
@@ -581,16 +594,7 @@ namespace std
 	{
 		size_t operator () (const JPH::Array<T, Allocator> &inRHS) const
 		{
-			std::size_t ret = 0;
-
-			// Hash length first
-			JPH::HashCombine(ret, inRHS.size());
-
-			// Then hash elements
-			for (const T &t : inRHS)
-				JPH::HashCombine(ret, t);
-
-			return ret;
+			return std::size_t(inRHS.GetHash());
 		}
 	};
 }

+ 127 - 12
Jolt/Core/HashCombine.h

@@ -23,6 +23,19 @@ inline uint64 HashBytes(const void *inData, uint inSize, uint64 inSeed = 0xcbf29
 	return hash;
 }
 
+/// Calculate the FNV-1a hash of inString.
+/// @see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+constexpr uint64 HashString(const char *inString, uint64 inSeed = 0xcbf29ce484222325UL)
+{
+	uint64 hash = inSeed;
+	for (const char *c = inString; *c != 0; ++c)
+	{
+		hash ^= uint64(*c);
+		hash = hash * 0x100000001b3UL;
+	}
+	return hash;
+}
+
 /// A 64 bit hash function by Thomas Wang, Jan 1997
 /// See: http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
 /// @param inValue Value to hash
@@ -40,13 +53,101 @@ inline uint64 Hash64(uint64 inValue)
 	return hash;
 }
 
+/// Fallback hash function that calls T::GetHash()
+template <class T>
+struct Hash
+{
+	uint64		operator () (const T &inValue) const
+	{
+		return inValue.GetHash();
+	}
+};
+
+/// A hash function for floats
+template <>
+struct Hash<float>
+{
+	uint64		operator () (float inValue) const
+	{
+		float value = inValue == 0.0f? 0.0f : inValue; // Convert -0.0f to 0.0f
+		return HashBytes(&value, sizeof(value));
+	}
+};
+
+/// A hash function for doubles
+template <>
+struct Hash<double>
+{
+	uint64		operator () (double inValue) const
+	{
+		double value = inValue == 0.0? 0.0 : inValue; // Convert -0.0 to 0.0
+		return HashBytes(&value, sizeof(value));
+	}
+};
+
+/// A hash function for character pointers
+template <>
+struct Hash<const char *>
+{
+	uint64		operator () (const char *inValue) const
+	{
+		return HashString(inValue);
+	}
+};
+
+/// A hash function for std::string_view
+template <>
+struct Hash<std::string_view>
+{
+	uint64		operator () (const std::string_view &inValue) const
+	{
+		return HashBytes(inValue.data(), uint(inValue.size()));
+	}
+};
+
+/// A hash function for String
+template <>
+struct Hash<String>
+{
+	uint64		operator () (const String &inValue) const
+	{
+		return HashBytes(inValue.data(), uint(inValue.size()));
+	}
+};
+
+/// A fallback function for generic pointers
+template <class T>
+struct Hash<T *>
+{
+	uint64		operator () (T *inValue) const
+	{
+		return HashBytes(&inValue, sizeof(inValue));
+	}
+};
+
+/// Helper macro to define a hash function for trivial types
+#define JPH_DEFINE_TRIVIAL_HASH(type)						\
+template <>													\
+struct Hash<type>											\
+{															\
+	uint64		operator () (const type &inValue) const		\
+	{														\
+		return HashBytes(&inValue, sizeof(inValue));		\
+	}														\
+};
+
+/// Commonly used types
+JPH_DEFINE_TRIVIAL_HASH(char)
+JPH_DEFINE_TRIVIAL_HASH(int)
+JPH_DEFINE_TRIVIAL_HASH(uint32)
+JPH_DEFINE_TRIVIAL_HASH(uint64)
+
 /// @brief Helper function that hashes a single value into ioSeed
 /// Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
 template <typename T>
-inline void HashCombineHelper(size_t &ioSeed, const T &inValue)
+inline void HashCombine(uint64 &ioSeed, const T &inValue)
 {
-	std::hash<T> hasher;
-	ioSeed ^= hasher(inValue) + 0x9e3779b9 + (ioSeed << 6) + (ioSeed >> 2);
+	ioSeed ^= Hash<T> { } (inValue) + 0x9e3779b9 + (ioSeed << 6) + (ioSeed >> 2);
 }
 
 /// Hash combiner to use a custom struct in an unordered map or set
@@ -61,11 +162,16 @@ inline void HashCombineHelper(size_t &ioSeed, const T &inValue)
 ///		};
 ///
 ///		JPH_MAKE_HASHABLE(SomeHashKey, t.key1, t.key2, t.key3)
-template <typename... Values>
-inline void HashCombine(std::size_t &ioSeed, Values... inValues)
+template <typename FirstValue, typename... Values>
+inline uint64 HashCombineArgs(const FirstValue &inFirstValue, Values... inValues)
 {
-	// Hash all values together using a fold expression
-	(HashCombineHelper(ioSeed, inValues), ...);
+	// Prime the seed by hashing the first value
+	uint64 seed = Hash<FirstValue> { } (inFirstValue);
+
+	// Hash all remaining values together using a fold expression
+	(HashCombine(seed, inValues), ...);
+
+	return seed;
 }
 
 JPH_NAMESPACE_END
@@ -76,21 +182,30 @@ JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")
 #define JPH_MAKE_HASH_STRUCT(type, name, ...)				\
 	struct [[nodiscard]] name								\
 	{														\
-		std::size_t operator()(const type &t) const			\
+		::JPH::uint64 operator()(const type &t) const		\
 		{													\
-			std::size_t ret = 0;							\
-			::JPH::HashCombine(ret, __VA_ARGS__);			\
-			return ret;										\
+			return ::JPH::HashCombineArgs(__VA_ARGS__);		\
 		}													\
 	};
 
 #define JPH_MAKE_HASHABLE(type, ...)						\
 	JPH_SUPPRESS_WARNING_PUSH								\
 	JPH_SUPPRESS_WARNINGS									\
+	namespace JPH											\
+	{														\
+		template<>											\
+		JPH_MAKE_HASH_STRUCT(type, Hash<type>, __VA_ARGS__) \
+	}														\
 	namespace std											\
 	{														\
 		template<>											\
-		JPH_MAKE_HASH_STRUCT(type, hash<type>, __VA_ARGS__)	\
+		struct [[nodiscard]] hash<type>						\
+		{													\
+			std::size_t operator()(const type &t) const		\
+			{												\
+				return std::size_t(::JPH::Hash<type>{ }(t));\
+			}												\
+		};													\
 	}														\
 	JPH_SUPPRESS_WARNING_POP
 

+ 662 - 0
Jolt/Core/HashTable.h

@@ -0,0 +1,662 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Math/BVec16.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper class for implementing an UnorderedSet or UnorderedMap
+/// Based on CppCon 2017: Matt Kulukundis "Designing a Fast, Efficient, Cache-friendly Hash Table, Step by Step"
+/// See: https://www.youtube.com/watch?v=ncHmEUmJZf4
+template <class Key, class KeyValue, class HashTableDetail, class Hash, class KeyEqual>
+class HashTable
+{
+public:
+	/// Properties
+	using value_type = KeyValue;
+	using size_type = uint32;
+	using difference_type = ptrdiff_t;
+
+private:
+	/// Base class for iterators
+	template <class Table, class Iterator>
+	class IteratorBase
+	{
+	public:
+        /// Properties
+		using difference_type = typename Table::difference_type;
+        using value_type = typename Table::value_type;
+        using iterator_category = std::forward_iterator_tag;
+
+		/// Copy constructor
+							IteratorBase(const IteratorBase &inRHS) = default;
+
+		/// Assignment operator
+		IteratorBase &		operator = (const IteratorBase &inRHS) = default;
+
+		/// Iterator at start of table
+		explicit			IteratorBase(Table *inTable) :
+			mTable(inTable),
+			mIndex(0)
+		{
+			while (mIndex < mTable->mMaxSize && (mTable->mControl[mIndex] & cBucketUsed) == 0)
+				++mIndex;
+		}
+
+		/// Iterator at specific index
+							IteratorBase(Table *inTable, size_type inIndex) :
+			mTable(inTable),
+			mIndex(inIndex)
+		{
+		}
+
+		/// Prefix increment
+		Iterator &			operator ++ ()
+		{
+			JPH_ASSERT(IsValid());
+
+			do
+			{
+				++mIndex;
+			}
+			while (mIndex < mTable->mMaxSize && (mTable->mControl[mIndex] & cBucketUsed) == 0);
+
+			return static_cast<Iterator &>(*this);
+		}
+
+		/// Postfix increment
+		Iterator			operator ++ (int)
+		{
+			Iterator result(mTable, mIndex);
+			++(*this);
+			return result;
+		}
+
+		/// Access to key value pair
+		const KeyValue &	operator * () const
+		{
+			JPH_ASSERT(IsValid());
+			return mTable->mData[mIndex];
+		}
+
+		/// Access to key value pair
+		const KeyValue *	operator -> () const
+		{
+			JPH_ASSERT(IsValid());
+			return mTable->mData + mIndex;
+		}
+
+		/// Equality operator
+		bool				operator == (const Iterator &inRHS) const
+		{
+			return mIndex == inRHS.mIndex && mTable == inRHS.mTable;
+		}
+
+		/// Inequality operator
+		bool				operator != (const Iterator &inRHS) const
+		{
+			return !(*this == inRHS);
+		}
+
+		/// Check that the iterator is valid
+		bool				IsValid() const
+		{
+			return mIndex < mTable->mMaxSize
+				&& (mTable->mControl[mIndex] & cBucketUsed) != 0;
+		}
+
+		Table *				mTable;
+		size_type			mIndex;
+	};
+
+	/// Allocate space for the hash table
+	void					AllocateTable(size_type inMaxSize)
+	{
+		JPH_ASSERT(mData == nullptr);
+
+		mMaxSize = inMaxSize;
+		mMaxLoad = uint32((cMaxLoadFactorNumerator * inMaxSize) / cMaxLoadFactorDenominator);
+		size_type required_size = mMaxSize * (sizeof(KeyValue) + 1) + 15; // Add 15 bytes to mirror the first 15 bytes of the control values
+		if constexpr (cNeedsAlignedAllocate)
+			mData = reinterpret_cast<KeyValue *>(AlignedAllocate(required_size, alignof(KeyValue)));
+		else
+			mData = reinterpret_cast<KeyValue *>(Allocate(required_size));
+		mControl = reinterpret_cast<uint8 *>(mData + mMaxSize);
+	}
+
+	/// Copy the contents of another hash table
+	void					CopyTable(const HashTable &inRHS)
+	{
+		if (inRHS.empty())
+			return;
+
+		AllocateTable(inRHS.mMaxSize);
+
+		// Copy control bytes
+		memcpy(mControl, inRHS.mControl, mMaxSize + 15);
+
+		// Copy elements
+		uint index = 0;
+		for (const uint8 *control = mControl, *control_end = mControl + mMaxSize; control != control_end; ++control, ++index)
+			if (*control & cBucketUsed)
+				::new (mData + index) KeyValue(inRHS.mData[index]);
+		mSize = inRHS.mSize;
+	}
+
+	/// Grow the table to the next power of 2
+	void					GrowTable()
+	{
+		// Calculate new size
+		size_type new_max_size = max<size_type>(mMaxSize << 1, 16);
+		if (new_max_size < mMaxSize)
+		{
+			JPH_ASSERT(false, "Overflow in hash table size, can't grow!");
+			return;
+		}
+
+		// Move the old table to a temporary structure
+		size_type old_max_size = mMaxSize;
+		KeyValue *old_data = mData;
+		const uint8 *old_control = mControl;
+		mData = nullptr;
+		mControl = nullptr;
+		mSize = 0;
+		mMaxSize = 0;
+		mMaxLoad = 0;
+
+		// Allocate new table
+		AllocateTable(new_max_size);
+
+		// Reset all control bytes
+		memset(mControl, cBucketEmpty, mMaxSize + 15);
+
+		if (old_data != nullptr)
+		{
+			// Copy all elements from the old table
+			for (size_type i = 0; i < old_max_size; ++i)
+				if (old_control[i] & cBucketUsed)
+				{
+					size_type index;
+					KeyValue *element = old_data + i;
+					JPH_IF_ENABLE_ASSERTS(bool inserted =) InsertKey</* AllowDeleted= */ false>(HashTableDetail::sGetKey(*element), index);
+					JPH_ASSERT(inserted);
+					::new (mData + index) KeyValue(std::move(*element));
+					element->~KeyValue();
+				}
+
+			// Free memory
+			if constexpr (cNeedsAlignedAllocate)
+				AlignedFree(old_data);
+			else
+				Free(old_data);
+		}
+	}
+
+protected:
+	/// Get an element by index
+	KeyValue &				GetElement(size_type inIndex) const
+	{
+		return mData[inIndex];
+	}
+
+	/// Insert a key into the map, returns true if the element was inserted, false if it already existed.
+	/// outIndex is the index at which the element should be constructed / where it is located.
+	template <bool AllowDeleted = true>
+	bool					InsertKey(const Key &inKey, size_type &outIndex)
+	{
+		// Ensure we have enough space
+		if (mSize + 1 >= mMaxLoad)
+			GrowTable();
+
+		// Calculate hash
+		uint64 hash_value = Hash { } (inKey);
+
+		// Split hash into control byte and index
+		uint8 control = cBucketUsed | uint8(hash_value);
+		size_type bucket_mask = mMaxSize - 1;
+		size_type index = size_type(hash_value >> 7) & bucket_mask;
+
+		BVec16 control16 = BVec16::sReplicate(control);
+		BVec16 bucket_empty = BVec16::sZero();
+		BVec16 bucket_deleted = BVec16::sReplicate(cBucketDeleted);
+
+		// Keeps track of the index of the first deleted bucket we found
+		constexpr size_type cNoDeleted = ~size_type(0);
+		size_type first_deleted_index = cNoDeleted;
+
+		// Linear probing
+		KeyEqual equal;
+		for (;;)
+		{
+			// Read 16 control values (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes)
+			BVec16 control_bytes = BVec16::sLoadByte16(mControl + index);
+
+			// Check for the control value we're looking for
+			uint32 control_equal = uint32(BVec16::sEquals(control_bytes, control16).GetTrues());
+
+			// Check for empty buckets
+			uint32 control_empty = uint32(BVec16::sEquals(control_bytes, bucket_empty).GetTrues());
+
+			// Check if we're still scanning for deleted buckets
+			if constexpr (AllowDeleted)
+				if (first_deleted_index == cNoDeleted)
+				{
+					// Check if any buckets have been deleted, if so store the first one
+					uint32 control_deleted = uint32(BVec16::sEquals(control_bytes, bucket_deleted).GetTrues());
+					if (control_deleted != 0)
+						first_deleted_index = index + CountTrailingZeros(control_deleted);
+				}
+
+			// Index within the 16 buckets
+			size_type local_index = index;
+
+			// Loop while there's still buckets to process
+			while ((control_equal | control_empty) != 0)
+			{
+				// Get the index of the first bucket that is either equal or empty
+				uint first_equal = CountTrailingZeros(control_equal);
+				uint first_empty = CountTrailingZeros(control_empty);
+
+				// Check if we first found a bucket with equal control value before an empty bucket
+				if (first_equal < first_empty)
+				{
+					// Skip to the bucket
+					local_index += first_equal;
+
+					// Make sure that our index is not beyond the end of the table
+					local_index &= bucket_mask;
+
+					// We found a bucket with same control value
+					if (equal(HashTableDetail::sGetKey(mData[local_index]), inKey))
+					{
+						// Element already exists
+						outIndex = local_index;
+						return false;
+					}
+
+					// Skip past this bucket
+					local_index++;
+					uint shift = first_equal + 1;
+					control_equal >>= shift;
+					control_empty >>= shift;
+				}
+				else
+				{
+					// An empty bucket was found, we can insert a new item
+					JPH_ASSERT(control_empty != 0);
+
+					// Get the location of the first empty or deleted bucket
+					local_index += first_empty;
+					if constexpr (AllowDeleted)
+						if (first_deleted_index < local_index)
+							local_index = first_deleted_index;
+
+					// Make sure that our index is not beyond the end of the table
+					local_index &= bucket_mask;
+
+					// Update control byte
+					mControl[local_index] = control;
+					if (local_index < 15)
+						mControl[mMaxSize + local_index] = control; // Mirror the first 15 bytes at the end of the control values
+					++mSize;
+
+					// Return index to newly allocated bucket
+					outIndex = local_index;
+					return true;
+				}
+			}
+
+			// Move to next batch of 16 buckets
+			index = (index + 16) & bucket_mask;
+		}
+	}
+
+public:
+	/// Non-const iterator
+	class iterator : public IteratorBase<HashTable, iterator>
+	{
+		using Base = IteratorBase<HashTable, iterator>;
+
+	public:
+        /// Properties
+        using reference = typename Base::value_type &;
+        using pointer = typename Base::value_type *;
+
+		/// Constructors
+		explicit			iterator(HashTable *inTable) : Base(inTable) { }
+							iterator(HashTable *inTable, size_type inIndex) : Base(inTable, inIndex) { }
+							iterator(const iterator &inIterator) : Base(inIterator) { }
+
+		/// Assignment
+		iterator &			operator = (const iterator &inRHS) { Base::operator = (inRHS); return *this; }
+
+		using Base::operator *;
+
+		/// Non-const access to key value pair
+		KeyValue &			operator * ()
+		{
+			JPH_ASSERT(this->IsValid());
+			return this->mTable->mData[this->mIndex];
+		}
+
+		using Base::operator ->;
+
+		/// Non-const access to key value pair
+		KeyValue *			operator -> ()
+		{
+			JPH_ASSERT(this->IsValid());
+			return this->mTable->mData + this->mIndex;
+		}
+	};
+
+	/// Const iterator
+	class const_iterator : public IteratorBase<const HashTable, const_iterator>
+	{
+		using Base = IteratorBase<const HashTable, const_iterator>;
+
+	public:
+        /// Properties
+        using reference = const typename Base::value_type &;
+        using pointer = const typename Base::value_type *;
+
+		/// Constructors
+		explicit			const_iterator(const HashTable *inTable) : Base(inTable) { }
+							const_iterator(const HashTable *inTable, size_type inIndex) : Base(inTable, inIndex) { }
+							const_iterator(const const_iterator &inRHS) : Base(inRHS) { }
+							const_iterator(const iterator &inIterator) : Base(inIterator.mTable, inIterator.mIndex) { }
+
+		/// Assignment
+		const_iterator &	operator = (const iterator &inRHS) { this->mTable = inRHS.mTable; this->mIndex = inRHS.mIndex; return *this; }
+		const_iterator &	operator = (const const_iterator &inRHS) { Base::operator = (inRHS); return *this; }
+	};
+
+	/// Default constructor
+							HashTable() = default;
+
+	/// Copy constructor
+							HashTable(const HashTable &inRHS)
+	{
+		CopyTable(inRHS);
+	}
+
+	/// Move constructor
+							HashTable(HashTable &&ioRHS) noexcept :
+		mData(ioRHS.mData),
+		mControl(ioRHS.mControl),
+		mSize(ioRHS.mSize),
+		mMaxSize(ioRHS.mMaxSize),
+		mMaxLoad(ioRHS.mMaxLoad)
+	{
+		ioRHS.mData = nullptr;
+		ioRHS.mControl = nullptr;
+		ioRHS.mSize = 0;
+		ioRHS.mMaxSize = 0;
+		ioRHS.mMaxLoad = 0;
+	}
+
+	/// Assignment operator
+	HashTable &				operator = (const HashTable &inRHS)
+	{
+		if (this != &inRHS)
+		{
+			clear();
+
+			CopyTable(inRHS);
+		}
+
+		return *this;
+	}
+
+	/// Destructor
+							~HashTable()
+	{
+		clear();
+	}
+
+	/// Reserve memory for a certain number of elements
+	void					reserve(size_type inMaxSize)
+	{
+		// Calculate max size based on load factor
+		size_type max_size = GetNextPowerOf2(max<uint32>((cMaxLoadFactorDenominator * inMaxSize) / cMaxLoadFactorNumerator, 16));
+		if (max_size <= mMaxSize)
+			return;
+
+		// Allocate buffers
+		AllocateTable(max_size);
+
+		// Reset all control bytes
+		memset(mControl, cBucketEmpty, mMaxSize + 15);
+	}
+
+	/// Destroy the entire hash table
+	void					clear()
+	{
+		// Delete all elements
+		if constexpr (!is_trivially_destructible<KeyValue>())
+			if (!empty())
+				for (size_type i = 0; i < mMaxSize; ++i)
+					if (mControl[i] & cBucketUsed)
+						mData[i].~KeyValue();
+
+		if (mData != nullptr)
+		{
+			// Free memory
+			if constexpr (cNeedsAlignedAllocate)
+				AlignedFree(mData);
+			else
+				Free(mData);
+
+			// Reset members
+			mData = nullptr;
+			mControl = nullptr;
+			mSize = 0;
+			mMaxSize = 0;
+			mMaxLoad = 0;
+		}
+	}
+
+	/// Iterator to first element
+	iterator				begin()
+	{
+		return iterator(this);
+	}
+
+	/// Iterator to one beyond last element
+	iterator				end()
+	{
+		return iterator(this, mMaxSize);
+	}
+
+	/// Iterator to first element
+	const_iterator			begin() const
+	{
+		return const_iterator(this);
+	}
+
+	/// Iterator to one beyond last element
+	const_iterator			end() const
+	{
+		return const_iterator(this, mMaxSize);
+	}
+
+	/// Iterator to first element
+	const_iterator			cbegin() const
+	{
+		return const_iterator(this);
+	}
+
+	/// Iterator to one beyond last element
+	const_iterator			cend() const
+	{
+		return const_iterator(this, mMaxSize);
+	}
+
+	/// Check if there are no elements in the table
+	bool					empty() const
+	{
+		return mSize == 0;
+	}
+
+	/// Number of elements in the table
+	size_type				size() const
+	{
+		return mSize;
+	}
+
+	/// Insert a new element, returns iterator and if the element was inserted
+	std::pair<iterator, bool> insert(const value_type &inValue)
+	{
+		size_type index;
+		bool inserted = InsertKey(HashTableDetail::sGetKey(inValue), index);
+		if (inserted)
+			::new (mData + index) KeyValue(inValue);
+		return std::make_pair(iterator(this, index), inserted);
+	}
+
+	/// Find an element, returns iterator to element or end() if not found
+	const_iterator			find(const Key &inKey) const
+	{
+		// Check if we have any data
+		if (empty())
+			return cend();
+
+		// Calculate hash
+		uint64 hash_value = Hash { } (inKey);
+
+		// Split hash into control byte and index
+		uint8 control = cBucketUsed | uint8(hash_value);
+		size_type bucket_mask = mMaxSize - 1;
+		size_type index = size_type(hash_value >> 7) & bucket_mask;
+
+		BVec16 control16 = BVec16::sReplicate(control);
+		BVec16 bucket_empty = BVec16::sZero();
+
+		// Linear probing
+		KeyEqual equal;
+		for (;;)
+		{
+			// Read 16 control values (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes)
+			BVec16 control_bytes = BVec16::sLoadByte16(mControl + index);
+
+			// Check for the control value we're looking for
+			uint32 control_equal = uint32(BVec16::sEquals(control_bytes, control16).GetTrues());
+
+			// Check for empty buckets
+			uint32 control_empty = uint32(BVec16::sEquals(control_bytes, bucket_empty).GetTrues());
+
+			// Index within the 16 buckets
+			size_type local_index = index;
+
+			// Loop while there's still buckets to process
+			while ((control_equal | control_empty) != 0)
+			{
+				// Get the index of the first bucket that is either equal or empty
+				uint first_equal = CountTrailingZeros(control_equal);
+				uint first_empty = CountTrailingZeros(control_empty);
+
+				// Check if we first found a bucket with equal control value before an empty bucket
+				if (first_equal < first_empty)
+				{
+					// Skip to the bucket
+					local_index += first_equal;
+
+					// Make sure that our index is not beyond the end of the table
+					local_index &= bucket_mask;
+
+					// We found a bucket with same control value
+					if (equal(HashTableDetail::sGetKey(mData[local_index]), inKey))
+					{
+						// Element found
+						return const_iterator(this, local_index);
+					}
+
+					// Skip past this bucket
+					local_index++;
+					uint shift = first_equal + 1;
+					control_equal >>= shift;
+					control_empty >>= shift;
+				}
+				else
+				{
+					// An empty bucket was found, we didn't find the element
+					JPH_ASSERT(control_empty != 0);
+					return cend();
+				}
+			}
+
+			// Move to next batch of 16 buckets
+			index = (index + 16) & bucket_mask;
+		}
+	}
+
+	/// @brief Erase an element by iterator
+	void					erase(const const_iterator &inIterator)
+	{
+		JPH_ASSERT(inIterator.IsValid());
+
+		// Mark the bucket as deleted
+		mControl[inIterator.mIndex] = cBucketDeleted;
+		if (inIterator.mIndex < 15)
+			mControl[inIterator.mIndex + mMaxSize] = cBucketDeleted;
+
+		// Destruct the element
+		mData[inIterator.mIndex].~KeyValue();
+
+		// Decrease size
+		--mSize;
+	}
+
+	/// @brief Erase an element by key
+	size_type				erase(const Key &inKey)
+	{
+		const_iterator it = find(inKey);
+		if (it == cend())
+			return 0;
+
+		erase(it);
+		return 1;
+	}
+
+	/// Swap the contents of two hash tables
+	void					swap(HashTable &ioRHS) noexcept
+	{
+		std::swap(mData, ioRHS.mData);
+		std::swap(mControl, ioRHS.mControl);
+		std::swap(mSize, ioRHS.mSize);
+		std::swap(mMaxSize, ioRHS.mMaxSize);
+		std::swap(mMaxLoad, ioRHS.mMaxLoad);
+	}
+
+private:
+	/// If this allocator needs to fall back to aligned allocations because the type requires it
+	static constexpr bool	cNeedsAlignedAllocate = alignof(KeyValue) > (JPH_CPU_ADDRESS_BITS == 32? 8 : 16);
+
+	/// Max load factor is cMaxLoadFactorNumerator / cMaxLoadFactorDenominator
+	static constexpr uint64	cMaxLoadFactorNumerator = 7;
+	static constexpr uint64	cMaxLoadFactorDenominator = 8;
+
+	/// Values that the control bytes can have
+	static constexpr uint8	cBucketEmpty = 0;
+	static constexpr uint8	cBucketDeleted = 0x7f;
+	static constexpr uint8	cBucketUsed = 0x80;	// Lowest 7 bits are lowest 7 bits of the hash value
+
+	/// The buckets, an array of size mMaxSize
+	KeyValue *				mData = nullptr;
+
+	/// Control bytes, an array of size mMaxSize + 15
+	uint8 *					mControl = nullptr;
+
+	/// Number of elements in the table
+	size_type				mSize = 0;
+
+	/// Max number of elements that can be stored in the table
+	size_type				mMaxSize = 0;
+
+	/// Max number of elements in the table before it should grow
+	size_type				mMaxLoad = 0;
+};
+
+JPH_NAMESPACE_END

+ 1 - 1
Jolt/Core/MutexArray.h

@@ -45,7 +45,7 @@ public:
 	/// Convert an object index to a mutex index
 	inline uint32			GetMutexIndex(uint32 inObjectIndex) const
 	{
-		std::hash<uint32> hasher;
+		Hash<uint32> hasher;
 		return hasher(inObjectIndex) & (mNumMutexes - 1);
 	}
 

+ 14 - 2
Jolt/Core/Reference.h

@@ -134,6 +134,12 @@ public:
 	/// Get pointer
 	inline T *				GetPtr() const									{ return mPtr; }
 
+	/// Get hash for this object
+	uint64					GetHash() const
+	{
+		return Hash<T *> { } (mPtr);
+	}
+
 	/// INTERNAL HELPER FUNCTION USED BY SERIALIZATION
 	void **					InternalGetPointer()							{ return reinterpret_cast<void **>(&mPtr); }
 
@@ -190,6 +196,12 @@ public:
 	/// Get pointer
 	inline const T *		GetPtr() const									{ return mPtr; }
 
+	/// Get hash for this object
+	uint64					GetHash() const
+	{
+		return Hash<const T *> { } (mPtr);
+	}
+
 	/// INTERNAL HELPER FUNCTION USED BY SERIALIZATION
 	void **					InternalGetPointer()							{ return const_cast<void **>(reinterpret_cast<const void **>(&mPtr)); }
 
@@ -214,7 +226,7 @@ namespace std
 	{
 		size_t operator () (const JPH::Ref<T> &inRHS) const
 		{
-			return hash<T *> { }(inRHS.GetPtr());
+			return size_t(inRHS.GetHash());
 		}
 	};
 
@@ -224,7 +236,7 @@ namespace std
 	{
 		size_t operator () (const JPH::RefConst<T> &inRHS) const
 		{
-			return hash<const T *> { }(inRHS.GetPtr());
+			return size_t(inRHS.GetHash());
 		}
 	};
 }

+ 14 - 10
Jolt/Core/StaticArray.h

@@ -282,6 +282,19 @@ public:
 		return false;
 	}
 
+	/// Get hash for this array
+	uint64					GetHash() const
+	{
+		// Hash length first
+		uint64 ret = Hash<uint32> { } (uint32(size()));
+
+		// Then hash elements
+		for (const T *element = reinterpret_cast<const T *>(mElements), *element_end = reinterpret_cast<const T *>(mElements) + mSize; element < element_end; ++element)
+			HashCombine(ret, *element);
+
+		return ret;
+	}
+
 protected:
 	struct alignas(T) Storage
 	{
@@ -308,16 +321,7 @@ namespace std
 	{
 		size_t operator () (const JPH::StaticArray<T, N> &inRHS) const
 		{
-			std::size_t ret = 0;
-
-			// Hash length first
-			JPH::HashCombine(ret, inRHS.size());
-
-			// Then hash elements
-			for (const T &t : inRHS)
-				JPH::HashCombine(ret, t);
-
-			return ret;
+			return std::size_t(inRHS.GetHash());
 		}
 	};
 }

+ 0 - 13
Jolt/Core/StringTools.h

@@ -20,19 +20,6 @@ String ConvertToString(const T &inValue)
 	return oss.str();
 }
 
-/// Calculate the FNV-1a hash of inString.
-/// @see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-constexpr uint64 HashString(const char *inString)
-{
-	uint64 hash = 14695981039346656037UL;
-	for (const char *c = inString; *c != 0; ++c)
-	{
-		hash ^= *c;
-		hash = hash * 1099511628211UL;
-	}
-	return hash;
-}
-
 /// Replace substring with other string
 JPH_EXPORT void StringReplace(String &ioString, const string_view &inSearch, const string_view &inReplace);
 

+ 70 - 5
Jolt/Core/UnorderedMap.h

@@ -1,15 +1,80 @@
 // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
 // SPDX-License-Identifier: MIT
 
 #pragma once
 
-JPH_SUPPRESS_WARNINGS_STD_BEGIN
-#include <unordered_map>
-JPH_SUPPRESS_WARNINGS_STD_END
+#include <Jolt/Core/HashTable.h>
 
 JPH_NAMESPACE_BEGIN
 
-template <class Key, class T, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>> using UnorderedMap = std::unordered_map<Key, T, Hash, KeyEqual, STLAllocator<pair<const Key, T>>>;
+/// Internal helper class to provide context for UnorderedMap
+template <class Key, class Value>
+class UnorderedMapDetail
+{
+public:
+	/// Get key from key value pair
+	static const Key &			sGetKey(const std::pair<Key, Value> &inKeyValue)
+	{
+		return inKeyValue.first;
+	}
+};
+
+/// Hash Map class
+/// @tparam Key Key type
+/// @tparam Value Value type
+/// @tparam Hash Hash function (note should be 64-bits)
+/// @tparam KeyEqual Equality comparison function
+template <class Key, class Value, class Hash = JPH::Hash<Key>, class KeyEqual = std::equal_to<Key>>
+class UnorderedMap : public HashTable<Key, std::pair<Key, Value>, UnorderedMapDetail<Key, Value>, Hash, KeyEqual>
+{
+	using Base = HashTable<Key, std::pair<Key, Value>, UnorderedMapDetail<Key, Value>, Hash, KeyEqual>;
+
+public:
+	using size_type = typename Base::size_type;
+	using iterator = typename Base::iterator;
+	using const_iterator = typename Base::const_iterator;
+	using value_type = typename Base::value_type;
+
+	Value &						operator [] (const Key &inKey)
+	{
+		size_type index;
+		bool inserted = this->InsertKey(inKey, index);
+		value_type &key_value = this->GetElement(index);
+		if (inserted)
+			::new (&key_value) value_type(inKey, Value());
+		return key_value.second;
+	}
+
+	template<class... Args>
+	std::pair<iterator, bool>	try_emplace(const Key &inKey, Args &&...inArgs)
+	{
+		size_type index;
+		bool inserted = this->InsertKey(inKey, index);
+		if (inserted)
+			::new (&this->GetElement(index)) value_type(std::piecewise_construct, std::forward_as_tuple(inKey), std::forward_as_tuple(std::forward<Args>(inArgs)...));
+		return std::make_pair(iterator(this, index), inserted);
+	}
+
+	template<class... Args>
+	std::pair<iterator, bool>	try_emplace(Key &&inKey, Args &&...inArgs)
+	{
+		size_type index;
+		bool inserted = this->InsertKey(inKey, index);
+		if (inserted)
+			::new (&this->GetElement(index)) value_type(std::piecewise_construct, std::forward_as_tuple(std::move(inKey)), std::forward_as_tuple(std::forward<Args>(inArgs)...));
+		return std::make_pair(iterator(this, index), inserted);
+	}
+
+	/// Const version of find
+	using Base::find;
+
+	/// Non-const version of find
+	iterator					find(const Key &inKey)
+	{
+		const_iterator it = Base::find(inKey);
+		return iterator(this, it.mIndex);
+	}
+};
 
 JPH_NAMESPACE_END

+ 22 - 5
Jolt/Core/UnorderedSet.h

@@ -1,15 +1,32 @@
 // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
 // SPDX-License-Identifier: MIT
 
 #pragma once
 
-JPH_SUPPRESS_WARNINGS_STD_BEGIN
-#include <unordered_set>
-JPH_SUPPRESS_WARNINGS_STD_END
+#include <Jolt/Core/HashTable.h>
 
 JPH_NAMESPACE_BEGIN
 
-template <class Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>> using UnorderedSet = std::unordered_set<Key, Hash, KeyEqual, STLAllocator<Key>>;
+/// Internal helper class to provide context for UnorderedSet
+template <class Key>
+class UnorderedSetDetail
+{
+public:
+	/// The key is the key, just return it
+	static const Key &		sGetKey(const Key &inKey)
+	{
+		return inKey;
+	}
+};
+
+/// Hash Set class
+/// @tparam Key Key type
+/// @tparam Hash Hash function (note should be 64-bits)
+/// @tparam KeyEqual Equality comparison function
+template <class Key, class Hash = JPH::Hash<Key>, class KeyEqual = std::equal_to<Key>>
+class UnorderedSet : public HashTable<Key, Key, UnorderedSetDetail<Key>, Hash, KeyEqual>
+{
+};
 
 JPH_NAMESPACE_END

+ 1 - 0
Jolt/Geometry/ConvexHullBuilder.cpp

@@ -247,6 +247,7 @@ float ConvexHullBuilder::DetermineCoplanarDistance() const
 int ConvexHullBuilder::GetNumVerticesUsed() const
 {
 	UnorderedSet<int> used_verts;
+	used_verts.reserve(UnorderedSet<int>::size_type(mPositions.size()));
 	for (Face *f : mFaces)
 	{
 		Edge *e = f->mFirstEdge;

+ 1 - 1
Jolt/Geometry/IndexedTriangle.h

@@ -111,6 +111,6 @@ using IndexedTriangleList = Array<IndexedTriangle>;
 
 JPH_NAMESPACE_END
 
-// Create a std::hash for IndexedTriangleNoMaterial and IndexedTriangle
+// Create a std::hash/JPH::Hash for IndexedTriangleNoMaterial and IndexedTriangle
 JPH_MAKE_HASHABLE(JPH::IndexedTriangleNoMaterial, t.mIdx[0], t.mIdx[1], t.mIdx[2])
 JPH_MAKE_HASHABLE(JPH::IndexedTriangle, t.mIdx[0], t.mIdx[1], t.mIdx[2], t.mMaterialIndex, t.mUserData)

+ 3 - 0
Jolt/Jolt.cmake

@@ -29,6 +29,7 @@ set(JOLT_PHYSICS_SRC_FILES
 	${JOLT_PHYSICS_ROOT}/Core/FPException.h
 	${JOLT_PHYSICS_ROOT}/Core/FPFlushDenormals.h
 	${JOLT_PHYSICS_ROOT}/Core/HashCombine.h
+	${JOLT_PHYSICS_ROOT}/Core/HashTable.h
 	${JOLT_PHYSICS_ROOT}/Core/InsertionSort.h
 	${JOLT_PHYSICS_ROOT}/Core/IssueReporting.cpp
 	${JOLT_PHYSICS_ROOT}/Core/IssueReporting.h
@@ -105,6 +106,8 @@ set(JOLT_PHYSICS_SRC_FILES
 	${JOLT_PHYSICS_ROOT}/Geometry/Triangle.h
 	${JOLT_PHYSICS_ROOT}/Jolt.cmake
 	${JOLT_PHYSICS_ROOT}/Jolt.h
+	${JOLT_PHYSICS_ROOT}/Math/BVec16.h
+	${JOLT_PHYSICS_ROOT}/Math/BVec16.inl
 	${JOLT_PHYSICS_ROOT}/Math/DMat44.h
 	${JOLT_PHYSICS_ROOT}/Math/DMat44.inl
 	${JOLT_PHYSICS_ROOT}/Math/Double3.h

+ 99 - 0
Jolt/Math/BVec16.h

@@ -0,0 +1,99 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// A vector consisting of 16 bytes
+class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) BVec16
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	// Underlying vector type
+#if defined(JPH_USE_SSE)
+	using Type = __m128i;
+#elif defined(JPH_USE_NEON)
+	using Type = uint8x16_t;
+#else
+	using Type = struct { uint64 mData[2]; };
+#endif
+
+	/// Constructor
+								BVec16() = default; ///< Intentionally not initialized for performance reasons
+								BVec16(const BVec16 &inRHS) = default;
+	BVec16 &					operator = (const BVec16 &inRHS) = default;
+	JPH_INLINE					BVec16(Type inRHS) : mValue(inRHS)					{ }
+
+	/// Create a vector from 16 bytes
+	JPH_INLINE					BVec16(uint8 inB0, uint8 inB1, uint8 inB2, uint8 inB3, uint8 inB4, uint8 inB5, uint8 inB6, uint8 inB7, uint8 inB8, uint8 inB9, uint8 inB10, uint8 inB11, uint8 inB12, uint8 inB13, uint8 inB14, uint8 inB15);
+
+	/// Create a vector from two uint64's
+	JPH_INLINE					BVec16(uint64 inV0, uint64 inV1);
+
+	/// Comparison
+	JPH_INLINE bool				operator == (BVec16Arg inV2) const;
+	JPH_INLINE bool				operator != (BVec16Arg inV2) const					{ return !(*this == inV2); }
+
+	/// Vector with all zeros
+	static JPH_INLINE BVec16	sZero();
+
+	/// Replicate int inV across all components
+	static JPH_INLINE BVec16	sReplicate(uint8 inV);
+
+	/// Load 16 bytes from memory
+	static JPH_INLINE BVec16	sLoadByte16(const uint8 *inV);
+
+	/// Equals (component wise), highest bit of each component that is set is considered true
+	static JPH_INLINE BVec16	sEquals(BVec16Arg inV1, BVec16Arg inV2);
+
+	/// Logical or (component wise)
+	static JPH_INLINE BVec16	sOr(BVec16Arg inV1, BVec16Arg inV2);
+
+	/// Logical xor (component wise)
+	static JPH_INLINE BVec16	sXor(BVec16Arg inV1, BVec16Arg inV2);
+
+	/// Logical and (component wise)
+	static JPH_INLINE BVec16	sAnd(BVec16Arg inV1, BVec16Arg inV2);
+
+	/// Logical not (component wise)
+	static JPH_INLINE BVec16	sNot(BVec16Arg inV1);
+
+	/// Get component by index
+	JPH_INLINE uint8			operator [] (uint inCoordinate) const				{ JPH_ASSERT(inCoordinate < 16); return mU8[inCoordinate]; }
+	JPH_INLINE uint8 &			operator [] (uint inCoordinate)						{ JPH_ASSERT(inCoordinate < 16); return mU8[inCoordinate]; }
+
+	/// Test if any of the components are true (true is when highest bit of component is set)
+	JPH_INLINE bool				TestAnyTrue() const;
+
+	/// Test if all components are true (true is when highest bit of component is set)
+	JPH_INLINE bool				TestAllTrue() const;
+
+	/// Store if mU8[0] is true in bit 0, mU8[1] in bit 1, etc. (true is when highest bit of component is set)
+	JPH_INLINE int				GetTrues() const;
+
+	/// To String
+	friend ostream &			operator << (ostream &inStream, BVec16Arg inV)
+	{
+		inStream << uint(inV.mU8[0]) << ", " << uint(inV.mU8[1]) << ", " << uint(inV.mU8[2]) << ", " << uint(inV.mU8[3]) << ", "
+				 << uint(inV.mU8[4]) << ", " << uint(inV.mU8[5]) << ", " << uint(inV.mU8[6]) << ", " << uint(inV.mU8[7]) << ", "
+				 << uint(inV.mU8[8]) << ", " << uint(inV.mU8[9]) << ", " << uint(inV.mU8[10]) << ", " << uint(inV.mU8[11]) << ", "
+				 << uint(inV.mU8[12]) << ", " << uint(inV.mU8[13]) << ", " << uint(inV.mU8[14]) << ", " << uint(inV.mU8[15]);
+		return inStream;
+	}
+
+	union
+	{
+		Type					mValue;
+		uint8					mU8[16];
+		uint64					mU64[2];
+	};
+};
+
+static_assert(is_trivial<BVec16>(), "Is supposed to be a trivial type!");
+
+JPH_NAMESPACE_END
+
+#include "BVec16.inl"

+ 177 - 0
Jolt/Math/BVec16.inl

@@ -0,0 +1,177 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+JPH_NAMESPACE_BEGIN
+
+BVec16::BVec16(uint8 inB0, uint8 inB1, uint8 inB2, uint8 inB3, uint8 inB4, uint8 inB5, uint8 inB6, uint8 inB7, uint8 inB8, uint8 inB9, uint8 inB10, uint8 inB11, uint8 inB12, uint8 inB13, uint8 inB14, uint8 inB15)
+{
+#if defined(JPH_USE_SSE)
+	mValue = _mm_set_epi8(char(inB15), char(inB14), char(inB13), char(inB12), char(inB11), char(inB10), char(inB9), char(inB8), char(inB7), char(inB6), char(inB5), char(inB4), char(inB3), char(inB2), char(inB1), char(inB0));
+#elif defined(JPH_USE_NEON)
+	uint8x8_t v1 = vcreate_u8(uint64(inB0) | (uint64(inB1) << 8) | (uint64(inB2) << 16) | (uint64(inB3) << 24) | (uint64(inB4) << 32) | (uint64(inB5) << 40) | (uint64(inB6) << 48) | (uint64(inB7) << 56));
+	uint8x8_t v2 = vcreate_u8(uint64(inB8) | (uint64(inB9) << 8) | (uint64(inB10) << 16) | (uint64(inB11) << 24) | (uint64(inB12) << 32) | (uint64(inB13) << 40) | (uint64(inB14) << 48) | (uint64(inB15) << 56));
+	mValue = vcombine_u8(v1, v2);
+#else
+	mU8[0] = inB0;
+	mU8[1] = inB1;
+	mU8[2] = inB2;
+	mU8[3] = inB3;
+	mU8[4] = inB4;
+	mU8[5] = inB5;
+	mU8[6] = inB6;
+	mU8[7] = inB7;
+	mU8[8] = inB8;
+	mU8[9] = inB9;
+	mU8[10] = inB10;
+	mU8[11] = inB11;
+	mU8[12] = inB12;
+	mU8[13] = inB13;
+	mU8[14] = inB14;
+	mU8[15] = inB15;
+#endif
+}
+
+BVec16::BVec16(uint64 inV0, uint64 inV1)
+{
+	mU64[0] = inV0;
+	mU64[1] = inV1;
+}
+
+bool BVec16::operator == (BVec16Arg inV2) const
+{
+	return sEquals(*this, inV2).TestAllTrue();
+}
+
+BVec16 BVec16::sZero()
+{
+#if defined(JPH_USE_SSE)
+	return _mm_setzero_si128();
+#elif defined(JPH_USE_NEON)
+	return vdupq_n_u8(0);
+#else
+	return BVec16(0, 0);
+#endif
+}
+
+BVec16 BVec16::sReplicate(uint8 inV)
+{
+#if defined(JPH_USE_SSE)
+	return _mm_set1_epi8(char(inV));
+#elif defined(JPH_USE_NEON)
+	return vdupq_n_u8(inV);
+#else
+	uint64 v(inV);
+	v |= v << 8;
+	v |= v << 16;
+	v |= v << 32;
+	return BVec16(v, v);
+#endif
+}
+
+BVec16 BVec16::sLoadByte16(const uint8 *inV)
+{
+#if defined(JPH_USE_SSE)
+	return _mm_loadu_si128(reinterpret_cast<const __m128i *>(inV));
+#elif defined(JPH_USE_NEON)
+	return vld1q_u8(inV);
+#else
+	return BVec16(inV[0], inV[1], inV[2], inV[3], inV[4], inV[5], inV[6], inV[7], inV[8], inV[9], inV[10], inV[11], inV[12], inV[13], inV[14], inV[15]);
+#endif
+}
+
+BVec16 BVec16::sEquals(BVec16Arg inV1, BVec16Arg inV2)
+{
+#if defined(JPH_USE_SSE)
+	return _mm_cmpeq_epi8(inV1.mValue, inV2.mValue);
+#elif defined(JPH_USE_NEON)
+	return vceqq_u8(inV1.mValue, inV2.mValue);
+#else
+	auto equals = [](uint64 inV1, uint64 inV2) {
+		uint64 r = inV1 ^ ~inV2; // Bits that are equal are 1
+		r &= r << 1; // Combine bit 0 through 1
+		r &= r << 2; // Combine bit 0 through 3
+		r &= r << 4; // Combine bit 0 through 7
+		r &= 0x8080808080808080UL; // Keep only the highest bit of each byte
+		return r;
+	};
+	return BVec16(equals(inV1.mU64[0], inV2.mU64[0]), equals(inV1.mU64[1], inV2.mU64[1]));
+#endif
+}
+
+BVec16 BVec16::sOr(BVec16Arg inV1, BVec16Arg inV2)
+{
+#if defined(JPH_USE_SSE)
+	return _mm_or_si128(inV1.mValue, inV2.mValue);
+#elif defined(JPH_USE_NEON)
+	return vorrq_u8(inV1.mValue, inV2.mValue);
+#else
+	return BVec16(inV1.mU64[0] | inV2.mU64[0], inV1.mU64[1] | inV2.mU64[1]);
+#endif
+}
+
+BVec16 BVec16::sXor(BVec16Arg inV1, BVec16Arg inV2)
+{
+#if defined(JPH_USE_SSE)
+	return _mm_xor_si128(inV1.mValue, inV2.mValue);
+#elif defined(JPH_USE_NEON)
+	return veorq_u8(inV1.mValue, inV2.mValue);
+#else
+	return BVec16(inV1.mU64[0] ^ inV2.mU64[0], inV1.mU64[1] ^ inV2.mU64[1]);
+#endif
+}
+
+BVec16 BVec16::sAnd(BVec16Arg inV1, BVec16Arg inV2)
+{
+#if defined(JPH_USE_SSE)
+	return _mm_and_si128(inV1.mValue, inV2.mValue);
+#elif defined(JPH_USE_NEON)
+	return vandq_u8(inV1.mValue, inV2.mValue);
+#else
+	return BVec16(inV1.mU64[0] & inV2.mU64[0], inV1.mU64[1] & inV2.mU64[1]);
+#endif
+}
+
+
+BVec16 BVec16::sNot(BVec16Arg inV1)
+{
+#if defined(JPH_USE_SSE)
+	return sXor(inV1, sReplicate(0xff));
+#elif defined(JPH_USE_NEON)
+	return vmvnq_u8(inV1.mValue);
+#else
+	return BVec16(~inV1.mU64[0], ~inV1.mU64[1]);
+#endif
+}
+
+int BVec16::GetTrues() const
+{
+#if defined(JPH_USE_SSE)
+	return _mm_movemask_epi8(mValue);
+#else
+	int result = 0;
+	for (int i = 0; i < 16; ++i)
+		result |= int(mU8[i] >> 7) << i;
+	return result;
+#endif
+}
+
+bool BVec16::TestAnyTrue() const
+{
+#if defined(JPH_USE_SSE)
+	return _mm_movemask_epi8(mValue) != 0;
+#else
+	return ((mU64[0] | mU64[1]) & 0x8080808080808080UL) != 0;
+#endif
+}
+
+bool BVec16::TestAllTrue() const
+{
+#if defined(JPH_USE_SSE)
+	return _mm_movemask_epi8(mValue) == 0b1111111111111111;
+#else
+	return ((mU64[0] & mU64[1]) & 0x8080808080808080UL) == 0x8080808080808080UL;
+#endif
+}
+
+JPH_NAMESPACE_END

+ 1 - 1
Jolt/Math/DVec3.inl

@@ -6,7 +6,7 @@
 
 #include <Jolt/Core/HashCombine.h>
 
-// Create a std::hash for DVec3
+// Create a std::hash/JPH::Hash for DVec3
 JPH_MAKE_HASHABLE(JPH::DVec3, t.GetX(), t.GetY(), t.GetZ())
 
 JPH_NAMESPACE_BEGIN

+ 1 - 1
Jolt/Math/Double3.h

@@ -44,5 +44,5 @@ static_assert(is_trivial<Double3>(), "Is supposed to be a trivial type!");
 
 JPH_NAMESPACE_END
 
-// Create a std::hash for Double3
+// Create a std::hash/JPH::Hash for Double3
 JPH_MAKE_HASHABLE(JPH::Double3, t.x, t.y, t.z)

+ 1 - 1
Jolt/Math/Float3.h

@@ -46,5 +46,5 @@ static_assert(is_trivial<Float3>(), "Is supposed to be a trivial type!");
 
 JPH_NAMESPACE_END
 
-// Create a std::hash for Float3
+// Create a std::hash/JPH::Hash for Float3
 JPH_MAKE_HASHABLE(JPH::Float3, t.x, t.y, t.z)

+ 2 - 0
Jolt/Math/MathTypes.h

@@ -10,6 +10,7 @@ class Vec3;
 class DVec3;
 class Vec4;
 class UVec4;
+class BVec16;
 class Quat;
 class Mat44;
 class DMat44;
@@ -23,6 +24,7 @@ using Vec3Arg = const Vec3;
 #endif
 using Vec4Arg = const Vec4;
 using UVec4Arg = const UVec4;
+using BVec16Arg = const BVec16;
 using QuatArg = const Quat;
 using Mat44Arg = const Mat44 &;
 using DMat44Arg = const DMat44 &;

+ 1 - 1
Jolt/Math/Vec3.inl

@@ -10,7 +10,7 @@ JPH_SUPPRESS_WARNINGS_STD_BEGIN
 #include <random>
 JPH_SUPPRESS_WARNINGS_STD_END
 
-// Create a std::hash for Vec3
+// Create a std::hash/JPH::Hash for Vec3
 JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
 
 JPH_NAMESPACE_BEGIN

+ 1 - 1
Jolt/Physics/Body/BodyID.h

@@ -96,5 +96,5 @@ private:
 
 JPH_NAMESPACE_END
 
-// Create a std::hash for BodyID
+// Create a std::hash/JPH::Hash for BodyID
 JPH_MAKE_HASHABLE(JPH::BodyID, t.GetIndexAndSequenceNumber())

+ 1 - 1
Jolt/Physics/Collision/Shape/ConvexHullShape.cpp

@@ -19,7 +19,6 @@
 #include <Jolt/Core/StreamIn.h>
 #include <Jolt/Core/StreamOut.h>
 #include <Jolt/Core/UnorderedMap.h>
-#include <Jolt/Core/UnorderedSet.h>
 
 JPH_NAMESPACE_BEGIN
 
@@ -126,6 +125,7 @@ ConvexHullShape::ConvexHullShape(const ConvexHullShapeSettings &inSettings, Shap
 	// Convert polygons from the builder to our internal representation
 	using VtxMap = UnorderedMap<int, uint8>;
 	VtxMap vertex_map;
+	vertex_map.reserve(VtxMap::size_type(inSettings.mPoints.size()));
 	for (BuilderFace *builder_face : builder_faces)
 	{
 		// Determine where the vertices go

+ 11 - 6
Jolt/Physics/Collision/Shape/MeshShape.cpp

@@ -93,7 +93,7 @@ void MeshShapeSettings::Sanitize()
 {
 	// Remove degenerate and duplicate triangles
 	UnorderedSet<IndexedTriangle> triangles;
-	triangles.reserve(mIndexedTriangles.size());
+	triangles.reserve(UnorderedSet<IndexedTriangle>::size_type(mIndexedTriangles.size()));
 	TriangleCodec::ValidationContext validation_ctx(mIndexedTriangles, mTriangleVertices);
 	for (int t = (int)mIndexedTriangles.size() - 1; t >= 0; --t)
 	{
@@ -245,12 +245,16 @@ void MeshShape::sFindActiveEdges(const MeshShapeSettings &inSettings, IndexedTri
 			return mIdx1 == inRHS.mIdx1 && mIdx2 == inRHS.mIdx2;
 		}
 
+		uint64	GetHash() const
+		{
+			static_assert(sizeof(*this) == 2 * sizeof(int), "No padding expected");
+			return HashBytes(this, sizeof(*this));
+		}
+
 		int		mIdx1;
 		int		mIdx2;
 	};
 
-	JPH_MAKE_HASH_STRUCT(Edge, EdgeHash, t.mIdx1, t.mIdx2)
-
 	// A struct to hold the triangles that are connected to an edge
 	struct TriangleIndices
 	{
@@ -259,16 +263,17 @@ void MeshShape::sFindActiveEdges(const MeshShapeSettings &inSettings, IndexedTri
 	};
 
 	// Build a list of edge to triangles
-	using EdgeToTriangle = UnorderedMap<Edge, TriangleIndices, EdgeHash>;
+	using EdgeToTriangle = UnorderedMap<Edge, TriangleIndices>;
 	EdgeToTriangle edge_to_triangle;
-	edge_to_triangle.reserve(ioIndices.size() * 3);
+	edge_to_triangle.reserve(EdgeToTriangle::size_type(ioIndices.size() * 3));
 	for (uint triangle_idx = 0; triangle_idx < ioIndices.size(); ++triangle_idx)
 	{
 		IndexedTriangle &triangle = ioIndices[triangle_idx];
 		for (uint edge_idx = 0; edge_idx < 3; ++edge_idx)
 		{
 			Edge edge(triangle.mIdx[edge_idx], triangle.mIdx[(edge_idx + 1) % 3]);
-			TriangleIndices &indices = edge_to_triangle[edge];
+			EdgeToTriangle::iterator edge_to_triangle_it = edge_to_triangle.try_emplace(edge, TriangleIndices()).first;
+			TriangleIndices &indices = edge_to_triangle_it->second;
 			if (indices.mNumTriangles < 2)
 			{
 				// Store index of triangle that connects to this edge

+ 19 - 11
Jolt/Physics/SoftBody/SoftBodySharedSettings.cpp

@@ -423,14 +423,16 @@ void SoftBodySharedSettings::CalculateSkinnedConstraintNormals()
 		return;
 
 	// First collect all vertices that are skinned
-	UnorderedSet<uint32> skinned_vertices;
-	skinned_vertices.reserve(mSkinnedConstraints.size());
+	using VertexIndexSet = UnorderedSet<uint32>;
+	VertexIndexSet skinned_vertices;
+	skinned_vertices.reserve(VertexIndexSet::size_type(mSkinnedConstraints.size()));
 	for (const Skinned &s : mSkinnedConstraints)
 		skinned_vertices.insert(s.mVertex);
 
 	// Now collect all faces that connect only to skinned vertices
-	UnorderedMap<uint32, UnorderedSet<uint32>> connected_faces;
-	connected_faces.reserve(mVertices.size());
+	using ConnectedFacesMap = UnorderedMap<uint32, VertexIndexSet>;
+	ConnectedFacesMap connected_faces;
+	connected_faces.reserve(ConnectedFacesMap::size_type(mVertices.size()));
 	for (const Face &f : mFaces)
 	{
 		// Must connect to only skinned vertices
@@ -451,12 +453,18 @@ void SoftBodySharedSettings::CalculateSkinnedConstraintNormals()
 	{
 		uint32 start = uint32(mSkinnedConstraintNormals.size());
 		JPH_ASSERT((start >> 24) == 0);
-		const UnorderedSet<uint32> &faces = connected_faces[s.mVertex];
-		uint32 num = uint32(faces.size());
-		JPH_ASSERT(num < 256);
-		mSkinnedConstraintNormals.insert(mSkinnedConstraintNormals.end(), faces.begin(), faces.end());
-		QuickSort(mSkinnedConstraintNormals.begin() + start, mSkinnedConstraintNormals.begin() + start + num);
-		s.mNormalInfo = start + (num << 24);
+		ConnectedFacesMap::const_iterator connected_faces_it = connected_faces.find(s.mVertex);
+		if (connected_faces_it != connected_faces.cend())
+		{
+			const VertexIndexSet &faces = connected_faces_it->second;
+			uint32 num = uint32(faces.size());
+			JPH_ASSERT(num < 256);
+			mSkinnedConstraintNormals.insert(mSkinnedConstraintNormals.end(), faces.begin(), faces.end());
+			QuickSort(mSkinnedConstraintNormals.begin() + start, mSkinnedConstraintNormals.begin() + start + num);
+			s.mNormalInfo = start + (num << 24);
+		}
+		else
+			s.mNormalInfo = 0;
 	}
 	mSkinnedConstraintNormals.shrink_to_fit();
 }
@@ -954,7 +962,7 @@ void SoftBodySharedSettings::SaveWithMaterials(StreamOut &inStream, SharedSettin
 	if (settings_iter == ioSettingsMap.end())
 	{
 		// Write settings ID
-		uint32 settings_id = (uint32)ioSettingsMap.size();
+		uint32 settings_id = ioSettingsMap.size();
 		ioSettingsMap[this] = settings_id;
 		inStream.Write(settings_id);
 

+ 31 - 0
UnitTests/Core/HashCombineTest.cpp

@@ -0,0 +1,31 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include "UnitTestFramework.h"
+#include <Jolt/Core/HashCombine.h>
+
+TEST_SUITE("HashCombineTest")
+{
+	TEST_CASE("TestHashBytes")
+	{
+		CHECK(HashBytes("This is a test", 14) == 2733878766136413408UL);
+	}
+
+	TEST_CASE("TestHashString")
+	{
+		CHECK(HashString("This is a test") == 2733878766136413408UL);
+	}
+
+	TEST_CASE("TestHashStruct")
+	{
+		const char *char_test = "This is a test";
+		CHECK(Hash<const char *> { } (char_test) == 2733878766136413408UL);
+
+		std::string_view str_view_test = "This is a test";
+		CHECK(Hash<std::string_view> { } (str_view_test) == 2733878766136413408UL);
+
+		String str_test = "This is a test";
+		CHECK(Hash<String> { } (str_test) == 2733878766136413408UL);
+	}
+}

+ 0 - 5
UnitTests/Core/StringToolsTest.cpp

@@ -19,11 +19,6 @@ TEST_SUITE("StringToolsTest")
 		CHECK(ConvertToString(0x7fffffffffffffffUL) == "9223372036854775807");
 	}
 
-	TEST_CASE("TestStringHash")
-	{
-		CHECK(HashString("This is a test") == 2733878766136413408UL);
-	}
-
 	TEST_CASE("StringReplace")
 	{
 		JPH::String value = "Hello this si si a test";

+ 129 - 0
UnitTests/Core/UnorderedMapTest.cpp

@@ -0,0 +1,129 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include "UnitTestFramework.h"
+
+#include <Jolt/Core/UnorderedMap.h>
+
+TEST_SUITE("UnorderedMapTest")
+{
+	TEST_CASE("TestUnorderedMap")
+	{
+		UnorderedMap<int, int> map;
+		map.reserve(10);
+
+		// Insert some entries
+		CHECK(map.insert({ 1, 2 }).first->first == 1);
+		CHECK(map.insert({ 3, 4 }).second);
+		CHECK(!map.insert({ 3, 5 }).second);
+		CHECK(map.size() == 2);
+		CHECK(map.find(1)->second == 2);
+		CHECK(map.find(3)->second == 4);
+		CHECK(map.find(5) == map.end());
+
+		// Use operator []
+		map[5] = 6;
+		CHECK(map.size() == 3);
+		CHECK(map.find(5)->second == 6);
+		map[5] = 7;
+		CHECK(map.size() == 3);
+		CHECK(map.find(5)->second == 7);
+
+		// Validate all elements are visited by a visitor
+		int count = 0;
+		bool visited[10] = { false };
+		for (UnorderedMap<int, int>::const_iterator i = map.begin(); i != map.end(); ++i)
+		{
+			visited[i->first] = true;
+			++count;
+		}
+		CHECK(count == 3);
+		CHECK(visited[1]);
+		CHECK(visited[3]);
+		CHECK(visited[5]);
+		for (UnorderedMap<int, int>::iterator i = map.begin(); i != map.end(); ++i)
+		{
+			visited[i->first] = false;
+			--count;
+		}
+		CHECK(count == 0);
+		CHECK(!visited[1]);
+		CHECK(!visited[3]);
+		CHECK(!visited[5]);
+
+		// Copy the map
+		UnorderedMap<int, int> map2;
+		map2 = map;
+		CHECK(map2.find(1)->second == 2);
+		CHECK(map2.find(3)->second == 4);
+		CHECK(map2.find(5)->second == 7);
+		CHECK(map2.find(7) == map2.end());
+
+		// Try emplace
+		map.try_emplace(7, 8);
+		CHECK(map.size() == 4);
+		CHECK(map.find(7)->second == 8);
+
+		// Swap
+		UnorderedMap<int, int> map3;
+		map3.swap(map);
+		CHECK(map3.find(1)->second == 2);
+		CHECK(map3.find(3)->second == 4);
+		CHECK(map3.find(5)->second == 7);
+		CHECK(map3.find(7)->second == 8);
+		CHECK(map3.find(9) == map3.end());
+		CHECK(map.empty());
+
+		// Move construct
+		UnorderedMap<int, int> map4(std::move(map3));
+		CHECK(map4.find(1)->second == 2);
+		CHECK(map4.find(3)->second == 4);
+		CHECK(map4.find(5)->second == 7);
+		CHECK(map4.find(7)->second == 8);
+		CHECK(map4.find(9) == map4.end());
+		CHECK(map3.empty());
+	}
+
+	TEST_CASE("TestUnorderedMapGrow")
+	{
+		UnorderedMap<int, int> map;
+		for (int i = 0; i < 10000; ++i)
+			CHECK(map.try_emplace(i, ~i).second);
+
+		CHECK(map.size() == 10000);
+
+		for (int i = 0; i < 10000; ++i)
+			CHECK(map.find(i)->second == ~i);
+
+		CHECK(map.find(10001) == map.end());
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(map.erase(i) == 1);
+
+		CHECK(map.size() == 5000);
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(map.find(i) == map.end());
+
+		for (int i = 5000; i < 10000; ++i)
+			CHECK(map.find(i)->second == ~i);
+
+		CHECK(map.find(10001) == map.end());
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(map.try_emplace(i, i + 1).second);
+
+		CHECK(!map.try_emplace(0, 0).second);
+
+		CHECK(map.size() == 10000);
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(map.find(i)->second == i + 1);
+
+		for (int i = 5000; i < 10000; ++i)
+			CHECK(map.find(i)->second == ~i);
+
+		CHECK(map.find(10001) == map.end());
+	}
+}

+ 155 - 0
UnitTests/Core/UnorderedSetTest.cpp

@@ -0,0 +1,155 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include "UnitTestFramework.h"
+
+#include <Jolt/Core/UnorderedSet.h>
+
+TEST_SUITE("UnorderedSetTest")
+{
+	TEST_CASE("TestUnorderedSet")
+	{
+		UnorderedSet<int> set;
+		set.reserve(10);
+
+		// Insert some entries
+		CHECK(*set.insert(1).first == 1);
+		CHECK(set.insert(3).second);
+		CHECK(!set.insert(3).second);
+		CHECK(set.size() == 2);
+		CHECK(*set.find(1) == 1);
+		CHECK(*set.find(3) == 3);
+		CHECK(set.find(5) == set.cend());
+
+		// Validate all elements are visited by a visitor
+		int count = 0;
+		bool visited[10] = { false };
+		for (UnorderedSet<int>::const_iterator i = set.begin(); i != set.end(); ++i)
+		{
+			visited[*i] = true;
+			++count;
+		}
+		CHECK(count == 2);
+		CHECK(visited[1]);
+		CHECK(visited[3]);
+		for (UnorderedSet<int>::iterator i = set.begin(); i != set.end(); ++i)
+		{
+			visited[*i] = false;
+			--count;
+		}
+		CHECK(count == 0);
+		CHECK(!visited[1]);
+		CHECK(!visited[3]);
+
+		// Copy the set
+		UnorderedSet<int> set2;
+		set2 = set;
+		CHECK(*set2.find(1) == 1);
+		CHECK(*set2.find(3) == 3);
+		CHECK(set2.find(5) == set2.cend());
+
+		// Swap
+		UnorderedSet<int> set3;
+		set3.swap(set);
+		CHECK(*set3.find(1) == 1);
+		CHECK(*set3.find(3) == 3);
+		CHECK(set3.find(5) == set3.end());
+		CHECK(set.empty());
+
+		// Move construct
+		UnorderedSet<int> set4(std::move(set3));
+		CHECK(*set4.find(1) == 1);
+		CHECK(*set4.find(3) == 3);
+		CHECK(set4.find(5) == set4.end());
+		CHECK(set3.empty());
+	}
+
+	TEST_CASE("TestUnorderedSetGrow")
+	{
+		UnorderedSet<int> set;
+		for (int i = 0; i < 10000; ++i)
+			CHECK(set.insert(i).second);
+
+		CHECK(set.size() == 10000);
+
+		for (int i = 0; i < 10000; ++i)
+			CHECK(*set.find(i) == i);
+
+		CHECK(set.find(10001) == set.cend());
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(set.erase(i) == 1);
+
+		CHECK(set.size() == 5000);
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(set.find(i) == set.end());
+
+		for (int i = 5000; i < 10000; ++i)
+			CHECK(*set.find(i) == i);
+
+		CHECK(set.find(10001) == set.cend());
+
+		for (int i = 0; i < 5000; ++i)
+			CHECK(set.insert(i).second);
+
+		CHECK(!set.insert(0).second);
+
+		CHECK(set.size() == 10000);
+
+		for (int i = 0; i < 10000; ++i)
+			CHECK(*set.find(i) == i);
+
+		CHECK(set.find(10001) == set.cend());
+	}
+
+	TEST_CASE("TestUnorderedSetHashCollision")
+	{
+		// A hash function that's guaranteed to collide
+		class MyBadHash
+		{
+		public:
+			size_t operator () (int inValue) const
+			{
+				return 0;
+			}
+		};
+
+		UnorderedSet<int, MyBadHash> set;
+		for (int i = 0; i < 10; ++i)
+			CHECK(set.insert(i).second);
+
+		CHECK(set.size() == 10);
+
+		for (int i = 0; i < 10; ++i)
+			CHECK(*set.find(i) == i);
+
+		CHECK(set.find(11) == set.cend());
+
+		for (int i = 0; i < 5; ++i)
+			CHECK(set.erase(i) == 1);
+
+		CHECK(set.size() == 5);
+
+		for (int i = 0; i < 5; ++i)
+			CHECK(set.find(i) == set.end());
+
+		for (int i = 5; i < 10; ++i)
+			CHECK(*set.find(i) == i);
+
+		CHECK(set.find(11) == set.cend());
+
+		for (int i = 0; i < 5; ++i)
+			CHECK(set.insert(i).second);
+
+		CHECK(!set.insert(0).second);
+
+		CHECK(set.size() == 10);
+
+		for (int i = 0; i < 10; ++i)
+			CHECK(*set.find(i) == i);
+
+		CHECK(set.find(11) == set.cend());
+	}
+}

+ 90 - 0
UnitTests/Math/BVec16Tests.cpp

@@ -0,0 +1,90 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include "UnitTestFramework.h"
+
+#include <Jolt/Math/BVec16.h>
+#include <Jolt/Core/StringTools.h>
+
+TEST_SUITE("BVec16Tests")
+{
+	TEST_CASE("TestBVec16Construct")
+	{
+		BVec16 v(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+
+		CHECK(v[0] == 1);
+		CHECK(v[1] == 2);
+		CHECK(v[2] == 3);
+		CHECK(v[3] == 4);
+		CHECK(v[4] == 5);
+		CHECK(v[5] == 6);
+		CHECK(v[6] == 7);
+		CHECK(v[7] == 8);
+		CHECK(v[8] == 9);
+		CHECK(v[9] == 10);
+		CHECK(v[10] == 11);
+		CHECK(v[11] == 12);
+		CHECK(v[12] == 13);
+		CHECK(v[13] == 14);
+		CHECK(v[14] == 15);
+		CHECK(v[15] == 16);
+
+		// Test == and != operators
+		CHECK(v == BVec16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
+		CHECK(v != BVec16(1, 2, 3, 4, 5, 6, 7, 8, 10, 9, 11, 12, 13, 14, 15, 16));
+
+		// Check element modification
+		v[15] = 17;
+		CHECK(v[15] == 17);
+		CHECK(v == BVec16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17));
+	}
+
+	TEST_CASE("TestBVec16LoadByte16")
+	{
+		uint8 u16[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+		CHECK(BVec16::sLoadByte16(u16) == BVec16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
+	}
+
+	TEST_CASE("TestBVec16Zero")
+	{
+		BVec16 v = BVec16::sZero();
+
+		for (int i = 0; i < 16; ++i)
+			CHECK(v[i] == 0);
+	}
+
+	TEST_CASE("TestBVec16Replicate")
+	{
+		CHECK(BVec16::sReplicate(2) == BVec16(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+	}
+
+	TEST_CASE("TestBVec16Comparisons")
+	{
+		BVec16 eq = BVec16::sEquals(BVec16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), BVec16(6, 7, 3, 4, 5, 6, 7, 5, 9, 10, 11, 12, 13, 14, 15, 13));
+		CHECK(eq.GetTrues() == 0b0111111101111100);
+		CHECK(eq.TestAnyTrue());
+		CHECK(!eq.TestAllTrue());
+	}
+
+	TEST_CASE("TestBVec16BitOps")
+	{
+		// Test all bit permutations
+		BVec16 v1(0b011, 0b0110, 0b01100, 0b011000, 0b0110000, 0b01100000, 0b011, 0b0110, 0b01100, 0b011000, 0b0110000, 0b01100000, 0b011, 0b0110, 0b01100, 0b011000);
+		BVec16 v2(0b101, 0b1010, 0b10100, 0b101000, 0b1010000, 0b10100000, 0b101, 0b1010, 0b10100, 0b101000, 0b1010000, 0b10100000, 0b101, 0b1010, 0b10100, 0b101000);
+
+		CHECK(BVec16::sOr(v1, v2) == BVec16(0b111, 0b1110, 0b11100, 0b111000, 0b1110000, 0b11100000, 0b111, 0b1110, 0b11100, 0b111000, 0b1110000, 0b11100000, 0b111, 0b1110, 0b11100, 0b111000));
+		CHECK(BVec16::sXor(v1, v2) == BVec16(0b110, 0b1100, 0b11000, 0b110000, 0b1100000, 0b11000000, 0b110, 0b1100, 0b11000, 0b110000, 0b1100000, 0b11000000, 0b110, 0b1100, 0b11000, 0b110000));
+		CHECK(BVec16::sAnd(v1, v2) == BVec16(0b001, 0b0010, 0b00100, 0b001000, 0b0010000, 0b00100000, 0b001, 0b0010, 0b00100, 0b001000, 0b0010000, 0b00100000, 0b001, 0b0010, 0b00100, 0b001000));
+
+		CHECK(BVec16::sNot(v1) == BVec16(0b11111100, 0b11111001, 0b11110011, 0b11100111, 0b11001111, 0b10011111, 0b11111100, 0b11111001, 0b11110011, 0b11100111, 0b11001111, 0b10011111, 0b11111100, 0b11111001, 0b11110011, 0b11100111));
+		CHECK(BVec16::sNot(v2) == BVec16(0b11111010, 0b11110101, 0b11101011, 0b11010111, 0b10101111, 0b01011111, 0b11111010, 0b11110101, 0b11101011, 0b11010111, 0b10101111, 0b01011111, 0b11111010, 0b11110101, 0b11101011, 0b11010111));
+	}
+
+	TEST_CASE("TestBVec16ToString")
+	{
+		BVec16 v(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+
+		CHECK(ConvertToString(v) == "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16");
+	}
+}

+ 4 - 0
UnitTests/UnitTests.cmake

@@ -5,6 +5,7 @@ set(UNIT_TESTS_ROOT ${PHYSICS_REPO_ROOT}/UnitTests)
 set(UNIT_TESTS_SRC_FILES
 	${UNIT_TESTS_ROOT}/Core/ArrayTest.cpp
 	${UNIT_TESTS_ROOT}/Core/FPFlushDenormalsTest.cpp
+	${UNIT_TESTS_ROOT}/Core/HashCombineTest.cpp
 	${UNIT_TESTS_ROOT}/Core/InsertionSortTest.cpp
 	${UNIT_TESTS_ROOT}/Core/JobSystemTest.cpp
 	${UNIT_TESTS_ROOT}/Core/LinearCurveTest.cpp
@@ -12,6 +13,8 @@ set(UNIT_TESTS_SRC_FILES
 	${UNIT_TESTS_ROOT}/Core/ScopeExitTest.cpp
 	${UNIT_TESTS_ROOT}/Core/StringToolsTest.cpp
 	${UNIT_TESTS_ROOT}/Core/QuickSortTest.cpp
+	${UNIT_TESTS_ROOT}/Core/UnorderedSetTest.cpp
+	${UNIT_TESTS_ROOT}/Core/UnorderedMapTest.cpp
 	${UNIT_TESTS_ROOT}/doctest.h
 	${UNIT_TESTS_ROOT}/Geometry/ClosestPointTests.cpp
 	${UNIT_TESTS_ROOT}/Geometry/ConvexHullBuilderTest.cpp
@@ -23,6 +26,7 @@ set(UNIT_TESTS_SRC_FILES
 	${UNIT_TESTS_ROOT}/Layers.h
 	${UNIT_TESTS_ROOT}/LoggingBodyActivationListener.h
 	${UNIT_TESTS_ROOT}/LoggingContactListener.h
+	${UNIT_TESTS_ROOT}/Math/BVec16Tests.cpp
 	${UNIT_TESTS_ROOT}/Math/DMat44Tests.cpp
 	${UNIT_TESTS_ROOT}/Math/DVec3Tests.cpp
 	${UNIT_TESTS_ROOT}/Math/EigenValueSymmetricTests.cpp