Selaa lähdekoodia

The PerformanceTest is now deterministic between Windows and Linux (tested on clang 13) (#184)

* Fixed non-deterministic simulation in case of hash collision
* Don't add fma by default when avx2 is on
* Added FNV-1a hash algorithm and use it for SubShapeIDPair because std::hash is not implemented the same on all platforms, this results in divergence between platforms
* Explicitly made contact constraint manager hashes 64 bit (so it behaves the same in a 32 bit app)
* Using our own hashing function for showing the hash at the end of the performance test
* Fixed natvis file for UVec4
* Added vectorized sine and cosine function Vec4::SinCos
* Sin and Cos are now making use of Vec4::SinCos to make them platform independent
Jorrit Rouwe 3 vuotta sitten
vanhempi
commit
aab9527141
52 muutettua tiedostoa jossa 438 lisäystä ja 145 poistoa
  1. 23 8
      Build/CMakeLists.txt
  2. 1 0
      Build/README.md
  3. 1 1
      Docs/Architecture.md
  4. 12 10
      Jolt/Core/Core.h
  5. 20 4
      Jolt/Core/HashCombine.h
  6. 2 2
      Jolt/Core/LockFreeHashMap.h
  7. 2 2
      Jolt/Core/LockFreeHashMap.inl
  8. 2 2
      Jolt/Core/StaticArray.h
  9. 1 0
      Jolt/Jolt.cmake
  10. 1 1
      Jolt/Jolt.natvis
  11. 9 6
      Jolt/Math/Mat44.inl
  12. 1 1
      Jolt/Math/Quat.h
  13. 23 21
      Jolt/Math/Quat.inl
  14. 56 0
      Jolt/Math/Trigonometry.h
  15. 3 2
      Jolt/Math/Vec3.inl
  16. 3 0
      Jolt/Math/Vec4.h
  17. 76 0
      Jolt/Math/Vec4.inl
  18. 2 2
      Jolt/Physics/Body/BodyPair.h
  19. 1 1
      Jolt/Physics/Character/CharacterBase.h
  20. 1 1
      Jolt/Physics/Collision/Shape/ConvexHullShape.cpp
  21. 2 2
      Jolt/Physics/Collision/Shape/GetTrianglesContext.h
  22. 18 1
      Jolt/Physics/Collision/Shape/SubShapeIDPair.h
  23. 1 1
      Jolt/Physics/Collision/Shape/TaperedCapsuleShape.cpp
  24. 2 2
      Jolt/Physics/Constraints/ConeConstraint.cpp
  25. 1 1
      Jolt/Physics/Constraints/ConeConstraint.h
  26. 10 8
      Jolt/Physics/Constraints/ConstraintPart/SwingTwistConstraintPart.h
  27. 28 15
      Jolt/Physics/Constraints/ContactConstraintManager.cpp
  28. 6 6
      Jolt/Physics/Constraints/ContactConstraintManager.h
  29. 3 0
      Jolt/Physics/StateRecorderImpl.h
  30. 2 2
      Jolt/Physics/Vehicle/VehicleCollisionTester.h
  31. 1 1
      Jolt/Physics/Vehicle/VehicleConstraint.h
  32. 1 1
      Jolt/Physics/Vehicle/WheeledVehicleController.cpp
  33. 10 10
      Jolt/Renderer/DebugRenderer.cpp
  34. 1 1
      PerformanceTest/ConvexVsMeshScene.h
  35. 55 4
      PerformanceTest/PerformanceTest.cpp
  36. 4 4
      Samples/Tests/Character/CharacterBaseTest.cpp
  37. 2 2
      Samples/Tests/Constraints/PathConstraintTest.cpp
  38. 2 2
      Samples/Tests/ConvexCollision/ConvexHullTest.cpp
  39. 1 1
      Samples/Tests/ConvexCollision/EPATest.cpp
  40. 1 1
      Samples/Tests/ConvexCollision/InteractivePairsTest.cpp
  41. 1 1
      Samples/Tests/General/ChangeMotionTypeTest.cpp
  42. 1 1
      Samples/Tests/General/SensorTest.cpp
  43. 2 2
      Samples/Tests/Shapes/ConvexHullShapeTest.cpp
  44. 1 1
      Samples/Tests/Shapes/RotatedTranslatedShapeTest.cpp
  45. 3 3
      Samples/Tests/Vehicle/TankTest.cpp
  46. 1 1
      Samples/Utils/ShapeCreator.cpp
  47. 3 3
      TestFramework/Application/Application.cpp
  48. 1 1
      TestFramework/Image/ZoomImage.cpp
  49. 1 1
      TestFramework/Renderer/Renderer.cpp
  50. 1 1
      UnitTests/Geometry/ConvexHullBuilderTest.cpp
  51. 1 1
      UnitTests/Geometry/EPATests.cpp
  52. 31 0
      UnitTests/Math/Vec4Tests.cpp

+ 23 - 8
Build/CMakeLists.txt

@@ -9,6 +9,9 @@ option(TARGET_PERFORMANCE_TEST "Build Performance Test" ON)
 option(TARGET_SAMPLES "Build Samples" ON)
 option(TARGET_VIEWER "Build JoltViewer" ON)
 
+# When turning this option on, the library will be compiled in such a way to attempt to keep the simulation deterministic across platforms
+option(CROSS_PLATFORM_DETERMINISTIC "Cross platform deterministic" OFF)
+
 # Select X86 processor features to use (if everything is off it will be SSE2 compatible)
 option(USE_SSE4_1 "Enable SSE4.1" ON)
 option(USE_SSE4_2 "Enable SSE4.2" ON)
@@ -43,7 +46,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 
 	# Set general compiler flags
 	set(CMAKE_CXX_FLAGS "/std:c++17 /Zc:__cplusplus /GR- /Gm- /Wall /WX /EHsc /MP /nologo /diagnostics:classic /FC /fp:except- /Zc:inline /Zi /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE")
-	
+
 	# Set compiler flags for various configurations
 	set(CMAKE_CXX_FLAGS_DEBUG "/GS /Od /Ob0 /RTC1")
 	set(CMAKE_CXX_FLAGS_RELEASE "/GS- /GL /Gy /O2 /Oi /Ot")
@@ -55,7 +58,11 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 	# Set linker flags
 	set(CMAKE_EXE_LINKER_FLAGS "/SUBSYSTEM:WINDOWS /ignore:4221 /DEBUG:FASTLINK")
 	if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
-		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast") # Clang doesn't use fast math because it cannot be turned off inside a single compilation unit
+		if (CROSS_PLATFORM_DETERMINISTIC)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise")
+		else()
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast") # Clang doesn't use fast math because it cannot be turned off inside a single compilation unit
+		endif()
 		if (USE_AVX2)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
 		elseif (USE_AVX)
@@ -76,7 +83,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 		if (USE_F16C)
 			add_compile_definitions(JPH_USE_F16C)
 		endif()
-		if (USE_FMADD)
+		if (USE_FMADD AND NOT CROSS_PLATFORM_DETERMINISTIC)
 			add_compile_definitions(JPH_USE_FMADD)
 		endif()
 		set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DJPH_FLOATING_POINT_EXCEPTIONS_ENABLED") # Clang turns Float2 into a vector sometimes causing floating point exceptions
@@ -86,7 +93,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 	elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /showFilenames")
 		if (USE_AVX2)
-			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mpopcnt -mlzcnt -mf16c -mfma")
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mpopcnt -mlzcnt -mf16c")
 		elseif (USE_AVX)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mpopcnt")
 		elseif (USE_SSE4_2)
@@ -105,7 +112,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 		if (USE_F16C)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
 		endif()
-		if (USE_FMADD)
+		if (USE_FMADD AND NOT CROSS_PLATFORM_DETERMINISTIC)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
 		endif()
 		set(CMAKE_EXE_LINKER_FLAGS_RELEASEASAN "/SUBSYSTEM:CONSOLE /LIBPATH:${CLANG_LIB_PATH} clang_rt.asan-x86_64.lib -wholearchive:clang_rt.asan-x86_64.lib clang_rt.asan_cxx-x86_64.lib -wholearchive:clang_rt.asan_cxx-x86_64.lib")
@@ -113,20 +120,23 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 		set(CMAKE_EXE_LINKER_FLAGS_RELEASECOVERAGE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LIBPATH:${CLANG_LIB_PATH}")
 	endif()
 elseif ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" OR "${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin" OR "${CMAKE_SYSTEM_NAME}" STREQUAL "iOS")
-	# Set general compiler flags (do not use -ffast-math since it cannot be turned off in a single compilation unit)
+	# Set general compiler flags
 	set(CMAKE_CXX_FLAGS "-g -std=c++17 -I. -Wall -Werror")
 
 	if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
 		# Somehow -Wcomment doesn't want to be turned off from code and we need this because Doxygen MathJax uses it
 		# Also turn off automatic fused multiply add contractions, there doesn't seem to be a way to do this selectively through the macro JPH_PRECISE_MATH_OFF
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-comment -ffp-contract=off")
+	else()
+		# Do not use -ffast-math or -ffp-contract=on since it cannot be turned off in a single compilation unit under clang, see Core.h
+		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffp-model=precise -ffp-contract=off")
 	endif()
 
 	# Platform specific compiler flags
 	if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64")
 		# X64
 		if (USE_AVX2)
-			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mpopcnt -mlzcnt -mf16c -mfma")
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mpopcnt -mlzcnt -mf16c")
 		elseif (USE_AVX)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mpopcnt")
 		elseif (USE_SSE4_2)
@@ -145,7 +155,7 @@ elseif ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" OR "${CMAKE_SYSTEM_NAME}" STREQU
 		if (USE_F16C)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
 		endif()
-		if (USE_FMADD)
+		if (USE_FMADD AND NOT CROSS_PLATFORM_DETERMINISTIC)
 			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
 		endif()
 	elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64")
@@ -164,6 +174,11 @@ elseif ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" OR "${CMAKE_SYSTEM_NAME}" STREQU
 	set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
 endif()
 
+# Setting to attempt cross platform determinism
+if (CROSS_PLATFORM_DETERMINISTIC)
+	add_compile_definitions(JPH_CROSS_PLATFORM_DETERMINISTIC)
+endif()
+
 # Set linker flags
 set(CMAKE_EXE_LINKER_FLAGS_DISTRIBUTION "${CMAKE_EXE_LINKER_FLAGS_RELEASE}")
 

+ 1 - 0
Build/README.md

@@ -26,6 +26,7 @@ There are a number of user configurable defines that turn on/off certain feature
 - JPH_DISABLE_TEMP_ALLOCATOR - Disables the temporary memory allocator, used mainly to allow ASAN to do its job.
 - JPH_DISABLE_CUSTOM_ALLOCATOR - Disables the ability to override the memory allocator.
 - JPH_FLOATING_POINT_EXCEPTIONS_ENABLED - Turns on division by zero and invalid floating point exception support in order to detect bugs (Windows only).
+- JPH_CROSS_PLATFORM_DETERMINISTIC - Turns on behavior to attempt cross platform determinism. If this is set, JPH_USE_FMADD is ignored.
 - JPH_USE_SSE4_1 - Enable SSE4.1 CPU instructions (x86/x64 only)
 - JPH_USE_SSE4_2 - Enable SSE4.2 CPU instructions (x86/x64 only)
 - JPH_USE_F16C - Enable half float CPU instructions (x86/x64 only)

+ 1 - 1
Docs/Architecture.md

@@ -311,7 +311,7 @@ The physics simulation is deterministic provided that:
 * The APIs that modify the simulation are called in exactly the same order. For example, bodies and constraints need to be added/removed/modified in exactly the same order so that the state at the beginning of a simulation step is exactly the same for both simulations.
 * The same binary code is used to run the simulation. For example, when you run the simulation on Windows it doesn't matter if you have an AMD or Intel processor. 
 
-Different library versions, compilers, compile options, OSes (e.g. Linux vs Windows) or architectures (e.g. ARM vs x86) will not result in the same simulation. You may be able to get cross platform compatibility by turning on 'strict math' (-ffp-model=strict in clang) and by disabling 'fused multiply add' (see JPH_USE_FMADD), but this hasn't been tested.
+If you want cross platform determinism (e.g. Linux vs Windows) then please turn on the CROSS_PLATFORM_DETERMINISTIC option in CMake. This will compile the library without fused multiply add instructions and with precise math (so it will come at a performance cost). It has been tested with the PerformanceTest (both with Ragdoll and ConvexVsMesh test) to result in the same simulation regardless if the library was compiled on MSVC2022 or clang, in Debug and Release mode and on Windows or Linux. Note that the library is not yet cross platform deterministic between ARM vs x86. Also note that it is quite difficult to verify cross platform determinism, so this feature is less tested than other features.
 
 When running the Samples Application you can press ESC, Physics Settings and check the 'Check Determinism' checkbox. Before every simulation step we will record the state using the [StateRecorder](@ref StateRecorder) interface, rewind the simulation and do the step again to validate that the simulation runs deterministically. Some of the tests (e.g. the MultiThreaded) test will explicitly disable the check because they randomly add/remove bodies from different threads. This violates the first rule so will not result in a deterministic simulation.
 

+ 12 - 10
Jolt/Core/Core.h

@@ -74,16 +74,18 @@
 	#if defined(__AVX2__) && !defined(JPH_USE_AVX2)
 		#define JPH_USE_AVX2
 	#endif
-	#if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)
-		#if defined(__FMA__) && !defined(JPH_USE_FMADD)
-			#define JPH_USE_FMADD
+	#ifndef JPH_CROSS_PLATFORM_DETERMINISTIC // FMA is not compatible with cross platform determinism
+		#if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)
+			#if defined(__FMA__) && !defined(JPH_USE_FMADD)
+				#define JPH_USE_FMADD
+			#endif
+		#elif defined(JPH_COMPILER_MSVC)
+			#if defined(__AVX2__) && !defined(JPH_USE_FMADD) // AVX2 also enables fused multiply add
+				#define JPH_USE_FMADD
+			#endif
+		#else
+			#error Undefined compiler
 		#endif
-	#elif defined(JPH_COMPILER_MSVC)
-		#if defined(__AVX2__) && !defined(JPH_USE_FMADD) // AVX2 also enables fused multiply add
-			#define JPH_USE_FMADD
-		#endif
-	#else
-		#error Undefined compiler
 	#endif
 #elif defined(__aarch64__) || defined(_M_ARM64)
 	// ARM64 CPU architecture
@@ -286,7 +288,7 @@ static_assert(sizeof(void *) == (JPH_CPU_ADDRESS_BITS == 64? 8 : 4), "Invalid si
 #define JPH_UNUSED(x)			(void)x
 
 // Macro to enable floating point precise mode and to disable fused multiply add instructions
-#if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)
+#if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC) || defined(JPH_CROSS_PLATFORM_DETERMINISTIC)
 	// In clang it appears you cannot turn off -ffast-math and -ffp-contract=fast for a code block
 	// There is #pragma clang fp contract (off) but that doesn't seem to work under clang 9 & 10 when -ffast-math is specified on the commandline (you override it to turn it on, but not off)
 	// There is #pragma float_control(precise, on) but that doesn't work under clang 9.

+ 20 - 4
Jolt/Core/HashCombine.h

@@ -5,10 +5,26 @@
 
 JPH_NAMESPACE_BEGIN
 
+/// Implements the FNV-1a hash algorithm
+/// @see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+/// @param inData Data block of bytes
+/// @param inSize Number of bytes
+/// @return Hash
+inline uint64 HashBytes(const void *inData, uint inSize, uint64 inSeed = 0xcbf29ce484222325UL)
+{
+	uint64 hash = inSeed;
+	for (const uint8 *data = reinterpret_cast<const uint8 *>(inData); data < reinterpret_cast<const uint8 *>(inData) + inSize; ++data)
+	{
+		hash = hash ^ uint64(*data);
+		hash = hash * 0x100000001b3UL;
+	}
+	return hash;
+}
+
 /// @brief Helper function that hashes a single value into ioSeed
 /// Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
 template <typename T>
-inline void hash_combine_helper(std::size_t &ioSeed, const T &inValue)
+inline void HashCombineHelper(size_t &ioSeed, const T &inValue)
 {
 	std::hash<T> hasher;
     ioSeed ^= hasher(inValue) + 0x9e3779b9 + (ioSeed << 6) + (ioSeed >> 2);
@@ -27,10 +43,10 @@ inline void hash_combine_helper(std::size_t &ioSeed, const T &inValue)
 /// 
 ///		JPH_MAKE_HASHABLE(SomeHashKey, t.key1, t.key2, t.key3)
 template <typename... Values>
-inline void hash_combine(std::size_t &ioSeed, Values... inValues) 
+inline void HashCombine(std::size_t &ioSeed, Values... inValues) 
 {
 	// Hash all values together using a fold expression
-	(hash_combine_helper(ioSeed, inValues), ...);
+	(HashCombineHelper(ioSeed, inValues), ...);
 }
 
 JPH_NAMESPACE_END
@@ -44,7 +60,7 @@ JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")
         std::size_t operator()(const type &t) const			\
 		{													\
             std::size_t ret = 0;							\
-            ::JPH::hash_combine(ret, __VA_ARGS__);			\
+            ::JPH::HashCombine(ret, __VA_ARGS__);			\
             return ret;										\
         }													\
     };

+ 2 - 2
Jolt/Core/LockFreeHashMap.h

@@ -115,10 +115,10 @@ public:
 	/// Insert a new element, returns null if map full.
 	/// Multiple threads can be inserting in the map at the same time.
 	template <class... Params>
-	inline KeyValue *		Create(LFHMAllocatorContext &ioContext, const Key &inKey, size_t inKeyHash, int inExtraBytes, Params &&... inConstructorParams);
+	inline KeyValue *		Create(LFHMAllocatorContext &ioContext, const Key &inKey, uint64 inKeyHash, int inExtraBytes, Params &&... inConstructorParams);
 	
 	/// Find an element, returns null if not found
-	inline const KeyValue *	Find(const Key &inKey, size_t inKeyHash) const;
+	inline const KeyValue *	Find(const Key &inKey, uint64 inKeyHash) const;
 
 	/// Value of an invalid handle
 	const static uint32		cInvalidHandle = uint32(-1);

+ 2 - 2
Jolt/Core/LockFreeHashMap.inl

@@ -158,7 +158,7 @@ void LockFreeHashMap<Key, Value>::SetNumBuckets(uint32 inNumBuckets)
 
 template <class Key, class Value>
 template <class... Params>
-inline typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::Create(LFHMAllocatorContext &ioContext, const Key &inKey, size_t inKeyHash, int inExtraBytes, Params &&... inConstructorParams)
+inline typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::Create(LFHMAllocatorContext &ioContext, const Key &inKey, uint64 inKeyHash, int inExtraBytes, Params &&... inConstructorParams)
 {
 	// This is not a multi map, test the key hasn't been inserted yet
 	JPH_ASSERT(Find(inKey, inKeyHash) == nullptr);
@@ -201,7 +201,7 @@ inline typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Valu
 }
 
 template <class Key, class Value>
-inline const typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::Find(const Key &inKey, size_t inKeyHash) const
+inline const typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::Find(const Key &inKey, uint64 inKeyHash) const
 {
 	// Get the offset to the keyvalue object from the bucket with corresponding hash
 	uint32 offset = mBuckets[inKeyHash & (mNumBuckets - 1)].load(memory_order_acquire);

+ 2 - 2
Jolt/Core/StaticArray.h

@@ -293,11 +293,11 @@ namespace std
 			std::size_t ret = 0;
 
 			// Hash length first
-            JPH::hash_combine(ret, inRHS.size());
+            JPH::HashCombine(ret, inRHS.size());
 
 			// Then hash elements
 			for (const T &t : inRHS)
-	            JPH::hash_combine(ret, t);
+	            JPH::HashCombine(ret, t);
 
             return ret;
 		}

+ 1 - 0
Jolt/Jolt.cmake

@@ -105,6 +105,7 @@ set(JOLT_PHYSICS_SRC_FILES
 	${JOLT_PHYSICS_ROOT}/Math/Quat.h
 	${JOLT_PHYSICS_ROOT}/Math/Quat.inl
 	${JOLT_PHYSICS_ROOT}/Math/Swizzle.h
+	${JOLT_PHYSICS_ROOT}/Math/Trigonometry.h
 	${JOLT_PHYSICS_ROOT}/Math/UVec4.cpp
 	${JOLT_PHYSICS_ROOT}/Math/UVec4.h
 	${JOLT_PHYSICS_ROOT}/Math/UVec4.inl

+ 1 - 1
Jolt/Jolt.natvis

@@ -19,7 +19,7 @@
     <DisplayString>{mF32[0]}, {mF32[1]}, {mF32[2]}, {mF32[3]}, L^2={mF32[0]*mF32[0]+mF32[1]*mF32[1]+mF32[2]*mF32[2]+mF32[3]*mF32[3]}</DisplayString>
   </Type>
   <Type Name="JPH::UVec4">
-    <DisplayString>{mF32[0]}, {mF32[1]}, {mF32[2]}, {mF32[3]}</DisplayString>
+    <DisplayString>{mU32[0]}, {mU32[1]}, {mU32[2]}, {mU32[3]}</DisplayString>
   </Type>
   <Type Name="JPH::Quat">
     <DisplayString>{mValue}</DisplayString>

+ 9 - 6
Jolt/Math/Mat44.inl

@@ -52,22 +52,25 @@ Mat44 Mat44::sLoadFloat4x4Aligned(const Float4 *inV)
 
 Mat44 Mat44::sRotationX(float inX)
 {
-	// TODO: Could be optimized
-	float c = cos(inX), s = sin(inX);
+	Vec4 sv, cv;
+	Vec4::sReplicate(inX).SinCos(sv, cv);
+	float s = sv.GetX(), c = cv.GetX();
 	return Mat44(Vec4(1, 0, 0, 0), Vec4(0, c, s, 0), Vec4(0, -s, c, 0), Vec4(0, 0, 0, 1));
 }
 
 Mat44 Mat44::sRotationY(float inY)
 {
-	// TODO: Could be optimized
-	float c = cos(inY), s = sin(inY);
+	Vec4 sv, cv;
+	Vec4::sReplicate(inY).SinCos(sv, cv);
+	float s = sv.GetX(), c = cv.GetX();
 	return Mat44(Vec4(c, 0, -s, 0), Vec4(0, 1, 0, 0), Vec4(s, 0, c, 0), Vec4(0, 0, 0, 1));
 }
 
 Mat44 Mat44::sRotationZ(float inZ)
 {
-	// TODO: Could be optimized
-	float c = cos(inZ), s = sin(inZ);
+	Vec4 sv, cv;
+	Vec4::sReplicate(inZ).SinCos(sv, cv);
+	float s = sv.GetX(), c = cv.GetX();
 	return Mat44(Vec4(c, s, 0, 0), Vec4(-s, c, 0, 0), Vec4(0, 0, 1, 0), Vec4(0, 0, 0, 1));
 }
 

+ 1 - 1
Jolt/Math/Quat.h

@@ -176,7 +176,7 @@ public:
 	JPH_INLINE Quat				GetPerpendicular() const										{ return Quat(Vec4(1, -1, 1, -1) * mValue.Swizzle<SWIZZLE_Y, SWIZZLE_X, SWIZZLE_W, SWIZZLE_Z>()); }
 
 	/// Get rotation angle around inAxis (uses Swing Twist Decomposition to get the twist quaternion and uses q(axis, angle) = [cos(angle / 2), axis * sin(angle / 2)])
-	JPH_INLINE float			GetRotationAngle(Vec3Arg inAxis) const							{ return GetW() == 0.0f? JPH_PI : 2.0f * atan(GetXYZ().Dot(inAxis) / GetW()); }
+	JPH_INLINE float			GetRotationAngle(Vec3Arg inAxis) const							{ return GetW() == 0.0f? JPH_PI : 2.0f * ATan(GetXYZ().Dot(inAxis) / GetW()); }
 
 	/// Swing Twist Decomposition: any quaternion can be split up as:
 	///

+ 23 - 21
Jolt/Math/Quat.inl

@@ -72,9 +72,11 @@ Quat Quat::operator * (QuatArg inRHS) const
 
 Quat Quat::sRotation(Vec3Arg inAxis, float inAngle)
 {
+    // returns [inAxis * sin(0.5f * inAngle), cos(0.5f * inAngle)]
 	JPH_ASSERT(inAxis.IsNormalized());
-	float half_angle = 0.5f * inAngle;
-    return Quat(Vec4(inAxis * sin(half_angle), cos(half_angle)));
+	Vec4 s, c;
+	Vec4::sReplicate(0.5f * inAngle).SinCos(s, c);
+    return Quat(Vec4::sSelect(Vec4(inAxis) * s, c, UVec4(0, 0, 0, 0xffffffffU)));
 }
 
 void Quat::GetAxisAngle(Vec3 &outAxis, float &outAngle) const
@@ -89,7 +91,7 @@ void Quat::GetAxisAngle(Vec3 &outAxis, float &outAngle) const
 	}
 	else
 	{
-		outAngle = 2.0f * acos(abs_w);
+		outAngle = 2.0f * ACos(abs_w);
 		outAxis = w_pos.GetXYZ().NormalizedOr(Vec3::sZero());
 	}
 }
@@ -143,23 +145,23 @@ Quat Quat::sRandom(Random &inRandom)
 	float x0 = zero_to_one(inRandom);
 	float r1 = sqrt(1.0f - x0), r2 = sqrt(x0);
 	uniform_real_distribution<float> zero_to_two_pi(0.0f, 2.0f * JPH_PI);
-	float t1 = zero_to_two_pi(inRandom), t2 = zero_to_two_pi(inRandom);	
-	return Quat(sin(t1) * r1, cos(t1) * r1, sin(t2) * r2, cos(t2) * r2);
+	Vec4 s, c;
+	Vec4(zero_to_two_pi(inRandom), zero_to_two_pi(inRandom), 0, 0).SinCos(s, c);
+	return Quat(s.GetX() * r1, c.GetX() * r1, s.GetY() * r2, c.GetY() * r2);
 }
 
 Quat Quat::sEulerAngles(Vec3Arg inAngles)
 {
-	Vec3 half = 0.5f * inAngles;
-	float x = half.GetX();
-	float y = half.GetY();
-	float z = half.GetZ();
-	
-	float cx = cos(x);
-	float sx = sin(x);
-	float cy = cos(y);
-	float sy = sin(y);
-	float cz = cos(z);
-	float sz = sin(z);
+	Vec4 half(0.5f * inAngles);
+	Vec4 s, c;
+	half.SinCos(s, c);
+
+	float cx = c.GetX();
+	float sx = s.GetX();
+	float cy = c.GetY();
+	float sy = s.GetY();
+	float cz = c.GetZ();
+	float sz = s.GetZ();
 
 	return Quat(
 		cz * sx * cy - sz * cx * sy,
@@ -185,7 +187,7 @@ Vec3 Quat::GetEulerAngles() const
 	float t3 = 2.0f * (GetW() * GetZ() + GetX() * GetY());
 	float t4 = 1.0f - 2.0f * (y_sq + GetZ() * GetZ());  
 
-	return Vec3(atan2(t0, t1), asin(t2), atan2(t3, t4));
+	return Vec3(ATan2(t0, t1), ASin(t2), ATan2(t3, t4));
 }
 
 Quat Quat::GetTwist(Vec3Arg inAxis) const
@@ -242,10 +244,10 @@ Quat Quat::SLERP(QuatArg inDestination, float inFraction) const
 	if (1.0f - cos_omega > delta) 
 	{
 		// Standard case (slerp)
-		float omega = acos(cos_omega);
-		float sin_omega = sin(omega);
-		scale0 = sin((1.0f - inFraction) * omega) / sin_omega;
-		scale1 = sign_scale1 * sin(inFraction * omega) / sin_omega;
+		float omega = ACos(cos_omega);
+		float sin_omega = Sin(omega);
+		scale0 = Sin((1.0f - inFraction) * omega) / sin_omega;
+		scale1 = sign_scale1 * Sin(inFraction * omega) / sin_omega;
 	} 
 	else 
 	{        

+ 56 - 0
Jolt/Math/Trigonometry.h

@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+// Note that this file exists because std::sin etc. are not platform independent and will lead to non-deterministic simulation
+
+/// Sine of x
+JPH_INLINE float Sin(float inX)
+{
+	Vec4 s, c;
+	Vec4::sReplicate(inX).SinCos(s, c);
+	return s.GetX();
+}
+
+/// Cosine of x
+JPH_INLINE float Cos(float inX)
+{
+	Vec4 s, c;
+	Vec4::sReplicate(inX).SinCos(s, c);
+	return c.GetX();
+}
+
+/// Tangent of x
+JPH_INLINE float Tan(float inX)
+{
+	return tan(inX);
+}
+
+/// Arc sine of x
+JPH_INLINE float ASin(float inX)
+{
+	return asin(inX);
+}
+
+/// Arc cosine of x
+JPH_INLINE float ACos(float inX)
+{
+	return acos(inX);
+}
+
+/// Arc tangent of x
+JPH_INLINE float ATan(float inX)
+{
+	return atan(inX);
+}
+
+/// Arc tangent of y / x
+JPH_INLINE float ATan2(float inY, float inX)
+{
+	return atan2(inY, inX);
+}
+
+JPH_NAMESPACE_END

+ 3 - 2
Jolt/Math/Vec3.inl

@@ -278,8 +278,9 @@ Vec3 Vec3::sAnd(Vec3Arg inV1, Vec3Arg inV2)
 
 Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
 {
-	float sint = sin(inTheta);
-	return Vec3(sint * cos(inPhi), sint * sin(inPhi), cos(inTheta));
+	Vec4 s, c;
+	Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
+	return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
 }
 
 template <class Random>

+ 3 - 0
Jolt/Math/Vec4.h

@@ -234,6 +234,9 @@ public:
 	/// Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
 	JPH_INLINE Vec4				GetSign() const;
 
+	/// Calcluate the sine and cosine for each element of this vector
+	inline void					SinCos(Vec4 &outSin, Vec4 &outCos) const;
+
 	/// To String
 	friend ostream &			operator << (ostream &inStream, Vec4Arg inV)
 	{

+ 76 - 0
Jolt/Math/Vec4.inl

@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
 // SPDX-License-Identifier: MIT
 
+#include <Jolt/Math/Trigonometry.h>
 #include <Jolt/Math/Vec3.h>
 #include <Jolt/Math/UVec4.h>
 
@@ -710,4 +711,79 @@ float Vec4::ReduceMax() const
 	return v.GetX();
 }
 
+void Vec4::SinCos(Vec4 &outSin, Vec4 &outCos) const
+{
+	// Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function and vectorizes it
+	// Original implementation by Stephen L. Moshier (See: http://www.netlib.org/cephes/)
+
+	// Make argument positive and remember sign (highest bit set is negative)
+	UVec4 sin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
+	Vec4 x = Vec4::sXor(*this, sin_sign.ReinterpretAsFloat());
+
+	// Integer part of x / (PI / 4)
+	UVec4 int_val = (1.27323954473516f * x).ToInt();
+	Vec4 y = int_val.ToFloat();
+
+	// Integer and fractional part modulo one octant, map zeros to origin
+	// if (int_val & 1) int_val++, y += 1;
+	UVec4 and_1 = int_val.LogicalShiftLeft<31>().ArithmeticShiftRight<31>();
+	int_val += UVec4::sAnd(and_1, UVec4::sReplicate(1));
+	y += Vec4::sAnd(and_1.ReinterpretAsFloat(), Vec4::sReplicate(1.0f));
+
+	// Extended precision modular arithmetic
+	x = ((x - y * 0.78515625f) - y * 2.4187564849853515625e-4f) - y * 3.77489497744594108e-8f;
+
+	// Calculate both results
+	Vec4 z = x * x;
+	Vec4 y1 = ((2.443315711809948e-5f * z - Vec4::sReplicate(1.388731625493765e-3f)) * z + Vec4::sReplicate(4.166664568298827e-2f)) * z * z - 0.5f * z + Vec4::sReplicate(1.0f);
+	Vec4 y2 = ((-1.9515295891e-4f * z + Vec4::sReplicate(8.3321608736e-3f)) * z - Vec4::sReplicate(1.6666654611e-1f)) * z * x + x;
+
+	// From here we deviate form the original cephes code, we would have to write:
+	//
+	// j &= 7;
+	// 
+	// if (j > 3)
+	// {
+	//		j -= 4;
+	//		sin_sign = -sin_sign;
+	//		cos_sign = -cos_sign;
+	// }
+	// 
+	// if (j > 1)
+	//		cos_sign = -cos_sign;
+	//
+	// ...
+	//
+	// if (j == 1 || j == 2) // condition
+	//		...
+	// 
+	// j		sin_sign	cos_sign	condition
+	// 000b     1			1			0
+	// 001b     1			1			1
+	// 010b     1			-1			1
+	// 011b     1			-1			0
+	// 100b     -1			-1			0
+	// 101b     -1			-1			1
+	// 110b     -1			1			1
+	// 111b		-1			1			0
+	//
+	// So: sin_sign = bit3, cos_sign = bit2 ^ bit3, condition = bit1 ^ bit2
+	UVec4 bit1 = int_val.LogicalShiftLeft<31>();
+	UVec4 bit2 = UVec4::sAnd(int_val.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
+	UVec4 bit3 = UVec4::sAnd(int_val.LogicalShiftLeft<29>(), UVec4::sReplicate(0x80000000U));
+
+	// Select which one of the results is sin and which one is cos
+	UVec4 xor_1_2 = UVec4::sXor(bit1, bit2);
+	Vec4 s = Vec4::sSelect(y2, y1, xor_1_2);
+	Vec4 c = Vec4::sSelect(y1, y2, xor_1_2);
+
+	// Update the signs
+	sin_sign = UVec4::sXor(sin_sign, bit3);
+	UVec4 cos_sign = UVec4::sXor(bit2, bit3);
+
+	// Correct the signs
+	outSin = Vec4::sXor(s, sin_sign.ReinterpretAsFloat());
+	outCos = Vec4::sXor(c, cos_sign.ReinterpretAsFloat());
+}
+
 JPH_NAMESPACE_END

+ 2 - 2
Jolt/Physics/Body/BodyPair.h

@@ -23,10 +23,10 @@ struct BodyPair
 	/// Smaller than operator, used for consistently ordering body pairs
 	bool					operator < (const BodyPair &inRHS) const					{ static_assert(sizeof(*this) == sizeof(uint64), "Mismatch in class size"); return *reinterpret_cast<const uint64 *>(this) < *reinterpret_cast<const uint64 *>(&inRHS); }
 
+	uint64					GetHash() const												{ return HashBytes(this, sizeof(BodyPair)); }
+
 	BodyID					mBodyA;
 	BodyID					mBodyB;
 };
 
-JPH_MAKE_HASH_STRUCT(BodyPair, BodyPairHash, t.mBodyA.GetIndex(), t.mBodyB.GetIndex())
-
 JPH_NAMESPACE_END

+ 1 - 1
Jolt/Physics/Character/CharacterBase.h

@@ -45,7 +45,7 @@ public:
 	virtual								~CharacterBase() = default;
 
 	/// Set the maximum angle of slope that character can still walk on (radians)
-	void								SetMaxSlopeAngle(float inMaxSlopeAngle)					{ mCosMaxSlopeAngle = cos(inMaxSlopeAngle); }
+	void								SetMaxSlopeAngle(float inMaxSlopeAngle)					{ mCosMaxSlopeAngle = Cos(inMaxSlopeAngle); }
 
 	/// Get the current shape that the character is using.
 	const Shape *						GetShape() const										{ return mShape; }

+ 1 - 1
Jolt/Physics/Collision/Shape/ConvexHullShape.cpp

@@ -212,7 +212,7 @@ ConvexHullShape::ConvexHullShape(const ConvexHullShapeSettings &inSettings, Shap
 			
 			// When using 2 normals, we get the two with the biggest angle between them with a minimal difference of 1 degree
 			// otherwise we fall back to just using 1 plane normal
-			float smallest_dot = cos(DegreesToRadians(1.0f));
+			float smallest_dot = Cos(DegreesToRadians(1.0f));
 			int best2[2] = { -1, -1 };
 
 			for (int face1 = 0; face1 < (int)faces.size(); ++face1)

+ 2 - 2
Jolt/Physics/Collision/Shape/GetTrianglesContext.h

@@ -94,8 +94,8 @@ public:
 			float angle1 = 2.0f * JPH_PI * (float(i) / num_verts);
 			float angle2 = 2.0f * JPH_PI * (float(i + 1) / num_verts);
 
-			Vec3 t1(sin(angle1), 1.0f, cos(angle1));
-			Vec3 t2(sin(angle2), 1.0f, cos(angle2));
+			Vec3 t1(Sin(angle1), 1.0f, Cos(angle1));
+			Vec3 t2(Sin(angle2), 1.0f, Cos(angle2));
 			Vec3 b1 = t1 + bottom_offset;
 			Vec3 b2 = t2 + bottom_offset;
 

+ 18 - 1
Jolt/Physics/Collision/Shape/SubShapeIDPair.h

@@ -46,6 +46,8 @@ public:
 	const BodyID &			GetBody2ID() const				{ return mBody2ID; }
 	const SubShapeID &		GetSubShapeID2() const			{ return mSubShapeID2; }
 
+	uint64					GetHash() const					{ return HashBytes(this, sizeof(SubShapeIDPair)); }
+
 private:
 	BodyID					mBody1ID;
 	SubShapeID				mSubShapeID1;
@@ -58,4 +60,19 @@ static_assert(alignof(SubShapeIDPair) == 4, "Assuming 4 byte aligned");
 
 JPH_NAMESPACE_END
 
-JPH_MAKE_HASHABLE(JPH::SubShapeIDPair, t.GetBody1ID().GetIndexAndSequenceNumber(), t.GetSubShapeID1().GetValue(), t.GetBody2ID().GetIndexAndSequenceNumber(), t.GetSubShapeID2().GetValue())
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+
+namespace std
+{
+	/// Declare std::hash for SubShapeIDPair, note that std::hash is platform dependent and we need this one to be consistent because we sort on it in the ContactConstraintManager
+	template <> 
+	struct hash<JPH::SubShapeIDPair>
+	{
+		inline size_t operator () (const JPH::SubShapeIDPair &inRHS) const
+		{
+			return static_cast<size_t>(inRHS.GetHash());
+		}
+	};
+}
+
+JPH_SUPPRESS_WARNINGS_STD_END

+ 1 - 1
Jolt/Physics/Collision/Shape/TaperedCapsuleShape.cpp

@@ -126,7 +126,7 @@ TaperedCapsuleShape::TaperedCapsuleShape(const TaperedCapsuleShapeSettings &inSe
 	// See: TaperedCapsuleShape.gliffy
 	mSinAlpha = (mBottomRadius - mTopRadius) / (mTopCenter - mBottomCenter);
 	JPH_ASSERT(mSinAlpha >= -1.0f && mSinAlpha <= 1.0f);
-	mTanAlpha = tan(asin(mSinAlpha));
+	mTanAlpha = Tan(ASin(mSinAlpha));
 
 	outResult.Set(this);
 }

+ 2 - 2
Jolt/Physics/Constraints/ConeConstraint.cpp

@@ -175,7 +175,7 @@ void ConeConstraint::DrawConstraintLimits(DebugRenderer *inRenderer) const
 	Vec3 twist_axis1 = transform1.Multiply3x3(mLocalSpaceTwistAxis1);
 	Vec3 normal_axis1 = transform1.Multiply3x3(mLocalSpaceTwistAxis1.GetNormalizedPerpendicular());
 
-	inRenderer->DrawOpenCone(position1, twist_axis1, normal_axis1, acos(mCosHalfConeAngle), mDrawConstraintSize * mCosHalfConeAngle, Color::sPurple, DebugRenderer::ECastShadow::Off);
+	inRenderer->DrawOpenCone(position1, twist_axis1, normal_axis1, ACos(mCosHalfConeAngle), mDrawConstraintSize * mCosHalfConeAngle, Color::sPurple, DebugRenderer::ECastShadow::Off);
 }
 #endif // JPH_DEBUG_RENDERER
 
@@ -206,7 +206,7 @@ Ref<ConstraintSettings> ConeConstraint::GetConstraintSettings() const
 	settings->mTwistAxis1 = mLocalSpaceTwistAxis1;
 	settings->mPoint2 = mLocalSpacePosition2;
 	settings->mTwistAxis2 = mLocalSpaceTwistAxis2;
-	settings->mHalfConeAngle = acos(mCosHalfConeAngle);
+	settings->mHalfConeAngle = ACos(mCosHalfConeAngle);
 	return settings;
 }
 

+ 1 - 1
Jolt/Physics/Constraints/ConeConstraint.h

@@ -92,7 +92,7 @@ public:
 	virtual Mat44				GetConstraintToBody2Matrix() const override;
 
 	/// Update maximum angle between body 1 and 2 (see ConeConstraintSettings)
-	void						SetHalfConeAngle(float inHalfConeAngle)		{ JPH_ASSERT(inHalfConeAngle >= 0.0f && inHalfConeAngle <= JPH_PI); mCosHalfConeAngle = cos(inHalfConeAngle); }
+	void						SetHalfConeAngle(float inHalfConeAngle)		{ JPH_ASSERT(inHalfConeAngle >= 0.0f && inHalfConeAngle <= JPH_PI); mCosHalfConeAngle = Cos(inHalfConeAngle); }
 	float						GetCosHalfConeAngle() const					{ return mCosHalfConeAngle; }
 
 	///@name Get Lagrange multiplier from last physics update (relates to how much force/torque was applied to satisfy the constraint)

+ 10 - 8
Jolt/Physics/Constraints/ConstraintPart/SwingTwistConstraintPart.h

@@ -36,6 +36,10 @@ public:
 		JPH_ASSERT(inSwingYHalfAngle >= 0.0f && inSwingYHalfAngle <= JPH_PI);
 		JPH_ASSERT(inSwingZHalfAngle >= 0.0f && inSwingZHalfAngle <= JPH_PI);
 
+		// Calculate the sine and cosine of the half angles
+		Vec4 s, c;
+		(0.5f * Vec4(inTwistMinAngle, inTwistMaxAngle, inSwingYHalfAngle, inSwingZHalfAngle)).SinCos(s, c);
+
 		// Store axis flags which are used at runtime to quickly decided which contraints to apply
 		mRotationFlags = 0;
 		if (inTwistMinAngle > -cLockedAngle && inTwistMaxAngle < cLockedAngle)
@@ -56,12 +60,10 @@ public:
 		}
 		else
 		{
-			float twist_half_min = 0.5f * inTwistMinAngle;
-			float twist_half_max = 0.5f * inTwistMaxAngle;
-			mSinTwistHalfMinAngle = sin(twist_half_min);
-			mSinTwistHalfMaxAngle = sin(twist_half_max);
-			mCosTwistHalfMinAngle = cos(twist_half_min);
-			mCosTwistHalfMaxAngle = cos(twist_half_max);
+			mSinTwistHalfMinAngle = s.GetX();
+			mSinTwistHalfMaxAngle = s.GetY();
+			mCosTwistHalfMinAngle = c.GetX();
+			mCosTwistHalfMaxAngle = c.GetY();
 		}
 
 		if (inSwingYHalfAngle < cLockedAngle)
@@ -76,7 +78,7 @@ public:
 		}
 		else
 		{
-			mSinSwingYQuarterAngle = sin(0.5f * inSwingYHalfAngle);
+			mSinSwingYQuarterAngle = s.GetZ();
 		}
 
 		if (inSwingZHalfAngle < cLockedAngle)
@@ -91,7 +93,7 @@ public:
 		}
 		else
 		{
-			mSinSwingZQuarterAngle = sin(0.5f * inSwingZHalfAngle);
+			mSinSwingZQuarterAngle = s.GetW();
 		}
 	}
 

+ 28 - 15
Jolt/Physics/Constraints/ContactConstraintManager.cpp

@@ -242,13 +242,13 @@ void ContactConstraintManager::ManifoldCache::Prepare(uint inExpectedNumBodyPair
 	mCachedBodyPairs.SetNumBuckets(min(max(cMinBuckets, GetNextPowerOf2(inExpectedNumBodyPairs)), mCachedBodyPairs.GetMaxBuckets()));
 }
 
-const ContactConstraintManager::MKeyValue *ContactConstraintManager::ManifoldCache::Find(const SubShapeIDPair &inKey, size_t inKeyHash) const
+const ContactConstraintManager::MKeyValue *ContactConstraintManager::ManifoldCache::Find(const SubShapeIDPair &inKey, uint64 inKeyHash) const
 {
 	JPH_ASSERT(mIsFinalized);
 	return mCachedManifolds.Find(inKey, inKeyHash);
 }
 
-ContactConstraintManager::MKeyValue *ContactConstraintManager::ManifoldCache::Create(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, size_t inKeyHash, int inNumContactPoints)
+ContactConstraintManager::MKeyValue *ContactConstraintManager::ManifoldCache::Create(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, uint64 inKeyHash, int inNumContactPoints)
 {
 	JPH_ASSERT(!mIsFinalized);
 	MKeyValue *kv = mCachedManifolds.Create(ioContactAllocator, inKey, inKeyHash, CachedManifold::sGetRequiredExtraSize(inNumContactPoints));
@@ -262,7 +262,7 @@ ContactConstraintManager::MKeyValue *ContactConstraintManager::ManifoldCache::Cr
 	return kv;
 }
 
-ContactConstraintManager::MKVAndCreated ContactConstraintManager::ManifoldCache::FindOrCreate(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, size_t inKeyHash, int inNumContactPoints)
+ContactConstraintManager::MKVAndCreated ContactConstraintManager::ManifoldCache::FindOrCreate(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, uint64 inKeyHash, int inNumContactPoints)
 {
 	MKeyValue *kv = const_cast<MKeyValue *>(mCachedManifolds.Find(inKey, inKeyHash));
 	if (kv != nullptr)
@@ -283,13 +283,13 @@ const ContactConstraintManager::MKeyValue *ContactConstraintManager::ManifoldCac
 	return mCachedManifolds.FromHandle(inHandle);
 }
 
-const ContactConstraintManager::BPKeyValue *ContactConstraintManager::ManifoldCache::Find(const BodyPair &inKey, size_t inKeyHash) const
+const ContactConstraintManager::BPKeyValue *ContactConstraintManager::ManifoldCache::Find(const BodyPair &inKey, uint64 inKeyHash) const
 {
 	JPH_ASSERT(mIsFinalized);	
 	return mCachedBodyPairs.Find(inKey, inKeyHash);
 }
 
-ContactConstraintManager::BPKeyValue *ContactConstraintManager::ManifoldCache::Create(ContactAllocator &ioContactAllocator, const BodyPair &inKey, size_t inKeyHash)
+ContactConstraintManager::BPKeyValue *ContactConstraintManager::ManifoldCache::Create(ContactAllocator &ioContactAllocator, const BodyPair &inKey, uint64 inKeyHash)
 {
 	JPH_ASSERT(!mIsFinalized);
 	BPKeyValue *kv = mCachedBodyPairs.Create(ioContactAllocator, inKey, inKeyHash, 0);
@@ -463,7 +463,7 @@ bool ContactConstraintManager::ManifoldCache::RestoreState(const ManifoldCache &
 		inStream.Read(body_pair_key);
 
 		// Create new entry for this body pair
-		size_t body_pair_hash = BodyPairHash {} (body_pair_key);
+		uint64 body_pair_hash = body_pair_key.GetHash();
 		BPKeyValue *bp_kv = Create(contact_allocator, body_pair_key, body_pair_hash);
 		if (bp_kv == nullptr)
 		{
@@ -497,7 +497,7 @@ bool ContactConstraintManager::ManifoldCache::RestoreState(const ManifoldCache &
 			if (inStream.IsValidating() && j < all_m.size())
 				sub_shape_key = all_m[j]->GetKey();
 			inStream.Read(sub_shape_key);
-			size_t sub_shape_key_hash = std::hash<SubShapeIDPair> {} (sub_shape_key);
+			uint64 sub_shape_key_hash = sub_shape_key.GetHash();
 			
 			// Read amount of contact points
 			uint16 num_contact_points;
@@ -548,7 +548,7 @@ bool ContactConstraintManager::ManifoldCache::RestoreState(const ManifoldCache &
 		if (inStream.IsValidating() && j < all_m.size())
 			sub_shape_key = all_m[j]->GetKey();
 		inStream.Read(sub_shape_key);
-		size_t sub_shape_key_hash = std::hash<SubShapeIDPair> {} (sub_shape_key);
+		uint64 sub_shape_key_hash = sub_shape_key.GetHash();
 			
 		// Create CCD manifold
 		MKeyValue *m_kv = Create(contact_allocator, sub_shape_key, sub_shape_key_hash, 0);
@@ -694,7 +694,7 @@ void ContactConstraintManager::GetContactsFromCache(ContactAllocator &ioContactA
 
 	// Find the cached body pair
 	BodyPair body_pair_key(body1->GetID(), body2->GetID());
-	size_t body_pair_hash = BodyPairHash {} (body_pair_key);
+	uint64 body_pair_hash = body_pair_key.GetHash();
 	const ManifoldCache &read_cache = mCache[mCacheWriteIdx ^ 1];
 	const BPKeyValue *kv = read_cache.Find(body_pair_key, body_pair_hash);
 	if (kv == nullptr)
@@ -761,7 +761,7 @@ void ContactConstraintManager::GetContactsFromCache(ContactAllocator &ioContactA
 		JPH_ASSERT(input_cm.mNumContactPoints > 0); // There should be contact points in this manifold!
 
 		// Create room for manifold in write buffer and copy data
-		size_t input_hash = std::hash<SubShapeIDPair> {} (input_key);
+		uint64 input_hash = input_key.GetHash();
 		MKeyValue *output_kv = write_cache.Create(ioContactAllocator, input_key, input_hash, input_cm.mNumContactPoints);
 		if (output_kv == nullptr)
 			break; // Out of cache space
@@ -881,7 +881,7 @@ ContactConstraintManager::BodyPairHandle ContactConstraintManager::AddBodyPair(C
 
 	// Add an entry
 	BodyPair body_pair_key(body1->GetID(), body2->GetID());
-	size_t body_pair_hash = BodyPairHash {} (body_pair_key);
+	uint64 body_pair_hash = body_pair_key.GetHash();
 	BPKeyValue *body_pair_kv = mCache[mCacheWriteIdx].Create(ioContactAllocator, body_pair_key, body_pair_hash);
 	if (body_pair_kv == nullptr)
 		return nullptr; // Out of cache space
@@ -909,7 +909,7 @@ bool ContactConstraintManager::TemplatedAddContactConstraint(ContactAllocator &i
 {
 	// Calculate hash
 	SubShapeIDPair key { inBody1.GetID(), inManifold.mSubShapeID1, inBody2.GetID(), inManifold.mSubShapeID2 };
-	size_t key_hash = std::hash<SubShapeIDPair> {} (key);
+	uint64 key_hash = key.GetHash();
 
 	// Determine number of contact points
 	int num_contact_points = (int)inManifold.mWorldSpaceContactPointsOn1.size();
@@ -1212,7 +1212,7 @@ void ContactConstraintManager::OnCCDContactAdded(ContactAllocator &ioContactAllo
 
 		// Calculate hash
 		SubShapeIDPair key { body1->GetID(), manifold->mSubShapeID1, body2->GetID(), manifold->mSubShapeID2 };
-		size_t key_hash = std::hash<SubShapeIDPair> {} (key);
+		uint64 key_hash = key.GetHash();
 
 		// Check if we already created this contact this physics update
 		ManifoldCache &write_cache = mCache[mCacheWriteIdx];
@@ -1263,8 +1263,21 @@ void ContactConstraintManager::SortContacts(uint32 *inConstraintIdxBegin, uint32
 	sort(inConstraintIdxBegin, inConstraintIdxEnd, [this](uint32 inLHS, uint32 inRHS) {
 		const ContactConstraint &lhs = mConstraints[inLHS];
 		const ContactConstraint &rhs = mConstraints[inRHS];
-		JPH_ASSERT(lhs.mSortKey != rhs.mSortKey, "Hash collision, ordering will be inconsistent");
-		return lhs.mSortKey < rhs.mSortKey;
+
+		// Most of the time the sort key will be different so we sort on that
+		if (lhs.mSortKey != rhs.mSortKey)
+			return lhs.mSortKey < rhs.mSortKey;
+
+		// If they're equal we use the IDs of body 1 to order
+		if (lhs.mBody1 != rhs.mBody1)
+			return lhs.mBody1->GetID() < rhs.mBody1->GetID();
+
+		// If they're still equal we use the IDs of body 2 to order
+		if (lhs.mBody2 != rhs.mBody2)
+			return lhs.mBody2->GetID() < rhs.mBody2->GetID();
+
+		JPH_ASSERT(false, "Hash collision, ordering will be inconsistent");
+		return false;
 	});
 }
 

+ 6 - 6
Jolt/Physics/Constraints/ContactConstraintManager.h

@@ -353,15 +353,15 @@ private:
 		ContactAllocator		GetContactAllocator()						{ return ContactAllocator(mAllocator, cAllocatorBlockSize); }
 
 		/// Find / create cached entry for SubShapeIDPair -> CachedManifold
-		const MKeyValue *		Find(const SubShapeIDPair &inKey, size_t inKeyHash) const;
-		MKeyValue *				Create(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, size_t inKeyHash, int inNumContactPoints);
-		MKVAndCreated			FindOrCreate(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, size_t inKeyHash, int inNumContactPoints);
+		const MKeyValue *		Find(const SubShapeIDPair &inKey, uint64 inKeyHash) const;
+		MKeyValue *				Create(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, uint64 inKeyHash, int inNumContactPoints);
+		MKVAndCreated			FindOrCreate(ContactAllocator &ioContactAllocator, const SubShapeIDPair &inKey, uint64 inKeyHash, int inNumContactPoints);
 		uint32					ToHandle(const MKeyValue *inKeyValue) const;
 		const MKeyValue *		FromHandle(uint32 inHandle) const;
 
 		/// Find / create entry for BodyPair -> CachedBodyPair
-		const BPKeyValue *		Find(const BodyPair &inKey, size_t inKeyHash) const;
-		BPKeyValue *			Create(ContactAllocator &ioContactAllocator, const BodyPair &inKey, size_t inKeyHash);
+		const BPKeyValue *		Find(const BodyPair &inKey, uint64 inKeyHash) const;
+		BPKeyValue *			Create(ContactAllocator &ioContactAllocator, const BodyPair &inKey, uint64 inKeyHash);
 		void					GetAllBodyPairsSorted(Array<const BPKeyValue *> &outAll) const;
 		void					GetAllManifoldsSorted(const CachedBodyPair &inBodyPair, Array<const MKeyValue *> &outAll) const;
 		void					GetAllCCDManifoldsSorted(Array<const MKeyValue *> &outAll) const;
@@ -442,7 +442,7 @@ private:
 		Vec3					mWorldSpaceNormal;
 		Body *					mBody1;
 		Body *					mBody2;
-		size_t					mSortKey;
+		uint64					mSortKey;
 		float					mCombinedFriction;
 		float					mCombinedRestitution;
 		WorldContactPoints		mContactPoints;

+ 3 - 0
Jolt/Physics/StateRecorderImpl.h

@@ -33,6 +33,9 @@ public:
 	/// Compare this state with a reference state and ensure they are the same
 	bool				IsEqual(StateRecorderImpl &inReference);
 
+	/// Convert the binary data to a string
+	string				GetData() const												{ return mStream.str(); }
+
 private:
 	stringstream		mStream;
 };

+ 2 - 2
Jolt/Physics/Vehicle/VehicleCollisionTester.h

@@ -44,7 +44,7 @@ public:
 	/// @param inObjectLayer Object layer to test collision with
 	/// @param inUp World space up vector, used to avoid colliding with vertical walls.
 	/// @param inMaxSlopeAngle Max angle (rad) that is considered for colliding wheels. This is to avoid colliding with vertical walls.
-								VehicleCollisionTesterRay(ObjectLayer inObjectLayer, Vec3Arg inUp = Vec3::sAxisY(), float inMaxSlopeAngle = DegreesToRadians(80.0f)) : mObjectLayer(inObjectLayer), mUp(inUp), mCosMaxSlopeAngle(cos(inMaxSlopeAngle)) { }
+								VehicleCollisionTesterRay(ObjectLayer inObjectLayer, Vec3Arg inUp = Vec3::sAxisY(), float inMaxSlopeAngle = DegreesToRadians(80.0f)) : mObjectLayer(inObjectLayer), mUp(inUp), mCosMaxSlopeAngle(Cos(inMaxSlopeAngle)) { }
 
 	// See: VehicleCollisionTester
 	virtual bool				Collide(PhysicsSystem &inPhysicsSystem, uint inWheelIndex, Vec3Arg inOrigin, Vec3Arg inDirection, float inSuspensionMaxLength, const BodyID &inVehicleBodyID, Body *&outBody, SubShapeID &outSubShapeID, Vec3 &outContactPosition, Vec3 &outContactNormal, float &outSuspensionLength) const override;
@@ -66,7 +66,7 @@ public:
 	/// @param inUp World space up vector, used to avoid colliding with vertical walls.
 	/// @param inRadius Radius of sphere
 	/// @param inMaxSlopeAngle Max angle (rad) that is considered for colliding wheels. This is to avoid colliding with vertical walls.
-								VehicleCollisionTesterCastSphere(ObjectLayer inObjectLayer, float inRadius, Vec3Arg inUp = Vec3::sAxisY(), float inMaxSlopeAngle = DegreesToRadians(80.0f)) : mObjectLayer(inObjectLayer), mRadius(inRadius), mUp(inUp), mCosMaxSlopeAngle(cos(inMaxSlopeAngle)) { }
+								VehicleCollisionTesterCastSphere(ObjectLayer inObjectLayer, float inRadius, Vec3Arg inUp = Vec3::sAxisY(), float inMaxSlopeAngle = DegreesToRadians(80.0f)) : mObjectLayer(inObjectLayer), mRadius(inRadius), mUp(inUp), mCosMaxSlopeAngle(Cos(inMaxSlopeAngle)) { }
 
 	// See: VehicleCollisionTester
 	virtual bool				Collide(PhysicsSystem &inPhysicsSystem, uint inWheelIndex, Vec3Arg inOrigin, Vec3Arg inDirection, float inSuspensionMaxLength, const BodyID &inVehicleBodyID, Body *&outBody, SubShapeID &outSubShapeID, Vec3 &outContactPosition, Vec3 &outContactNormal, float &outSuspensionLength) const override;

+ 1 - 1
Jolt/Physics/Vehicle/VehicleConstraint.h

@@ -53,7 +53,7 @@ public:
 	virtual EConstraintSubType	GetSubType() const override					{ return EConstraintSubType::Vehicle; }
 
 	/// Defines the maximum pitch/roll angle (rad), can be used to avoid the car from getting upside down. The vehicle up direction will stay within a cone centered around the up axis with half top angle mMaxPitchRollAngle, set to pi to turn off.
-	void						SetMaxPitchRollAngle(float inMaxPitchRollAngle) { mCosMaxPitchRollAngle = cos(inMaxPitchRollAngle); }
+	void						SetMaxPitchRollAngle(float inMaxPitchRollAngle) { mCosMaxPitchRollAngle = Cos(inMaxPitchRollAngle); }
 	
 	/// Set the interface that tests collision between wheel and ground
 	void						SetVehicleCollisionTester(const VehicleCollisionTester *inTester) { mVehicleCollisionTester = inTester; }

+ 1 - 1
Jolt/Physics/Vehicle/WheeledVehicleController.cpp

@@ -109,7 +109,7 @@ void WheelWV::Update(float inDeltaTime, const VehicleConstraint &inConstraint)
 
 		// Calculate lateral friction based on slip angle
 		float relative_velocity_len = relative_velocity.Length();
-		float lateral_slip_angle = relative_velocity_len < 1.0e-3f? 0.0f : RadiansToDegrees(acos(min(abs(relative_longitudinal_velocity) / relative_velocity_len, 1.0f)));
+		float lateral_slip_angle = relative_velocity_len < 1.0e-3f? 0.0f : RadiansToDegrees(ACos(min(abs(relative_longitudinal_velocity) / relative_velocity_len, 1.0f)));
 		float lateral_slip_friction = settings->mLateralFriction.GetValue(lateral_slip_angle);
 
 		// Tire friction

+ 10 - 10
Jolt/Renderer/DebugRenderer.cpp

@@ -486,8 +486,8 @@ void DebugRenderer::Initialize()
 				for (int i = 0; i <= num_parts; ++i)
 				{
 					float angle = 0.5f * JPH_PI * (float(q) + float(i) / num_parts);
-					float s = sin(angle);
-					float c = cos(angle);
+					float s = Sin(angle);
+					float c = Cos(angle);
 					Float3 vt(s, 1.0f, c);
 					Float3 vb(s, -1.0f, c);
 					Float3 n(s, 0, c);
@@ -528,8 +528,8 @@ void DebugRenderer::Initialize()
 				{
 					// Calculate bottom vertex
 					float angle = 0.5f * JPH_PI * (float(q) + float(i) / num_parts);
-					float s = sin(angle);
-					float c = cos(angle);
+					float s = Sin(angle);
+					float c = Cos(angle);
 					Float3 vb(s, 1.0f, c);
 
 					// Calculate normal
@@ -575,8 +575,8 @@ void DebugRenderer::Initialize()
 				for (int i = 0; i <= num_parts; ++i)
 				{
 					float angle = 0.5f * JPH_PI * (float(q) + float(i) / num_parts);
-					float s = sin(angle);
-					float c = cos(angle);
+					float s = Sin(angle);
+					float c = Cos(angle);
 					Float3 vt(s, 1.0f, c);
 					Float3 vb(s, -1.0f, c);
 					Float3 n(s, 0, c);
@@ -786,7 +786,7 @@ void DebugRenderer::DrawOpenCone(Vec3Arg inTop, Vec3Arg inAxis, Vec3Arg inPerpen
 	JPH_ASSERT(abs(inPerpendicular.Dot(inAxis)) < 1.0e-4f);
 
 	Vec3 axis = Sign(inHalfAngle) * inLength * inAxis;
-	float scale = inLength * tan(abs(inHalfAngle));
+	float scale = inLength * Tan(abs(inHalfAngle));
 	if (scale != 0.0f)
 	{
 		Vec3 perp1 = scale * inPerpendicular;
@@ -815,8 +815,8 @@ void DebugRenderer::DrawSwingLimits(Mat44Arg inMatrix, float inSwingYHalfAngle,
 		int half_num_segments = num_segments / 2;
 
 		// The y and z values of the quaternion are limited to an ellipse, e1 and e2 are the radii of this ellipse
-		float e1 = sin(0.5f * inSwingZHalfAngle);
-		float e2 = sin(0.5f * inSwingYHalfAngle);
+		float e1 = Sin(0.5f * inSwingZHalfAngle);
+		float e2 = Sin(0.5f * inSwingYHalfAngle);
 
 		// Check if the limits will draw something
 		if ((e1 <= 0.0f && e2 <= 0.0f) || (e2 >= 1.0f && e1 >= 1.0f))
@@ -960,7 +960,7 @@ void DebugRenderer::DrawPie(Vec3Arg inCenter, float inRadius, Vec3Arg inNormal,
 		{
 			float angle = float(i) / float(num_parts) * delta_angle;
 
-			Float3 pos = { cos(angle), 0, sin(angle) };
+			Float3 pos = { Cos(angle), 0, Sin(angle) };
 			*vertices++ = { pos, normal, { 0, 0 }, Color::sWhite };
 		}
 

+ 1 - 1
PerformanceTest/ConvexVsMeshScene.h

@@ -38,7 +38,7 @@ public:
 		for (int x = 0; x <= n; ++x)
 			for (int z = 0; z <= n; ++z)
 			{
-				float height = sin(float(x) * 50.0f / n) * cos(float(z) * 50.0f / n);
+				float height = Sin(float(x) * 50.0f / n) * Cos(float(z) * 50.0f / n);
 				vertices[z * (n + 1) + x] = Float3(cell_size * x, max_height * height, cell_size * z);
 			}
 

+ 55 - 4
PerformanceTest/PerformanceTest.cpp

@@ -10,6 +10,7 @@
 #include <Jolt/Physics/PhysicsSettings.h>
 #include <Jolt/Physics/PhysicsSystem.h>
 #include <Jolt/Physics/Collision/NarrowPhaseStats.h>
+#include <Jolt/Physics/StateRecorderImpl.h>
 #ifdef JPH_DEBUG_RENDERER
 	#include <Jolt/Renderer/DebugRendererRecorder.h>
 	#include <Jolt/Core/StreamWrapper.h>
@@ -63,6 +64,8 @@ int main(int argc, char** argv)
 	bool enable_debug_renderer = false;
 #endif // JPH_DEBUG_RENDERER
 	bool enable_per_frame_recording = false;
+	bool record_state = false;
+	bool validate_state = false;
 	unique_ptr<PerformanceTestScene> scene;
 	for (int argidx = 1; argidx < argc; ++argidx)
 	{
@@ -122,6 +125,14 @@ int main(int argc, char** argv)
 		{
 			enable_per_frame_recording = true;
 		}
+		else if (strcmp(arg, "-rs") == 0)
+		{
+			record_state = true;
+		}
+		else if (strcmp(arg, "-vs") == 0)
+		{
+			validate_state = true;
+		}
 		else if (strcmp(arg, "-h") == 0)
 		{
 			// Print usage
@@ -133,7 +144,9 @@ int main(int argc, char** argv)
 				 << "-p: Write out profiles" << endl
 				 << "-r: Record debug renderer output for JoltViewer" << endl
 				 << "-f: Record per frame timings" << endl
-				 << "-no_sleep: Disable sleeping" << endl;
+				 << "-no_sleep: Disable sleeping" << endl
+				 << "-rs: Record state" << endl
+				 << "-vs: Validate state" << endl;
 			return 0;
 		}
 	}
@@ -241,6 +254,13 @@ int main(int argc, char** argv)
 				per_frame_file << "Frame, Time (ms)" << endl;
 			}
 
+			ofstream record_state_file;
+			ifstream validate_state_file;
+			if (record_state)
+				record_state_file.open(("state_" + ToLower(motion_quality_str) + ".bin").c_str(), ofstream::out | ofstream::binary | ofstream::trunc);
+			else if (validate_state)
+				validate_state_file.open(("state_" + ToLower(motion_quality_str) + ".bin").c_str(), ifstream::in | ifstream::binary);
+
 			chrono::nanoseconds total_duration(0);
 
 			// Step the world for a fixed amount of iterations
@@ -280,25 +300,56 @@ int main(int argc, char** argv)
 				{
 					JPH_PROFILE_DUMP(tag + "_it" + ConvertToString(iterations));
 				}
+
+				if (record_state)
+				{
+					// Record state
+					StateRecorderImpl recorder;
+					physics_system.SaveState(recorder);
+
+					// Write to file
+					string data = recorder.GetData();
+					size_t size = data.size();
+					record_state_file.write((char *)&size, sizeof(size));
+					record_state_file.write(data.data(), size);
+				}
+				else if (validate_state)
+				{
+					// Read state
+					size_t size = 0;
+					validate_state_file.read((char *)&size, sizeof(size));
+					string data;
+					data.resize(size);
+					validate_state_file.read(data.data(), size);
+
+					// Copy to validator
+					StateRecorderImpl validator;
+					validator.WriteBytes(data.data(), size);
+
+					// Validate state
+					validator.SetValidating(true);
+					physics_system.RestoreState(validator);
+				}
 			}
 
 			// Calculate hash of all positions and rotations of the bodies
-			size_t hash = 0;
+			uint64 hash = HashBytes(nullptr, 0); // Ensure we start with the proper seed
 			BodyInterface &bi = physics_system.GetBodyInterfaceNoLock();
 			BodyIDVector body_ids;
 			physics_system.GetBodies(body_ids);
 			for (BodyID id : body_ids)
 			{
 				Vec3 pos = bi.GetPosition(id);
+				hash = HashBytes(&pos, 3 * sizeof(float), hash);
 				Quat rot = bi.GetRotation(id);
-				hash_combine(hash, pos.GetX(), pos.GetY(), pos.GetZ(), rot.GetX(), rot.GetY(), rot.GetZ(), rot.GetW());
+				hash = HashBytes(&rot, sizeof(Quat), hash);
 			}
 
 			// Stop test scene
 			scene->StopTest(physics_system);
 
 			// Trace stat line
-			cout << motion_quality_str << ", " << num_threads + 1 << ", " << double(max_iterations) / (1.0e-9 * total_duration.count()) << ", " << hash << endl;
+			cout << motion_quality_str << ", " << num_threads + 1 << ", " << double(max_iterations) / (1.0e-9 * total_duration.count()) << ", 0x" << hex << hash << dec << endl;
 		}
 	}
 

+ 4 - 4
Samples/Tests/Character/CharacterBaseTest.cpp

@@ -213,11 +213,11 @@ void CharacterBaseTest::PrePhysicsUpdate(const PreUpdateParams &inParams)
 
 	// Animate bodies
 	if (!mRotatingBody.IsInvalid())
-		mBodyInterface->MoveKinematic(mRotatingBody, cRotatingPosition, Quat::sRotation(Vec3::sAxisY(), JPH_PI * sin(mTime)), inParams.mDeltaTime);
+		mBodyInterface->MoveKinematic(mRotatingBody, cRotatingPosition, Quat::sRotation(Vec3::sAxisY(), JPH_PI * Sin(mTime)), inParams.mDeltaTime);
 	if (!mHorizontallyMovingBody.IsInvalid())
-		mBodyInterface->MoveKinematic(mHorizontallyMovingBody, cHorizontallyMovingPosition + Vec3(3.0f * sin(mTime), 0, 0), cHorizontallyMovingOrientation, inParams.mDeltaTime);
+		mBodyInterface->MoveKinematic(mHorizontallyMovingBody, cHorizontallyMovingPosition + Vec3(3.0f * Sin(mTime), 0, 0), cHorizontallyMovingOrientation, inParams.mDeltaTime);
 	if (!mVerticallyMovingBody.IsInvalid())
-		mBodyInterface->MoveKinematic(mVerticallyMovingBody, cVerticallyMovingPosition + Vec3(0, 1.75f * sin(mTime), 0), cVerticallyMovingOrientation, inParams.mDeltaTime);
+		mBodyInterface->MoveKinematic(mVerticallyMovingBody, cVerticallyMovingPosition + Vec3(0, 1.75f * Sin(mTime), 0), cVerticallyMovingOrientation, inParams.mDeltaTime);
 
 	// Reset ramp blocks
 	mRampBlocksTimeLeft -= inParams.mDeltaTime;
@@ -252,7 +252,7 @@ void CharacterBaseTest::GetInitialCamera(CameraState& ioState) const
 Mat44 CharacterBaseTest::GetCameraPivot(float inCameraHeading, float inCameraPitch) const 
 {
 	// Pivot is center of character + distance behind based on the heading and pitch of the camera
-	Vec3 fwd = Vec3(cos(inCameraPitch) * cos(inCameraHeading), sin(inCameraPitch), cos(inCameraPitch) * sin(inCameraHeading));
+	Vec3 fwd = Vec3(Cos(inCameraPitch) * Cos(inCameraHeading), Sin(inCameraPitch), Cos(inCameraPitch) * Sin(inCameraHeading));
 	return Mat44::sTranslation(GetCharacterPosition() + Vec3(0, cCharacterHeightStanding + cCharacterRadiusStanding, 0) - 5.0f * fwd);
 }
 

+ 2 - 2
Samples/Tests/Constraints/PathConstraintTest.cpp

@@ -29,7 +29,7 @@ void PathConstraintTest::Initialize()
 		Vec3 normal(0, 1, 0);
 		Array<Vec3> positions;
 		for (float a = -0.1f * JPH_PI; a < 4.0f * JPH_PI; a += 0.1f * JPH_PI)
-			positions.push_back(Vec3(5.0f * cos(a), -a, 5.0f * sin(a)));
+			positions.push_back(Vec3(5.0f * Cos(a), -a, 5.0f * Sin(a)));
 		for (int i = 1; i < int(positions.size() - 1); ++i)
 		{
 			Vec3 tangent = 0.5f * (positions[i + 1] - positions[i - 1]);
@@ -64,7 +64,7 @@ void PathConstraintTest::Initialize()
 		for (int i = -1; i < 11; ++i)
 		{
 			float a = 2.0f * JPH_PI * i / 10.0f;
-			positions.push_back(Vec3(5.0f * cos(a), 0.0f, 5.0f * sin(a)));
+			positions.push_back(Vec3(5.0f * Cos(a), 0.0f, 5.0f * Sin(a)));
 		}
 		for (int i = 1; i < int(positions.size() - 1); ++i)
 		{

+ 2 - 2
Samples/Tests/ConvexCollision/ConvexHullTest.cpp

@@ -426,7 +426,7 @@ void ConvexHullTest::Initialize()
 		Mat44 rot = Mat44::sRotationZ(0.25f * JPH_PI);
 		for (float r = 0.0f; r < 2.0f; r += 0.1f)
 			for (float phi = 0.0f; phi <= 2.0f * JPH_PI; phi += 2.0f * JPH_PI / 20.0f)
-				p.push_back(rot * Vec3(r * cos(phi), r * sin(phi), 0));
+				p.push_back(rot * Vec3(r * Cos(phi), r * Sin(phi), 0));
 		mPoints.push_back(move(p));
 	}
 
@@ -435,7 +435,7 @@ void ConvexHullTest::Initialize()
 		Points p;
 		for (float phi = 0.0f; phi <= 2.0f * JPH_PI; phi += 2.0f * JPH_PI / 40.0f)
 		{
-			Vec3 pos(2.0f * cos(phi), 0, 2.0f * sin(phi));
+			Vec3 pos(2.0f * Cos(phi), 0, 2.0f * Sin(phi));
 			p.push_back(pos);
 			p.push_back(pos + Vec3(0, 2.0e-3f * (2.0f + pos.GetX()) / 4.0f, 0));
 		}

+ 1 - 1
Samples/Tests/ConvexCollision/EPATest.cpp

@@ -70,7 +70,7 @@ bool EPATest::CollideBoxSphere(Mat44Arg inMatrix, const AABox &inBox, const Sphe
 		// Check angle between v1 and v2
 		float dot = v1.Dot(v2);
 		float len = v1.Length() * v2.Length();
-		float angle = RadiansToDegrees(acos(Clamp(dot / len, -1.0f, 1.0f)));
+		float angle = RadiansToDegrees(ACos(Clamp(dot / len, -1.0f, 1.0f)));
 		JPH_ASSERT(angle < 0.1f);
 		Trace("Angle = %.9g", (double)angle);
 

+ 1 - 1
Samples/Tests/ConvexCollision/InteractivePairsTest.cpp

@@ -47,7 +47,7 @@ void InteractivePairsTest::PrePhysicsUpdate(const PreUpdateParams &inParams)
 	float z = 0.0f;
 
 	const float r1 = 0.25f * JPH_PI;
-	const float r2 = atan(1.0f / sqrt(2.0f)); // When rotating cube by 45 degrees the one axis becomes sqrt(2) long while the other stays at length 1
+	const float r2 = ATan(1.0f / sqrt(2.0f)); // When rotating cube by 45 degrees the one axis becomes sqrt(2) long while the other stays at length 1
 
 	for (int i = 0; i < 2; ++i)
 	{

+ 1 - 1
Samples/Tests/General/ChangeMotionTypeTest.cpp

@@ -45,7 +45,7 @@ void ChangeMotionTypeTest::PrePhysicsUpdate(const PreUpdateParams &inParams)
 
 	// Provide kinematic body a target
 	if (motion_type == EMotionType::Kinematic)
-		mBody->MoveKinematic(Vec3(sin(mTime), 10, 0), Quat::sRotation(Vec3::sAxisX(), cos(mTime)), inParams.mDeltaTime);
+		mBody->MoveKinematic(Vec3(Sin(mTime), 10, 0), Quat::sRotation(Vec3::sAxisX(), Cos(mTime)), inParams.mDeltaTime);
 }
 
 void ChangeMotionTypeTest::SaveState(StateRecorder &inStream) const

+ 1 - 1
Samples/Tests/General/SensorTest.cpp

@@ -91,7 +91,7 @@ void SensorTest::PrePhysicsUpdate(const PreUpdateParams &inParams)
 	mTime += inParams.mDeltaTime;
 
 	// Move kinematic body
-	Vec3 kinematic_pos = Vec3(-20.0f * cos(mTime), 10, 0);
+	Vec3 kinematic_pos = Vec3(-20.0f * Cos(mTime), 10, 0);
 	mBodyInterface->MoveKinematic(mKinematicBodyID, kinematic_pos, Quat::sIdentity(), inParams.mDeltaTime);
 
 	// Draw if body is in sensor

+ 2 - 2
Samples/Tests/Shapes/ConvexHullShapeTest.cpp

@@ -55,8 +55,8 @@ void ConvexHullShapeTest::Initialize()
 	Array<Vec3> tapered_cylinder;
 	for (float theta = 0.0f; theta <= 2.0f * JPH_PI; theta += JPH_PI / 128.0f)
 	{
-		tapered_cylinder.push_back(4.0f * Vec3(-0.1f, sin(theta), cos(theta)));
-		tapered_cylinder.push_back(4.5f * Vec3(0.1f, sin(theta), cos(theta)));
+		tapered_cylinder.push_back(4.0f * Vec3(-0.1f, Sin(theta), Cos(theta)));
+		tapered_cylinder.push_back(4.5f * Vec3(0.1f, Sin(theta), Cos(theta)));
 	}
 
 	Body &body_tapered_cylinder = *mBodyInterface->CreateBody(BodyCreationSettings(new ConvexHullShapeSettings(tapered_cylinder), Vec3(60, 10, 0), Quat::sIdentity(), EMotionType::Dynamic, Layers::MOVING));

+ 1 - 1
Samples/Tests/Shapes/RotatedTranslatedShapeTest.cpp

@@ -23,7 +23,7 @@ void RotatedTranslatedShapeTest::Initialize()
 	Array<Vec3> points;
 	points.push_back(Vec3(0, 2.5f, 0));
 	for (float a = 0; a < DegreesToRadians(360); a += DegreesToRadians(36))
-		points.push_back(Vec3(sin(a), -2.5f, cos(a)));
+		points.push_back(Vec3(Sin(a), -2.5f, Cos(a)));
 	Ref<ConvexHullShapeSettings> convex_hull = new ConvexHullShapeSettings(points);
 
 	// Offset and rotate so that the cone is upside down on its point

+ 3 - 3
Samples/Tests/Vehicle/TankTest.cpp

@@ -244,13 +244,13 @@ void TankTest::PrePhysicsUpdate(const PreUpdateParams &inParams)
 	// Orient the turret towards the hit position
 	Mat44 turret_to_world = mTankBody->GetCenterOfMassTransform() * mTurretHinge->GetConstraintToBody1Matrix();
 	Vec3 hit_pos_in_turret = turret_to_world.InversedRotationTranslation() * hit_pos;
-	float heading = atan2(hit_pos_in_turret.GetZ(), hit_pos_in_turret.GetY());
+	float heading = ATan2(hit_pos_in_turret.GetZ(), hit_pos_in_turret.GetY());
 	mTurretHinge->SetTargetAngle(heading);
 
 	// Orient barrel towards the hit position
 	Mat44 barrel_to_world = mTurretBody->GetCenterOfMassTransform() * mBarrelHinge->GetConstraintToBody1Matrix();
 	Vec3 hit_pos_in_barrel = barrel_to_world.InversedRotationTranslation() * hit_pos;
-	float pitch = atan2(hit_pos_in_barrel.GetZ(), hit_pos_in_barrel.GetY());
+	float pitch = ATan2(hit_pos_in_barrel.GetZ(), hit_pos_in_barrel.GetY());
 	mBarrelHinge->SetTargetAngle(pitch);
 
 	// Update reload time
@@ -296,6 +296,6 @@ void TankTest::GetInitialCamera(CameraState &ioState) const
 Mat44 TankTest::GetCameraPivot(float inCameraHeading, float inCameraPitch) const 
 {
 	// Pivot is center of tank + a distance away from the tank based on the heading and pitch of the camera
-	Vec3 fwd = Vec3(cos(inCameraPitch) * cos(inCameraHeading), sin(inCameraPitch), cos(inCameraPitch) * sin(inCameraHeading));
+	Vec3 fwd = Vec3(Cos(inCameraPitch) * Cos(inCameraHeading), Sin(inCameraPitch), Cos(inCameraPitch) * Sin(inCameraHeading));
 	return Mat44::sTranslation(mTankBody->GetPosition() - 10.0f * fwd);
 }

+ 1 - 1
Samples/Utils/ShapeCreator.cpp

@@ -22,7 +22,7 @@ ShapeRefC CreateTorusMesh(float inTorusRadius, float inTubeRadius, uint inTorusS
 		{
 			// Create vertices
 			float tube_angle = float(tube_segment) * 2.0f * JPH_PI / inTubeSegments;
-			Vec3 pos = rotation * Vec3(inTorusRadius + inTubeRadius * sin(tube_angle), inTubeRadius * cos(tube_angle), 0);
+			Vec3 pos = rotation * Vec3(inTorusRadius + inTubeRadius * Sin(tube_angle), inTubeRadius * Cos(tube_angle), 0);
 			Float3 v;
 			pos.StoreFloat3(&v);
 			mesh.mTriangleVertices.push_back(v);

+ 3 - 3
TestFramework/Application/Application.cpp

@@ -232,8 +232,8 @@ void Application::Run()
 
 void Application::GetCameraLocalHeadingAndPitch(float &outHeading, float &outPitch)
 {
-	outHeading = atan2(mLocalCamera.mForward.GetZ(), mLocalCamera.mForward.GetX());
-	outPitch = atan2(mLocalCamera.mForward.GetY(), Vec3(mLocalCamera.mForward.GetX(), 0, mLocalCamera.mForward.GetZ()).Length());
+	outHeading = ATan2(mLocalCamera.mForward.GetZ(), mLocalCamera.mForward.GetX());
+	outPitch = ATan2(mLocalCamera.mForward.GetY(), Vec3(mLocalCamera.mForward.GetX(), 0, mLocalCamera.mForward.GetZ()).Length());
 }
 
 void Application::ConvertCameraLocalToWorld(float inCameraHeading, float inCameraPitch)
@@ -284,7 +284,7 @@ void Application::UpdateCamera(float inDeltaTime)
 	GetCameraLocalHeadingAndPitch(heading, pitch);
 	heading += DegreesToRadians(mMouse->GetDX() * 0.5f);
 	pitch = Clamp(pitch - DegreesToRadians(mMouse->GetDY() * 0.5f), -0.49f * JPH_PI, 0.49f * JPH_PI);
-	mLocalCamera.mForward = Vec3(cos(pitch) * cos(heading), sin(pitch), cos(pitch) * sin(heading));
+	mLocalCamera.mForward = Vec3(Cos(pitch) * Cos(heading), Sin(pitch), Cos(pitch) * Sin(heading));
 
 	// Convert to world space
 	ConvertCameraLocalToWorld(heading, pitch);

+ 1 - 1
TestFramework/Image/ZoomImage.cpp

@@ -138,7 +138,7 @@ private:
 		if (abs(x) < 1.0e-5f)
 			return 1.0f;
 
-		return sin(x) / x;		
+		return Sin(x) / x;		
 	}
 
 };

+ 1 - 1
TestFramework/Renderer/Renderer.cpp

@@ -486,7 +486,7 @@ void Renderer::BeginFrame(const CameraState &inCamera, float inWorldScale)
 	// Camera properties
 	float camera_fovy = inCamera.mFOVY;
 	float camera_aspect = static_cast<float>(GetWindowWidth()) / GetWindowHeight();
-	float camera_fovx = 2.0f * atan(camera_aspect * tan(0.5f * camera_fovy));
+	float camera_fovx = 2.0f * ATan(camera_aspect * Tan(0.5f * camera_fovy));
 	float camera_near = 0.01f * inWorldScale;
 	float camera_far = inCamera.mFarPlane * inWorldScale;
 

+ 1 - 1
UnitTests/Geometry/ConvexHullBuilderTest.cpp

@@ -78,7 +78,7 @@ TEST_SUITE("ConvexHullBuilderTest")
 				{
 					float f_r = 2.0f * r;
 					float f_phi = 2.0f * JPH_PI * phi / 10;
-					positions.push_back(Vec3(f_r * cos(f_phi), f_r * sin(f_phi), 0));
+					positions.push_back(Vec3(f_r * Cos(f_phi), f_r * Sin(f_phi), 0));
 				}
 
 			ConvexHullBuilder builder(positions);

+ 1 - 1
UnitTests/Geometry/EPATests.cpp

@@ -19,7 +19,7 @@ TEST_SUITE("EPATests")
 	{
 		float dot = inV1.Dot(inV2);
 		float len = inV1.Length() * inV2.Length();
-		return RadiansToDegrees(acos(Clamp(dot / len, -1.0f, 1.0f)));
+		return RadiansToDegrees(ACos(Clamp(dot / len, -1.0f, 1.0f)));
 	}
 
 	/// Test box versus sphere and compare analytical solution with that of the EPA algorithm

+ 31 - 0
UnitTests/Math/Vec4Tests.cpp

@@ -507,4 +507,35 @@ TEST_SUITE("Vec4Tests")
 									}
 								}
 	}
+
+	TEST_CASE("TestVec4SinCos")
+	{
+		double ms = 0.0, mc = 0.0;
+
+		for (float x = -100.0f * JPH_PI; x < 100.0f * JPH_PI; x += 1.0e-3f)
+		{
+			// Create a vector with intermediate values
+			Vec4 xv = Vec4::sReplicate(x) + Vec4(0.0e-4f, 2.5e-4f, 5.0e-4f, 7.5e-4f);
+
+			// Calculate sin and cos
+			Vec4 vs, vc;
+			xv.SinCos(vs, vc);
+
+			for (int i = 0; i < 4; ++i)
+			{
+				// Check accuracy of sin
+				double s1 = sin((double)xv[i]), s2 = (double)vs[i];
+				double ds = abs(s2 - s1);
+				ms = max(ms, ds);
+
+				// Check accuracy of cos
+				double c1 = cos((double)xv[i]), c2 = (double)vc[i];
+				double dc = abs(c2 - c1);
+				mc = max(mc, dc);
+			}
+		}
+
+		CHECK(ms < 1.0e-7f);
+		CHECK(mc < 1.0e-7f);
+	}
 }