Forráskód Böngészése

Support for Windows 32-bit platform (#162)

* Cast up SIZE_T -> UINT64 explicitly when going from CPU -> GPU descriptor handle
* Change JPH_CPU_X64 -> JPH_CPU_X86 platform
* Added JPH_CPU_ADDRESS_BITS to indicate if we're 32/64 bit
* Disable warning 4826.
* Handle TickCounter for JPH_CPU_X86
* Implement ctz and clz ops for JPH_CPU_X86
* Handle Body padding for JPH_CPU_X86
* Handle SubShape size difference for 32-bit
* Added CMake batch file for 32 bit
* Temp allocator needs to return 16 byte aligned memory, malloc aligns to 8 bytes in Win32
* Workaround for what I think is a compiler bug. When the Vec/Matrix classes are not explicitly aligned to 16 the compiler passes an address to a Vec3 and then tries to read it as a value.
* The base class CharacterBaseSettings was not virtual. When newing CharacterVirtualSettings it would go through aligned_malloc(..., 16) to ensure that the SIMD members are aligned, but when freeing we were freeing CharacterBaseSettings and we'd go through the regular free function (instead of aligned_free) causing memory corruption.
* Github workflow now builds 32 bit debug build by default to ensure that it stays working

Co-authored-by: Jorrit Rouwe
Joshie 3 éve
szülő
commit
4fe61fc24d

+ 21 - 0
.github/workflows/build.yml

@@ -76,6 +76,27 @@ jobs:
       working-directory: ${{github.workspace}}/Build/VS2022_CL/${{matrix.build_type}}
       run: ./UnitTests.exe
 
+  msvc_cl_32_bit:
+    runs-on: windows-latest
+    name: Visual Studio CL 32-bit
+    strategy:
+        fail-fast: false
+        matrix:
+            build_type: [Debug]
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v2
+    - name: Add msbuild to PATH
+      uses: microsoft/[email protected]
+    - name: Configure CMake
+      run: cmake -B ${{github.workspace}}/Build/VS2022_CL_32_BIT -G "Visual Studio 17 2022" -A Win32 -DUSE_SSE4_1=OFF -DUSE_SSE4_2=OFF -DUSE_AVX=OFF -DUSE_AVX2=OFF -DUSE_LZCNT=OFF -DUSE_TZCNT=OFF -DUSE_F16C=OFF -DUSE_FMADD=OFF Build
+    - name: Build
+      run: msbuild Build\VS2022_CL_32_BIT\JoltPhysics.sln /property:Configuration=${{matrix.build_type}}
+    - name: Test
+      working-directory: ${{github.workspace}}/Build/VS2022_CL_32_BIT/${{matrix.build_type}}
+      run: ./UnitTests.exe
+
   macos:
     runs-on: macos-latest
     name: MacOS

+ 1 - 1
Build/CMakeLists.txt

@@ -53,7 +53,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUAL
 	set(CMAKE_CXX_FLAGS_RELEASECOVERAGE "-fprofile-instr-generate -fcoverage-mapping")
 
 	# Set linker flags
-	set(CMAKE_EXE_LINKER_FLAGS "/machine:x64 /SUBSYSTEM:WINDOWS /ignore:4221 /DEBUG:FASTLINK")
+	set(CMAKE_EXE_LINKER_FLAGS "/SUBSYSTEM:WINDOWS /ignore:4221 /DEBUG:FASTLINK")
 	if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast") # Clang doesn't use fast math because it cannot be turned off inside a single compilation unit
 		if (USE_AVX2)

+ 3 - 0
Build/cmake_vs2022_cl_32bit.bat

@@ -0,0 +1,3 @@
+@echo off
+cmake -S . -B VS2022_CL_32BIT -G "Visual Studio 17 2022" -A Win32 -DUSE_SSE4_1=OFF -DUSE_SSE4_2=OFF -DUSE_AVX=OFF -DUSE_AVX2=OFF -DUSE_LZCNT=OFF -DUSE_TZCNT=OFF -DUSE_F16C=OFF -DUSE_FMADD=OFF
+echo Open VS2022_CL_32BIT\JoltPhysics.sln to build the project.

+ 12 - 5
Jolt/Core/Core.h

@@ -42,9 +42,14 @@
 #endif
 
 // Detect CPU architecture
-#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
 	// X86 CPU architecture
-	#define JPH_CPU_X64
+	#define JPH_CPU_X86
+	#if defined(__x86_64__) || defined(_M_X64)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
 	#define JPH_USE_SSE
 
 	// Detect enabled instruction sets
@@ -84,6 +89,7 @@
 	// ARM64 CPU architecture
 	#define JPH_CPU_ARM64
 	#define JPH_USE_NEON
+	#define JPH_CPU_ADDRESS_BITS 64
 #else
 	#error Unsupported CPU architecture
 #endif
@@ -158,7 +164,8 @@
 	JPH_MSVC_SUPPRESS_WARNING(5045) /* Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified */ \
 	JPH_MSVC_SUPPRESS_WARNING(4583) /* 'X': destructor is not implicitly called */				\
 	JPH_MSVC_SUPPRESS_WARNING(4582) /* 'X': constructor is not implicitly called */				\
-	JPH_MSVC_SUPPRESS_WARNING(5219) /* implicit conversion from 'X' to 'Y', possible loss of data  */
+	JPH_MSVC_SUPPRESS_WARNING(5219) /* implicit conversion from 'X' to 'Y', possible loss of data  */ \
+	JPH_MSVC_SUPPRESS_WARNING(4826) /* Conversion from 'X *' to 'JPH::uint64' is sign-extended. This may cause unexpected runtime behavior. (32-bit) */
 
 // OS-specific includes
 #if defined(JPH_PLATFORM_WINDOWS)
@@ -174,7 +181,7 @@
 	#include <limits.h>
 	#include <string.h>
 
-	#if defined(JPH_CPU_X64)
+	#if defined(JPH_CPU_X86)
 		#define JPH_BREAKPOINT		__asm volatile ("int $0x3")
 	#elif defined(JPH_CPU_ARM64)
 		#define JPH_BREAKPOINT		__builtin_trap()
@@ -240,7 +247,7 @@ static_assert(sizeof(uint8) == 1, "Invalid size of uint8");
 static_assert(sizeof(uint16) == 2, "Invalid size of uint16");
 static_assert(sizeof(uint32) == 4, "Invalid size of uint32");
 static_assert(sizeof(uint64) == 8, "Invalid size of uint64");
-static_assert(sizeof(void *) == 8, "Invalid size of pointer");
+static_assert(sizeof(void *) == (JPH_CPU_ADDRESS_BITS == 64? 8 : 4), "Invalid size of pointer" );
 
 // Define inline macro
 #if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)

+ 3 - 2
Jolt/Core/TempAllocator.h

@@ -4,6 +4,7 @@
 #pragma once
 
 #include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Core/Memory.h>
 
 JPH_NAMESPACE_BEGIN
 
@@ -95,13 +96,13 @@ public:
 	// See: TempAllocator
 	virtual void *					Allocate(uint inSize) override
 	{
-		return malloc(inSize);
+		return AlignedAlloc(inSize, 16);
 	}
 
 	// See: TempAllocator
 	virtual void					Free(void *inAddress, uint inSize) override
 	{
-		free(inAddress);
+		AlignedFree(inAddress);
 	}
 };
 

+ 1 - 1
Jolt/Core/TickCounter.cpp

@@ -65,7 +65,7 @@ static const uint64 sProcessorTicksPerSecond = []() {
 			string line;
 			getline(ifs, line);
 		
-		#if defined(JPH_CPU_X64)
+		#if defined(JPH_CPU_X86)
 			const char *cpu_str = "cpu MHz";
 		#elif defined(JPH_CPU_ARM64)
 			const char *cpu_str = "BogoMIPS";

+ 2 - 2
Jolt/Core/TickCounter.h

@@ -6,7 +6,7 @@
 // Include for __rdtsc
 #if defined(JPH_PLATFORM_WINDOWS)
 	#include <intrin.h> 
-#elif defined(JPH_CPU_X64) && defined(JPH_COMPILER_GCC)
+#elif defined(JPH_CPU_X86) && defined(JPH_COMPILER_GCC)
 	#include <x86intrin.h>
 #endif
 
@@ -24,7 +24,7 @@ JPH_INLINE uint64 GetProcessorTickCount()
 {
 #if defined(JPH_PLATFORM_BLUE)
 	return JPH_PLATFORM_BLUE_GET_TICKS();
-#elif defined(JPH_CPU_X64)
+#elif defined(JPH_CPU_X86)
 	return __rdtsc();
 #elif defined(JPH_CPU_ARM64)
 	uint64 val;

+ 1 - 1
Jolt/Math/Mat44.h

@@ -8,7 +8,7 @@
 JPH_NAMESPACE_BEGIN
 
 /// Holds a 4x4 matrix of floats, but supports also operations on the 3x3 upper left part of the matrix.
-class [[nodiscard]] Mat44
+class [[nodiscard]] alignas(16) Mat44
 {
 public:
 	// Underlying column type

+ 2 - 2
Jolt/Math/Math.h

@@ -93,7 +93,7 @@ inline bool IsAligned(T inV, uint64 inAlignment)
 /// Compute number of trailing zero bits (how many low bits are zero)
 inline uint CountTrailingZeros(uint32 inValue)
 {
-#if defined(JPH_CPU_X64)
+#if defined(JPH_CPU_X86)
 	#if defined(JPH_USE_TZCNT)
 		return _tzcnt_u32(inValue);
 	#elif defined(JPH_COMPILER_MSVC)
@@ -117,7 +117,7 @@ inline uint CountTrailingZeros(uint32 inValue)
 /// Compute the number of leading zero bits (how many high bits are zero)
 inline uint CountLeadingZeros(uint32 inValue)
 {
-#if defined(JPH_CPU_X64)
+#if defined(JPH_CPU_X86)
 	#if defined(JPH_USE_LZCNT)
 		return _lzcnt_u32(inValue);
 	#elif defined(JPH_COMPILER_MSVC)

+ 1 - 1
Jolt/Math/Quat.h

@@ -28,7 +28,7 @@ JPH_NAMESPACE_BEGIN
 /// it easy to extract the rotation axis of the quaternion:
 ///
 /// q = [cos(angle / 2), sin(angle / 2) * rotation_axis]
-class [[nodiscard]] Quat
+class [[nodiscard]] alignas(16) Quat
 {
 public:
 	///@name Constructors

+ 1 - 1
Jolt/Math/UVec4.h

@@ -7,7 +7,7 @@
 
 JPH_NAMESPACE_BEGIN
 
-class [[nodiscard]] UVec4
+class [[nodiscard]] alignas(16) UVec4
 {
 public:
 	// Underlying vector type

+ 1 - 1
Jolt/Math/Vec3.h

@@ -11,7 +11,7 @@ JPH_NAMESPACE_BEGIN
 
 /// 3 component vector (stored as 4 vectors). 
 /// Note that we keep the 4th component the same as the 3rd component to avoid divisions by zero when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED defined
-class [[nodiscard]] Vec3
+class [[nodiscard]] alignas(16) Vec3
 {
 public:
 	// Underlying vector type

+ 1 - 1
Jolt/Math/Vec4.h

@@ -9,7 +9,7 @@
 
 JPH_NAMESPACE_BEGIN
 
-class [[nodiscard]] Vec4
+class [[nodiscard]] alignas(16) Vec4
 {
 public:
 	// Underlying vector type

+ 6 - 1
Jolt/Physics/Body/Body.h

@@ -306,7 +306,12 @@ private:
 	EMotionType				mMotionType;													///< Type of motion (static, dynamic or kinematic)
 	atomic<uint8>			mFlags = 0;														///< See EFlags for possible flags
 	
-	// 121 bytes up to here
+	// 121 bytes up to here (64-bit mode)
+
+#if JPH_CPU_ADDRESS_BITS == 32
+	// Padding for 32 bit mode
+	char					mPadding[19];
+#endif
 };
 
 static_assert(sizeof(Body) == 128, "Body should be 128 bytes");

+ 3 - 0
Jolt/Physics/Character/CharacterBase.h

@@ -19,6 +19,9 @@ class StateRecorder;
 class CharacterBaseSettings : public RefTarget<CharacterBaseSettings>
 {
 public:
+	/// Virtual destructor
+	virtual								~CharacterBaseSettings() = default;
+
 	/// Maximum angle of slope that character can still walk on (radians).
 	float								mMaxSlopeAngle = DegreesToRadians(50.0f);
 

+ 1 - 1
Jolt/Physics/Collision/Shape/CompoundShape.h

@@ -226,7 +226,7 @@ public:
 		// 3 padding bytes left
 	};
 
-	static_assert(sizeof(SubShape) == 40, "Compiler added unexpected padding");
+	static_assert(sizeof(SubShape) == (JPH_CPU_ADDRESS_BITS == 64? 40 : 36), "Compiler added unexpected padding");
 
 	using SubShapes = vector<SubShape>;
 

+ 1 - 1
README.md

@@ -77,7 +77,7 @@ For more information see the [Architecture and API documentation](https://jrouwe
 
 ## Supported Platforms
 
-* Windows (VS2019, VS2022) x64 (Desktop/UWP)
+* Windows (VS2019, VS2022) x64/x86 (Desktop/UWP)
 * Linux (tested on Ubuntu 20.04) x64/ARM64
 * Android (tested on Android 10) x64/ARM64
 * Platform Blue (a popular game console) x64

+ 1 - 1
TestFramework/Renderer/DescriptorHeap.h

@@ -60,7 +60,7 @@ public:
 	D3D12_GPU_DESCRIPTOR_HANDLE			ConvertToGPUHandle(D3D12_CPU_DESCRIPTOR_HANDLE inHandle)
 	{
 		JPH_ASSERT(mGPUOffset != -1);
-		return { inHandle.ptr + mGPUOffset };
+		return { UINT64(inHandle.ptr) + mGPUOffset };
 	}
 
 	/// Access to the underlying DirectX structure