Ver Fonte

SSE2 version now produces the same simulation as SSE4.1 and higher (#621)

This means the Windows 32 vs 64-bit versions produce the same deterministic results
Jorrit Rouwe há 2 anos atrás
pai
commit
c52a6cedf8

+ 21 - 0
.github/workflows/determinism_check.yml

@@ -55,6 +55,27 @@ jobs:
       working-directory: ${{github.workspace}}/Build/VS2022_CL/Distribution
       run: ./PerformanceTest -q=LinearCast -t=2 -s=Ragdoll "-validate_hash=$env:RAGDOLL_HASH"
 
+  msvc_cl_32:
+    runs-on: windows-latest
+    name: Visual Studio CL 32-bit Determinism Check
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v3
+    - name: Add msbuild to PATH
+      uses: microsoft/[email protected]
+    - name: Configure CMake
+      working-directory: ${{github.workspace}}/Build
+      run: ./cmake_vs2022_cl_32bit.bat -DCROSS_PLATFORM_DETERMINISTIC=ON -DTARGET_VIEWER=OFF -DTARGET_SAMPLES=OFF -DTARGET_HELLO_WORLD=OFF -DTARGET_UNIT_TESTS=OFF
+    - name: Build
+      run: msbuild Build\VS2022_CL_32BIT\JoltPhysics.sln /property:Configuration=Distribution
+    - name: Test ConvexVsMesh
+      working-directory: ${{github.workspace}}/Build/VS2022_CL_32BIT/Distribution
+      run: ./PerformanceTest -q=LinearCast -t=2 -s=ConvexVsMesh "-validate_hash=$env:CONVEX_VS_MESH_HASH"
+    - name: Test Ragdoll
+      working-directory: ${{github.workspace}}/Build/VS2022_CL_32BIT/Distribution
+      run: ./PerformanceTest -q=LinearCast -t=2 -s=Ragdoll "-validate_hash=$env:RAGDOLL_HASH"
+
   macos:
     runs-on: macos-latest
     name: macOS Determinism Check

+ 1 - 1
Build/cmake_vs2022_cl_32bit.bat

@@ -1,3 +1,3 @@
 @echo off
-cmake -S . -B VS2022_CL_32BIT -G "Visual Studio 17 2022" -A Win32 -DUSE_SSE4_1=OFF -DUSE_SSE4_2=OFF -DUSE_AVX=OFF -DUSE_AVX2=OFF -DUSE_AVX512=OFF -DUSE_LZCNT=OFF -DUSE_TZCNT=OFF -DUSE_F16C=OFF -DUSE_FMADD=OFF
+cmake -S . -B VS2022_CL_32BIT -G "Visual Studio 17 2022" -A Win32 -DUSE_SSE4_1=OFF -DUSE_SSE4_2=OFF -DUSE_AVX=OFF -DUSE_AVX2=OFF -DUSE_AVX512=OFF -DUSE_LZCNT=OFF -DUSE_TZCNT=OFF -DUSE_F16C=OFF -DUSE_FMADD=OFF %*
 echo Open VS2022_CL_32BIT\JoltPhysics.sln to build the project.

+ 2 - 1
Docs/Architecture.md

@@ -385,11 +385,12 @@ If you want cross platform determinism then please turn on the CROSS_PLATFORM_DE
 Some caveats:
 
 * The same source code must be used to compile the library on all platforms.
-* Applications compiled in 32-bit mode do not produce the same results as applications compiled in 64-bit mode. Compile your application in the same mode for all platforms.
+* The source code must be compiled with the same defines, e.g. you can't have one platform using JPH_DOUBLE_PRECISION and another not.
 
 It is quite difficult to verify cross platform determinism, so this feature is less tested than other features. With every build, the following architectures are verified to produce the same results:
 
 * Windows MSVC x86 64-bit with AVX2
+* Windows MSVC x86 32-bit with SSE2
 * macOS clang x86 64-bit with AVX
 * Linux clang x86 64-bit with AVX2
 * Linux clang ARM 64-bit with NEON 

+ 3 - 1
Jolt/Core/Core.h

@@ -293,6 +293,7 @@
 	JPH_MSVC_SUPPRESS_WARNING(4826) /* Conversion from 'X *' to 'JPH::uint64' is sign-extended. This may cause unexpected runtime behavior. (32-bit) */ \
 	JPH_MSVC_SUPPRESS_WARNING(5264) /* 'X': 'const' variable is not used */						\
 	JPH_MSVC_SUPPRESS_WARNING(4251) /* class 'X' needs to have DLL-interface to be used by clients of class 'Y' */ \
+	JPH_MSVC_SUPPRESS_WARNING(4738) /* storing 32-bit float result in memory, possible loss of performance */ \
 	JPH_MSVC2019_SUPPRESS_WARNING(5246) /* the initialization of a subobject should be wrapped in braces */
 
 // OS-specific includes
@@ -340,7 +341,8 @@
 	JPH_MSVC_SUPPRESS_WARNING(4820)																\
 	JPH_MSVC_SUPPRESS_WARNING(4514)																\
 	JPH_MSVC_SUPPRESS_WARNING(5262)																\
-	JPH_MSVC_SUPPRESS_WARNING(5264)
+	JPH_MSVC_SUPPRESS_WARNING(5264)																\
+	JPH_MSVC_SUPPRESS_WARNING(4738)
 
 #define JPH_SUPPRESS_WARNINGS_STD_END															\
 	JPH_SUPPRESS_WARNING_POP

+ 8 - 13
Jolt/Math/Vec4.inl

@@ -614,10 +614,8 @@ Vec4 Vec4::DotV(Vec4Arg inV2) const
     float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
     return vdupq_n_f32(vaddvq_f32(mul));
 #else
-	float dot = 0.0f;
-	for (int i = 0; i < 4; i++)
-		dot += mF32[i] * inV2.mF32[i];
-	return Vec4::sReplicate(dot);
+	// Brackets placed so that the order is consistent with the vectorized version
+	return Vec4::sReplicate((mF32[0] * inV2.mF32[0] + mF32[1] * inV2.mF32[1]) + (mF32[2] * inV2.mF32[2] + mF32[3] * inV2.mF32[3]));
 #endif
 }
 
@@ -629,10 +627,8 @@ float Vec4::Dot(Vec4Arg inV2) const
     float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
     return vaddvq_f32(mul);
 #else
-	float dot = 0.0f;
-	for (int i = 0; i < 4; i++)
-		dot += mF32[i] * inV2.mF32[i];
-	return dot;
+	// Brackets placed so that the order is consistent with the vectorized version
+	return (mF32[0] * inV2.mF32[0] + mF32[1] * inV2.mF32[1]) + (mF32[2] * inV2.mF32[2] + mF32[3] * inV2.mF32[3]);
 #endif
 }
 
@@ -644,10 +640,8 @@ float Vec4::LengthSq() const
     float32x4_t mul = vmulq_f32(mValue, mValue);
     return vaddvq_f32(mul);
 #else
-	float len_sq = 0.0f;
-	for (int i = 0; i < 4; i++)
-		len_sq += mF32[i] * mF32[i];
-	return len_sq;
+	// Brackets placed so that the order is consistent with the vectorized version
+	return (mF32[0] * mF32[0] + mF32[1] * mF32[1]) + (mF32[2] * mF32[2] + mF32[3] * mF32[3]);
 #endif
 }
 
@@ -660,7 +654,8 @@ float Vec4::Length() const
     float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
     return vget_lane_f32(vsqrt_f32(sum), 0);
 #else
-	return sqrt(LengthSq());
+	// Brackets placed so that the order is consistent with the vectorized version
+	return sqrt((mF32[0] * mF32[0] + mF32[1] * mF32[1]) + (mF32[2] * mF32[2] + mF32[3] * mF32[3]));
 #endif
 }