Browse Source

RISC-V support (#1400)

Jorrit Rouwe 7 months ago
parent
commit
b7bf913e68

+ 29 - 0
.github/workflows/determinism_check.yml

@@ -8,6 +8,7 @@ env:
   UBUNTU_CLANG_VERSION: clang++-15
   UBUNTU_GCC_VERSION: g++-12
   UBUNTU_GCC_AARCH64_VERSION: aarch64-linux-gnu-g++-12
+  UBUNTU_GCC_RISCV_VERSION: riscv64-linux-gnu-g++-12
 
 on:
   push:
@@ -235,6 +236,34 @@ jobs:
       working-directory: ${{github.workspace}}/Build/Linux_Distribution
       run: qemu-aarch64 -L /usr/aarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Pyramid -validate_hash=${PYRAMID_HASH}
 
+  riscv_gcc:
+    runs-on: ubuntu-latest
+    name: RISC-V GCC Determinism Check
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Update index
+      run: sudo apt-get update
+    - name: Install Cross Compiler
+      run: sudo apt-get install g++-12-riscv64-linux-gnu gcc-12-multilib g++-12-multilib qemu-user -y
+    - name: Configure CMake
+      working-directory: ${{github.workspace}}/Build
+      run: ./cmake_linux_clang_gcc.sh Distribution ${{env.UBUNTU_GCC_RISCV_VERSION}} -DCROSS_COMPILE_ARM=ON -DCROSS_PLATFORM_DETERMINISTIC=ON -DCROSS_COMPILE_ARM_TARGET="" -DTARGET_VIEWER=OFF -DTARGET_SAMPLES=OFF -DTARGET_HELLO_WORLD=OFF -DTARGET_UNIT_TESTS=ON -DTARGET_PERFORMANCE_TEST=ON
+    - name: Build
+      run: cmake --build ${{github.workspace}}/Build/Linux_Distribution -j $(nproc)
+    - name: Unit Tests
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-riscv64 -L /usr/riscv64-linux-gnu/ ./UnitTests
+    - name: Test ConvexVsMesh
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-riscv64 -L /usr/riscv64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=ConvexVsMesh -validate_hash=${CONVEX_VS_MESH_HASH}
+    - name: Test Ragdoll
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-riscv64 -L /usr/riscv64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Ragdoll -validate_hash=${RAGDOLL_HASH}
+    - name: Test Pyramid
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-riscv64 -L /usr/riscv64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Pyramid -validate_hash=${PYRAMID_HASH}
+
   emscripten:
     runs-on: ubuntu-latest
     name: Emscripten Determinism Check

+ 2 - 1
Build/CMakeLists.txt

@@ -272,12 +272,13 @@ function(SET_INTERPROCEDURAL_OPTIMIZATION)
 	if (INTERPROCEDURAL_OPTIMIZATION
 		AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64")
 		AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM")
-		AND NOT (CROSS_COMPILE_ARM AND CROSS_COMPILE_ARM_TARGET MATCHES "arm-.*")
+		AND (NOT CROSS_COMPILE_ARM OR ("${CROSS_COMPILE_ARM_TARGET}" STREQUAL "aarch64-linux-gnu"))
 		AND NOT (MINGW AND BUILD_SHARED_LIBS))
 		include(CheckIPOSupported)
 		check_ipo_supported(RESULT IS_IPO_SUPPORTED OUTPUT IPO_CHECK_OUTPUT)
 
 		if (IS_IPO_SUPPORTED)
+			message("Interprocedural optimizations are turned on")
 			set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON PARENT_SCOPE)
 			set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_DISTRIBUTION ON PARENT_SCOPE)
 		else()

+ 1 - 0
Docs/Architecture.md

@@ -632,6 +632,7 @@ It is quite difficult to verify cross platform determinism, so this feature is l
 * Linux clang ARM 32-bit
 * Linux gcc x86 64-bit with AVX2
 * Linux gcc ARM 64-bit with NEON
+* Linux gcc RISC-V 64-bit
 * WASM emscripten running in nodejs
 
 The most important things to look out for in your own application:

+ 7 - 1
Jolt/ConfigurationString.h

@@ -14,8 +14,14 @@ inline const char *GetConfigurationString()
 		"x86 "
 #elif defined(JPH_CPU_ARM)
 		"ARM "
-#elif defined(JPH_PLATFORM_WASM)
+#elif defined(JPH_CPU_RISCV)
+		"RISC-V "
+#elif defined(JPH_CPU_E2K)
+		"E2K "
+#elif defined(JPH_CPU_WASM)
 		"WASM "
+#else
+	#error Unknown CPU architecture
 #endif
 #if JPH_CPU_ADDRESS_BITS == 64
 		"64-bit "

+ 23 - 3
Jolt/Core/Core.h

@@ -180,6 +180,18 @@
 		#define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries
 		#define JPH_DVECTOR_ALIGNMENT 8
 	#endif
+#elif defined(__riscv)
+	// RISC-V CPU architecture
+	#define JPH_CPU_RISCV
+	#if __riscv_xlen == 64
+		#define JPH_CPU_ADDRESS_BITS 64
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 32
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 8
+	#endif
 #elif defined(JPH_PLATFORM_WASM)
 	// WebAssembly CPU architecture
 	#define JPH_CPU_WASM
@@ -206,6 +218,13 @@
 	#error Unsupported CPU architecture
 #endif
 
+// CPU helper macros
+#ifdef JPH_CPU_RISCV
+	#define JPH_IF_RISCV(x) x
+#else
+	#define JPH_IF_RISCV(x)
+#endif
+
 // If this define is set, Jolt is compiled as a shared library
 #ifdef JPH_SHARED_LIBRARY
 	#ifdef JPH_BUILD_SHARED_LIBRARY
@@ -320,6 +339,7 @@
 	JPH_GCC_SUPPRESS_WARNING("-Wpedantic")														\
 	JPH_GCC_SUPPRESS_WARNING("-Wunused-parameter")												\
 	JPH_GCC_SUPPRESS_WARNING("-Wmaybe-uninitialized")											\
+	JPH_IF_RISCV(JPH_GCC_SUPPRESS_WARNING("-Wuninitialized"))									\
 																								\
 	JPH_MSVC_SUPPRESS_WARNING(4619) /* #pragma warning: there is no warning number 'XXXX' */	\
 	JPH_MSVC_SUPPRESS_WARNING(4514) /* 'X' : unreferenced inline function has been removed */	\
@@ -358,10 +378,10 @@
 #elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_FREEBSD)
 	#if defined(JPH_CPU_X86)
 		#define JPH_BREAKPOINT	__asm volatile ("int $0x3")
-	#elif defined(JPH_CPU_ARM)
-		#define JPH_BREAKPOINT	__builtin_trap()
-	#elif defined(JPH_CPU_E2K)
+	#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K)
 		#define JPH_BREAKPOINT	__builtin_trap()
+	#else
+		#error Unknown CPU architecture
 	#endif
 #elif defined(JPH_PLATFORM_WASM)
 	#define JPH_BREAKPOINT		do { } while (false) // Not supported

+ 4 - 0
Jolt/Core/FPControlWord.h

@@ -126,6 +126,10 @@ private:
 	uint32		mPrevState;
 };
 
+#elif defined(JPH_CPU_RISCV)
+
+// RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions.
+
 #else
 
 #error Unsupported CPU architecture

+ 4 - 0
Jolt/Core/FPException.h

@@ -56,6 +56,10 @@ class FPExceptionDisableInvalid : public FPControlWord<0, FP_IOE> { };
 /// Disable division by zero floating point exceptions
 class FPExceptionDisableDivByZero : public FPControlWord<0, FP_DZE> { };
 
+#elif defined(JPH_CPU_RISCV)
+
+#error "RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled."
+
 #else
 
 #error Unsupported CPU architecture

+ 3 - 1
Jolt/Core/FPFlushDenormals.h

@@ -8,7 +8,7 @@
 
 JPH_NAMESPACE_BEGIN
 
-#if defined(JPH_CPU_WASM)
+#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV)
 
 // Not supported
 class FPFlushDenormals { };
@@ -21,6 +21,8 @@ class FPFlushDenormals : public FPControlWord<_MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_
 
 #elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)
 
+/// Helper class that needs to be put on the stack to enable flushing denormals to zero
+/// This can make floating point operations much faster when working with very small numbers
 class FPFlushDenormals : public FPControlWord<_DN_FLUSH, _MCW_DN> { };
 
 #elif defined(JPH_CPU_ARM)

+ 1 - 3
Jolt/Core/TickCounter.h

@@ -35,9 +35,7 @@ JPH_INLINE uint64 GetProcessorTickCount()
 	uint64 val;
 	asm volatile("mrs %0, cntvct_el0" : "=r" (val));
 	return val;
-#elif defined(JPH_CPU_ARM)
-	return 0; // Not supported
-#elif defined(JPH_CPU_WASM)
+#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_WASM)
 	return 0; // Not supported
 #else
 	#error Undefined

+ 4 - 4
Jolt/Math/Math.h

@@ -120,8 +120,8 @@ inline uint CountTrailingZeros(uint32 inValue)
 			return 32;
 		return __builtin_ctz(inValue);
 	#endif
-#elif defined(JPH_CPU_E2K)
-		return inValue ? __builtin_ctz(inValue) : 32;
+#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV)
+	return inValue ? __builtin_ctz(inValue) : 32;
 #else
 	#error Undefined
 #endif
@@ -150,8 +150,8 @@ inline uint CountLeadingZeros(uint32 inValue)
 	#else
 		return __builtin_clz(inValue);
 	#endif
-#elif defined(JPH_CPU_E2K)
-		return inValue ? __builtin_clz(inValue) : 32;
+#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV)
+	return inValue ? __builtin_clz(inValue) : 32;
 #else
 	#error Undefined
 #endif

+ 1 - 1
README.md

@@ -90,7 +90,7 @@ Why create yet another physics engine? Firstly, it has been a personal learning
 ## Supported platforms
 
 * Windows (Desktop or UWP) x86/x64/ARM32/ARM64
-* Linux (tested on Ubuntu) x64/ARM64
+* Linux (tested on Ubuntu) x86/x64/ARM32/ARM64/RISC-V64
 * FreeBSD
 * Android x86/x64/ARM32/ARM64
 * Platform Blue (a popular game console) x64

+ 1 - 1
UnitTests/Core/FPFlushDenormalsTest.cpp

@@ -6,7 +6,7 @@
 #include <Jolt/Core/FPFlushDenormals.h>
 #include <atomic>
 
-#ifndef JPH_CPU_WASM
+#if !defined(JPH_CPU_WASM) && !defined(JPH_CPU_RISCV)
 
 // Implemented as a global atomic so the compiler can't optimize it to a constant
 extern atomic<float> TestFltMin;

+ 1 - 1
UnitTests/Math/Vec3Tests.cpp

@@ -35,7 +35,7 @@ TEST_SUITE("Vec3Tests")
 		v.SetX(7);
 		v.SetY(8);
 		v.SetZ(9);
-		CHECK(v == Vec3(7, 8, 9));		
+		CHECK(v == Vec3(7, 8, 9));
 
 		// Set all components
 		v.Set(10, 11, 12);