فهرست منبع

Support broader range of CPUs like Intel Core 2 series by using SSE4.1 (#78)

mrezai 3 سال پیش
والد
کامیت
80c0b6075c
5فایلهای تغییر یافته به همراه91 افزوده شده و 9 حذف شده
  1. 73 3
      Build/CMakeLists.txt
  2. 4 2
      Build/README.md
  3. 3 0
      Jolt/Core/Core.h
  4. 9 2
      Jolt/Math/Math.h
  5. 2 2
      README.md

+ 73 - 3
Build/CMakeLists.txt

@@ -2,6 +2,15 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
 
 project(JoltPhysics CXX)
 
+# Select X86 processor features to use (if everything is off it will be SSE4.1 compatible)
+option(USE_SSE4_2 "Enable SSE4.2" ON)
+option(USE_AVX "Enable AVX" ON)
+option(USE_AVX2 "Enable AVX2" ON)
+option(USE_LZCNT "Enable LZCNT" ON)
+option(USE_TZCNT "Enable TZCNT" ON)
+option(USE_F16C "Enable F16C" ON)
+option(USE_FMADD "Enable FMADD" ON)
+
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
 	set(CMAKE_CONFIGURATION_TYPES "Debug;Release;Distribution")
 elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
@@ -19,7 +28,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
 	set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
 
 	# Set general compiler flags
-	set(CMAKE_CXX_FLAGS "/std:c++17 /Zc:__cplusplus /GR- /Gm- /Wall /WX /EHsc /nologo /diagnostics:classic /FC /arch:AVX2 /fp:except- /Zc:inline /Zi /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE")
+	set(CMAKE_CXX_FLAGS "/std:c++17 /Zc:__cplusplus /GR- /Gm- /Wall /WX /EHsc /nologo /diagnostics:classic /FC /fp:except- /Zc:inline /Zi /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE")
 	
 	# Set compiler flags for various configurations
 	set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /GS /Od /Ob0 /RTC1 /DJPH_PROFILE_ENABLED /DJPH_DEBUG_RENDERER")
@@ -33,12 +42,53 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
 	set(CMAKE_EXE_LINKER_FLAGS "/machine:x64 /SUBSYSTEM:WINDOWS /ignore:4221 /DEBUG:FASTLINK")
 	if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /fp:fast") # Clang doesn't use fast math because it cannot be turned off inside a single compilation unit
+		if (USE_AVX2)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
+		elseif (USE_AVX)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
+		endif()	
+		if (USE_SSE4_2)
+			add_compile_definitions(JPH_USE_SSE4_2)
+		endif()
+		if (USE_LZCNT)
+			add_compile_definitions(JPH_USE_LZCNT)
+		endif()
+		if (USE_TZCNT)
+			add_compile_definitions(JPH_USE_TZCNT)
+		endif()
+		if (USE_F16C)
+			add_compile_definitions(JPH_USE_F16C)
+		endif()
+		if (USE_FMADD)
+			add_compile_definitions(JPH_USE_FMADD)
+		endif()
 		set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DJPH_FLOATING_POINT_EXCEPTIONS_ENABLED") # Clang turns Float2 into a vector sometimes causing floating point exceptions
 		set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /DJPH_FLOATING_POINT_EXCEPTIONS_ENABLED")
 		set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/INCREMENTAL:NO /LTCG:incremental /OPT:ICF /OPT:REF")
 		set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "/LTCG")
 	elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /showFilenames -mavx2 -mfma -mf16c -mlzcnt -mpopcnt")
+		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /showFilenames")
+		if (USE_AVX2)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mpopcnt -mlzcnt -mf16c -mfma")
+		elseif (USE_AVX)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mpopcnt")
+		elseif (USE_SSE4_2)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mpopcnt")
+		else()
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
+		endif()
+		if (USE_LZCNT)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlzcnt")
+		endif()
+		if (USE_TZCNT)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
+		endif()
+		if (USE_F16C)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
+		endif()
+		if (USE_FMADD)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
+		endif()
 		set(CMAKE_EXE_LINKER_FLAGS_RELEASEASAN "/SUBSYSTEM:CONSOLE /LIBPATH:${CLANG_LIB_PATH} clang_rt.asan-x86_64.lib -wholearchive:clang_rt.asan-x86_64.lib clang_rt.asan_cxx-x86_64.lib -wholearchive:clang_rt.asan_cxx-x86_64.lib")
 		set(CMAKE_EXE_LINKER_FLAGS_RELEASEUBSAN "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LIBPATH:${CLANG_LIB_PATH}")
 		set(CMAKE_EXE_LINKER_FLAGS_RELEASECOVERAGE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LIBPATH:${CLANG_LIB_PATH}")
@@ -56,7 +106,27 @@ elseif ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
 	# Platform specific compiler flags
 	if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64")
 		# X64
-		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma -mf16c -mlzcnt -mbmi -mpopcnt")
+		if (USE_AVX2)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mpopcnt -mlzcnt -mf16c -mfma")
+		elseif (USE_AVX)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mpopcnt")
+		elseif (USE_SSE4_2)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mpopcnt")
+		else()
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
+		endif()
+		if (USE_LZCNT)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlzcnt")
+		endif()
+		if (USE_TZCNT)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
+		endif()
+		if (USE_F16C)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
+		endif()
+		if (USE_FMADD)
+			set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
+		endif()
 	elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64")
 		# ARM64
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

+ 4 - 2
Build/README.md

@@ -25,11 +25,13 @@ There are a number of user configurable defines that turn on/off certain feature
 - JPH_DEBUG_RENDERER - Adds support to draw lines and triangles, used to be able to debug draw the state of the world.
 - JPH_DISABLE_TEMP_ALLOCATOR - Disables the temporary memory allocator, used mainly to allow ASAN to do its job.
 - JPH_FLOATING_POINT_EXCEPTIONS_ENABLED - Turns on division by zero and invalid floating point exception support in order to detect bugs (Windows only).
+- JPH_USE_SSE4_2 - Enable SSE4.2 CPU instructions (x64 only)
 - JPH_USE_F16C - Enable half float CPU instructions (x64 only)
 - JPH_USE_LZCNT - Enable the lzcnt CPU instruction (x64 only)
+- JPH_USE_TZCNT - Enable the tzcnt CPU instruction (x64 only)
 - JPH_USE_AVX - Enable AVX CPU instructions (x64 only)
 - JPH_USE_AVX2 - Enable AVX2 CPU instructions (x64 only)
-- JPH_USE_FMA - Enable fused multiply add CPU instructions (x64 only)
+- JPH_USE_FMADD - Enable fused multiply add CPU instructions (x64 only)
 
 ## Logging & Asserting
 
@@ -83,4 +85,4 @@ Documentation can be generated through doxygen:
 
 - Install Doxygen (https://www.doxygen.nl/download.html)
 - Install Microsoft HTML Help Workshop (to generate a CHM file)
-- Run: run_doxygen.bat
+- Run: run_doxygen.bat

+ 3 - 0
Jolt/Core/Core.h

@@ -93,6 +93,9 @@
 	#if (defined(__BMI__) || defined(__AVX2__)) && !defined(JPH_USE_TZCNT)
 		#define JPH_USE_TZCNT
 	#endif
+	#if (defined(__SSE4_2__) || defined(__AVX__)) && !defined(JPH_USE_SSE4_2)
+		#define JPH_USE_SSE4_2
+	#endif
 	#if defined(__AVX__) && !defined(JPH_USE_AVX)
 		#define JPH_USE_AVX
 	#endif

+ 9 - 2
Jolt/Math/Math.h

@@ -142,9 +142,16 @@ inline uint CountLeadingZeros(uint32 inValue)
 inline uint CountBits(uint32 inValue)
 {
 #if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)
-    return __builtin_popcount(inValue);
+	return __builtin_popcount(inValue);
 #elif defined(JPH_COMPILER_MSVC)
-	return _mm_popcnt_u32(inValue);
+	#if defined(JPH_USE_SSE4_2)
+		return _mm_popcnt_u32(inValue);
+	#else
+		inValue = inValue - ((inValue >> 1) & 0x55555555);
+		inValue = (inValue & 0x33333333) + ((inValue >> 2) & 0x33333333);
+		inValue = (inValue + (inValue >> 4)) & 0x0F0F0F0F;
+		return (inValue * 0x01010101) >> 24;
+	#endif
 #else
 	#error Undefined
 #endif

+ 2 - 2
README.md

@@ -82,7 +82,7 @@ For more information see the [Architecture and API documentation](https://jrouwe
 
 ## Required CPU features
 
-* On x86 the minimal requirements are SSE4.2 but the library can be compiled using FP16C, AVX or AVX2.
+* On x86 the minimal requirements are SSE4.1 but the library can be compiled using SSE4.2, FP16C, AVX or AVX2.
 * On ARM64 the library requires NEON with FP16 support.
 
 ## Compiling
@@ -110,4 +110,4 @@ For build instructions go to the [Build](Build/README.md) section.
 
 ## License
 
-The project is distributed under the [MIT license](LICENSE).
+The project is distributed under the [MIT license](LICENSE).