Browse Source

Added GLM_FORCE_INTRINSICS define

Christophe Riccio 7 years ago
parent
commit
ef9d65e0c6
6 changed files with 57 additions and 21 deletions
  1. 14 0
      CMakeLists.txt
  2. 3 3
      glm/detail/setup.hpp
  3. 24 15
      glm/simd/platform.h
  4. 3 3
      manual.md
  5. 3 0
      readme.md
  6. 10 0
      test/gtc/gtc_color_space.cpp

+ 14 - 0
CMakeLists.txt

@@ -117,6 +117,8 @@ if(GLM_TEST_FORCE_PURE)
 	message(STATUS "GLM: No SIMD instruction set")
 	message(STATUS "GLM: No SIMD instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_AVX2)
 elseif(GLM_TEST_ENABLE_SIMD_AVX2)
+	add_definitions(-DGLM_FORCE_PURE)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-mavx2)
 		add_compile_options(-mavx2)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -127,6 +129,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX2)
 	message(STATUS "GLM: AVX2 instruction set")
 	message(STATUS "GLM: AVX2 instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_AVX)
 elseif(GLM_TEST_ENABLE_SIMD_AVX)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-mavx)
 		add_compile_options(-mavx)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -137,6 +141,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX)
 	message(STATUS "GLM: AVX instruction set")
 	message(STATUS "GLM: AVX instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
 elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-msse4.2)
 		add_compile_options(-msse4.2)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -147,6 +153,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
 	message(STATUS "GLM: SSE4.2 instruction set")
 	message(STATUS "GLM: SSE4.2 instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
 elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-msse4.1)
 		add_compile_options(-msse4.1)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -157,6 +165,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
 	message(STATUS "GLM: SSE4.1 instruction set")
 	message(STATUS "GLM: SSE4.1 instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
 elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-mssse3)
 		add_compile_options(-mssse3)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -167,6 +177,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
 	message(STATUS "GLM: SSSE3 instruction set")
 	message(STATUS "GLM: SSSE3 instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_SSE3)
 elseif(GLM_TEST_ENABLE_SIMD_SSE3)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-msse3)
 		add_compile_options(-msse3)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -177,6 +189,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE3)
 	message(STATUS "GLM: SSE3 instruction set")
 	message(STATUS "GLM: SSE3 instruction set")
 
 
 elseif(GLM_TEST_ENABLE_SIMD_SSE2)
 elseif(GLM_TEST_ENABLE_SIMD_SSE2)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 		add_compile_options(-msse2)
 		add_compile_options(-msse2)
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")

+ 3 - 3
glm/detail/setup.hpp

@@ -315,12 +315,12 @@
 #endif
 #endif
 
 
 //
 //
-#if defined(GLM_FORCE_PURE)
-#	define GLM_HAS_BITSCAN_WINDOWS 0
-#else
+#if defined(GLM_FORCE_INTRINSICS)
 #	define GLM_HAS_BITSCAN_WINDOWS ((GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && (\
 #	define GLM_HAS_BITSCAN_WINDOWS ((GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && (\
 		((GLM_COMPILER & GLM_COMPILER_INTEL)) || \
 		((GLM_COMPILER & GLM_COMPILER_INTEL)) || \
 		((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC14) && (GLM_ARCH & GLM_ARCH_X86_BIT))))
 		((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC14) && (GLM_ARCH & GLM_ARCH_X86_BIT))))
+#else
+#	define GLM_HAS_BITSCAN_WINDOWS 0
 #endif
 #endif
 
 
 ///////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////

+ 24 - 15
glm/simd/platform.h

@@ -217,7 +217,7 @@
 ///////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////
 // Instruction sets
 // Instruction sets
 
 
-// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
+// User defines: GLM_FORCE_PURE GLM_FORCE_INTRINSICS GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
 
 
 #define GLM_ARCH_MIPS_BIT	(0x10000000)
 #define GLM_ARCH_MIPS_BIT	(0x10000000)
 #define GLM_ARCH_PPC_BIT	(0x20000000)
 #define GLM_ARCH_PPC_BIT	(0x20000000)
@@ -251,39 +251,36 @@
 #define GLM_ARCH_MIPS		(GLM_ARCH_MIPS_BIT)
 #define GLM_ARCH_MIPS		(GLM_ARCH_MIPS_BIT)
 #define GLM_ARCH_PPC		(GLM_ARCH_PPC_BIT)
 #define GLM_ARCH_PPC		(GLM_ARCH_PPC_BIT)
 
 
-#ifdef GLM_FORCE_ARCH_UNKNOWN
+#if defined(GLM_FORCE_ARCH_UNKNOWN) || defined(GLM_FORCE_PURE)
 #	define GLM_ARCH GLM_ARCH_UNKNOWN
 #	define GLM_ARCH GLM_ARCH_UNKNOWN
-#elif defined(GLM_FORCE_PURE) || defined(GLM_FORCE_XYZW_ONLY)
-#	if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
-#		define GLM_ARCH (GLM_ARCH_X86)
-#	elif defined(__arm__ ) || defined(_M_ARM)
-#		define GLM_ARCH (GLM_ARCH_ARM)
-#	elif defined(__powerpc__ ) || defined(_M_PPC)
-#		define GLM_ARCH (GLM_ARCH_PPC)
-#	elif defined(__mips__ )
-#		define GLM_ARCH (GLM_ARCH_MIPS)
-#	else
-#		define GLM_ARCH (GLM_ARCH_UNKNOWN)
-#	endif
 #elif defined(GLM_FORCE_NEON)
 #elif defined(GLM_FORCE_NEON)
 #	define GLM_ARCH (GLM_ARCH_NEON)
 #	define GLM_ARCH (GLM_ARCH_NEON)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_AVX2)
 #elif defined(GLM_FORCE_AVX2)
 #	define GLM_ARCH (GLM_ARCH_AVX2)
 #	define GLM_ARCH (GLM_ARCH_AVX2)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_AVX)
 #elif defined(GLM_FORCE_AVX)
 #	define GLM_ARCH (GLM_ARCH_AVX)
 #	define GLM_ARCH (GLM_ARCH_AVX)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_SSE42)
 #elif defined(GLM_FORCE_SSE42)
 #	define GLM_ARCH (GLM_ARCH_SSE42)
 #	define GLM_ARCH (GLM_ARCH_SSE42)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_SSE41)
 #elif defined(GLM_FORCE_SSE41)
 #	define GLM_ARCH (GLM_ARCH_SSE41)
 #	define GLM_ARCH (GLM_ARCH_SSE41)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_SSSE3)
 #elif defined(GLM_FORCE_SSSE3)
 #	define GLM_ARCH (GLM_ARCH_SSSE3)
 #	define GLM_ARCH (GLM_ARCH_SSSE3)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_SSE3)
 #elif defined(GLM_FORCE_SSE3)
 #	define GLM_ARCH (GLM_ARCH_SSE3)
 #	define GLM_ARCH (GLM_ARCH_SSE3)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_SSE2)
 #elif defined(GLM_FORCE_SSE2)
 #	define GLM_ARCH (GLM_ARCH_SSE2)
 #	define GLM_ARCH (GLM_ARCH_SSE2)
+#	define GLM_FORCE_INTRINSICS
 #elif defined(GLM_FORCE_SSE)
 #elif defined(GLM_FORCE_SSE)
 #	define GLM_ARCH (GLM_ARCH_SSE)
 #	define GLM_ARCH (GLM_ARCH_SSE)
-#else
+#	define GLM_FORCE_INTRINSICS
+#elif defined(GLM_FORCE_INTRINSICS) && !defined(GLM_FORCE_XYZW_ONLY)
 #	if defined(__AVX2__)
 #	if defined(__AVX2__)
 #		define GLM_ARCH (GLM_ARCH_AVX2)
 #		define GLM_ARCH (GLM_ARCH_AVX2)
 #	elif defined(__AVX__)
 #	elif defined(__AVX__)
@@ -311,6 +308,18 @@
 #	else
 #	else
 #		define GLM_ARCH (GLM_ARCH_UNKNOWN)
 #		define GLM_ARCH (GLM_ARCH_UNKNOWN)
 #	endif
 #	endif
+#else
+#	if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
+#		define GLM_ARCH (GLM_ARCH_X86)
+#	elif defined(__arm__) || defined(_M_ARM)
+#		define GLM_ARCH (GLM_ARCH_ARM)
+#	elif defined(__powerpc__) || defined(_M_PPC)
+#		define GLM_ARCH (GLM_ARCH_PPC)
+#	elif defined(__mips__)
+#		define GLM_ARCH (GLM_ARCH_MIPS)
+#	else
+#		define GLM_ARCH (GLM_ARCH_UNKNOWN)
+#	endif
 #endif
 #endif
 
 
 #if GLM_ARCH & GLM_ARCH_AVX2_BIT
 #if GLM_ARCH & GLM_ARCH_AVX2_BIT

+ 3 - 3
manual.md

@@ -25,7 +25,7 @@
 + [2.8. GLM\_FORCE\_INLINE: Force inline](#section2_8)
 + [2.8. GLM\_FORCE\_INLINE: Force inline](#section2_8)
 + [2.9. GLM\_FORCE\_ALIGNED\_GENTYPES: Force GLM to enable aligned types](#section2_9)
 + [2.9. GLM\_FORCE\_ALIGNED\_GENTYPES: Force GLM to enable aligned types](#section2_9)
 + [2.10. GLM\_FORCE\_DEFAULT\_ALIGNED\_GENTYPES: Force GLM to use aligned types by default](#section2_10)
 + [2.10. GLM\_FORCE\_DEFAULT\_ALIGNED\_GENTYPES: Force GLM to use aligned types by default](#section2_10)
-+ [2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations](#section2_11)
++ [2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations](#section2_11)
 + [2.12. GLM\_FORCE\_PRECISION\_**: Default precision](#section2_12)
 + [2.12. GLM\_FORCE\_PRECISION\_**: Default precision](#section2_12)
 + [2.13. GLM\_FORCE\_SINGLE\_ONLY: Removed explicit 64-bits floating point types](#section2_13)
 + [2.13. GLM\_FORCE\_SINGLE\_ONLY: Removed explicit 64-bits floating point types](#section2_13)
 + [2.14. GLM\_FORCE\_SWIZZLE: Enable swizzle operators](#section2_14)
 + [2.14. GLM\_FORCE\_SWIZZLE: Enable swizzle operators](#section2_14)
@@ -456,10 +456,10 @@ void foo()
 
 
 *Note: GLM SIMD optimizations require the use of aligned types*
 *Note: GLM SIMD optimizations require the use of aligned types*
 
 
-### <a name="section2_11"></a> 2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations
+### <a name="section2_11"></a> 2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations
 
 
 GLM provides some SIMD optimizations based on [compiler intrinsics](https://msdn.microsoft.com/en-us/library/26td21ds.aspx).
 GLM provides some SIMD optimizations based on [compiler intrinsics](https://msdn.microsoft.com/en-us/library/26td21ds.aspx).
-These optimizations will be automatically thanks to compiler arguments.
+These optimizations will be automatically thanks to compiler arguments when `GLM_FORCE_INTRINSICS` is defined before including GLM files.
 For example, if a program is compiled with Visual Studio using `/arch:AVX`, GLM will detect this argument and generate code using AVX instructions automatically when available.
 For example, if a program is compiled with Visual Studio using `/arch:AVX`, GLM will detect this argument and generate code using AVX instructions automatically when available.
 
 
 It’s possible to avoid the instruction set detection by forcing the use of a specific instruction set with one of the fallowing define:
 It’s possible to avoid the instruction set detection by forcing the use of a specific instruction set with one of the fallowing define:

+ 3 - 0
readme.md

@@ -53,6 +53,9 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate)
 ## Release notes
 ## Release notes
 
 
 ### [GLM 0.9.9.4](https://github.com/g-truc/glm/tree/master) - 2018-1X-XX
 ### [GLM 0.9.9.4](https://github.com/g-truc/glm/tree/master) - 2018-1X-XX
+#### Improvements:
+- Added GLM_FORCE_INTRINSICS to enable SIMD instruction code path. By default, it's disabled allowing constexpr support by default.
+
 #### Fixes:
 #### Fixes:
 - Fixed in mat4x3 conversion #829
 - Fixed in mat4x3 conversion #829
 
 

+ 10 - 0
test/gtc/gtc_color_space.cpp

@@ -36,6 +36,16 @@ namespace srgb
 			Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1;
 			Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1;
 		}
 		}
 
 
+		glm::vec4 const ColorSourceGNI = glm::vec4(107, 107, 104, 131) / glm::vec4(255);
+
+		{
+			glm::vec4 const ColorGNA = glm::convertSRGBToLinear(ColorSourceGNI) * glm::vec4(255);
+			glm::vec4 const ColorGNE = glm::convertLinearToSRGB(ColorSourceGNI) * glm::vec4(255);
+			glm::vec4 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceGNI);
+			glm::vec4 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB);
+			Error += glm::all(glm::epsilonEqual(ColorSourceGNI, ColorRGB, 0.00001f)) ? 0 : 1;
+		}
+
 		return Error;
 		return Error;
 	}
 	}
 }//namespace srgb
 }//namespace srgb