Browse Source

Added NEON, MIPS and PowerPC detection

Christophe Riccio 9 years ago
parent
commit
1208eb63f7
2 changed files with 60 additions and 42 deletions
  1. 57 42
      glm/detail/setup.hpp
  2. 3 0
      readme.md

+ 57 - 42
glm/detail/setup.hpp

@@ -68,74 +68,81 @@
 
 
 // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
 // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
 
 
-#define GLM_ARCH_PURE		0x0000
-#define GLM_ARCH_ARM		0x0001
-#define GLM_ARCH_X86		0x0002
-#define GLM_ARCH_SSE2		0x0004
-#define GLM_ARCH_SSE3		0x0008
-#define GLM_ARCH_SSE4		0x0010
-#define GLM_ARCH_AVX		0x0020
-#define GLM_ARCH_AVX2		0x0040
-#define GLM_ARCH_AVX512		0x0080 // Skylake set
+#define GLM_ARCH_PURE		0x00000000
+#define GLM_ARCH_X86		0x00000001
+#define GLM_ARCH_SSE2		0x00000002
+#define GLM_ARCH_SSE3		0x00000004
+#define GLM_ARCH_SSE4		0x00000008
+#define GLM_ARCH_AVX		0x00000010
+#define GLM_ARCH_AVX2		0x00000020
+#define GLM_ARCH_AVX512		0x00000040 // Skylake subset
+#define GLM_ARCH_ARM		0x00000100
+#define GLM_ARCH_NEON		0x00000200
+#define GLM_ARCH_MIPS		0x00010000
+#define GLM_ARCH_PPC		0x01000000
 
 
 #if defined(GLM_FORCE_PURE)
 #if defined(GLM_FORCE_PURE)
 #	define GLM_ARCH GLM_ARCH_PURE
 #	define GLM_ARCH GLM_ARCH_PURE
+#elif defined(GLM_FORCE_MIPS)
+#	define GLM_ARCH (GLM_ARCH_MIPS)
+#elif defined(GLM_FORCE_PPC)
+#	define GLM_ARCH (GLM_ARCH_PPC)
+#elif defined(GLM_FORCE_NEON)
+#	define GLM_ARCH (GLM_ARCH_ARM | GLM_ARCH_NEON)
 #elif defined(GLM_FORCE_AVX512)
 #elif defined(GLM_FORCE_AVX512)
-#	define GLM_ARCH (GLM_ARCH_AVX512 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#	define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX512 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #elif defined(GLM_FORCE_AVX2)
 #elif defined(GLM_FORCE_AVX2)
-#	define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#	define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #elif defined(GLM_FORCE_AVX)
 #elif defined(GLM_FORCE_AVX)
-#	define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#	define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #elif defined(GLM_FORCE_SSE4)
 #elif defined(GLM_FORCE_SSE4)
-#	define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#	define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #elif defined(GLM_FORCE_SSE3)
 #elif defined(GLM_FORCE_SSE3)
-#	define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#	define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #elif defined(GLM_FORCE_SSE2)
 #elif defined(GLM_FORCE_SSE2)
-#	define GLM_ARCH (GLM_ARCH_SSE2)
+#	define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
 #elif (GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX))
 #elif (GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX))
 //	This is Skylake set of instruction set
 //	This is Skylake set of instruction set
 #	if defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512VL__) && defined(__AVX512DQ__)
 #	if defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512VL__) && defined(__AVX512DQ__)
-#		define GLM_ARCH (GLM_ARCH_AVX512 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX512 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #	elif defined(__AVX2__)
 #	elif defined(__AVX2__)
-#		define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #	elif defined(__AVX__)
 #	elif defined(__AVX__)
-#		define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #	elif defined(__SSE3__)
 #	elif defined(__SSE3__)
-#		define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #	elif defined(__SSE2__)
 #	elif defined(__SSE2__)
-#		define GLM_ARCH (GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
+#	elif defined(__i386__) || defined(__x86_64__)
+#		define GLM_ARCH (GLM_ARCH_X86)
+#	elif defined(__ARM_NEON)
+#		define GLM_ARCH (GLM_ARCH_ARM | GLM_ARCH_NEON)
+#	elif defined(__arm__ )
+#		define GLM_ARCH (GLM_ARCH_ARM)
+#	elif defined(__mips__ )
+#		define GLM_ARCH (GLM_ARCH_MIPS)
+#	elif defined(__powerpc__ )
+#		define GLM_ARCH (GLM_ARCH_PPC)
 #	else
 #	else
-#		define GLM_ARCH GLM_ARCH_PURE
+#		define GLM_ARCH (GLM_ARCH_PURE)
 #	endif
 #	endif
 #elif (GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))
 #elif (GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))
-#	if defined(_M_ARM_FP)
+#	if defined(_M_ARM)
 #		define GLM_ARCH (GLM_ARCH_ARM)
 #		define GLM_ARCH (GLM_ARCH_ARM)
 #	elif defined(__AVX2__)
 #	elif defined(__AVX2__)
-#		define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #	elif defined(__AVX__)
 #	elif defined(__AVX__)
-#		define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
 #	elif defined(_M_X64)
 #	elif defined(_M_X64)
-#		define GLM_ARCH (GLM_ARCH_SSE2)
+#		define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
 #	elif defined(_M_IX86_FP)
 #	elif defined(_M_IX86_FP)
 #		if _M_IX86_FP >= 2
 #		if _M_IX86_FP >= 2
-#			define GLM_ARCH (GLM_ARCH_SSE2)
+#			define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
 #		else
 #		else
 #			define GLM_ARCH (GLM_ARCH_PURE)
 #			define GLM_ARCH (GLM_ARCH_PURE)
 #		endif
 #		endif
-#	else
-#		define GLM_ARCH (GLM_ARCH_PURE)
-#	endif
-#elif (GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))
-#	if defined(__AVX2__)
-#		define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
-#	elif defined(__AVX__)
-#		define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
-#	elif defined(__SSE4_1__ )
-#		define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
-#	elif defined(__SSE3__)
-#		define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
-#	elif defined(__SSE2__)
-#		define GLM_ARCH (GLM_ARCH_SSE2)
+#	elif defined(_M_PPC)
+#		define GLM_ARCH (GLM_ARCH_PPC)
 #	else
 #	else
 #		define GLM_ARCH (GLM_ARCH_PURE)
 #		define GLM_ARCH (GLM_ARCH_PURE)
 #	endif
 #	endif
@@ -180,6 +187,16 @@
 #		pragma message("GLM: SSE3 instruction set")
 #		pragma message("GLM: SSE3 instruction set")
 #	elif(GLM_ARCH & GLM_ARCH_SSE2)
 #	elif(GLM_ARCH & GLM_ARCH_SSE2)
 #		pragma message("GLM: SSE2 instruction set")
 #		pragma message("GLM: SSE2 instruction set")
+#	elif(GLM_ARCH & GLM_ARCH_X86)
+#		pragma message("GLM: x86 instruction set")
+#	elif(GLM_ARCH & GLM_ARCH_NEON)
+#		pragma message("GLM: NEON instruction set")
+#	elif(GLM_ARCH & GLM_ARCH_ARM)
+#		pragma message("GLM: ARM instruction set")
+#	elif(GLM_ARCH & GLM_ARCH_MIPS)
+#		pragma message("GLM: MIPS instruction set")
+#	elif(GLM_ARCH & GLM_ARCH_PPC)
+#		pragma message("GLM: PowerPC architechture")
 #	endif//GLM_ARCH
 #	endif//GLM_ARCH
 #endif//GLM_MESSAGE
 #endif//GLM_MESSAGE
 
 
@@ -265,8 +282,6 @@
 #		else
 #		else
 #			if __cplusplus >= 201402L
 #			if __cplusplus >= 201402L
 #				define GLM_LANG GLM_LANG_CXX14
 #				define GLM_LANG GLM_LANG_CXX14
-//#			elif GLM_COMPILER >= GLM_COMPILER_VC2015
-//#				define GLM_LANG GLM_LANG_CXX1Y
 #			elif __cplusplus >= 201103L
 #			elif __cplusplus >= 201103L
 #				define GLM_LANG GLM_LANG_CXX11
 #				define GLM_LANG GLM_LANG_CXX11
 #			elif GLM_COMPILER >= GLM_COMPILER_VC2010
 #			elif GLM_COMPILER >= GLM_COMPILER_VC2010

+ 3 - 0
readme.md

@@ -71,6 +71,9 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate)
 - Improved GLM_FORCE_EXPLICIT_CTOR coverage #481
 - Improved GLM_FORCE_EXPLICIT_CTOR coverage #481
 - Improved OpenMP support detection for Clang, GCC, ICC and VC
 - Improved OpenMP support detection for Clang, GCC, ICC and VC
 - Added constexpr for *vec*, *mat*, *quat* and *dual_quat* types #493
 - Added constexpr for *vec*, *mat*, *quat* and *dual_quat* types #493
+- Added NEON instruction set detection
+- Added MIPS CPUs detection
+- Added PowerPC CPUs detection
 - Use Cuda built-in function for abs function implementation with Cuda compiler
 - Use Cuda built-in function for abs function implementation with Cuda compiler
 
 
 ##### Fixes:
 ##### Fixes: