Browse Source

Fixed SIMD code path selection

Christophe Riccio 9 years ago
parent
commit
9f00ba86cb

+ 2 - 2
glm/detail/func_common_simd.inl

@@ -1,7 +1,7 @@
 /// @ref core
 /// @file glm/detail/func_common_simd.inl
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
 #include "../simd/common.h"
 
@@ -135,4 +135,4 @@ namespace detail
 }//namespace detail
 }//namespace glm
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 9 - 0
glm/detail/func_exponential_simd.inl

@@ -0,0 +1,9 @@
+/// @ref core
+/// @file glm/detail/func_exponential_simd.inl
+
+namespace glm{
+namespace detail
+{
+
+}//namespace detail
+}//namespace glm

+ 3 - 0
glm/detail/func_geometric_simd.inl

@@ -1,5 +1,7 @@
 #include "../simd/geometric.h"
 
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+
 namespace glm{
 namespace detail
 {
@@ -15,3 +17,4 @@ namespace detail
 }//namespace detail
 }//namespace glm
 
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 9 - 0
glm/detail/func_packing_simd.inl

@@ -0,0 +1,9 @@
+/// @ref core
+/// @file glm/detail/func_packing_simd.inl
+
+namespace glm{
+namespace detail
+{
+
+}//namespace detail
+}//namespace glm

+ 9 - 0
glm/detail/func_vector_relational_simd.inl

@@ -0,0 +1,9 @@
+/// @ref core
+/// @file glm/detail/func_vector_relational_simd.inl
+
+namespace glm{
+namespace detail
+{
+
+}//namespace detail
+}//namespace glm

+ 28 - 14
glm/detail/setup.hpp

@@ -68,20 +68,34 @@
 
 // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
 
-#define GLM_ARCH_PURE		0x00000000
-#define GLM_ARCH_X86		0x00000001
-#define GLM_ARCH_SSE2		0x00000002 | GLM_ARCH_X86
-#define GLM_ARCH_SSE3		0x00000004 | GLM_ARCH_SSE2
-#define GLM_ARCH_SSSE3		0x00000008 | GLM_ARCH_SSE3
-#define GLM_ARCH_SSE41		0x00000010 | GLM_ARCH_SSSE3
-#define GLM_ARCH_SSE42		0x00000020 | GLM_ARCH_SSE41
-#define GLM_ARCH_AVX		0x00000040 | GLM_ARCH_SSE42
-#define GLM_ARCH_AVX2		0x00000080 | GLM_ARCH_AVX
-#define GLM_ARCH_AVX512		0x00000100 | GLM_ARCH_AVX2 // Skylake subset
-#define GLM_ARCH_ARM		0x00000100
-#define GLM_ARCH_NEON		0x00000200 | GLM_ARCH_ARM
-#define GLM_ARCH_MIPS		0x00010000
-#define GLM_ARCH_PPC		0x01000000
+#define GLM_ARCH_X86_FLAG		0x00000001
+#define GLM_ARCH_SSE2_FLAG		0x00000002
+#define GLM_ARCH_SSE3_FLAG		0x00000004
+#define GLM_ARCH_SSSE3_FLAG		0x00000008
+#define GLM_ARCH_SSE41_FLAG		0x00000010
+#define GLM_ARCH_SSE42_FLAG		0x00000020
+#define GLM_ARCH_AVX_FLAG		0x00000040
+#define GLM_ARCH_AVX2_FLAG		0x00000080
+#define GLM_ARCH_AVX512_FLAG	0x00000100 // Skylake subset
+#define GLM_ARCH_ARM_FLAG		0x00000100
+#define GLM_ARCH_NEON_FLAG		0x00000200
+#define GLM_ARCH_MIPS_FLAG		0x00010000
+#define GLM_ARCH_PPC_FLAG		0x01000000
+
+#define GLM_ARCH_PURE		(0x00000000)
+#define GLM_ARCH_X86		(GLM_ARCH_X86_FLAG)
+#define GLM_ARCH_SSE2		(GLM_ARCH_SSE2_FLAG | GLM_ARCH_X86)
+#define GLM_ARCH_SSE3		(GLM_ARCH_SSE3_FLAG | GLM_ARCH_SSE2)
+#define GLM_ARCH_SSSE3		(GLM_ARCH_SSSE3_FLAG | GLM_ARCH_SSE3)
+#define GLM_ARCH_SSE41		(GLM_ARCH_SSE41_FLAG | GLM_ARCH_SSSE3)
+#define GLM_ARCH_SSE42		(GLM_ARCH_SSE42_FLAG | GLM_ARCH_SSE41)
+#define GLM_ARCH_AVX		(GLM_ARCH_AVX_FLAG | GLM_ARCH_SSE42)
+#define GLM_ARCH_AVX2		(GLM_ARCH_AVX2_FLAG | GLM_ARCH_AVX)
+#define GLM_ARCH_AVX512		(GLM_ARCH_AVX512_FLAG | GLM_ARCH_AVX2) // Skylake subset
+#define GLM_ARCH_ARM		(GLM_ARCH_ARM_FLAG)
+#define GLM_ARCH_NEON		(GLM_ARCH_NEON_FLAG | GLM_ARCH_ARM)
+#define GLM_ARCH_MIPS		(GLM_ARCH_MIPS_FLAG)
+#define GLM_ARCH_PPC		(GLM_ARCH_PPC_FLAG)
 
 #if defined(GLM_FORCE_PURE)
 #	define GLM_ARCH GLM_ARCH_PURE

+ 2 - 2
glm/detail/type_vec4.hpp

@@ -49,7 +49,7 @@ namespace detail
 		};
 #	endif
 
-#	if (GLM_ARCH & GLM_ARCH_AVX)
+#	if (GLM_ARCH & GLM_ARCH_AVX_FLAG)
 		template <>
 		struct simd_data<double>
 		{
@@ -57,7 +57,7 @@ namespace detail
 		};
 #	endif
 
-#	if (GLM_ARCH & GLM_ARCH_AVX2)
+#	if (GLM_ARCH & GLM_ARCH_AVX2_FLAG)
 		template <>
 		struct simd_data<int64>
 		{

+ 10 - 10
glm/detail/type_vec4_simd.inl

@@ -1,7 +1,7 @@
 /// @ref core
 /// @file glm/detail/type_tvec4_simd.inl
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
 namespace glm{
 namespace detail
@@ -72,7 +72,7 @@ namespace detail
 		}
 	};
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <typename T, precision P>
 	struct compute_vec4_and<T, P, true, 64>
 	{
@@ -96,7 +96,7 @@ namespace detail
 		}
 	};
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <typename T, precision P>
 	struct compute_vec4_or<T, P, true, 64>
 	{
@@ -120,7 +120,7 @@ namespace detail
 		}
 	};
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <typename T, precision P>
 	struct compute_vec4_xor<T, P, true, 64>
 	{
@@ -144,7 +144,7 @@ namespace detail
 		}
 	};
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <typename T, precision P>
 	struct compute_vec4_shift_left<T, P, true, 64>
 	{
@@ -168,7 +168,7 @@ namespace detail
 		}
 	};
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <typename T, precision P>
 	struct compute_vec4_shift_right<T, P, true, 64>
 	{
@@ -192,7 +192,7 @@ namespace detail
 		}
 	};
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <typename T, precision P>
 	struct compute_vec4_bitwise_not<T, P, true, 64>
 	{
@@ -248,7 +248,7 @@ namespace detail
 		data(_mm_set1_ps(s))
 	{}
 
-#	if GLM_ARCH & GLM_ARCH_AVX
+#	if GLM_ARCH & GLM_ARCH_AVX_FLAG
 	template <>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, lowp>::tvec4(double s) :
 		data(_mm256_set1_pd(s))
@@ -280,7 +280,7 @@ namespace detail
 		data(_mm_set1_epi32(s))
 	{}
 
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 	template <>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, lowp>::tvec4(int64 s) :
 		data(_mm256_set1_epi64x(s))
@@ -350,4 +350,4 @@ namespace detail
 */
 }//namespace glm
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 5 - 5
glm/simd/common.h

@@ -3,12 +3,12 @@
 
 #pragma once
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
 //mad
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
 {
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 		return _mm_fmadd_ss(a, b, c);
 #	else
 		return _mm_add_ss(_mm_mul_ss(a, b), c);
@@ -18,7 +18,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
 //mad
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c)
 {
-#	if GLM_ARCH & GLM_ARCH_AVX2
+#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
 		return _mm_fmadd_ps(a, b, c);
 #	else
 		return _mm_add_ps(_mm_mul_ps(a, b), c);
@@ -33,7 +33,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_abs(__m128 x)
 
 GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x)
 {
-#	if GLM_ARCH & GLM_ARCH_SSSE3
+#	if GLM_ARCH & GLM_ARCH_SSSE3_FLAG
 		return _mm_sign_epi32(x, x);
 #	else
 		__m128i const sgn0 = _mm_srai_epi32(x, 31);
@@ -202,4 +202,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
 	return Mul3;
 }
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 5 - 5
glm/simd/geometric.h

@@ -5,13 +5,13 @@
 
 #include "common.h"
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
 {
-#	if GLM_ARCH & GLM_ARCH_AVX
+#	if GLM_ARCH & GLM_ARCH_AVX_FLAG
 		return _mm_dp_ps(v1, v2, 0xff);
-#	elif GLM_ARCH & GLM_ARCH_SSE3
+#	elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
 		__m128 const Mul0 = _mm_mul_ps(v1, v2);
 		__m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0);
 		__m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0);
@@ -28,9 +28,9 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2)
 {
-#	if GLM_ARCH & GLM_ARCH_AVX
+#	if GLM_ARCH & GLM_ARCH_AVX_FLAG
 		return _mm_dp_ps(v1, v2, 0xff);
-#	elif GLM_ARCH & GLM_ARCH_SSE3
+#	elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
 		__m128 const mul0 = _mm_mul_ps(v1, v2);
 		__m128 const had0 = _mm_hadd_ps(mul0, mul0);
 		__m128 const had1 = _mm_hadd_ps(had0, had0);

+ 2 - 2
glm/simd/integer.h

@@ -3,7 +3,7 @@
 
 #pragma once
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
 GLM_FUNC_QUALIFIER __m128i glm_i128_interleave(__m128i x)
 {
@@ -112,4 +112,4 @@ GLM_FUNC_QUALIFIER __m128i glm_i128_interleave2(__m128i x, __m128i y)
 	return Reg1;
 }
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 2 - 2
glm/simd/matrix.h

@@ -5,7 +5,7 @@
 
 #include "geometric.h"
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
 static const __m128 GLM_VAR_USED _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f);
 static const __m128 GLM_VAR_USED _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f);
@@ -1029,4 +1029,4 @@ GLM_FUNC_QUALIFIER void glm_f32m4_outer(__m128 const & c, __m128 const & r, __m1
 	out[3] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3)));
 }
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 2 - 2
glm/simd/packing.h

@@ -3,6 +3,6 @@
 
 #pragma once
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

+ 2 - 2
glm/simd/trigonometric.h

@@ -3,7 +3,7 @@
 
 #pragma once
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
 

+ 2 - 2
glm/simd/vector_relational.h

@@ -3,6 +3,6 @@
 
 #pragma once
 
-#if GLM_ARCH & GLM_ARCH_SSE2
+#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2
+#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG