Browse Source

Renamed instruction set flags

Christophe Riccio 9 years ago
parent
commit
7fe2f5fe65

+ 2 - 2
glm/detail/func_common_simd.inl

@@ -1,7 +1,7 @@
 /// @ref core
 /// @ref core
 /// @file glm/detail/func_common_simd.inl
 /// @file glm/detail/func_common_simd.inl
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 #include "../simd/common.h"
 #include "../simd/common.h"
 
 
@@ -135,4 +135,4 @@ namespace detail
 }//namespace detail
 }//namespace detail
 }//namespace glm
 }//namespace glm
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 2 - 2
glm/detail/func_geometric_simd.inl

@@ -1,6 +1,6 @@
 #include "../simd/geometric.h"
 #include "../simd/geometric.h"
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 namespace glm{
 namespace glm{
 namespace detail
 namespace detail
@@ -17,4 +17,4 @@ namespace detail
 }//namespace detail
 }//namespace detail
 }//namespace glm
 }//namespace glm
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 1 - 1
glm/detail/func_matrix_simd.inl

@@ -8,7 +8,7 @@
 namespace glm{
 namespace glm{
 namespace detail
 namespace detail
 {
 {
-#	if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#	if GLM_ARCH & GLM_ARCH_SSE2_BIT
 		template <precision P>
 		template <precision P>
 		struct compute_inverse<tmat4x4, float, P>
 		struct compute_inverse<tmat4x4, float, P>
 		{
 		{

+ 3 - 3
glm/detail/type_vec4.hpp

@@ -29,7 +29,7 @@ namespace detail
 		typedef T type[4];
 		typedef T type[4];
 	};
 	};
 
 
-#	if (GLM_ARCH & GLM_ARCH_SSE2_FLAG)
+#	if (GLM_ARCH & GLM_ARCH_SSE2_BIT)
 		template <>
 		template <>
 		struct simd_data<float>
 		struct simd_data<float>
 		{
 		{
@@ -49,7 +49,7 @@ namespace detail
 		};
 		};
 #	endif
 #	endif
 
 
-#	if (GLM_ARCH & GLM_ARCH_AVX_FLAG)
+#	if (GLM_ARCH & GLM_ARCH_AVX_BIT)
 		template <>
 		template <>
 		struct simd_data<double>
 		struct simd_data<double>
 		{
 		{
@@ -57,7 +57,7 @@ namespace detail
 		};
 		};
 #	endif
 #	endif
 
 
-#	if (GLM_ARCH & GLM_ARCH_AVX2_FLAG)
+#	if (GLM_ARCH & GLM_ARCH_AVX2_BIT)
 		template <>
 		template <>
 		struct simd_data<int64>
 		struct simd_data<int64>
 		{
 		{

+ 10 - 10
glm/detail/type_vec4_simd.inl

@@ -1,7 +1,7 @@
 /// @ref core
 /// @ref core
 /// @file glm/detail/type_tvec4_simd.inl
 /// @file glm/detail/type_tvec4_simd.inl
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 namespace glm{
 namespace glm{
 namespace detail
 namespace detail
@@ -72,7 +72,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
 	template <typename T, precision P>
 	struct compute_vec4_and<T, P, true, 64>
 	struct compute_vec4_and<T, P, true, 64>
 	{
 	{
@@ -96,7 +96,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
 	template <typename T, precision P>
 	struct compute_vec4_or<T, P, true, 64>
 	struct compute_vec4_or<T, P, true, 64>
 	{
 	{
@@ -120,7 +120,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
 	template <typename T, precision P>
 	struct compute_vec4_xor<T, P, true, 64>
 	struct compute_vec4_xor<T, P, true, 64>
 	{
 	{
@@ -144,7 +144,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
 	template <typename T, precision P>
 	struct compute_vec4_shift_left<T, P, true, 64>
 	struct compute_vec4_shift_left<T, P, true, 64>
 	{
 	{
@@ -168,7 +168,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
 	template <typename T, precision P>
 	struct compute_vec4_shift_right<T, P, true, 64>
 	struct compute_vec4_shift_right<T, P, true, 64>
 	{
 	{
@@ -192,7 +192,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
 	template <typename T, precision P>
 	struct compute_vec4_bitwise_not<T, P, true, 64>
 	struct compute_vec4_bitwise_not<T, P, true, 64>
 	{
 	{
@@ -248,7 +248,7 @@ namespace detail
 		data(_mm_set1_ps(s))
 		data(_mm_set1_ps(s))
 	{}
 	{}
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template <>
 	template <>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, lowp>::tvec4(double s) :
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, lowp>::tvec4(double s) :
 		data(_mm256_set1_pd(s))
 		data(_mm256_set1_pd(s))
@@ -280,7 +280,7 @@ namespace detail
 		data(_mm_set1_epi32(s))
 		data(_mm_set1_epi32(s))
 	{}
 	{}
 
 
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <>
 	template <>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, lowp>::tvec4(int64 s) :
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, lowp>::tvec4(int64 s) :
 		data(_mm256_set1_epi64x(s))
 		data(_mm256_set1_epi64x(s))
@@ -350,4 +350,4 @@ namespace detail
 */
 */
 }//namespace glm
 }//namespace glm
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 1 - 1
glm/gtx/simd_mat4.hpp

@@ -17,7 +17,7 @@
 
 
 #if(GLM_ARCH != GLM_ARCH_PURE)
 #if(GLM_ARCH != GLM_ARCH_PURE)
 
 
-#if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
+#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
 #	include "../detail/intrinsic_matrix.hpp"
 #	include "../detail/intrinsic_matrix.hpp"
 #	include "../gtx/simd_vec4.hpp"
 #	include "../gtx/simd_vec4.hpp"
 #else
 #else

+ 1 - 1
glm/gtx/simd_quat.hpp

@@ -19,7 +19,7 @@
 
 
 #if(GLM_ARCH != GLM_ARCH_PURE)
 #if(GLM_ARCH != GLM_ARCH_PURE)
 
 
-#if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
+#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
 #   include "../gtx/simd_mat4.hpp"
 #   include "../gtx/simd_mat4.hpp"
 #else
 #else
 #	error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"
 #	error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"

+ 1 - 1
glm/gtx/simd_quat.inl

@@ -122,7 +122,7 @@ GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const &
     __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
     __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
     __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
     __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
 
 
-#   if(GLM_ARCH & GLM_ARCH_SSE41_FLAG)
+#   if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
     __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
     __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
     __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
     __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
     __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);
     __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);

+ 1 - 1
glm/gtx/simd_vec4.hpp

@@ -17,7 +17,7 @@
 
 
 #if(GLM_ARCH != GLM_ARCH_PURE)
 #if(GLM_ARCH != GLM_ARCH_PURE)
 
 
-#if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
+#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
 #	include "../detail/intrinsic_common.hpp"
 #	include "../detail/intrinsic_common.hpp"
 #	include "../detail/intrinsic_geometric.hpp"
 #	include "../detail/intrinsic_geometric.hpp"
 #	include "../detail/intrinsic_integer.hpp"
 #	include "../detail/intrinsic_integer.hpp"

+ 5 - 5
glm/simd/common.h

@@ -5,12 +5,12 @@
 
 
 #include "platform.h"
 #include "platform.h"
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 //mad
 //mad
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
 {
 {
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 		return _mm_fmadd_ss(a, b, c);
 		return _mm_fmadd_ss(a, b, c);
 #	else
 #	else
 		return _mm_add_ss(_mm_mul_ss(a, b), c);
 		return _mm_add_ss(_mm_mul_ss(a, b), c);
@@ -20,7 +20,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
 //mad
 //mad
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c)
 {
 {
-#	if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 		return _mm_fmadd_ps(a, b, c);
 		return _mm_fmadd_ps(a, b, c);
 #	else
 #	else
 		return _mm_add_ps(_mm_mul_ps(a, b), c);
 		return _mm_add_ps(_mm_mul_ps(a, b), c);
@@ -35,7 +35,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_abs(__m128 x)
 
 
 GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x)
 GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x)
 {
 {
-#	if GLM_ARCH & GLM_ARCH_SSSE3_FLAG
+#	if GLM_ARCH & GLM_ARCH_SSSE3_BIT
 		return _mm_sign_epi32(x, x);
 		return _mm_sign_epi32(x, x);
 #	else
 #	else
 		__m128i const sgn0 = _mm_srai_epi32(x, 31);
 		__m128i const sgn0 = _mm_srai_epi32(x, 31);
@@ -204,4 +204,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
 	return Mul3;
 	return Mul3;
 }
 }
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 41 - 42
glm/simd/geometric.h

@@ -5,17 +5,17 @@
 
 
 #include "common.h"
 #include "common.h"
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
 {
 {
-#	if GLM_ARCH & GLM_ARCH_AVX_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
 		return _mm_dp_ps(v1, v2, 0xff);
 		return _mm_dp_ps(v1, v2, 0xff);
-#	elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
-		__m128 const Mul0 = _mm_mul_ps(v1, v2);
-		__m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0);
-		__m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0);
-		return Hadd1;
+#	elif GLM_ARCH & GLM_ARCH_SSE3_BIT
+		__m128 const mul0 = _mm_mul_ps(v1, v2);
+		__m128 const hadd0 = _mm_hadd_ps(mul0, mul0);
+		__m128 const hadd1 = _mm_hadd_ps(hadd0, hadd0);
+		return hadd1;
 #	else
 #	else
 		__m128 const mul0 = _mm_mul_ps(v1, v2);
 		__m128 const mul0 = _mm_mul_ps(v1, v2);
 		__m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
 		__m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
@@ -28,9 +28,9 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2)
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2)
 {
 {
-#	if GLM_ARCH & GLM_ARCH_AVX_FLAG
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
 		return _mm_dp_ps(v1, v2, 0xff);
 		return _mm_dp_ps(v1, v2, 0xff);
-#	elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
+#	elif GLM_ARCH & GLM_ARCH_SSE3_BIT
 		__m128 const mul0 = _mm_mul_ps(v1, v2);
 		__m128 const mul0 = _mm_mul_ps(v1, v2);
 		__m128 const had0 = _mm_hadd_ps(mul0, mul0);
 		__m128 const had0 = _mm_hadd_ps(mul0, mul0);
 		__m128 const had1 = _mm_hadd_ps(had0, had0);
 		__m128 const had1 = _mm_hadd_ps(had0, had0);
@@ -54,69 +54,68 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_len(__m128 x)
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_dst(__m128 p0, __m128 p1)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_dst(__m128 p0, __m128 p1)
 {
 {
-	__m128 sub0 = _mm_sub_ps(p0, p1);
-	__m128 len0 = glm_f32v4_len(sub0);
+	__m128 const sub0 = _mm_sub_ps(p0, p1);
+	__m128 const len0 = glm_f32v4_len(sub0);
 	return len0;
 	return len0;
 }
 }
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_xpd(__m128 v1, __m128 v2)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_xpd(__m128 v1, __m128 v2)
 {
 {
-	__m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
-	__m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
-	__m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
-	__m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
-	__m128 mul0 = _mm_mul_ps(swp0, swp3);
-	__m128 mul1 = _mm_mul_ps(swp1, swp2);
-	__m128 sub0 = _mm_sub_ps(mul0, mul1);
+	__m128 const swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
+	__m128 const swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
+	__m128 const swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
+	__m128 const swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
+	__m128 const mul0 = _mm_mul_ps(swp0, swp3);
+	__m128 const mul1 = _mm_mul_ps(swp1, swp2);
+	__m128 const sub0 = _mm_sub_ps(mul0, mul1);
 	return sub0;
 	return sub0;
 }
 }
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_nrm(__m128 v)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_nrm(__m128 v)
 {
 {
-	__m128 dot0 = glm_f32v4_dot(v, v);
-	__m128 isr0 = _mm_rsqrt_ps(dot0);
-	__m128 mul0 = _mm_mul_ps(v, isr0);
+	__m128 const dot0 = glm_f32v4_dot(v, v);
+	__m128 const isr0 = _mm_rsqrt_ps(dot0);
+	__m128 const mul0 = _mm_mul_ps(v, isr0);
 	return mul0;
 	return mul0;
 }
 }
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_ffd(__m128 N, __m128 I, __m128 Nref)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_ffd(__m128 N, __m128 I, __m128 Nref)
 {
 {
-	__m128 dot0 = glm_f32v4_dot(Nref, I);
-	__m128 sgn0 = glm_f32v4_sgn(dot0);
-	__m128 mul0 = _mm_mul_ps(sgn0, _mm_set1_ps(-1.0f));
-	__m128 mul1 = _mm_mul_ps(N, mul0);
+	__m128 const dot0 = glm_f32v4_dot(Nref, I);
+	__m128 const sgn0 = glm_f32v4_sgn(dot0);
+	__m128 const mul0 = _mm_mul_ps(sgn0, _mm_set1_ps(-1.0f));
+	__m128 const mul1 = _mm_mul_ps(N, mul0);
 	return mul1;
 	return mul1;
 }
 }
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfe(__m128 I, __m128 N)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfe(__m128 I, __m128 N)
 {
 {
-	__m128 dot0 = glm_f32v4_dot(N, I);
-	__m128 mul0 = _mm_mul_ps(N, dot0);
-	__m128 mul1 = _mm_mul_ps(mul0, _mm_set1_ps(2.0f));
-	__m128 sub0 = _mm_sub_ps(I, mul1);
+	__m128 const dot0 = glm_f32v4_dot(N, I);
+	__m128 const mul0 = _mm_mul_ps(N, dot0);
+	__m128 const mul1 = _mm_mul_ps(mul0, _mm_set1_ps(2.0f));
+	__m128 const sub0 = _mm_sub_ps(I, mul1);
 	return sub0;
 	return sub0;
 }
 }
 
 
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta)
 GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta)
 {
 {
-	__m128 dot0 = glm_f32v4_dot(N, I);
-	__m128 mul0 = _mm_mul_ps(eta, eta);
-	__m128 mul1 = _mm_mul_ps(dot0, dot0);
-	__m128 sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), mul0);
-	__m128 sub1 = _mm_sub_ps(_mm_set1_ps(1.0f), mul1);
-	__m128 mul2 = _mm_mul_ps(sub0, sub1);
+	__m128 const dot0 = glm_f32v4_dot(N, I);
+	__m128 const mul0 = _mm_mul_ps(eta, eta);
+	__m128 const mul1 = _mm_mul_ps(dot0, dot0);
+	__m128 const sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), mul0);
+	__m128 const sub1 = _mm_sub_ps(_mm_set1_ps(1.0f), mul1);
+	__m128 const mul2 = _mm_mul_ps(sub0, sub1);
 	
 	
 	if(_mm_movemask_ps(_mm_cmplt_ss(mul2, _mm_set1_ps(0.0f))) == 0)
 	if(_mm_movemask_ps(_mm_cmplt_ss(mul2, _mm_set1_ps(0.0f))) == 0)
 		return _mm_set1_ps(0.0f);
 		return _mm_set1_ps(0.0f);
 
 
-	__m128 sqt0 = _mm_sqrt_ps(mul2);
-	__m128 mul3 = _mm_mul_ps(eta, dot0);
-	__m128 add0 = _mm_add_ps(mul3, sqt0);
-	__m128 mul4 = _mm_mul_ps(add0, N);
-	__m128 mul5 = _mm_mul_ps(eta, I);
-	__m128 sub2 = _mm_sub_ps(mul5, mul4);
+	__m128 const sqt0 = _mm_sqrt_ps(mul2);
+	__m128 const mad0 = glm_f32v4_mad(eta, dot0, sqt0);
+	__m128 const mul4 = _mm_mul_ps(mad0, N);
+	__m128 const mul5 = _mm_mul_ps(eta, I);
+	__m128 const sub2 = _mm_sub_ps(mul5, mul4);
 
 
 	return sub2;
 	return sub2;
 }
 }
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 2 - 2
glm/simd/integer.h

@@ -3,7 +3,7 @@
 
 
 #pragma once
 #pragma once
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 GLM_FUNC_QUALIFIER __m128i glm_i128_interleave(__m128i x)
 GLM_FUNC_QUALIFIER __m128i glm_i128_interleave(__m128i x)
 {
 {
@@ -112,4 +112,4 @@ GLM_FUNC_QUALIFIER __m128i glm_i128_interleave2(__m128i x, __m128i y)
 	return Reg1;
 	return Reg1;
 }
 }
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 2 - 2
glm/simd/matrix.h

@@ -5,7 +5,7 @@
 
 
 #include "geometric.h"
 #include "geometric.h"
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 static const __m128 GLM_VAR_USED _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f);
 static const __m128 GLM_VAR_USED _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f);
 static const __m128 GLM_VAR_USED _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f);
 static const __m128 GLM_VAR_USED _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f);
@@ -1029,4 +1029,4 @@ GLM_FUNC_QUALIFIER void glm_f32m4_outer(__m128 const & c, __m128 const & r, __m1
 	out[3] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3)));
 	out[3] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3)));
 }
 }
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 2 - 2
glm/simd/packing.h

@@ -3,6 +3,6 @@
 
 
 #pragma once
 #pragma once
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 33 - 33
glm/simd/platform.h

@@ -268,34 +268,34 @@
 
 
 // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
 // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
 
 
-#define GLM_ARCH_X86_FLAG		0x00000001
-#define GLM_ARCH_SSE2_FLAG		0x00000002
-#define GLM_ARCH_SSE3_FLAG		0x00000004
-#define GLM_ARCH_SSSE3_FLAG		0x00000008
-#define GLM_ARCH_SSE41_FLAG		0x00000010
-#define GLM_ARCH_SSE42_FLAG		0x00000020
-#define GLM_ARCH_AVX_FLAG		0x00000040
-#define GLM_ARCH_AVX2_FLAG		0x00000080
-#define GLM_ARCH_AVX512_FLAG	0x00000100 // Skylake subset
-#define GLM_ARCH_ARM_FLAG		0x00000100
-#define GLM_ARCH_NEON_FLAG		0x00000200
-#define GLM_ARCH_MIPS_FLAG		0x00010000
-#define GLM_ARCH_PPC_FLAG		0x01000000
+#define GLM_ARCH_X86_BIT		0x00000001
+#define GLM_ARCH_SSE2_BIT		0x00000002
+#define GLM_ARCH_SSE3_BIT		0x00000004
+#define GLM_ARCH_SSSE3_BIT		0x00000008
+#define GLM_ARCH_SSE41_BIT		0x00000010
+#define GLM_ARCH_SSE42_BIT		0x00000020
+#define GLM_ARCH_AVX_BIT		0x00000040
+#define GLM_ARCH_AVX2_BIT		0x00000080
+#define GLM_ARCH_AVX512_BIT		0x00000100 // Skylake subset
+#define GLM_ARCH_ARM_BIT		0x00000100
+#define GLM_ARCH_NEON_BIT		0x00000200
+#define GLM_ARCH_MIPS_BIT		0x00010000
+#define GLM_ARCH_PPC_BIT		0x01000000
 
 
 #define GLM_ARCH_PURE		(0x00000000)
 #define GLM_ARCH_PURE		(0x00000000)
-#define GLM_ARCH_X86		(GLM_ARCH_X86_FLAG)
-#define GLM_ARCH_SSE2		(GLM_ARCH_SSE2_FLAG | GLM_ARCH_X86)
-#define GLM_ARCH_SSE3		(GLM_ARCH_SSE3_FLAG | GLM_ARCH_SSE2)
-#define GLM_ARCH_SSSE3		(GLM_ARCH_SSSE3_FLAG | GLM_ARCH_SSE3)
-#define GLM_ARCH_SSE41		(GLM_ARCH_SSE41_FLAG | GLM_ARCH_SSSE3)
-#define GLM_ARCH_SSE42		(GLM_ARCH_SSE42_FLAG | GLM_ARCH_SSE41)
-#define GLM_ARCH_AVX		(GLM_ARCH_AVX_FLAG | GLM_ARCH_SSE42)
-#define GLM_ARCH_AVX2		(GLM_ARCH_AVX2_FLAG | GLM_ARCH_AVX)
-#define GLM_ARCH_AVX512		(GLM_ARCH_AVX512_FLAG | GLM_ARCH_AVX2) // Skylake subset
-#define GLM_ARCH_ARM		(GLM_ARCH_ARM_FLAG)
-#define GLM_ARCH_NEON		(GLM_ARCH_NEON_FLAG | GLM_ARCH_ARM)
-#define GLM_ARCH_MIPS		(GLM_ARCH_MIPS_FLAG)
-#define GLM_ARCH_PPC		(GLM_ARCH_PPC_FLAG)
+#define GLM_ARCH_X86		(GLM_ARCH_X86_BIT)
+#define GLM_ARCH_SSE2		(GLM_ARCH_SSE2_BIT | GLM_ARCH_X86)
+#define GLM_ARCH_SSE3		(GLM_ARCH_SSE3_BIT | GLM_ARCH_SSE2)
+#define GLM_ARCH_SSSE3		(GLM_ARCH_SSSE3_BIT | GLM_ARCH_SSE3)
+#define GLM_ARCH_SSE41		(GLM_ARCH_SSE41_BIT | GLM_ARCH_SSSE3)
+#define GLM_ARCH_SSE42		(GLM_ARCH_SSE42_BIT | GLM_ARCH_SSE41)
+#define GLM_ARCH_AVX		(GLM_ARCH_AVX_BIT | GLM_ARCH_SSE42)
+#define GLM_ARCH_AVX2		(GLM_ARCH_AVX2_BIT | GLM_ARCH_AVX)
+#define GLM_ARCH_AVX512		(GLM_ARCH_AVX512_BIT | GLM_ARCH_AVX2) // Skylake subset
+#define GLM_ARCH_ARM		(GLM_ARCH_ARM_BIT)
+#define GLM_ARCH_NEON		(GLM_ARCH_NEON_BIT | GLM_ARCH_ARM)
+#define GLM_ARCH_MIPS		(GLM_ARCH_MIPS_BIT)
+#define GLM_ARCH_PPC		(GLM_ARCH_PPC_BIT)
 
 
 #if defined(GLM_FORCE_PURE)
 #if defined(GLM_FORCE_PURE)
 #	define GLM_ARCH GLM_ARCH_PURE
 #	define GLM_ARCH GLM_ARCH_PURE
@@ -383,18 +383,18 @@
 #	include <intrin.h>
 #	include <intrin.h>
 #endif
 #endif
 
 
-#if GLM_ARCH & GLM_ARCH_AVX2_FLAG
+#if GLM_ARCH & GLM_ARCH_AVX2_BIT
 #	include <immintrin.h>
 #	include <immintrin.h>
-#elif GLM_ARCH & GLM_ARCH_AVX_FLAG
+#elif GLM_ARCH & GLM_ARCH_AVX_BIT
 #	include <immintrin.h>
 #	include <immintrin.h>
-#elif GLM_ARCH & GLM_ARCH_SSE42_FLAG
+#elif GLM_ARCH & GLM_ARCH_SSE42_BIT
 #	include <nmmintrin.h>
 #	include <nmmintrin.h>
-#elif GLM_ARCH & GLM_ARCH_SSE41_FLAG
+#elif GLM_ARCH & GLM_ARCH_SSE41_BIT
 #	include <smmintrin.h>
 #	include <smmintrin.h>
-#elif GLM_ARCH & GLM_ARCH_SSSE3_FLAG
+#elif GLM_ARCH & GLM_ARCH_SSSE3_BIT
 #	include <tmmintrin.h>
 #	include <tmmintrin.h>
-#elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
+#elif GLM_ARCH & GLM_ARCH_SSE3_BIT
 #	include <pmmintrin.h>
 #	include <pmmintrin.h>
-#elif GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#elif GLM_ARCH & GLM_ARCH_SSE2_BIT
 #	include <emmintrin.h>
 #	include <emmintrin.h>
 #endif//GLM_ARCH
 #endif//GLM_ARCH

+ 2 - 2
glm/simd/trigonometric.h

@@ -3,7 +3,7 @@
 
 
 #pragma once
 #pragma once
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 

+ 2 - 2
glm/simd/vector_relational.h

@@ -3,6 +3,6 @@
 
 
 #pragma once
 #pragma once
 
 
-#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
-#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 7 - 7
test/core/core_setup_message.cpp

@@ -176,23 +176,23 @@ int test_instruction_set()
 
 
 	if(GLM_ARCH == GLM_ARCH_PURE)
 	if(GLM_ARCH == GLM_ARCH_PURE)
 		std::printf("GLM_ARCH_PURE ");
 		std::printf("GLM_ARCH_PURE ");
-	if(GLM_ARCH & GLM_ARCH_ARM_FLAG)
+	if(GLM_ARCH & GLM_ARCH_ARM_BIT)
 		std::printf("ARM ");
 		std::printf("ARM ");
-	if(GLM_ARCH & GLM_ARCH_NEON_FLAG)
+	if(GLM_ARCH & GLM_ARCH_NEON_BIT)
 		std::printf("NEON ");
 		std::printf("NEON ");
 	if(GLM_ARCH & GLM_ARCH_AVX2)
 	if(GLM_ARCH & GLM_ARCH_AVX2)
 		std::printf("AVX2 ");
 		std::printf("AVX2 ");
 	if(GLM_ARCH & GLM_ARCH_AVX)
 	if(GLM_ARCH & GLM_ARCH_AVX)
 		std::printf("AVX ");
 		std::printf("AVX ");
-	if(GLM_ARCH & GLM_ARCH_SSE42_FLAG)
+	if(GLM_ARCH & GLM_ARCH_SSE42_BIT)
 		std::printf("SSE4.2 ");
 		std::printf("SSE4.2 ");
-	if(GLM_ARCH & GLM_ARCH_SSE41_FLAG)
+	if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
 		std::printf("SSE4.1 ");
 		std::printf("SSE4.1 ");
-	if(GLM_ARCH & GLM_ARCH_SSSE3_FLAG)
+	if(GLM_ARCH & GLM_ARCH_SSSE3_BIT)
 		std::printf("SSSE3 ");
 		std::printf("SSSE3 ");
-	if(GLM_ARCH & GLM_ARCH_SSE3_FLAG)
+	if(GLM_ARCH & GLM_ARCH_SSE3_BIT)
 		std::printf("SSE3 ");
 		std::printf("SSE3 ");
-	if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
+	if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
 		std::printf("SSE2 ");
 		std::printf("SSE2 ");
 
 
 	std::printf("\n");
 	std::printf("\n");

+ 4 - 4
test/gtc/gtc_bitfield.cpp

@@ -505,7 +505,7 @@ namespace bitfieldInterleave
 				assert(A == C);
 				assert(A == C);
 				assert(A == D);
 				assert(A == D);
 
 
-#				if GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#				if GLM_ARCH & GLM_ARCH_SSE2_BIT
 					glm::uint64 E = sseBitfieldInterleave(x, y);
 					glm::uint64 E = sseBitfieldInterleave(x, y);
 					glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
 					glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
 					assert(A == E);
 					assert(A == E);
@@ -515,7 +515,7 @@ namespace bitfieldInterleave
 					glm::uint64 Result[2];
 					glm::uint64 Result[2];
 					_mm_storeu_si128((__m128i*)Result, G);
 					_mm_storeu_si128((__m128i*)Result, G);
 					assert(A == Result[0]);
 					assert(A == Result[0]);
-#				endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#				endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
 			}
 			}
 		}
 		}
 
 
@@ -629,7 +629,7 @@ namespace bitfieldInterleave
 			std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
 			std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
 		}
 		}
 
 
-#		if(GLM_ARCH & GLM_ARCH_SSE2_FLAG && !(GLM_COMPILER & GLM_COMPILER_GCC))
+#		if(GLM_ARCH & GLM_ARCH_SSE2_BIT && !(GLM_COMPILER & GLM_COMPILER_GCC))
 		{
 		{
 			// SIMD
 			// SIMD
 			std::vector<__m128i> SimdData;
 			std::vector<__m128i> SimdData;
@@ -648,7 +648,7 @@ namespace bitfieldInterleave
 
 
 			std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
 			std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
 		}
 		}
-#		endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
+#		endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
 
 
 		return 0;
 		return 0;
 	}
 	}