9 years ago · 1381a95938
--- a/glm/detail/func_geometric_simd.inl
+++ b/glm/detail/func_geometric_simd.inl
@@ -9,10 +9,7 @@ namespace detail
 
				 		GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const& x, tvec4<float, P> const& y)
			
 
				 		{
			
 
				 			__m128 const dot0 = glm_dot_ss(x.data, y.data);
			
 
				-
			
 
				-			float Result = 0;
			
 
				-			_mm_store_ss(&Result, dot0);
			
 
				-			return Result;
			
 
				+			return _mm_cvtss_f32(dot0);
			
 
				 		}
			
 
				 	};
			
 
				 }//namespace detail
			
--- a/glm/simd/common.h
+++ b/glm/simd/common.h
@@ -3,26 +3,6 @@
 
				 
			
 
				 #pragma once
			
 
				 
			
 
				-#if(GLM_COMPILER & GLM_COMPILER_VC)
			
 
				-#pragma warning(push)
			
 
				-#pragma warning(disable : 4510 4512 4610)
			
 
				-#endif
			
 
				-
			
 
				-	union ieee754_QNAN
			
 
				-	{
			
 
				-		const float f;
			
 
				-		struct i
			
 
				-		{
			
 
				-			const unsigned int mantissa:23, exp:8, sign:1;
			
 
				-		};
			
 
				-
			
 
				-		ieee754_QNAN() : f(0.0)/*, mantissa(0x7FFFFF), exp(0xFF), sign(0x0)*/ {}
			
 
				-	};
			
 
				-
			
 
				-#if(GLM_COMPILER & GLM_COMPILER_VC)
			
 
				-#pragma warning(pop)
			
 
				-#endif
			
 
				-
			
 
				 static const __m128 GLM_VAR_USED glm_zero = _mm_setzero_ps();
			
 
				 static const __m128 GLM_VAR_USED glm_one = _mm_set_ps1(1.0f);
			
 
				 static const __m128 GLM_VAR_USED glm_half = _mm_set_ps1(0.5f);
			
@@ -30,15 +10,13 @@ static const __m128 GLM_VAR_USED glm_minus_one = _mm_set_ps1(-1.0f);
 
				 static const __m128 GLM_VAR_USED glm_two = _mm_set_ps1(2.0f);
			
 
				 static const __m128 GLM_VAR_USED glm_three = _mm_set_ps1(3.0f);
			
 
				 
			
 
				-static const ieee754_QNAN glm_abs_mask;
			
 
				-static const __m128 GLM_VAR_USED glm_abs4_mask = _mm_set_ps1(glm_abs_mask.f);
			
 
				 static const __m128 GLM_VAR_USED glm_epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(static_cast<int>(0x80000000)));
			
 
				 static const __m128 GLM_VAR_USED glm_ps_2pow23 = _mm_set_ps1(8388608.0f);
			
 
				 static const __m128 GLM_VAR_USED glm_ps_1 = _mm_set_ps1(1.0f);
			
 
				 
			
 
				 GLM_FUNC_QUALIFIER __m128 glm_abs_ps(__m128 x)
			
 
				 {
			
 
				-	return _mm_and_ps(glm_abs4_mask, x);
			
 
				+	return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
			
 
				 }
			
 
				 
			
 
				 //sign
			
--- a/glm/simd/geometric.h
+++ b/glm/simd/geometric.h
@@ -9,6 +9,11 @@ GLM_FUNC_QUALIFIER __m128 glm_dot_ps(__m128 v1, __m128 v2)
 
				 {
			
 
				 #	if GLM_ARCH & GLM_ARCH_AVX
			
 
				 		return _mm_dp_ps(v1, v2, 0xff);
			
 
				+#	elif GLM_ARCH & GLM_ARCH_SSE3
			
 
				+		__m128 const Mul0 = _mm_mul_ps(v1, v2);
			
 
				+		__m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0);
			
 
				+		__m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0);
			
 
				+		return Hadd1;
			
 
				 #	else
			
 
				 		__m128 const mul0 = _mm_mul_ps(v1, v2);
			
 
				 		__m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
			
@@ -21,12 +26,21 @@ GLM_FUNC_QUALIFIER __m128 glm_dot_ps(__m128 v1, __m128 v2)
 
				 
			
 
				 GLM_FUNC_QUALIFIER __m128 glm_dot_ss(__m128 v1, __m128 v2)
			
 
				 {
			
 
				-	__m128 const mul0 = _mm_mul_ps(v1, v2);
			
 
				-	__m128 const mov0 = _mm_movehl_ps(mul0, mul0);
			
 
				-	__m128 const add0 = _mm_add_ps(mov0, mul0);
			
 
				-	__m128 const swp1 = _mm_shuffle_ps(add0, add0, 1);
			
 
				-	__m128 const add1 = _mm_add_ss(add0, swp1);
			
 
				-	return add1;
			
 
				+#	if GLM_ARCH & GLM_ARCH_AVX
			
 
				+		return _mm_dp_ps(v1, v2, 0xff);
			
 
				+#	elif GLM_ARCH & GLM_ARCH_SSE3
			
 
				+		__m128 const Mul0 = _mm_mul_ps(v1, v2);
			
 
				+		__m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0);
			
 
				+		__m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0);
			
 
				+		return Hadd1;
			
 
				+#	else
			
 
				+		__m128 const mul0 = _mm_mul_ps(v1, v2);
			
 
				+		__m128 const mov0 = _mm_movehl_ps(mul0, mul0);
			
 
				+		__m128 const add0 = _mm_add_ps(mov0, mul0);
			
 
				+		__m128 const swp1 = _mm_shuffle_ps(add0, add0, 1);
			
 
				+		__m128 const add1 = _mm_add_ss(add0, swp1);
			
 
				+		return add1;
			
 
				+#	endif
			
 
				 }
			
 
				 
			
 
				 GLM_FUNC_QUALIFIER __m128 glm_len_ps(__m128 x)