Browse Source

Updated fast versions

Christophe Riccio 15 years ago
parent
commit
f2a8d5a976
2 changed files with 30 additions and 8 deletions
  1. 12 4
      glm/gtx/simd_vec4.hpp
  2. 18 4
      glm/gtx/simd_vec4.inl

+ 12 - 4
glm/gtx/simd_vec4.hpp

@@ -379,6 +379,12 @@ namespace glm
 		detail::fvec4SIMD simdNormalize(
 			detail::fvec4SIMD const & x);
 
+		//! Returns a vector in the same direction as x but with length of 1.
+		//! Less accurate but much faster than simdNormalize.
+		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
+		detail::fvec4SIMD simdFastNormalize(
+			detail::fvec4SIMD const & x);
+
 		//! If dot(Nref, I) < 0.0, return N, otherwise, return -N.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
 		detail::fvec4SIMD simdFaceforward(
@@ -407,12 +413,14 @@ namespace glm
 		detail::fvec4SIMD simdSqrt(
 			detail::fvec4SIMD const & x);
 
-		//! Returns the positive square root of x with the nicest quality but very slow
+		//! Returns the positive square root of x with the nicest quality but very slow.
+		//! Slightly more accurate but much slower than simdSqrt.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
 		detail::fvec4SIMD simdNiceSqrt(
 			detail::fvec4SIMD const & x);
 
-		//! Returns the positive square root of x but less accurate than simdSqrt but much faster.
+		//! Returns the positive square root of x
+		//! Less accurate but much faster than simdSqrt.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
 		detail::fvec4SIMD simdFastSqrt(
 			detail::fvec4SIMD const & x);
@@ -422,8 +430,8 @@ namespace glm
 		detail::fvec4SIMD simdInversesqrt(
 			detail::fvec4SIMD const & x);
 
-		//! Returns the reciprocal of the positive square root of x, 
-		//! faster than simdInversesqrt but less accurate.
+		//! Returns the reciprocal of the positive square root of x.
+		//! Faster than simdInversesqrt but less accurate.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
 		detail::fvec4SIMD simdFastInversesqrt(
 			detail::fvec4SIMD const & x);

+ 18 - 4
glm/gtx/simd_vec4.inl

@@ -585,7 +585,7 @@ namespace glm
 			detail::fvec4SIMD const & y
 		)
 		{
-			return detail::sse_dot_ss(x.Data, y.Data);
+			return detail::sse_dot_ps(x.Data, y.Data);
 		}
 
 		inline detail::fvec4SIMD simdCross
@@ -602,7 +602,21 @@ namespace glm
 			detail::fvec4SIMD const & x
 		)
 		{
-			return detail::sse_nrm_ps(x.Data);
+			__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
+			__m128 isr0 = simdInversesqrt(dot0).Data;
+			__m128 mul0 = _mm_mul_ps(x.Data, isr0);
+			return mul0;
+		}
+
+		inline detail::fvec4SIMD simdFastNormalize
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
+			__m128 isr0 = simdFastInversesqrt(dot0).Data;
+			__m128 mul0 = _mm_mul_ps(x.Data, isr0);
+			return mul0;
 		}
 
 		inline detail::fvec4SIMD simdFaceforward
@@ -636,7 +650,7 @@ namespace glm
 
 		inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x)
 		{
-			return _mm_mul_ps(simdInversesqrt(x.Data), x.Data);
+			return _mm_mul_ps(simdInversesqrt(x.Data).Data, x.Data);
 		}
 
 		inline detail::fvec4SIMD simdNiceSqrt(detail::fvec4SIMD const & x)
@@ -646,7 +660,7 @@ namespace glm
 
 		inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x)
 		{
-			return _mm_mul_ps(simdFastInversesqrt(x.Data), x.Data);
+			return _mm_mul_ps(simdFastInversesqrt(x.Data).Data, x.Data);
 		}
 
 		// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration