15 years ago · d77694f8ac
--- a/glm/core/intrinsic_common.inl
+++ b/glm/core/intrinsic_common.inl
@@ -258,7 +258,7 @@ inline __m128 sse_inf_ps(__m128 x)
 
				 }
			
 
				 
			
 
				 // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
			
 
				-// By Elan Ruskin, 
			
 
				+// By Elan Ruskin, http://assemblyrequired.crashworks.org/
			
 
				 inline __m128 sse_sqrt_wip_ss(__m128 const & x)
			
 
				 {
			
 
				 	__m128 recip = _mm_rsqrt_ss(x);  // "estimate" opcode
			
--- a/glm/gtx/simd_vec4.hpp
+++ b/glm/gtx/simd_vec4.hpp
@@ -401,6 +401,28 @@ namespace glm
 
				 			detail::fvec4SIMD const & I,
			
 
				 			detail::fvec4SIMD const & N,
			
 
				 			float const & eta);
			
 
				+
			
 
				+		//! Returns the positive square root of x.
			
 
				+		//! (From GLM_GTX_simd_vec4 extension, exponential function)
			
 
				+		detail::fvec4SIMD simdSqrt(
			
 
				+			detail::fvec4SIMD const & x);
			
 
				+
			
 
				+		//! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster.
			
 
				+		//! (From GLM_GTX_simd_vec4 extension, exponential function)
			
 
				+		detail::fvec4SIMD simdFastSqrt(
			
 
				+			detail::fvec4SIMD const & x);
			
 
				+
			
 
				+		//! Returns the reciprocal of the positive square root of x.
			
 
				+		//! (From GLM_GTX_simd_vec4 extension, exponential function)
			
 
				+		detail::fvec4SIMD simdInversesqrt(
			
 
				+			detail::fvec4SIMD const & x);
			
 
				+
			
 
				+		//! Returns the reciprocal of the positive square root of x, 
			
 
				+		//! faster than simdInversesqrt but less accurate.
			
 
				+		//! (From GLM_GTX_simd_vec4 extension, exponential function)
			
 
				+		detail::fvec4SIMD simdFastInversesqrt(
			
 
				+			detail::fvec4SIMD const & x);
			
 
				+
			
 
				 	}//namespace simd_vec4
			
 
				 	}//namespace gtx
			
 
				 }//namespace glm
			
--- a/glm/gtx/simd_vec4.inl
+++ b/glm/gtx/simd_vec4.inl
@@ -634,6 +634,34 @@ namespace glm
 
				 			return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
			
 
				 		}
			
 
				 
			
 
				+		inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x)
			
 
				+		{
			
 
				+			return _mm_sqrt_ps(x.Data);
			
 
				+		}
			
 
				+
			
 
				+		inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x)
			
 
				+		{
			
 
				+
			
 
				+		}
			
 
				+
			
 
				+		// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
			
 
				+		// By Elan Ruskin, http://assemblyrequired.crashworks.org/
			
 
				+		inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x)
			
 
				+		{
			
 
				+			GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
			
 
				+			GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
			
 
				+
			
 
				+			__m128 recip = _mm_rsqrt_ps(x.Data);  // "estimate" opcode
			
 
				+			__m128 halfrecip = _mm_mul_ps(half, recip);
			
 
				+			__m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
			
 
				+			return _mm_mul_ps(halfrecip, threeminus_xrr);
			
 
				+		}
			
 
				+
			
 
				+		inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x)
			
 
				+		{
			
 
				+			return _mm_rsqrt_ps(x.Data);
			
 
				+		}
			
 
				+
			
 
				 	}//namespace simd_vec4
			
 
				 	}//namespace gtx
			
 
				 }//namespace glm