9 年之前 · 20cf68679c
--- a/glm/detail/func_exponential.inl
+++ b/glm/detail/func_exponential.inl
@@ -29,6 +29,15 @@ namespace detail
 
				 		}
			
 
				 	};
			
 
				 
			
 
				+	template <template <class, precision> class vecType, typename T, precision P>
			
 
				+	struct compute_sqrt
			
 
				+	{
			
 
				+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
			
 
				+		{
			
 
				+			return detail::functor1<T, T, P, vecType>::call(std::sqrt, x);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				 	template <template <class, precision> class vecType, typename T, precision P>
			
 
				 	struct compute_inversesqrt
			
 
				 	{
			
@@ -113,7 +122,7 @@ namespace detail
 
				 	GLM_FUNC_QUALIFIER vecType<T, P> sqrt(vecType<T, P> const & x)
			
 
				 	{
			
 
				 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'sqrt' only accept floating-point inputs");
			
 
				-		return detail::functor1<T, T, P, vecType>::call(sqrt, x);
			
 
				+		return detail::compute_sqrt<vecType, T, P>::call(x);
			
 
				 	}
			
 
				 
			
 
				 	// inversesqrt
			
--- a/glm/detail/func_exponential_simd.inl
+++ b/glm/detail/func_exponential_simd.inl
@@ -1,9 +1,35 @@
 
				 /// @ref core
			
 
				 /// @file glm/detail/func_exponential_simd.inl
			
 
				 
			
 
				+#include "../simd/exponential.h"
			
 
				+
			
 
				+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
			
 
				+
			
 
				 namespace glm{
			
 
				 namespace detail
			
 
				 {
			
 
				+	template <precision P>
			
 
				+	struct compute_sqrt<tvec4, float, P>
			
 
				+	{
			
 
				+		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
			
 
				+		{
			
 
				+			tvec4<float, P> result(uninitialize);
			
 
				+			result.data = _mm_sqrt_ps(v.data);
			
 
				+			return result;
			
 
				+		}
			
 
				+	};
			
 
				 
			
 
				+	template <>
			
 
				+	struct compute_sqrt<tvec4, float, lowp>
			
 
				+	{
			
 
				+		GLM_FUNC_QUALIFIER static tvec4<float, lowp> call(tvec4<float, lowp> const & v)
			
 
				+		{
			
 
				+			tvec4<float, lowp> result(uninitialize);
			
 
				+			result.data = glm_f32v4_sqrt_lowp(v.data);
			
 
				+			return result;
			
 
				+		}
			
 
				+	};
			
 
				 }//namespace detail
			
 
				 }//namespace glm
			
 
				+
			
 
				+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
			
--- a/glm/detail/func_geometric_simd.inl
+++ b/glm/detail/func_geometric_simd.inl
@@ -1,3 +1,6 @@
 
				+/// @ref core
			
 
				+/// @file glm/detail/func_geometric_simd.inl
			
 
				+
			
 
				 #include "../simd/geometric.h"
			
 
				 
			
 
				 #if GLM_ARCH & GLM_ARCH_SSE2_BIT
			
--- a/glm/simd/common.h
+++ b/glm/simd/common.h
@@ -178,30 +178,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_inf(__m128 x)
 
				 	return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000)));		// exponent is all 1s, fraction is 0
			
 
				 }
			
 
				 
			
 
				-// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
			
 
				-// By Elan Ruskin, http://assemblyrequired.crashworks.org/
			
 
				-GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x)
			
 
				-{
			
 
				-	__m128 const Rcp0 = _mm_rsqrt_ss(x);  // "estimate" opcode
			
 
				-	__m128 const Mul0 = _mm_mul_ss(_mm_set1_ps(0.5f), Rcp0);
			
 
				-	__m128 const Mul1 = _mm_mul_ss(Rcp0, Rcp0);
			
 
				-	__m128 const Mul2 = _mm_mul_ss(x, Mul1);
			
 
				-	__m128 const Sub0 = _mm_sub_ss(_mm_set1_ps(3.0f), Mul2);
			
 
				-	__m128 const Mul3 = _mm_mul_ss(Mul0, Sub0);
			
 
				-	return Mul3;
			
 
				-}
			
 
				-
			
 
				-// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
			
 
				-// By Elan Ruskin, http://assemblyrequired.crashworks.org/
			
 
				-GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
			
 
				-{
			
 
				-	__m128 const Rcp0 = _mm_rsqrt_ps(x);  // "estimate" opcode
			
 
				-	__m128 const Mul0 = _mm_mul_ps(_mm_set1_ps(0.5f), Rcp0);
			
 
				-	__m128 const Mul1 = _mm_mul_ps(Mul0, Mul0);
			
 
				-	__m128 const Mul2 = _mm_mul_ps(x, Mul1);
			
 
				-	__m128 const Sub0 = _mm_sub_ps(_mm_set1_ps(3.0f), Mul2);
			
 
				-	__m128 const Mul3 = _mm_mul_ps(Mul0, Sub0);
			
 
				-	return Mul3;
			
 
				-}
			
 
				-
			
 
				 #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
			
--- a/glm/simd/exponential.h
+++ b/glm/simd/exponential.h
@@ -3,3 +3,20 @@
 
				 
			
 
				 #pragma once
			
 
				 
			
 
				+#if GLM_ARCH & GLM_ARCH_SSE2_BIT
			
 
				+
			
 
				+// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
			
 
				+// By Elan Ruskin, http://assemblyrequired.crashworks.org/
			
 
				+GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_lowp(__m128 x)
			
 
				+{
			
 
				+	return _mm_mul_ss(_mm_rsqrt_ss(x), x);
			
 
				+}
			
 
				+
			
 
				+// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
			
 
				+// By Elan Ruskin, http://assemblyrequired.crashworks.org/
			
 
				+GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_lowp(__m128 x)
			
 
				+{
			
 
				+	return _mm_mul_ps(_mm_rsqrt_ps(x), x);
			
 
				+}
			
 
				+
			
 
				+#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
			
--- a/test/core/core_func_exponential.cpp
+++ b/test/core/core_func_exponential.cpp
@@ -1,33 +1,5 @@
 
				-///////////////////////////////////////////////////////////////////////////////////
			
 
				-/// OpenGL Mathematics (glm.g-truc.net)
			
 
				-///
			
 
				-/// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
			
 
				-/// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				-/// of this software and associated documentation files (the "Software"), to deal
			
 
				-/// in the Software without restriction, including without limitation the rights
			
 
				-/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				-/// copies of the Software, and to permit persons to whom the Software is
			
 
				-/// furnished to do so, subject to the following conditions:
			
 
				-/// 
			
 
				-/// The above copyright notice and this permission notice shall be included in
			
 
				-/// all copies or substantial portions of the Software.
			
 
				-/// 
			
 
				-/// Restrictions:
			
 
				-///		By making use of the Software for military purposes, you choose to make
			
 
				-///		a Bunny unhappy.
			
 
				-/// 
			
 
				-/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				-/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				-/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				-/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				-/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				-/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				-/// THE SOFTWARE.
			
 
				-///
			
 
				 /// @file test/core/func_exponential.cpp
			
 
				 /// @date 2011-01-15 / 2011-09-13
			
 
				-/// @author Christophe Riccio
			
 
				-///////////////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				 #include <glm/common.hpp>
			
 
				 #include <glm/exponential.hpp>
			
@@ -103,6 +75,18 @@ int test_sqrt()
 
				 {
			
 
				 	int Error(0);
			
 
				 
			
 
				+#	if GLM_ARCH & GLM_ARCH_SSE2_BIT
			
 
				+	for(float f = 0.1f; f < 30.0f; f += 0.1f)
			
 
				+	{
			
 
				+		float q = _mm_cvtss_f32(_mm_sqrt_ps(_mm_set1_ps(f)));
			
 
				+		float r = _mm_cvtss_f32(glm_f32v4_sqrt_lowp(_mm_set1_ps(f)));
			
 
				+		float s = std::sqrt(f);
			
 
				+		Error += glm::abs(q - s) < 0.01f ? 0 : 1;
			
 
				+		Error += glm::abs(r - s) < 0.01f ? 0 : 1;
			
 
				+		assert(!Error);
			
 
				+	}
			
 
				+#	endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
			
 
				+
			
 
				 	float A = glm::sqrt(10.f);
			
 
				 	glm::vec1 B = glm::sqrt(glm::vec1(10.f));
			
 
				 	glm::vec2 C = glm::sqrt(glm::vec2(10.f));