12 лет назад · c1006718b3
--- a/glm/gtx/simd_quat.hpp
+++ b/glm/gtx/simd_quat.hpp
@@ -41,6 +41,7 @@
 
				 // Dependency:
			
 
				 #include "../glm.hpp"
			
 
				 #include "../gtc/quaternion.hpp"
			
 
				+#include "../gtx/fast_trigonometry.hpp"
			
 
				 
			
 
				 #if(GLM_ARCH != GLM_ARCH_PURE)
			
 
				 
			
@@ -223,7 +224,7 @@ namespace detail
 
				 	/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
			
 
				 	/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
			
 
				 	/// @see gtc_quaternion
			
 
				-	/// @see - slerp(detail::tquat<T> const & x, detail::tquat<T> const & y, T const & a) 
			
 
				+	/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				 	detail::fquatSIMD mix(
			
 
				 		detail::fquatSIMD const & x, 
			
 
				 		detail::fquatSIMD const & y, 
			
@@ -255,6 +256,35 @@ namespace detail
 
				 		detail::fquatSIMD const & y, 
			
 
				 		float const & a);
			
 
				 
			
 
				+
			
 
				+    /// Faster spherical linear interpolation of two unit length quaternions.
			
 
				+    ///
			
 
				+    /// This is the same as mix(), except for two rules:
			
 
				+    ///   1) The two quaternions must be unit length.
			
 
				+    ///   2) The interpolation factor (a) must be in the range [0, 1].
			
 
				+    ///
			
 
				+    /// This will use the equivalent to fastAcos() and fastSin().
			
 
				+    ///
			
 
				+	/// @see gtc_quaternion
			
 
				+	/// @see - mix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+	detail::fquatSIMD fastMix(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+    /// Identical to fastMix() except takes the shortest path.
			
 
				+    ///
			
 
				+    /// The same rules apply here as those in fastMix(). Both quaternions must be unit length and 'a' must be
			
 
				+    /// in the range [0, 1].
			
 
				+    ///
			
 
				+	/// @see - fastMix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+	/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+    detail::fquatSIMD fastSlerp(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+
			
 
				 	/// Returns the q conjugate. 
			
 
				 	/// 
			
 
				 	/// @see gtc_quaternion
			
@@ -292,6 +322,11 @@ namespace detail
 
				 		float const & z);
			
 
				 
			
 
				 
			
 
				+    // TODO: Move this to somewhere more appropriate. Used with fastMix() and fastSlerp().
			
 
				+    /// Performs the equivalent of glm::fastSin() on each component of the given __m128.
			
 
				+    __m128 fastSin(__m128 x);
			
 
				+
			
 
				+
			
 
				 	/// @}
			
 
				 }//namespace glm
			
 
				 
			
--- a/glm/gtx/simd_quat.inl
+++ b/glm/gtx/simd_quat.inl
@@ -423,9 +423,6 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD mix
 
				 
			
 
				         // Compared to the naive SIMD implementation below, this scalar version is consistently faster. A non-naive SSE-optimized implementation
			
 
				         // will most likely be faster, but that'll need to be left to people much smarter than I.
			
 
				-        //
			
 
				-        // The issue, I think, is loading the __m128 variables with initial data. Can probably be replaced with an SSE-optimized approximation of
			
 
				-        // glm::sin(). Maybe a fastMix() function would be better for that?
			
 
				         
			
 
				         float s0 = glm::sin((1.0f - a) * angle);
			
 
				         float s1 = glm::sin(a * angle);
			
@@ -495,6 +492,73 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
 
				 	}
			
 
				 }
			
 
				 
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	float cosTheta = dot(x, y);
			
 
				+
			
 
				+    if (cosTheta > 1.0f - glm::epsilon<float>())
			
 
				+    {
			
 
				+	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        float angle = glm::fastAcos(cosTheta);
			
 
				+
			
 
				+
			
 
				+        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
			
 
				+
			
 
				+        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
			
 
				+        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
			
 
				+        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
			
 
				+        
			
 
				+        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	detail::fquatSIMD z = y;
			
 
				+
			
 
				+	float cosTheta = dot(x, y);
			
 
				+	if (cosTheta < 0.0f)
			
 
				+	{
			
 
				+		z        = -y;
			
 
				+		cosTheta = -cosTheta;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	if(cosTheta > 1.0f - epsilon<float>())
			
 
				+	{
			
 
				+		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+        float angle = glm::fastAcos(cosTheta);
			
 
				+
			
 
				+
			
 
				+        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
			
 
				+
			
 
				+        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
			
 
				+        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
			
 
				+        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
			
 
				+        
			
 
				+        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				 GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
			
 
				 (
			
 
				 	detail::fquatSIMD const & q
			
@@ -544,4 +608,22 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
 
				 }
			
 
				 
			
 
				 
			
 
				+GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
			
 
				+{
			
 
				+    static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
			
 
				+    static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
			
 
				+    static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
			
 
				+
			
 
				+    __m128 x3 = _mm_mul_ps(x,  _mm_mul_ps(x, x));
			
 
				+    __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
			
 
				+    __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
			
 
				+
			
 
				+    __m128 y0 = _mm_mul_ps(x3, c0);
			
 
				+    __m128 y1 = _mm_mul_ps(x5, c1);
			
 
				+    __m128 y2 = _mm_mul_ps(x7, c2);
			
 
				+        
			
 
				+    return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
			
 
				+}
			
 
				+
			
 
				+
			
 
				 }//namespace glm