12 years ago · 092ccbe64f
--- a/glm/gtx/simd_quat.hpp
+++ b/glm/gtx/simd_quat.hpp
@@ -0,0 +1,343 @@
 
				+///////////////////////////////////////////////////////////////////////////////////
			
 
				+/// OpenGL Mathematics (glm.g-truc.net)
			
 
				+///
			
 
				+/// Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net)
			
 
				+/// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+/// of this software and associated documentation files (the "Software"), to deal
			
 
				+/// in the Software without restriction, including without limitation the rights
			
 
				+/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+/// copies of the Software, and to permit persons to whom the Software is
			
 
				+/// furnished to do so, subject to the following conditions:
			
 
				+/// 
			
 
				+/// The above copyright notice and this permission notice shall be included in
			
 
				+/// all copies or substantial portions of the Software.
			
 
				+/// 
			
 
				+/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+/// THE SOFTWARE.
			
 
				+///
			
 
				+/// @ref gtx_simd_quat
			
 
				+/// @file glm/gtx/simd_quat.hpp
			
 
				+/// @date 2009-05-07 / 2011-06-07
			
 
				+/// @author Christophe Riccio
			
 
				+///
			
 
				+/// @see core (dependence)
			
 
				+///
			
 
				+/// @defgroup gtx_simd_vec4 GLM_GTX_simd_quat
			
 
				+/// @ingroup gtx
			
 
				+/// 
			
 
				+/// @brief SIMD implementation of quat type.
			
 
				+/// 
			
 
				+/// <glm/gtx/simd_quat.hpp> need to be included to use these functionalities.
			
 
				+///////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+#ifndef GLM_GTX_simd_quat
			
 
				+#define GLM_GTX_simd_quat GLM_VERSION
			
 
				+
			
 
				+// Dependency:
			
 
				+#include "../glm.hpp"
			
 
				+#include "../gtc/quaternion.hpp"
			
 
				+#include "../gtx/fast_trigonometry.hpp"
			
 
				+
			
 
				+#if(GLM_ARCH != GLM_ARCH_PURE)
			
 
				+
			
 
				+#if(GLM_ARCH & GLM_ARCH_SSE2)
			
 
				+#	include "../core/intrinsic_common.hpp"
			
 
				+#	include "../core/intrinsic_geometric.hpp"
			
 
				+#   include "../gtx/simd_mat4.hpp"
			
 
				+#else
			
 
				+#	error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"
			
 
				+#endif
			
 
				+
			
 
				+#if(defined(GLM_MESSAGES) && !defined(glm_ext))
			
 
				+#	pragma message("GLM: GLM_GTX_simd_quat extension included")
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+// Warning silencer for nameless struct/union.
			
 
				+#if (GLM_COMPILER & GLM_COMPILER_VC)
			
 
				+#   pragma warning(push)
			
 
				+#   pragma warning(disable:4201)   // warning C4201: nonstandard extension used : nameless struct/union
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+namespace glm{
			
 
				+namespace detail
			
 
				+{
			
 
				+	/// Quaternion implemented using SIMD SEE intrinsics.
			
 
				+	/// \ingroup gtx_simd_vec4
			
 
				+	GLM_ALIGNED_STRUCT(16) fquatSIMD
			
 
				+	{
			
 
				+		enum ctor{null};
			
 
				+		typedef __m128 value_type;
			
 
				+		typedef std::size_t size_type;
			
 
				+		static size_type value_size();
			
 
				+
			
 
				+		typedef fquatSIMD type;
			
 
				+		typedef tquat<bool, defaultp> bool_type;
			
 
				+
			
 
				+#ifdef GLM_SIMD_ENABLE_XYZW_UNION
			
 
				+        union
			
 
				+        {
			
 
				+		    __m128 Data;
			
 
				+            struct {float x, y, z, w;};
			
 
				+        };
			
 
				+#else
			
 
				+        __m128 Data;
			
 
				+#endif
			
 
				+
			
 
				+		//////////////////////////////////////
			
 
				+		// Implicit basic constructors
			
 
				+
			
 
				+		fquatSIMD();
			
 
				+		fquatSIMD(__m128 const & Data);
			
 
				+		fquatSIMD(fquatSIMD const & q);
			
 
				+
			
 
				+		//////////////////////////////////////
			
 
				+		// Explicit basic constructors
			
 
				+
			
 
				+		explicit fquatSIMD(
			
 
				+			ctor);
			
 
				+		explicit fquatSIMD(
			
 
				+			float const & w, 
			
 
				+			float const & x, 
			
 
				+			float const & y, 
			
 
				+			float const & z);
			
 
				+		explicit fquatSIMD(
			
 
				+			quat const & v);
			
 
				+        explicit fquatSIMD(
			
 
				+			vec3 const & eulerAngles);
			
 
				+		
			
 
				+
			
 
				+		//////////////////////////////////////
			
 
				+		// Unary arithmetic operators
			
 
				+
			
 
				+        fquatSIMD& operator =(fquatSIMD const & q);
			
 
				+        fquatSIMD& operator*=(float const & s);
			
 
				+		fquatSIMD& operator/=(float const & s);
			
 
				+	};
			
 
				+
			
 
				+
			
 
				+    //////////////////////////////////////
			
 
				+    // Arithmetic operators
			
 
				+
			
 
				+	detail::fquatSIMD operator- (
			
 
				+		detail::fquatSIMD const & q);
			
 
				+
			
 
				+	detail::fquatSIMD operator+ ( 
			
 
				+		detail::fquatSIMD const & q, 
			
 
				+		detail::fquatSIMD const & p); 
			
 
				+
			
 
				+	detail::fquatSIMD operator* ( 
			
 
				+		detail::fquatSIMD const & q, 
			
 
				+		detail::fquatSIMD const & p); 
			
 
				+
			
 
				+	detail::fvec4SIMD operator* (
			
 
				+		detail::fquatSIMD const & q, 
			
 
				+		detail::fvec4SIMD const & v);
			
 
				+
			
 
				+	detail::fvec4SIMD operator* (
			
 
				+		detail::fvec4SIMD const & v,
			
 
				+		detail::fquatSIMD const & q);
			
 
				+
			
 
				+	detail::fquatSIMD operator* (
			
 
				+		detail::fquatSIMD const & q, 
			
 
				+		float s);
			
 
				+
			
 
				+	detail::fquatSIMD operator* (
			
 
				+		float s,
			
 
				+		detail::fquatSIMD const & q);
			
 
				+
			
 
				+	detail::fquatSIMD operator/ (
			
 
				+		detail::fquatSIMD const & q, 
			
 
				+		float s);
			
 
				+
			
 
				+}//namespace detail
			
 
				+
			
 
				+	typedef glm::detail::fquatSIMD simdQuat;
			
 
				+
			
 
				+	/// @addtogroup gtx_simd_quat
			
 
				+	/// @{
			
 
				+
			
 
				+    //! Convert a simdQuat to a quat.
			
 
				+	//! (From GLM_GTX_simd_quat extension)
			
 
				+	quat quat_cast(
			
 
				+		detail::fquatSIMD const & x);
			
 
				+
			
 
				+    //! Convert a simdMat4 to a simdQuat.
			
 
				+    //! (From GLM_GTX_simd_quat extension)
			
 
				+    detail::fquatSIMD quatSIMD_cast(
			
 
				+        detail::fmat4x4SIMD const & m);
			
 
				+
			
 
				+    //! Converts a mat4 to a simdQuat.
			
 
				+    //! (From GLM_GTX_simd_quat extension)
			
 
				+    template <typename T, precision P>
			
 
				+    detail::fquatSIMD quatSIMD_cast(
			
 
				+        detail::tmat4x4<T, P> const & m);
			
 
				+
			
 
				+    //! Converts a mat3 to a simdQuat.
			
 
				+    //! (From GLM_GTX_simd_quat extension)
			
 
				+    template <typename T, precision P>
			
 
				+    detail::fquatSIMD quatSIMD_cast(
			
 
				+        detail::tmat3x3<T, P> const & m);
			
 
				+
			
 
				+    //! Convert a simdQuat to a simdMat4
			
 
				+    //! (From GLM_GTX_simd_quat extension)
			
 
				+    detail::fmat4x4SIMD mat4SIMD_cast(
			
 
				+        detail::fquatSIMD const & q);
			
 
				+
			
 
				+    //! Converts a simdQuat to a standard mat4.
			
 
				+    //! (From GLM_GTX_simd_quat extension)
			
 
				+    mat4 mat4_cast(
			
 
				+        detail::fquatSIMD const & q);
			
 
				+
			
 
				+
			
 
				+	/// Returns the length of the quaternion. 
			
 
				+	/// 
			
 
				+	/// @see gtc_quaternion
			
 
				+	float length(
			
 
				+		detail::fquatSIMD const & x);
			
 
				+
			
 
				+	/// Returns the normalized quaternion. 
			
 
				+	/// 
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD normalize(
			
 
				+		detail::fquatSIMD const & x);
			
 
				+
			
 
				+    /// Returns dot product of q1 and q2, i.e., q1[0] * q2[0] + q1[1] * q2[1] + ... 
			
 
				+	/// 
			
 
				+	/// @see gtc_quaternion
			
 
				+	float dot(
			
 
				+		detail::fquatSIMD const & q1, 
			
 
				+		detail::fquatSIMD const & q2);
			
 
				+
			
 
				+    /// Spherical linear interpolation of two quaternions.
			
 
				+	/// The interpolation is oriented and the rotation is performed at constant speed.
			
 
				+	/// For short path spherical linear interpolation, use the slerp function.
			
 
				+	/// 
			
 
				+	/// @param x A quaternion
			
 
				+	/// @param y A quaternion
			
 
				+	/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
			
 
				+	/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
			
 
				+	/// @see gtc_quaternion
			
 
				+	/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+	detail::fquatSIMD mix(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+    /// Linear interpolation of two quaternions. 
			
 
				+	/// The interpolation is oriented.
			
 
				+	/// 
			
 
				+	/// @param x A quaternion
			
 
				+	/// @param y A quaternion
			
 
				+	/// @param a Interpolation factor. The interpolation is defined in the range [0, 1].
			
 
				+	/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD lerp(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+	/// Spherical linear interpolation of two quaternions.
			
 
				+	/// The interpolation always take the short path and the rotation is performed at constant speed.
			
 
				+	/// 
			
 
				+	/// @param x A quaternion
			
 
				+	/// @param y A quaternion
			
 
				+	/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
			
 
				+	/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD slerp(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+
			
 
				+    /// Faster spherical linear interpolation of two unit length quaternions.
			
 
				+    ///
			
 
				+    /// This is the same as mix(), except for two rules:
			
 
				+    ///   1) The two quaternions must be unit length.
			
 
				+    ///   2) The interpolation factor (a) must be in the range [0, 1].
			
 
				+    ///
			
 
				+    /// This will use the equivalent to fastAcos() and fastSin().
			
 
				+    ///
			
 
				+	/// @see gtc_quaternion
			
 
				+	/// @see - mix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+	detail::fquatSIMD fastMix(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+    /// Identical to fastMix() except takes the shortest path.
			
 
				+    ///
			
 
				+    /// The same rules apply here as those in fastMix(). Both quaternions must be unit length and 'a' must be
			
 
				+    /// in the range [0, 1].
			
 
				+    ///
			
 
				+	/// @see - fastMix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+	/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a) 
			
 
				+    detail::fquatSIMD fastSlerp(
			
 
				+		detail::fquatSIMD const & x, 
			
 
				+		detail::fquatSIMD const & y, 
			
 
				+		float const & a);
			
 
				+
			
 
				+
			
 
				+	/// Returns the q conjugate. 
			
 
				+	/// 
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD conjugate(
			
 
				+		detail::fquatSIMD const & q);
			
 
				+
			
 
				+	/// Returns the q inverse. 
			
 
				+	/// 
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD inverse(
			
 
				+		detail::fquatSIMD const & q);
			
 
				+
			
 
				+    /// Build a quaternion from an angle and a normalized axis.
			
 
				+	///
			
 
				+	/// @param angle Angle expressed in radians if GLM_FORCE_RADIANS is define or degrees otherwise.
			
 
				+	/// @param axis Axis of the quaternion, must be normalized. 
			
 
				+	///
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD angleAxisSIMD(
			
 
				+		float const & angle, 
			
 
				+		vec3 const & axis);
			
 
				+
			
 
				+    /// Build a quaternion from an angle and a normalized axis. 
			
 
				+	///
			
 
				+	/// @param angle Angle expressed in radians if GLM_FORCE_RADIANS is define or degrees otherwise.
			
 
				+	/// @param x x component of the x-axis, x, y, z must be a normalized axis
			
 
				+	/// @param y y component of the y-axis, x, y, z must be a normalized axis
			
 
				+	/// @param z z component of the z-axis, x, y, z must be a normalized axis
			
 
				+	///
			
 
				+	/// @see gtc_quaternion
			
 
				+	detail::fquatSIMD angleAxisSIMD(
			
 
				+		float const & angle, 
			
 
				+		float const & x, 
			
 
				+		float const & y, 
			
 
				+		float const & z);
			
 
				+
			
 
				+
			
 
				+    // TODO: Move this to somewhere more appropriate. Used with fastMix() and fastSlerp().
			
 
				+    /// Performs the equivalent of glm::fastSin() on each component of the given __m128.
			
 
				+    __m128 fastSin(__m128 x);
			
 
				+
			
 
				+
			
 
				+	/// @}
			
 
				+}//namespace glm
			
 
				+
			
 
				+#include "simd_quat.inl"
			
 
				+
			
 
				+
			
 
				+#if (GLM_COMPILER & GLM_COMPILER_VC)
			
 
				+#   pragma warning(pop)
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#endif//(GLM_ARCH != GLM_ARCH_PURE)
			
 
				+
			
 
				+#endif//GLM_GTX_simd_quat
			
--- a/glm/gtx/simd_quat.inl
+++ b/glm/gtx/simd_quat.inl
@@ -0,0 +1,628 @@
 
				+///////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+// OpenGL Mathematics Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net)
			
 
				+///////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+// Created : 2013-04-22
			
 
				+// Updated : 2013-04-22
			
 
				+// Licence : This source is under MIT License
			
 
				+// File    : glm/gtx/simd_quat.inl
			
 
				+///////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+
			
 
				+namespace glm{
			
 
				+namespace detail{
			
 
				+
			
 
				+
			
 
				+//////////////////////////////////////
			
 
				+// Debugging
			
 
				+#if 0
			
 
				+void print(__m128 v)
			
 
				+{
			
 
				+    GLM_ALIGN(16) float result[4];
			
 
				+    _mm_store_ps(result, v);
			
 
				+
			
 
				+    printf("__m128:    %f %f %f %f\n", result[0], result[1], result[2], result[3]);
			
 
				+}
			
 
				+
			
 
				+void print(const fvec4SIMD &v)
			
 
				+{
			
 
				+    printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+//////////////////////////////////////
			
 
				+// Implicit basic constructors
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
			
 
				+#ifdef GLM_SIMD_ENABLE_DEFAULT_INIT
			
 
				+    : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
			
 
				+#endif
			
 
				+{}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
			
 
				+	Data(Data)
			
 
				+{}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
			
 
				+	Data(q.Data)
			
 
				+{}
			
 
				+
			
 
				+
			
 
				+//////////////////////////////////////
			
 
				+// Explicit basic constructors
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
			
 
				+	Data(_mm_set_ps(w, z, y, x))
			
 
				+{}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
			
 
				+	Data(_mm_set_ps(q.w, q.z, q.y, q.x))
			
 
				+{}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
			
 
				+{
			
 
				+    vec3 c = glm::cos(eulerAngles * 0.5f);
			
 
				+	vec3 s = glm::sin(eulerAngles * 0.5f);
			
 
				+
			
 
				+    Data = _mm_set_ps(
			
 
				+        (c.x * c.y * c.z) + (s.x * s.y * s.z),
			
 
				+        (c.x * c.y * s.z) - (s.x * s.y * c.z),
			
 
				+        (c.x * s.y * c.z) + (s.x * c.y * s.z),
			
 
				+        (s.x * c.y * c.z) - (c.x * s.y * s.z));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//////////////////////////////////////
			
 
				+// Unary arithmetic operators
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
			
 
				+{
			
 
				+    this->Data = q.Data;
			
 
				+    return *this;
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
			
 
				+{
			
 
				+	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
			
 
				+{
			
 
				+	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+// negate operator
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
			
 
				+{
			
 
				+    return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
			
 
				+}
			
 
				+
			
 
				+// operator+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
			
 
				+{
			
 
				+	return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
			
 
				+}
			
 
				+
			
 
				+//operator*
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
			
 
				+{
			
 
				+    // SSE2 STATS:
			
 
				+    //    11 shuffle
			
 
				+    //    8  mul
			
 
				+    //    8  add
			
 
				+    
			
 
				+    // SSE4 STATS:
			
 
				+    //    3 shuffle
			
 
				+    //    4 mul
			
 
				+    //    4 dpps
			
 
				+
			
 
				+    __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
			
 
				+    __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
			
 
				+    __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
			
 
				+    __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
			
 
				+
			
 
				+#   if((GLM_ARCH & GLM_ARCH_SSE4))
			
 
				+    __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
			
 
				+    __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
			
 
				+    __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);
			
 
				+    __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
			
 
				+#   else
			
 
				+           mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f));
			
 
				+    __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
			
 
				+           add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
			
 
				+
			
 
				+           mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f));
			
 
				+    __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
			
 
				+           add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
			
 
				+
			
 
				+           mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f));
			
 
				+    __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
			
 
				+           add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
			
 
				+
			
 
				+           mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
			
 
				+    __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
			
 
				+           add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+    // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
			
 
				+    // the final code below. I'll keep this here for reference - maybe somebody else can do something better...
			
 
				+    //
			
 
				+    //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
			
 
				+    //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
			
 
				+    //
			
 
				+    //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
			
 
				+    
			
 
				+    float x;
			
 
				+    float y;
			
 
				+    float z;
			
 
				+    float w;
			
 
				+
			
 
				+    _mm_store_ss(&x, add0);
			
 
				+    _mm_store_ss(&y, add1);
			
 
				+    _mm_store_ss(&z, add2);
			
 
				+    _mm_store_ss(&w, add3);
			
 
				+
			
 
				+    return detail::fquatSIMD(w, x, y, z);
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
			
 
				+{
			
 
				+    static const __m128 two = _mm_set1_ps(2.0f);
			
 
				+
			
 
				+    __m128 q_wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
			
 
				+    __m128 q_swp0  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
			
 
				+	__m128 q_swp1  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
			
 
				+	__m128 v_swp0  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
			
 
				+	__m128 v_swp1  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
			
 
				+	
			
 
				+	__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
			
 
				+    __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
			
 
				+    __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
			
 
				+    __m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
			
 
				+
			
 
				+    
			
 
				+    uv  = _mm_mul_ps(uv,  _mm_mul_ps(q_wwww, two));
			
 
				+    uuv = _mm_mul_ps(uuv, two);
			
 
				+
			
 
				+    return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
			
 
				+{
			
 
				+	return inverse(q) * v;
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
			
 
				+{
			
 
				+	return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
			
 
				+{
			
 
				+	return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//operator/
			
 
				+GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
			
 
				+{
			
 
				+	return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+}//namespace detail
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER quat quat_cast
			
 
				+(
			
 
				+	detail::fquatSIMD const & x
			
 
				+)
			
 
				+{
			
 
				+	GLM_ALIGN(16) quat Result;
			
 
				+	_mm_store_ps(&Result[0], x.Data);
			
 
				+
			
 
				+	return Result;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
			
 
				+{
			
 
				+    T trace = m0[0] + m1[1] + m2[2] + T(1.0);
			
 
				+    if (trace > T(0))
			
 
				+    {
			
 
				+        T s = T(0.5) / sqrt(trace);
			
 
				+
			
 
				+        return _mm_set_ps(
			
 
				+            static_cast<float>(T(0.25) / s),
			
 
				+            static_cast<float>((m0[1] - m1[0]) * s),
			
 
				+            static_cast<float>((m2[0] - m0[2]) * s),
			
 
				+            static_cast<float>((m1[2] - m2[1]) * s));
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        if (m0[0] > m1[1])
			
 
				+        {
			
 
				+            if (m0[0] > m2[2])
			
 
				+            {
			
 
				+                // X is biggest.
			
 
				+                T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);
			
 
				+
			
 
				+                return _mm_set_ps(
			
 
				+                    static_cast<float>((m1[2] - m2[1]) * s),
			
 
				+                    static_cast<float>((m2[0] + m0[2]) * s),
			
 
				+                    static_cast<float>((m0[1] + m1[0]) * s),
			
 
				+                    static_cast<float>(T(0.5)          * s));
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            if (m1[1] > m2[2])
			
 
				+            {
			
 
				+                // Y is biggest.
			
 
				+                T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);
			
 
				+
			
 
				+                return _mm_set_ps(
			
 
				+                    static_cast<float>((m2[0] - m0[2]) * s),
			
 
				+                    static_cast<float>((m1[2] + m2[1]) * s),
			
 
				+                    static_cast<float>(T(0.5)          * s),
			
 
				+                    static_cast<float>((m0[1] + m1[0]) * s));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // Z is biggest.
			
 
				+        T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);
			
 
				+
			
 
				+        return _mm_set_ps(
			
 
				+            static_cast<float>((m0[1] - m1[0]) * s),
			
 
				+            static_cast<float>(T(0.5)          * s),
			
 
				+            static_cast<float>((m1[2] + m2[1]) * s),
			
 
				+            static_cast<float>((m2[0] + m0[2]) * s));
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
			
 
				+(
			
 
				+	detail::fmat4x4SIMD const & m
			
 
				+)
			
 
				+{
			
 
				+    // Scalar implementation for now.
			
 
				+    GLM_ALIGN(16) float m0[4];
			
 
				+    GLM_ALIGN(16) float m1[4];
			
 
				+    GLM_ALIGN(16) float m2[4];
			
 
				+
			
 
				+    _mm_store_ps(m0, m[0].Data);
			
 
				+    _mm_store_ps(m1, m[1].Data);
			
 
				+    _mm_store_ps(m2, m[2].Data);
			
 
				+
			
 
				+    return quatSIMD_cast_impl(m0, m1, m2);
			
 
				+}
			
 
				+
			
 
				+template <typename T, precision P>
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
			
 
				+(
			
 
				+    detail::tmat4x4<T, P> const & m
			
 
				+)
			
 
				+{
			
 
				+    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
			
 
				+}
			
 
				+
			
 
				+template <typename T, precision P>
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
			
 
				+(
			
 
				+    detail::tmat3x3<T, P> const & m
			
 
				+)
			
 
				+{
			
 
				+    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
			
 
				+(
			
 
				+	detail::fquatSIMD const & q
			
 
				+)
			
 
				+{
			
 
				+    detail::fmat4x4SIMD result;
			
 
				+
			
 
				+    __m128 _wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
			
 
				+    __m128 _xyzw  = q.Data;
			
 
				+    __m128 _zxyw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
			
 
				+    __m128 _yzxw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
			
 
				+
			
 
				+    __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
			
 
				+    __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
			
 
				+    __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
			
 
				+    
			
 
				+    __m128 _tmp0  = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
			
 
				+           _tmp0  = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
			
 
				+
			
 
				+    __m128 _tmp1  = _mm_mul_ps(_yzxw2, _xyzw);
			
 
				+           _tmp1  = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
			
 
				+
			
 
				+    __m128 _tmp2  = _mm_mul_ps(_zxyw2, _xyzw);
			
 
				+           _tmp2  = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
			
 
				+
			
 
				+
			
 
				+    // There's probably a better, more politically correct way of doing this...
			
 
				+    result[0].Data = _mm_set_ps(
			
 
				+        0.0f,
			
 
				+        reinterpret_cast<float*>(&_tmp2)[0],
			
 
				+        reinterpret_cast<float*>(&_tmp1)[0],
			
 
				+        reinterpret_cast<float*>(&_tmp0)[0]);
			
 
				+
			
 
				+    result[1].Data = _mm_set_ps(
			
 
				+        0.0f,
			
 
				+        reinterpret_cast<float*>(&_tmp1)[1],
			
 
				+        reinterpret_cast<float*>(&_tmp0)[1],
			
 
				+        reinterpret_cast<float*>(&_tmp2)[1]);
			
 
				+
			
 
				+    result[2].Data = _mm_set_ps(
			
 
				+        0.0f,
			
 
				+        reinterpret_cast<float*>(&_tmp0)[2],
			
 
				+        reinterpret_cast<float*>(&_tmp2)[2],
			
 
				+        reinterpret_cast<float*>(&_tmp1)[2]);
			
 
				+
			
 
				+   result[3].Data = _mm_set_ps(
			
 
				+        1.0f,
			
 
				+        0.0f,
			
 
				+        0.0f,
			
 
				+        0.0f);
			
 
				+
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER mat4 mat4_cast
			
 
				+(
			
 
				+	detail::fquatSIMD const & q
			
 
				+)
			
 
				+{
			
 
				+    return mat4_cast(mat4SIMD_cast(q));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER float length
			
 
				+(
			
 
				+	detail::fquatSIMD const & q
			
 
				+)
			
 
				+{
			
 
				+    return glm::sqrt(dot(q, q));
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
			
 
				+(
			
 
				+	detail::fquatSIMD const & q
			
 
				+)
			
 
				+{
			
 
				+    return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER float dot
			
 
				+(
			
 
				+	detail::fquatSIMD const & q1,
			
 
				+	detail::fquatSIMD const & q2
			
 
				+)
			
 
				+{
			
 
				+    float result;
			
 
				+    _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD mix
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	float cosTheta = dot(x, y);
			
 
				+
			
 
				+    if (cosTheta > 1.0f - glm::epsilon<float>())
			
 
				+    {
			
 
				+	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        float angle = glm::acos(cosTheta);
			
 
				+        
			
 
				+        
			
 
				+        float s0 = glm::sin((1.0f - a) * angle);
			
 
				+        float s1 = glm::sin(a * angle);
			
 
				+        float d  = 1.0f / glm::sin(angle);
			
 
				+
			
 
				+        return (s0 * x + s1 * y) * d;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	// Lerp is only defined in [0, 1]
			
 
				+	assert(a >= 0.0f);
			
 
				+	assert(a <= 1.0f);
			
 
				+
			
 
				+    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	detail::fquatSIMD z = y;
			
 
				+
			
 
				+	float cosTheta = dot(x, y);
			
 
				+
			
 
				+	// If cosTheta < 0, the interpolation will take the long way around the sphere. 
			
 
				+	// To fix this, one quat must be negated.
			
 
				+	if (cosTheta < 0.0f)
			
 
				+	{
			
 
				+		z        = -y;
			
 
				+		cosTheta = -cosTheta;
			
 
				+	}
			
 
				+
			
 
				+	// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
			
 
				+	if(cosTheta > 1.0f - epsilon<float>())
			
 
				+	{
			
 
				+		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+        float angle = glm::acos(cosTheta);
			
 
				+
			
 
				+
			
 
				+		float s0 = glm::sin((1.0f - a) * angle);
			
 
				+        float s1 = glm::sin(a * angle);
			
 
				+        float d  = 1.0f / glm::sin(angle);
			
 
				+
			
 
				+        return (s0 * x + s1 * y) * d;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	float cosTheta = dot(x, y);
			
 
				+
			
 
				+    if (cosTheta > 1.0f - glm::epsilon<float>())
			
 
				+    {
			
 
				+	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        float angle = glm::fastAcos(cosTheta);
			
 
				+
			
 
				+
			
 
				+        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
			
 
				+
			
 
				+        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
			
 
				+        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
			
 
				+        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
			
 
				+        
			
 
				+        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
			
 
				+(
			
 
				+	detail::fquatSIMD const & x, 
			
 
				+	detail::fquatSIMD const & y, 
			
 
				+	float const & a
			
 
				+)
			
 
				+{
			
 
				+	detail::fquatSIMD z = y;
			
 
				+
			
 
				+	float cosTheta = dot(x, y);
			
 
				+	if (cosTheta < 0.0f)
			
 
				+	{
			
 
				+		z        = -y;
			
 
				+		cosTheta = -cosTheta;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	if(cosTheta > 1.0f - epsilon<float>())
			
 
				+	{
			
 
				+		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+        float angle = glm::fastAcos(cosTheta);
			
 
				+
			
 
				+
			
 
				+        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
			
 
				+
			
 
				+        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
			
 
				+        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
			
 
				+        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
			
 
				+        
			
 
				+        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
			
 
				+(
			
 
				+	detail::fquatSIMD const & q
			
 
				+)
			
 
				+{
			
 
				+	return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
			
 
				+(
			
 
				+	detail::fquatSIMD const & q
			
 
				+)
			
 
				+{
			
 
				+	return conjugate(q) / dot(q, q);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
			
 
				+(
			
 
				+	float const & angle,
			
 
				+	vec3 const & v
			
 
				+)
			
 
				+{
			
 
				+#ifdef GLM_FORCE_RADIANS
			
 
				+	float a(angle);
			
 
				+#else
			
 
				+	float a(glm::radians(angle));
			
 
				+#endif
			
 
				+	float s = glm::sin(a * 0.5f);
			
 
				+
			
 
				+    return _mm_set_ps(
			
 
				+        glm::cos(a * 0.5f),
			
 
				+        v.z * s,
			
 
				+        v.y * s,
			
 
				+        v.x * s);
			
 
				+}
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
			
 
				+(
			
 
				+	float const & angle, 
			
 
				+	float const & x, 
			
 
				+	float const & y, 
			
 
				+	float const & z
			
 
				+)
			
 
				+{
			
 
				+	return angleAxisSIMD(angle, vec3(x, y, z));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
			
 
				+{
			
 
				+    static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
			
 
				+    static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
			
 
				+    static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
			
 
				+
			
 
				+    __m128 x3 = _mm_mul_ps(x,  _mm_mul_ps(x, x));
			
 
				+    __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
			
 
				+    __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
			
 
				+
			
 
				+    __m128 y0 = _mm_mul_ps(x3, c0);
			
 
				+    __m128 y1 = _mm_mul_ps(x5, c1);
			
 
				+    __m128 y2 = _mm_mul_ps(x7, c2);
			
 
				+        
			
 
				+    return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+}//namespace glm