|
|
@@ -1,620 +0,0 @@
|
|
|
-/// @ref gtx_simd_quat
|
|
|
-/// @file glm/gtx/simd_quat.inl
|
|
|
-
|
|
|
-namespace glm{
|
|
|
-namespace detail{
|
|
|
-
|
|
|
-
|
|
|
-//////////////////////////////////////
|
|
|
-// Debugging
|
|
|
-#if 0
|
|
|
-void print(__m128 v)
|
|
|
-{
|
|
|
- GLM_ALIGN(16) float result[4];
|
|
|
- _mm_store_ps(result, v);
|
|
|
-
|
|
|
- printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]);
|
|
|
-}
|
|
|
-
|
|
|
-void print(const fvec4SIMD &v)
|
|
|
-{
|
|
|
- printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-//////////////////////////////////////
|
|
|
-// Implicit basic constructors
|
|
|
-
|
|
|
-# if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT)
|
|
|
- GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
|
|
|
-# ifdef GLM_FORCE_NO_CTOR_INIT
|
|
|
- : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
|
|
|
-# endif
|
|
|
- {}
|
|
|
-# endif
|
|
|
-
|
|
|
-# if !GLM_HAS_DEFAULTED_FUNCTIONS
|
|
|
- GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
|
|
|
- Data(q.Data)
|
|
|
- {}
|
|
|
-# endif//!GLM_HAS_DEFAULTED_FUNCTIONS
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
|
|
|
- Data(Data)
|
|
|
-{}
|
|
|
-
|
|
|
-//////////////////////////////////////
|
|
|
-// Explicit basic constructors
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
|
|
|
- Data(_mm_set_ps(w, z, y, x))
|
|
|
-{}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
|
|
|
- Data(_mm_set_ps(q.w, q.z, q.y, q.x))
|
|
|
-{}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
|
|
|
-{
|
|
|
- vec3 c = glm::cos(eulerAngles * 0.5f);
|
|
|
- vec3 s = glm::sin(eulerAngles * 0.5f);
|
|
|
-
|
|
|
- Data = _mm_set_ps(
|
|
|
- (c.x * c.y * c.z) + (s.x * s.y * s.z),
|
|
|
- (c.x * c.y * s.z) - (s.x * s.y * c.z),
|
|
|
- (c.x * s.y * c.z) + (s.x * c.y * s.z),
|
|
|
- (s.x * c.y * c.z) - (c.x * s.y * s.z));
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-//////////////////////////////////////
|
|
|
-// Unary arithmetic operators
|
|
|
-
|
|
|
-#if !GLM_HAS_DEFAULTED_FUNCTIONS
|
|
|
- GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
|
|
|
- {
|
|
|
- this->Data = q.Data;
|
|
|
- return *this;
|
|
|
- }
|
|
|
-#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
|
|
|
-{
|
|
|
- this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
|
|
|
- return *this;
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
|
|
|
-{
|
|
|
- this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
|
|
|
- return *this;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-// negate operator
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
|
|
|
-{
|
|
|
- return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
|
|
|
-}
|
|
|
-
|
|
|
-// operator+
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
|
|
|
-{
|
|
|
- return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
|
|
|
-}
|
|
|
-
|
|
|
-//operator*
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
|
|
|
-{
|
|
|
- // SSE2 STATS:
|
|
|
- // 11 shuffle
|
|
|
- // 8 mul
|
|
|
- // 8 add
|
|
|
-
|
|
|
- // SSE4 STATS:
|
|
|
- // 3 shuffle
|
|
|
- // 4 mul
|
|
|
- // 4 dpps
|
|
|
-
|
|
|
- __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
|
|
|
- __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
|
|
|
- __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
|
- __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
|
|
|
-
|
|
|
-# if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
|
|
|
- __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
|
|
|
- __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff);
|
|
|
- __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff);
|
|
|
- __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
|
|
|
-# else
|
|
|
- mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f));
|
|
|
- __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
|
|
|
- add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
|
|
|
-
|
|
|
- mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f));
|
|
|
- __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
|
|
|
- add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
|
|
|
-
|
|
|
- mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f));
|
|
|
- __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
|
|
|
- add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
|
|
|
-
|
|
|
- mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
|
|
|
- __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
|
|
|
- add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
|
|
|
-#endif
|
|
|
-
|
|
|
-
|
|
|
- // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
|
|
|
- // the final code below. I'll keep this here for reference - maybe somebody else can do something better...
|
|
|
- //
|
|
|
- //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
|
|
|
- //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
|
|
|
- //
|
|
|
- //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
|
|
|
-
|
|
|
- float x;
|
|
|
- float y;
|
|
|
- float z;
|
|
|
- float w;
|
|
|
-
|
|
|
- _mm_store_ss(&x, add0);
|
|
|
- _mm_store_ss(&y, add1);
|
|
|
- _mm_store_ss(&z, add2);
|
|
|
- _mm_store_ss(&w, add3);
|
|
|
-
|
|
|
- return detail::fquatSIMD(w, x, y, z);
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
|
|
|
-{
|
|
|
- static const __m128 two = _mm_set1_ps(2.0f);
|
|
|
-
|
|
|
- __m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
- __m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
|
|
|
- __m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
|
|
|
- __m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
|
|
|
- __m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
|
|
|
-
|
|
|
- __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
|
|
|
- __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
|
|
|
- __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
|
|
|
- __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
|
|
|
-
|
|
|
-
|
|
|
- uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two));
|
|
|
- uuv = _mm_mul_ps(uuv, two);
|
|
|
-
|
|
|
- return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
|
|
|
-{
|
|
|
- return glm::inverse(q) * v;
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
|
|
|
-{
|
|
|
- return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
|
|
|
-{
|
|
|
- return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-//operator/
|
|
|
-GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
|
|
|
-{
|
|
|
- return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-}//namespace detail
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER quat quat_cast
|
|
|
-(
|
|
|
- detail::fquatSIMD const & x
|
|
|
-)
|
|
|
-{
|
|
|
- GLM_ALIGN(16) quat Result;
|
|
|
- _mm_store_ps(&Result[0], x.Data);
|
|
|
-
|
|
|
- return Result;
|
|
|
-}
|
|
|
-
|
|
|
-template <typename T>
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
|
|
|
-{
|
|
|
- T trace = m0[0] + m1[1] + m2[2] + T(1.0);
|
|
|
- if (trace > T(0))
|
|
|
- {
|
|
|
- T s = static_cast<T>(0.5) / sqrt(trace);
|
|
|
-
|
|
|
- return _mm_set_ps(
|
|
|
- static_cast<float>(T(0.25) / s),
|
|
|
- static_cast<float>((m0[1] - m1[0]) * s),
|
|
|
- static_cast<float>((m2[0] - m0[2]) * s),
|
|
|
- static_cast<float>((m1[2] - m2[1]) * s));
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- if (m0[0] > m1[1])
|
|
|
- {
|
|
|
- if (m0[0] > m2[2])
|
|
|
- {
|
|
|
- // X is biggest.
|
|
|
- T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);
|
|
|
-
|
|
|
- return _mm_set_ps(
|
|
|
- static_cast<float>((m1[2] - m2[1]) * s),
|
|
|
- static_cast<float>((m2[0] + m0[2]) * s),
|
|
|
- static_cast<float>((m0[1] + m1[0]) * s),
|
|
|
- static_cast<float>(T(0.5) * s));
|
|
|
- }
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- if (m1[1] > m2[2])
|
|
|
- {
|
|
|
- // Y is biggest.
|
|
|
- T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);
|
|
|
-
|
|
|
- return _mm_set_ps(
|
|
|
- static_cast<float>((m2[0] - m0[2]) * s),
|
|
|
- static_cast<float>((m1[2] + m2[1]) * s),
|
|
|
- static_cast<float>(T(0.5) * s),
|
|
|
- static_cast<float>((m0[1] + m1[0]) * s));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Z is biggest.
|
|
|
- T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);
|
|
|
-
|
|
|
- return _mm_set_ps(
|
|
|
- static_cast<float>((m0[1] - m1[0]) * s),
|
|
|
- static_cast<float>(T(0.5) * s),
|
|
|
- static_cast<float>((m1[2] + m2[1]) * s),
|
|
|
- static_cast<float>((m2[0] + m0[2]) * s));
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
|
|
|
-(
|
|
|
- detail::fmat4x4SIMD const & m
|
|
|
-)
|
|
|
-{
|
|
|
- // Scalar implementation for now.
|
|
|
- GLM_ALIGN(16) float m0[4];
|
|
|
- GLM_ALIGN(16) float m1[4];
|
|
|
- GLM_ALIGN(16) float m2[4];
|
|
|
-
|
|
|
- _mm_store_ps(m0, m[0].Data);
|
|
|
- _mm_store_ps(m1, m[1].Data);
|
|
|
- _mm_store_ps(m2, m[2].Data);
|
|
|
-
|
|
|
- return quatSIMD_cast_impl(m0, m1, m2);
|
|
|
-}
|
|
|
-
|
|
|
-template <typename T, precision P>
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
|
|
|
-(
|
|
|
- tmat4x4<T, P> const & m
|
|
|
-)
|
|
|
-{
|
|
|
- return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
|
|
|
-}
|
|
|
-
|
|
|
-template <typename T, precision P>
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
|
|
|
-(
|
|
|
- tmat3x3<T, P> const & m
|
|
|
-)
|
|
|
-{
|
|
|
- return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q
|
|
|
-)
|
|
|
-{
|
|
|
- detail::fmat4x4SIMD result;
|
|
|
-
|
|
|
- __m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
- __m128 _xyzw = q.Data;
|
|
|
- __m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
|
|
|
- __m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
|
|
|
-
|
|
|
- __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
|
|
|
- __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
|
|
|
- __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
|
|
|
-
|
|
|
- __m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
|
|
|
- _tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
|
|
|
-
|
|
|
- __m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw);
|
|
|
- _tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
|
|
|
-
|
|
|
- __m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw);
|
|
|
- _tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
|
|
|
-
|
|
|
-
|
|
|
- // There's probably a better, more politically correct way of doing this...
|
|
|
- result[0].Data = _mm_set_ps(
|
|
|
- 0.0f,
|
|
|
- reinterpret_cast<float*>(&_tmp2)[0],
|
|
|
- reinterpret_cast<float*>(&_tmp1)[0],
|
|
|
- reinterpret_cast<float*>(&_tmp0)[0]);
|
|
|
-
|
|
|
- result[1].Data = _mm_set_ps(
|
|
|
- 0.0f,
|
|
|
- reinterpret_cast<float*>(&_tmp1)[1],
|
|
|
- reinterpret_cast<float*>(&_tmp0)[1],
|
|
|
- reinterpret_cast<float*>(&_tmp2)[1]);
|
|
|
-
|
|
|
- result[2].Data = _mm_set_ps(
|
|
|
- 0.0f,
|
|
|
- reinterpret_cast<float*>(&_tmp0)[2],
|
|
|
- reinterpret_cast<float*>(&_tmp2)[2],
|
|
|
- reinterpret_cast<float*>(&_tmp1)[2]);
|
|
|
-
|
|
|
- result[3].Data = _mm_set_ps(
|
|
|
- 1.0f,
|
|
|
- 0.0f,
|
|
|
- 0.0f,
|
|
|
- 0.0f);
|
|
|
-
|
|
|
-
|
|
|
- return result;
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER mat4 mat4_cast
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q
|
|
|
-)
|
|
|
-{
|
|
|
- return mat4_cast(mat4SIMD_cast(q));
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER float length
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q
|
|
|
-)
|
|
|
-{
|
|
|
- return glm::sqrt(dot(q, q));
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q
|
|
|
-)
|
|
|
-{
|
|
|
- return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER float dot
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q1,
|
|
|
- detail::fquatSIMD const & q2
|
|
|
-)
|
|
|
-{
|
|
|
- float result;
|
|
|
- _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
|
|
|
-
|
|
|
- return result;
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD mix
|
|
|
-(
|
|
|
- detail::fquatSIMD const & x,
|
|
|
- detail::fquatSIMD const & y,
|
|
|
- float const & a
|
|
|
-)
|
|
|
-{
|
|
|
- float cosTheta = dot(x, y);
|
|
|
-
|
|
|
- if (cosTheta > 1.0f - glm::epsilon<float>())
|
|
|
- {
|
|
|
- return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- float angle = glm::acos(cosTheta);
|
|
|
-
|
|
|
-
|
|
|
- float s0 = glm::sin((1.0f - a) * angle);
|
|
|
- float s1 = glm::sin(a * angle);
|
|
|
- float d = 1.0f / glm::sin(angle);
|
|
|
-
|
|
|
- return (s0 * x + s1 * y) * d;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
|
|
|
-(
|
|
|
- detail::fquatSIMD const & x,
|
|
|
- detail::fquatSIMD const & y,
|
|
|
- float const & a
|
|
|
-)
|
|
|
-{
|
|
|
- // Lerp is only defined in [0, 1]
|
|
|
- assert(a >= 0.0f);
|
|
|
- assert(a <= 1.0f);
|
|
|
-
|
|
|
- return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
|
|
|
-(
|
|
|
- detail::fquatSIMD const & x,
|
|
|
- detail::fquatSIMD const & y,
|
|
|
- float const & a
|
|
|
-)
|
|
|
-{
|
|
|
- detail::fquatSIMD z = y;
|
|
|
-
|
|
|
- float cosTheta = dot(x, y);
|
|
|
-
|
|
|
- // If cosTheta < 0, the interpolation will take the long way around the sphere.
|
|
|
- // To fix this, one quat must be negated.
|
|
|
- if (cosTheta < 0.0f)
|
|
|
- {
|
|
|
- z = -y;
|
|
|
- cosTheta = -cosTheta;
|
|
|
- }
|
|
|
-
|
|
|
- // Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
|
|
|
- if(cosTheta > 1.0f - epsilon<float>())
|
|
|
- {
|
|
|
- return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- float angle = glm::acos(cosTheta);
|
|
|
-
|
|
|
-
|
|
|
- float s0 = glm::sin((1.0f - a) * angle);
|
|
|
- float s1 = glm::sin(a * angle);
|
|
|
- float d = 1.0f / glm::sin(angle);
|
|
|
-
|
|
|
- return (s0 * x + s1 * y) * d;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
|
|
|
-(
|
|
|
- detail::fquatSIMD const & x,
|
|
|
- detail::fquatSIMD const & y,
|
|
|
- float const & a
|
|
|
-)
|
|
|
-{
|
|
|
- float cosTheta = dot(x, y);
|
|
|
-
|
|
|
- if (cosTheta > 1.0f - glm::epsilon<float>())
|
|
|
- {
|
|
|
- return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- float angle = glm::fastAcos(cosTheta);
|
|
|
-
|
|
|
-
|
|
|
- __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
|
|
|
-
|
|
|
- __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
- __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
- __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
|
-
|
|
|
- return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
|
|
|
-(
|
|
|
- detail::fquatSIMD const & x,
|
|
|
- detail::fquatSIMD const & y,
|
|
|
- float const & a
|
|
|
-)
|
|
|
-{
|
|
|
- detail::fquatSIMD z = y;
|
|
|
-
|
|
|
- float cosTheta = dot(x, y);
|
|
|
- if (cosTheta < 0.0f)
|
|
|
- {
|
|
|
- z = -y;
|
|
|
- cosTheta = -cosTheta;
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- if(cosTheta > 1.0f - epsilon<float>())
|
|
|
- {
|
|
|
- return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- float angle = glm::fastAcos(cosTheta);
|
|
|
-
|
|
|
-
|
|
|
- __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
|
|
|
-
|
|
|
- __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
- __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
- __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
|
-
|
|
|
- return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q
|
|
|
-)
|
|
|
-{
|
|
|
- return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
|
|
|
-(
|
|
|
- detail::fquatSIMD const & q
|
|
|
-)
|
|
|
-{
|
|
|
- return conjugate(q) / dot(q, q);
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
|
|
|
-(
|
|
|
- float const & angle,
|
|
|
- vec3 const & v
|
|
|
-)
|
|
|
-{
|
|
|
- float s = glm::sin(angle * 0.5f);
|
|
|
-
|
|
|
- return _mm_set_ps(
|
|
|
- glm::cos(angle * 0.5f),
|
|
|
- v.z * s,
|
|
|
- v.y * s,
|
|
|
- v.x * s);
|
|
|
-}
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
|
|
|
-(
|
|
|
- float const & angle,
|
|
|
- float const & x,
|
|
|
- float const & y,
|
|
|
- float const & z
|
|
|
-)
|
|
|
-{
|
|
|
- return angleAxisSIMD(angle, vec3(x, y, z));
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
|
|
|
-{
|
|
|
- static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
|
|
|
- static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
|
|
|
- static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
|
|
|
-
|
|
|
- __m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x));
|
|
|
- __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
|
|
|
- __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
|
|
|
-
|
|
|
- __m128 y0 = _mm_mul_ps(x3, c0);
|
|
|
- __m128 y1 = _mm_mul_ps(x5, c1);
|
|
|
- __m128 y2 = _mm_mul_ps(x7, c2);
|
|
|
-
|
|
|
- return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-}//namespace glm
|