|
|
@@ -423,9 +423,6 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD mix
|
|
|
|
|
|
// Compared to the naive SIMD implementation below, this scalar version is consistently faster. A non-naive SSE-optimized implementation
|
|
|
// will most likely be faster, but that'll need to be left to people much smarter than I.
|
|
|
- //
|
|
|
- // The issue, I think, is loading the __m128 variables with initial data. Can probably be replaced with an SSE-optimized approximation of
|
|
|
- // glm::sin(). Maybe a fastMix() function would be better for that?
|
|
|
|
|
|
float s0 = glm::sin((1.0f - a) * angle);
|
|
|
float s1 = glm::sin(a * angle);
|
|
|
@@ -495,6 +492,73 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
|
|
|
+(
|
|
|
+ detail::fquatSIMD const & x,
|
|
|
+ detail::fquatSIMD const & y,
|
|
|
+ float const & a
|
|
|
+)
|
|
|
+{
|
|
|
+ float cosTheta = dot(x, y);
|
|
|
+
|
|
|
+ if (cosTheta > 1.0f - glm::epsilon<float>())
|
|
|
+ {
|
|
|
+ return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ float angle = glm::fastAcos(cosTheta);
|
|
|
+
|
|
|
+
|
|
|
+ __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
|
|
|
+
|
|
|
+ __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
+ __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
+ __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
|
+
|
|
|
+ return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
|
|
|
+(
|
|
|
+ detail::fquatSIMD const & x,
|
|
|
+ detail::fquatSIMD const & y,
|
|
|
+ float const & a
|
|
|
+)
|
|
|
+{
|
|
|
+ detail::fquatSIMD z = y;
|
|
|
+
|
|
|
+ float cosTheta = dot(x, y);
|
|
|
+ if (cosTheta < 0.0f)
|
|
|
+ {
|
|
|
+ z = -y;
|
|
|
+ cosTheta = -cosTheta;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ if(cosTheta > 1.0f - epsilon<float>())
|
|
|
+ {
|
|
|
+ return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ float angle = glm::fastAcos(cosTheta);
|
|
|
+
|
|
|
+
|
|
|
+ __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
|
|
|
+
|
|
|
+ __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
+ __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
+ __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
|
+
|
|
|
+ return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
|
|
|
(
|
|
|
detail::fquatSIMD const & q
|
|
|
@@ -544,4 +608,22 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
|
|
|
}
|
|
|
|
|
|
|
|
|
+GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
|
|
|
+{
|
|
|
+ static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
|
|
|
+ static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
|
|
|
+ static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
|
|
|
+
|
|
|
+ __m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x));
|
|
|
+ __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
|
|
|
+ __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
|
|
|
+
|
|
|
+ __m128 y0 = _mm_mul_ps(x3, c0);
|
|
|
+ __m128 y1 = _mm_mul_ps(x5, c1);
|
|
|
+ __m128 y2 = _mm_mul_ps(x7, c2);
|
|
|
+
|
|
|
+ return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
}//namespace glm
|