|
|
@@ -634,6 +634,34 @@ namespace glm
|
|
|
return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
|
|
|
}
|
|
|
|
|
|
+ inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x)
|
|
|
+ {
|
|
|
+ return _mm_sqrt_ps(x.Data);
|
|
|
+ }
|
|
|
+
|
|
|
+ inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x)
|
|
|
+ {
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
|
|
+ // By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
|
|
+ inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x)
|
|
|
+ {
|
|
|
+ GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
|
|
|
+ GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
|
|
|
+
|
|
|
+ __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode
|
|
|
+ __m128 halfrecip = _mm_mul_ps(half, recip);
|
|
|
+ __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
|
|
|
+ return _mm_mul_ps(halfrecip, threeminus_xrr);
|
|
|
+ }
|
|
|
+
|
|
|
+ inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x)
|
|
|
+ {
|
|
|
+ return _mm_rsqrt_ps(x.Data);
|
|
|
+ }
|
|
|
+
|
|
|
}//namespace simd_vec4
|
|
|
}//namespace gtx
|
|
|
}//namespace glm
|