Browse Source

All the vec4 geometry functions have SIMD optimizations

Christophe Riccio 9 years ago
parent
commit
b5521ca7c2
2 changed files with 24 additions and 10 deletions
  1. 20 2
      glm/detail/func_geometric.inl
  2. 4 8
      glm/detail/func_geometric_simd.inl

+ 20 - 2
glm/detail/func_geometric.inl

@@ -10,6 +10,24 @@
 namespace glm{
 namespace detail
 {
+	template <template <typename, precision> class vecType, typename T, precision P>
+	struct compute_length
+	{
+		GLM_FUNC_QUALIFIER static T call(vecType<T, P> const & v)
+		{
+			return sqrt(dot(v, v));
+		}
+	};
+
+	template <template <typename, precision> class vecType, typename T, precision P>
+	struct compute_distance
+	{
+		GLM_FUNC_QUALIFIER static T call(vecType<T, P> const & p0, vecType<T, P> const & p1)
+		{
+			return length(p1 - p0);
+		}
+	};
+
 	template <template <class, precision> class vecType, typename T, precision P>
 	struct compute_dot{};
 
@@ -112,7 +130,7 @@ namespace detail
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'length' accepts only floating-point inputs");
 
-		return sqrt(dot(v, v));
+		return detail::compute_length<vecType, T, P>::call(v);
 	}
 
 	// distance
@@ -127,7 +145,7 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER T distance(vecType<T, P> const & p0, vecType<T, P> const & p1)
 	{
-		return length(p1 - p0);
+		return detail::compute_distance<vecType, T, P>::call(p0, p1);
 	}
 
 	// dot

+ 4 - 8
glm/detail/func_geometric_simd.inl

@@ -10,8 +10,7 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const& x, tvec4<float, P> const& y)
 		{
-			__m128 const dot0 = glm_f32v1_dot(x.data, y.data);
-			return _mm_cvtss_f32(dot0);
+			return _mm_cvtss_f32(glm_f32v1_dot(x.data, y.data));
 		}
 	};
 
@@ -36,9 +35,8 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
-			__m128 const nrm0 = glm_f32v4_nrm(v.data);
 			tvec4<float, P> result(uninitialize);
-			result.data = nrm0;
+			result.data = glm_f32v4_nrm(v.data);
 			return result;
 		}
 	};
@@ -48,9 +46,8 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & N, tvec4<float, P> const & I, tvec4<float, P> const & Nref)
 		{
-			__m128 const ffd0 = glm_f32v4_ffd(N.data. I.data, Nref.data);
 			tvec4<float, P> result(uninitialize);
-			result.data = ffd0;
+			result.data = glm_f32v4_ffd(N.data. I.data, Nref.data);
 			return result;
 		}
 	};
@@ -60,9 +57,8 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & I, tvec4<float, P> const & N)
 		{
-			__m128 const rfe0 = glm_f32v4_rfe(I.data, N.data);
 			tvec4<float, P> result(uninitialize);
-			result.data = rfe0;
+			result.data = glm_f32v4_rfe(I.data, N.data);
 			return result;
 		}
 	};