Browse Source

SIMD common functions optimizations

Christophe Riccio 9 years ago
parent
commit
28cb770d14
3 changed files with 47 additions and 15 deletions
  1. 24 12
      glm/detail/func_common.inl
  2. 22 2
      glm/detail/func_common_simd.inl
  3. 1 1
      test/core/core_func_common.cpp

+ 24 - 12
glm/detail/func_common.inl

@@ -283,6 +283,26 @@ namespace detail
 			return min(max(x, minVal), maxVal);
 		}
 	};
+
+	template <typename T, precision P, template <typename, precision> class vecType>
+	struct compute_step_vector
+	{
+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & edge, vecType<T, P> const & x)
+		{
+			return mix(vecType<T, P>(1), vecType<T, P>(0), glm::lessThan(x, edge));
+		}
+	};
+
+	template <typename T, precision P, template <typename, precision> class vecType>
+	struct compute_smoothstep_vector
+	{
+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & edge0, vecType<T, P> const & edge1, vecType<T, P> const & x)
+		{
+			GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'step' only accept floating-point inputs");
+			vecType<T, P> const tmp(clamp((x - edge0) / (edge1 - edge0), static_cast<T>(0), static_cast<T>(1)));
+			return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
+		}
+	};
 }//namespace detail
 
 	template <typename genFIType>
@@ -561,15 +581,13 @@ namespace detail
 	template <template <typename, precision> class vecType, typename T, precision P>
 	GLM_FUNC_QUALIFIER vecType<T, P> step(T edge, vecType<T, P> const & x)
 	{
-		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'step' only accept floating-point inputs");
-
-		return mix(vecType<T, P>(1), vecType<T, P>(0), glm::lessThan(x, vecType<T, P>(edge)));
+		return detail::compute_step_vector<T, P, vecType>::call(vecType<T, P>(edge), x);
 	}
 
 	template <template <typename, precision> class vecType, typename T, precision P>
 	GLM_FUNC_QUALIFIER vecType<T, P> step(vecType<T, P> const & edge, vecType<T, P> const & x)
 	{
-		return mix(vecType<T, P>(1), vecType<T, P>(0), glm::lessThan(x, edge));
+		return detail::compute_step_vector<T, P, vecType>::call(edge, x);
 	}
 
 	// smoothstep
@@ -585,19 +603,13 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> smoothstep(T edge0, T edge1, vecType<T, P> const & x)
 	{
-		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'smoothstep' only accept floating-point inputs");
-
-		vecType<T, P> const tmp(clamp((x - edge0) / (edge1 - edge0), static_cast<T>(0), static_cast<T>(1)));
-		return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
+		return detail::compute_smoothstep_vector<T, P, vecType>::call(vecType<T, P>(edge0), vecType<T, P>(edge1), x);
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> smoothstep(vecType<T, P> const & edge0, vecType<T, P> const & edge1, vecType<T, P> const & x)
 	{
-		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'smoothstep' only accept floating-point inputs");
-
-		vecType<T, P> const tmp(clamp((x - edge0) / (edge1 - edge0), static_cast<T>(0), static_cast<T>(1)));
-		return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
+		return detail::compute_smoothstep_vector<T, P, vecType>::call(edge0, edge1, x);
 	}
 
 #	if GLM_HAS_CXX11_STL

+ 22 - 2
glm/detail/func_common_simd.inl

@@ -203,8 +203,28 @@ namespace detail
 			return Result;
 		}
 	};
-
-
+/* FIXME
+	template <precision P>
+	struct compute_step_vector<float, P, tvec4>
+	{
+		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge, tvec4<float, P> const& x)
+		{
+			tvec4<float, P> result(uninitialize);
+			result.data = glm_f32v4_stp(edge.data, x.data);
+			return result;
+		}
+	};
+*/
+	template <precision P>
+	struct compute_smoothstep_vector<float, P, tvec4>
+	{
+		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge0, tvec4<float, P> const& edge1, tvec4<float, P> const& x)
+		{
+			tvec4<float, P> result(uninitialize);
+			result.data = glm_f32v4_ssp(edge0.data, edge1.data, x.data);
+			return result;
+		}
+	};
 }//namespace detail
 }//namespace glm
 

+ 1 - 1
test/core/core_func_common.cpp

@@ -555,8 +555,8 @@ namespace step_
 
 	entry<float, glm::vec4> TestVec4Scalar [] =
 	{
-		{ 0.0f, glm::vec4(1.0f, 2.0f, 3.0f, 4.0f), glm::vec4(1.0f) },
 		{ 1.0f, glm::vec4(1.0f, 2.0f, 3.0f, 4.0f), glm::vec4(1.0f) },
+		{ 0.0f, glm::vec4(1.0f, 2.0f, 3.0f, 4.0f), glm::vec4(1.0f) },
 		{ 0.0f, glm::vec4(-1.0f, -2.0f, -3.0f, -4.0f), glm::vec4(0.0f) }
 	};