Browse Source

Fixed matrix functions using SIMD code on unaligned types #518

Christophe Riccio 9 years ago
parent
commit
6e9b524be1
3 changed files with 51 additions and 42 deletions
  1. 38 38
      glm/detail/func_matrix.inl
  2. 6 4
      glm/detail/func_matrix_simd.inl
  3. 7 0
      test/core/core_func_geometric.cpp

+ 38 - 38
glm/detail/func_matrix.inl

@@ -7,7 +7,7 @@
 namespace glm{
 namespace detail
 {
-	template <template <typename, precision> class matType, typename T, precision P>
+	template <template <typename, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_matrixCompMult
 	{
 		GLM_FUNC_QUALIFIER static matType<T, P> call(matType<T, P> const& x, matType<T, P> const& y)
@@ -19,11 +19,11 @@ namespace detail
 		}
 	};
 
-	template <template <class, precision> class matType, typename T, precision P>
+	template <template <class, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_transpose{};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat2x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat2x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x2<T, P> call(tmat2x2<T, P> const & m)
 		{
@@ -36,8 +36,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat2x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat2x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x2<T, P> call(tmat2x3<T, P> const & m)
 		{
@@ -52,8 +52,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat2x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat2x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x2<T, P> call(tmat2x4<T, P> const & m)
 		{
@@ -70,8 +70,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat3x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat3x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x3<T, P> call(tmat3x2<T, P> const & m)
 		{
@@ -86,8 +86,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat3x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat3x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x3<T, P> call(tmat3x3<T, P> const & m)
 		{
@@ -107,8 +107,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat3x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat3x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x3<T, P> call(tmat3x4<T, P> const & m)
 		{
@@ -129,8 +129,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat4x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat4x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x4<T, P> call(tmat4x2<T, P> const & m)
 		{
@@ -147,8 +147,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat4x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat4x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x4<T, P> call(tmat4x3<T, P> const & m)
 		{
@@ -169,8 +169,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_transpose<tmat4x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat4x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<T, P> call(tmat4x4<T, P> const & m)
 		{
@@ -198,11 +198,11 @@ namespace detail
 		}
 	};
 
-	template <template <typename, precision> class matType, typename T, precision P>
+	template <template <typename, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_determinant{};
 
-	template <typename T, precision P>
-	struct compute_determinant<tmat2x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_determinant<tmat2x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tmat2x2<T, P> const & m)
 		{
@@ -210,8 +210,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_determinant<tmat3x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_determinant<tmat3x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tmat3x3<T, P> const & m)
 		{
@@ -222,8 +222,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_determinant<tmat4x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_determinant<tmat4x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tmat4x4<T, P> const & m)
 		{
@@ -246,11 +246,11 @@ namespace detail
 		}
 	};
 
-	template <template <typename, precision> class matType, typename T, precision P>
+	template <template <typename, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_inverse{};
 
-	template <typename T, precision P>
-	struct compute_inverse<tmat2x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_inverse<tmat2x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x2<T, P> call(tmat2x2<T, P> const& m)
 		{
@@ -268,8 +268,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_inverse<tmat3x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_inverse<tmat3x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x3<T, P> call(tmat3x3<T, P> const& m)
 		{
@@ -293,8 +293,8 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
-	struct compute_inverse<tmat4x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_inverse<tmat4x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<T, P> call(tmat4x4<T, P> const& m)
 		{
@@ -359,7 +359,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER matType<T, P> matrixCompMult(matType<T, P> const & x, matType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'matrixCompMult' only accept floating-point inputs");
-		return detail::compute_matrixCompMult<matType, T, P>::call(x, y);
+		return detail::compute_matrixCompMult<matType, T, P, detail::is_aligned<P>::value>::call(x, y);
 	}
 
 	template<typename T, precision P, template <typename, precision> class vecTypeA, template <typename, precision> class vecTypeB>
@@ -377,21 +377,21 @@ namespace detail
 	GLM_FUNC_QUALIFIER typename matType<T, P>::transpose_type transpose(matType<T, P> const & m)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'transpose' only accept floating-point inputs");
-		return detail::compute_transpose<matType, T, P>::call(m);
+		return detail::compute_transpose<matType, T, P, detail::is_aligned<P>::value>::call(m);
 	}
 
 	template <typename T, precision P, template <typename, precision> class matType>
 	GLM_FUNC_QUALIFIER T determinant(matType<T, P> const & m)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'determinant' only accept floating-point inputs");
-		return detail::compute_determinant<matType, T, P>::call(m);
+		return detail::compute_determinant<matType, T, P, detail::is_aligned<P>::value>::call(m);
 	}
 
 	template <typename T, precision P, template <typename, precision> class matType>
 	GLM_FUNC_QUALIFIER matType<T, P> inverse(matType<T, P> const & m)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'inverse' only accept floating-point inputs");
-		return detail::compute_inverse<matType, T, P>::call(m);
+		return detail::compute_inverse<matType, T, P, detail::is_aligned<P>::value>::call(m);
 	}
 }//namespace glm
 

+ 6 - 4
glm/detail/func_matrix_simd.inl

@@ -11,8 +11,10 @@ namespace glm{
 namespace detail
 {
 	template <precision P>
-	struct compute_matrixCompMult<tmat4x4, float, P>
+	struct compute_matrixCompMult<tmat4x4, float, P, true>
 	{
+		GLM_STATIC_ASSERT(detail::is_aligned<P>::value, "Specialization requires aligned");
+
 		GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const & x, tmat4x4<float, P> const & y)
 		{
 			tmat4x4<float, P> result(uninitialize);
@@ -25,7 +27,7 @@ namespace detail
 	};
 
 	template <precision P>
-	struct compute_transpose<tmat4x4, float, P>
+	struct compute_transpose<tmat4x4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const & m)
 		{
@@ -38,7 +40,7 @@ namespace detail
 	};
 
 	template <precision P>
-	struct compute_determinant<tmat4x4, float, P>
+	struct compute_determinant<tmat4x4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static float call(tmat4x4<float, P> const& m)
 		{
@@ -47,7 +49,7 @@ namespace detail
 	};
 
 	template <precision P>
-	struct compute_inverse<tmat4x4, float, P>
+	struct compute_inverse<tmat4x4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const& m)
 		{

+ 7 - 0
test/core/core_func_geometric.cpp

@@ -87,6 +87,13 @@ namespace normalize
 		glm::vec3 Normalize1 = glm::normalize(glm::vec3(1, 0, 0));
 		glm::vec3 Normalize2 = glm::normalize(glm::vec3(2, 0, 0));
 
+		glm::vec3 Normalize3 = glm::normalize(glm::vec3(-0.6, 0.7, -0.5));
+
+		glm::vec3 ro = glm::vec3(glm::cos(5.f) * 3.f, 2.f, glm::sin(5.f) * 3.f);
+		glm::vec3 w = glm::normalize(glm::vec3(0, -0.2f, 0) - ro);
+		glm::vec3 u = glm::normalize(glm::cross(w, glm::vec3(0, 1, 0)));
+		glm::vec3 v = glm::cross(u, w);
+
 		int Error = 0;
 
 		Error += glm::all(glm::lessThan(glm::abs(Normalize1 - glm::vec3(1, 0, 0)), glm::vec3(std::numeric_limits<float>::epsilon()))) ? 0 : 1;