7 years ago · 007fe0ff65
--- a/test/perf/CMakeLists.txt
+++ b/test/perf/CMakeLists.txt
@@ -1,2 +1,4 @@
 
				 glmCreateTestGTC(perf_matrix_mul)
			
 
				+glmCreateTestGTC(perf_matrix_mul_vector)
			
 
				 glmCreateTestGTC(perf_matrix_div)
			
 
				+glmCreateTestGTC(perf_vector_mul_matrix)
			
--- a/test/perf/perf_matrix_div.cpp
+++ b/test/perf/perf_matrix_div.cpp
@@ -26,7 +26,7 @@ static int launch_mat_div_mat(std::vector<matType>& O, matType const& Transform,
 
				 	O.resize(Samples);
			
 
				 
			
 
				 	for(std::size_t i = 0; i < Samples; ++i)
			
 
				-		I[i] = Scale * static_cast<T>(i);
			
 
				+		I[i] = Scale * static_cast<T>(i) + Scale;
			
 
				 
			
 
				 	std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
			
 
				 	test_mat_div_mat<matType>(Transform, I, O);
			
@@ -56,6 +56,7 @@ static int comp_mat2_div_mat2(std::size_t Samples)
 
				 		packedMatType const A = SISD[i];
			
 
				 		packedMatType const B = SIMD[i];
			
 
				 		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+		assert(!Error);
			
 
				 	}
			
 
				 	
			
 
				 	return Error;
			
@@ -82,6 +83,7 @@ static int comp_mat3_div_mat3(std::size_t Samples)
 
				 		packedMatType const A = SISD[i];
			
 
				 		packedMatType const B = SIMD[i];
			
 
				 		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+		assert(!Error);
			
 
				 	}
			
 
				 	
			
 
				 	return Error;
			
@@ -95,7 +97,7 @@ static int comp_mat4_div_mat4(std::size_t Samples)
 
				 	int Error = 0;
			
 
				 
			
 
				 	packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				-	packedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05);
			
 
				+	packedMatType const Scale(0.01, 0.02, 0.05, 0.04, 0.02, 0.08, 0.05, 0.01, 0.08, 0.03, 0.05, 0.06, 0.02, 0.03, 0.07, 0.05);
			
 
				 
			
 
				 	std::vector<packedMatType> SISD;
			
 
				 	printf("- SISD: %d us\n", launch_mat_div_mat<packedMatType>(SISD, Transform, Scale, Samples));
			
@@ -108,6 +110,7 @@ static int comp_mat4_div_mat4(std::size_t Samples)
 
				 		packedMatType const A = SISD[i];
			
 
				 		packedMatType const B = SIMD[i];
			
 
				 		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+		assert(!Error);
			
 
				 	}
			
 
				 	
			
 
				 	return Error;
			
--- a/test/perf/perf_matrix_mul_vector.cpp
+++ b/test/perf/perf_matrix_mul_vector.cpp
@@ -1,6 +1,12 @@
 
				 #define GLM_FORCE_INLINE
			
 
				+#include <glm/ext/matrix_float2x2.hpp>
			
 
				+#include <glm/ext/matrix_double2x2.hpp>
			
 
				+#include <glm/ext/matrix_float3x3.hpp>
			
 
				+#include <glm/ext/matrix_double3x3.hpp>
			
 
				 #include <glm/ext/matrix_float4x4.hpp>
			
 
				+#include <glm/ext/matrix_double4x4.hpp>
			
 
				 #include <glm/ext/matrix_transform.hpp>
			
 
				+#include <glm/ext/matrix_relational.hpp>
			
 
				 #include <glm/ext/vector_float4.hpp>
			
 
				 #if GLM_CONFIG_SIMD == GLM_ENABLE
			
 
				 #include <glm/gtc/type_aligned.hpp>
			
@@ -16,137 +22,126 @@ static void test_mat_mul_vec(matType const& M, std::vector<vecType> const& I, st
 
				 }
			
 
				 
			
 
				 template <typename matType, typename vecType>
			
 
				-static int launch_mat_mul_vec(std::size_t Samples)
			
 
				+static int launch_mat_mul_vec(std::vector<vecType>& O, matType const& Transform, vecType const& Scale, std::size_t Samples)
			
 
				 {
			
 
				-	typedef typename vecType::value_type T;
			
 
				-
			
 
				-	static const matType Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				-
			
 
				-	{
			
 
				-		std::vector<vecType> I(Samples);
			
 
				-		std::vector<vecType> O(Samples);
			
 
				-
			
 
				-		for(std::size_t i = 0; i < Samples; ++i)
			
 
				-			I[i] = vecType(static_cast<T>(i)) * vecType(0.01, 0.02, 0.03, 0.05);
			
 
				-
			
 
				-		std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
			
 
				-		test_mat_mul_vec<matType, vecType>(Transform, I, O);
			
 
				-		std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
			
 
				-
			
 
				-		return static_cast<int>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-template <typename matType, typename vecType>
			
 
				-static void test_vec_mul_mat(matType const& M, std::vector<vecType> const& I, std::vector<vecType>& O)
			
 
				-{
			
 
				-	for (std::size_t i = 0, n = I.size(); i < n; ++i)
			
 
				-		O[i] = I[i] * M;
			
 
				-}
			
 
				-
			
 
				-template <typename matType, typename vecType>
			
 
				-static int launch_vec_mul_mat(std::size_t Samples)
			
 
				-{
			
 
				-	typedef typename vecType::value_type T;
			
 
				-
			
 
				-	static const matType Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				+	typedef typename matType::value_type T;
			
 
				 
			
 
				 	std::vector<vecType> I(Samples);
			
 
				-	std::vector<vecType> O(Samples);
			
 
				+	O.resize(Samples);
			
 
				 
			
 
				 	for(std::size_t i = 0; i < Samples; ++i)
			
 
				-		I[i] = vecType(static_cast<T>(i)) * vecType(0.01, 0.02, 0.03, 0.05);
			
 
				+		I[i] = Scale * static_cast<T>(i);
			
 
				 
			
 
				 	std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
			
 
				-	test_vec_mul_mat<matType, vecType>(Transform, I, O);
			
 
				+	test_mat_mul_vec<matType, vecType>(Transform, I, O);
			
 
				 	std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
			
 
				 
			
 
				 	return static_cast<int>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
			
 
				 }
			
 
				 
			
 
				-template <typename matType>
			
 
				-static void test_mat_mul_mat(matType const& M, std::vector<matType> const& I, std::vector<matType>& O)
			
 
				+template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
			
 
				+static int comp_mat2_mul_vec2(std::size_t Samples)
			
 
				 {
			
 
				-	for (std::size_t i = 0, n = I.size(); i < n; ++i)
			
 
				-		O[i] = M * I[i];
			
 
				-}
			
 
				+	typedef typename packedMatType::value_type T;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				 
			
 
				-template <typename matType>
			
 
				-static int launch_mat_mul_mat(std::size_t Samples)
			
 
				-{
			
 
				-	typedef typename matType::value_type T;
			
 
				+	packedMatType const Transform(1, 2, 3, 4);
			
 
				+	packedVecType const Scale(0.01, 0.02);
			
 
				 
			
 
				-	static const matType Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				+	std::vector<packedVecType> SISD;
			
 
				+	printf("- SISD: %d us\n", launch_mat_mul_vec<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
			
 
				 
			
 
				-	std::vector<matType> I(Samples);
			
 
				-	std::vector<matType> O(Samples);
			
 
				+	std::vector<alignedVecType> SIMD;
			
 
				+	printf("- SIMD: %d us\n", launch_mat_mul_vec<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
			
 
				 
			
 
				 	for(std::size_t i = 0; i < Samples; ++i)
			
 
				-		I[i] = matType(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05) * static_cast<T>(i);
			
 
				-
			
 
				-	std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
			
 
				-	test_mat_mul_mat<matType>(Transform, I, O);
			
 
				-	std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
			
 
				-
			
 
				-	return static_cast<int>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
			
 
				+	{
			
 
				+		packedVecType const A = SISD[i];
			
 
				+		packedVecType const B = packedVecType(SIMD[i]);
			
 
				+		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+	}
			
 
				+	
			
 
				+	return Error;
			
 
				 }
			
 
				 
			
 
				-template <typename matType>
			
 
				-static void test_mat_div_mat(matType const& M, std::vector<matType> const& I, std::vector<matType>& O)
			
 
				+template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
			
 
				+static int comp_mat3_mul_vec3(std::size_t Samples)
			
 
				 {
			
 
				-	for (std::size_t i = 0, n = I.size(); i < n; ++i)
			
 
				-		O[i] = M / I[i];
			
 
				+	typedef typename packedMatType::value_type T;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				+
			
 
				+	packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
			
 
				+	packedVecType const Scale(0.01, 0.02, 0.05);
			
 
				+
			
 
				+	std::vector<packedVecType> SISD;
			
 
				+	printf("- SISD: %d us\n", launch_mat_mul_vec<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
			
 
				+
			
 
				+	std::vector<alignedVecType> SIMD;
			
 
				+	printf("- SIMD: %d us\n", launch_mat_mul_vec<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
			
 
				+
			
 
				+	for(std::size_t i = 0; i < Samples; ++i)
			
 
				+	{
			
 
				+		packedVecType const A = SISD[i];
			
 
				+		packedVecType const B = SIMD[i];
			
 
				+		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+	}
			
 
				+	
			
 
				+	return Error;
			
 
				 }
			
 
				 
			
 
				-template <typename matType>
			
 
				-static int launch_mat_div_mat(std::size_t Samples)
			
 
				+template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
			
 
				+static int comp_mat4_mul_vec4(std::size_t Samples)
			
 
				 {
			
 
				-	typedef typename matType::value_type T;
			
 
				+	typedef typename packedMatType::value_type T;
			
 
				 	
			
 
				-	static const matType Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				+	int Error = 0;
			
 
				 
			
 
				-	std::vector<matType> I(Samples);
			
 
				-	std::vector<matType> O(Samples);
			
 
				+	packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				+	packedVecType const Scale(0.01, 0.02, 0.03, 0.05);
			
 
				 
			
 
				-	for(std::size_t i = 0; i < Samples; ++i)
			
 
				-		I[i] = matType(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05) * static_cast<T>(i);
			
 
				+	std::vector<packedVecType> SISD;
			
 
				+	printf("- SISD: %d us\n", launch_mat_mul_vec<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
			
 
				 
			
 
				-	std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
			
 
				-	test_mat_div_mat<matType>(Transform, I, O);
			
 
				-	std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
			
 
				+	std::vector<alignedVecType> SIMD;
			
 
				+	printf("- SIMD: %d us\n", launch_mat_mul_vec<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
			
 
				 
			
 
				-	return static_cast<int>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
			
 
				+	for(std::size_t i = 0; i < Samples; ++i)
			
 
				+	{
			
 
				+		packedVecType const A = SISD[i];
			
 
				+		packedVecType const B = SIMD[i];
			
 
				+		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+	}
			
 
				+	
			
 
				+	return Error;
			
 
				 }
			
 
				 
			
 
				 int main()
			
 
				 {
			
 
				-	std::size_t const Samples = 50000;
			
 
				-
			
 
				-	printf("\nmat4 * vec4\n");
			
 
				-	printf("- dmat4 * dvec4 duration %d us\n", launch_mat_mul_vec<glm::dmat4, glm::dvec4>(Samples));
			
 
				-	printf("- dmat4 * dvec4 (SIMD) duration %d us\n", launch_mat_mul_vec<glm::aligned_dmat4, glm::aligned_dvec4>(Samples));
			
 
				-	printf("- mat4 * vec4 duration %d us\n", launch_mat_mul_vec<glm::mat4, glm::vec4>(Samples));
			
 
				-	printf("- mat4 * vec4 (SIMD) duration %d us\n", launch_mat_mul_vec<glm::aligned_mat4, glm::aligned_vec4>(Samples));
			
 
				-
			
 
				-	printf("\nvec4 * mat4\n");
			
 
				-	printf("- dvec4 * dmat4 duration %d us\n", launch_vec_mul_mat<glm::dmat4, glm::dvec4>(Samples));
			
 
				-	printf("- dvec4 * dmat4 (SIMD) duration %d us\n", launch_vec_mul_mat<glm::aligned_dmat4, glm::aligned_dvec4>(Samples));
			
 
				-	printf("- vec4 * mat4 duration %d us\n", launch_vec_mul_mat<glm::mat4, glm::vec4>(Samples));
			
 
				-	printf("- vec4 * mat4 (SIMD) duration %d us\n", launch_vec_mul_mat<glm::aligned_mat4, glm::aligned_vec4>(Samples));
			
 
				-
			
 
				-	printf("\nmat4 * mat4\n");
			
 
				-	printf("- dmat4 * dmat4 duration %d us\n", launch_mat_mul_mat<glm::dmat4>(Samples));
			
 
				-	printf("- dmat4 * dmat4 (SIMD) duration %d us\n", launch_mat_mul_mat<glm::aligned_dmat4>(Samples));
			
 
				-	printf("- mat4 * mat4 duration %d us\n", launch_mat_mul_mat<glm::mat4>(Samples));
			
 
				-	printf("- mat4 * mat4 (SIMD) duration %d us\n", launch_mat_mul_mat<glm::aligned_mat4>(Samples));
			
 
				-
			
 
				-	printf("\nmat4 / mat4\n");
			
 
				-	printf("- dmat4 / dmat4 duration %d us\n", launch_mat_div_mat<glm::dmat4>(Samples));
			
 
				-	printf("- dmat4 / dmat4 (SIMD) duration %d us\n", launch_mat_div_mat<glm::aligned_dmat4>(Samples));
			
 
				-	printf("- mat4 / mat4 duration %d us\n", launch_mat_div_mat<glm::mat4>(Samples));
			
 
				-	printf("- mat4 / mat4 (SIMD) duration %d us\n", launch_mat_div_mat<glm::aligned_mat4>(Samples));
			
 
				+	std::size_t const Samples = 100000;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				 
			
 
				-	return 0;
			
 
				+	printf("mat2 * vec2:\n");
			
 
				+	Error += comp_mat2_mul_vec2<glm::mat2, glm::vec2, glm::aligned_mat2, glm::aligned_vec2>(Samples);
			
 
				+	
			
 
				+	printf("dmat2 * dvec2:\n");
			
 
				+	Error += comp_mat2_mul_vec2<glm::dmat2, glm::dvec2,glm::aligned_dmat2, glm::aligned_dvec2>(Samples);
			
 
				+
			
 
				+	printf("mat3 * vec3:\n");
			
 
				+	Error += comp_mat3_mul_vec3<glm::mat3, glm::vec3, glm::aligned_mat3, glm::aligned_vec3>(Samples);
			
 
				+	
			
 
				+	printf("dmat3 * dvec3:\n");
			
 
				+	Error += comp_mat3_mul_vec3<glm::dmat3, glm::dvec3, glm::aligned_dmat3, glm::aligned_dvec3>(Samples);
			
 
				+
			
 
				+	printf("mat4 * vec4:\n");
			
 
				+	Error += comp_mat4_mul_vec4<glm::mat4, glm::vec4, glm::aligned_mat4, glm::aligned_vec4>(Samples);
			
 
				+	
			
 
				+	printf("dmat4 * dvec4:\n");
			
 
				+	Error += comp_mat4_mul_vec4<glm::dmat4, glm::dvec4, glm::aligned_dmat4, glm::aligned_dvec4>(Samples);
			
 
				+
			
 
				+	return Error;
			
 
				 }
			
 
				 
			
 
				 #else
			
--- a/test/perf/perf_vector_mul_matrix.cpp
+++ b/test/perf/perf_vector_mul_matrix.cpp
@@ -0,0 +1,154 @@
 
				+#define GLM_FORCE_INLINE
			
 
				+#include <glm/ext/matrix_float2x2.hpp>
			
 
				+#include <glm/ext/matrix_double2x2.hpp>
			
 
				+#include <glm/ext/matrix_float3x3.hpp>
			
 
				+#include <glm/ext/matrix_double3x3.hpp>
			
 
				+#include <glm/ext/matrix_float4x4.hpp>
			
 
				+#include <glm/ext/matrix_double4x4.hpp>
			
 
				+#include <glm/ext/matrix_transform.hpp>
			
 
				+#include <glm/ext/matrix_relational.hpp>
			
 
				+#include <glm/ext/vector_float4.hpp>
			
 
				+#if GLM_CONFIG_SIMD == GLM_ENABLE
			
 
				+#include <glm/gtc/type_aligned.hpp>
			
 
				+#include <vector>
			
 
				+#include <chrono>
			
 
				+#include <cstdio>
			
 
				+
			
 
				+template <typename matType, typename vecType>
			
 
				+static void test_vec_mul_mat(matType const& M, std::vector<vecType> const& I, std::vector<vecType>& O)
			
 
				+{
			
 
				+	for (std::size_t i = 0, n = I.size(); i < n; ++i)
			
 
				+		O[i] = I[i] * M;
			
 
				+}
			
 
				+
			
 
				+template <typename matType, typename vecType>
			
 
				+static int launch_vec_mul_mat(std::vector<vecType>& O, matType const& Transform, vecType const& Scale, std::size_t Samples)
			
 
				+{
			
 
				+	typedef typename matType::value_type T;
			
 
				+
			
 
				+	std::vector<vecType> I(Samples);
			
 
				+	O.resize(Samples);
			
 
				+
			
 
				+	for(std::size_t i = 0; i < Samples; ++i)
			
 
				+		I[i] = Scale * static_cast<T>(i);
			
 
				+
			
 
				+	std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
			
 
				+	test_vec_mul_mat<matType, vecType>(Transform, I, O);
			
 
				+	std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
			
 
				+
			
 
				+	return static_cast<int>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
			
 
				+}
			
 
				+
			
 
				+template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
			
 
				+static int comp_vec2_mul_mat2(std::size_t Samples)
			
 
				+{
			
 
				+	typedef typename packedMatType::value_type T;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				+
			
 
				+	packedMatType const Transform(1, 2, 3, 4);
			
 
				+	packedVecType const Scale(0.01, 0.02);
			
 
				+
			
 
				+	std::vector<packedVecType> SISD;
			
 
				+	printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
			
 
				+
			
 
				+	std::vector<alignedVecType> SIMD;
			
 
				+	printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
			
 
				+
			
 
				+	for(std::size_t i = 0; i < Samples; ++i)
			
 
				+	{
			
 
				+		packedVecType const A = SISD[i];
			
 
				+		packedVecType const B = packedVecType(SIMD[i]);
			
 
				+		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+	}
			
 
				+	
			
 
				+	return Error;
			
 
				+}
			
 
				+
			
 
				+template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
			
 
				+static int comp_vec3_mul_mat3(std::size_t Samples)
			
 
				+{
			
 
				+	typedef typename packedMatType::value_type T;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				+
			
 
				+	packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
			
 
				+	packedVecType const Scale(0.01, 0.02, 0.05);
			
 
				+
			
 
				+	std::vector<packedVecType> SISD;
			
 
				+	printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
			
 
				+
			
 
				+	std::vector<alignedVecType> SIMD;
			
 
				+	printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
			
 
				+
			
 
				+	for(std::size_t i = 0; i < Samples; ++i)
			
 
				+	{
			
 
				+		packedVecType const A = SISD[i];
			
 
				+		packedVecType const B = SIMD[i];
			
 
				+		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+	}
			
 
				+	
			
 
				+	return Error;
			
 
				+}
			
 
				+
			
 
				+template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
			
 
				+static int comp_vec4_mul_mat4(std::size_t Samples)
			
 
				+{
			
 
				+	typedef typename packedMatType::value_type T;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				+
			
 
				+	packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
			
 
				+	packedVecType const Scale(0.01, 0.02, 0.03, 0.05);
			
 
				+
			
 
				+	std::vector<packedVecType> SISD;
			
 
				+	printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
			
 
				+
			
 
				+	std::vector<alignedVecType> SIMD;
			
 
				+	printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
			
 
				+
			
 
				+	for(std::size_t i = 0; i < Samples; ++i)
			
 
				+	{
			
 
				+		packedVecType const A = SISD[i];
			
 
				+		packedVecType const B = SIMD[i];
			
 
				+		Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
			
 
				+	}
			
 
				+	
			
 
				+	return Error;
			
 
				+}
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+	std::size_t const Samples = 100000;
			
 
				+	
			
 
				+	int Error = 0;
			
 
				+
			
 
				+	printf("vec2 * mat2:\n");
			
 
				+	Error += comp_vec2_mul_mat2<glm::mat2, glm::vec2, glm::aligned_mat2, glm::aligned_vec2>(Samples);
			
 
				+	
			
 
				+	printf("dvec2 * dmat2:\n");
			
 
				+	Error += comp_vec2_mul_mat2<glm::dmat2, glm::dvec2,glm::aligned_dmat2, glm::aligned_dvec2>(Samples);
			
 
				+
			
 
				+	printf("vec3 * mat3:\n");
			
 
				+	Error += comp_vec3_mul_mat3<glm::mat3, glm::vec3, glm::aligned_mat3, glm::aligned_vec3>(Samples);
			
 
				+	
			
 
				+	printf("dvec3 * dmat3:\n");
			
 
				+	Error += comp_vec3_mul_mat3<glm::dmat3, glm::dvec3, glm::aligned_dmat3, glm::aligned_dvec3>(Samples);
			
 
				+
			
 
				+	printf("vec4 * mat4:\n");
			
 
				+	Error += comp_vec4_mul_mat4<glm::mat4, glm::vec4, glm::aligned_mat4, glm::aligned_vec4>(Samples);
			
 
				+	
			
 
				+	printf("dvec4 * dmat4:\n");
			
 
				+	Error += comp_vec4_mul_mat4<glm::dmat4, glm::dvec4, glm::aligned_dmat4, glm::aligned_dvec4>(Samples);
			
 
				+
			
 
				+	return Error;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif