Browse Source

Updated det tests + PSHUFD det impl

Christophe Riccio 15 years ago
parent
commit
4cb7bcd4f1
3 changed files with 139 additions and 50 deletions
  1. 68 0
      glm/core/intrinsic_matrix.inl
  2. 15 0
      glm/gtx/simd_mat4.inl
  3. 56 50
      test/gtx/gtx-simd-mat4.cpp

+ 68 - 0
glm/core/intrinsic_matrix.inl

@@ -408,11 +408,79 @@ inline __m128 sse_slow_det_ps(__m128 const in[4])
 	return Det0;
 }
 
+inline __m128 sse_detd_ps
+(
+	__m128 const m[4]
+)
+{
+	// _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(
+
+	//T SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
+	//T SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
+	//T SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
+	//T SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
+	//T SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
+	//T SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
+
+	// First 2 columns
+ 	__m128 Swp2A = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[2]), _MM_SHUFFLE(0, 1, 1, 2)));
+ 	__m128 Swp3A = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[3]), _MM_SHUFFLE(3, 2, 3, 3)));
+	__m128 MulA = _mm_mul_ps(Swp2A, Swp3A);
+
+	// Second 2 columns
+	__m128 Swp2B = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[2]), _MM_SHUFFLE(3, 2, 3, 3)));
+	__m128 Swp3B = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[3]), _MM_SHUFFLE(0, 1, 1, 2)));
+	__m128 MulB = _mm_mul_ps(Swp2B, Swp3B);
+
+	// Columns subtraction
+	__m128 SubE = _mm_sub_ps(MulA, MulB);
+
+	// Last 2 rows
+	__m128 Swp2C = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[2]), _MM_SHUFFLE(0, 0, 1, 2)));
+	__m128 Swp3C = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[3]), _MM_SHUFFLE(1, 2, 0, 0)));
+	__m128 MulC = _mm_mul_ps(Swp2C, Swp3C);
+	__m128 SubF = _mm_sub_ps(_mm_movehl_ps(MulC, MulC), MulC);
+
+	//detail::tvec4<T> DetCof(
+	//	+ (m[1][1] * SubFactor00 - m[1][2] * SubFactor01 + m[1][3] * SubFactor02),
+	//	- (m[1][0] * SubFactor00 - m[1][2] * SubFactor03 + m[1][3] * SubFactor04),
+	//	+ (m[1][0] * SubFactor01 - m[1][1] * SubFactor03 + m[1][3] * SubFactor05),
+	//	- (m[1][0] * SubFactor02 - m[1][1] * SubFactor04 + m[1][2] * SubFactor05));
+
+	__m128 SubFacA = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(SubE), _MM_SHUFFLE(2, 1, 0, 0)));
+	__m128 SwpFacA = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[1]), _MM_SHUFFLE(0, 0, 0, 1)));
+	__m128 MulFacA = _mm_mul_ps(SwpFacA, SubFacA);
+
+	__m128 SubTmpB = _mm_shuffle_ps(SubE, SubF, _MM_SHUFFLE(0, 0, 3, 1));
+	__m128 SubFacB = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(SubTmpB), _MM_SHUFFLE(3, 1, 1, 0)));//SubF[0], SubE[3], SubE[3], SubE[1];
+	__m128 SwpFacB = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[1]), _MM_SHUFFLE(1, 1, 2, 2)));
+	__m128 MulFacB = _mm_mul_ps(SwpFacB, SubFacB);
+
+	__m128 SubRes = _mm_sub_ps(MulFacA, MulFacB);
+
+	__m128 SubTmpC = _mm_shuffle_ps(SubE, SubF, _MM_SHUFFLE(1, 0, 2, 2));
+	__m128 SubFacC = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(SubTmpC), _MM_SHUFFLE(3, 3, 2, 0)));
+	__m128 SwpFacC = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(m[1]), _MM_SHUFFLE(2, 3, 3, 3)));
+	__m128 MulFacC = _mm_mul_ps(SwpFacC, SubFacC);
+
+	__m128 AddRes = _mm_add_ps(SubRes, MulFacC);
+	__m128 DetCof = _mm_mul_ps(AddRes, _mm_setr_ps( 1.0f,-1.0f, 1.0f,-1.0f));
+
+	//return m[0][0] * DetCof[0]
+	//	 + m[0][1] * DetCof[1]
+	//	 + m[0][2] * DetCof[2]
+	//	 + m[0][3] * DetCof[3];
+
+	return sse_dot_ps(m[0], DetCof);
+}
+
 inline __m128 sse_det_ps
 (
 	__m128 const m[4]
 )
 {
+	// _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(add)
+
 	//T SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
 	//T SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
 	//T SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];

+ 15 - 0
glm/gtx/simd_mat4.inl

@@ -10,6 +10,21 @@
 namespace glm{
 namespace detail
 {
+	inline fmat4x4SIMD::size_type fmat4x4SIMD::value_size()
+	{
+		return sizeof(value_type);
+	}
+
+	inline fmat4x4SIMD::size_type fmat4x4SIMD::col_size()
+	{
+		return 4;
+	}
+
+	inline fmat4x4SIMD::size_type fmat4x4SIMD::row_size()
+	{
+		return 4;
+	}
+
     inline fmat4x4SIMD::fmat4x4SIMD()
     {}
 

+ 56 - 50
test/gtx/gtx-simd-mat4.cpp

@@ -10,98 +10,104 @@
 #define GLM_INSTRUCTION_SET GLM_PLATFORM_SSE3
 #include <glm/glm.hpp>
 #include <glm/gtx/simd_mat4.hpp>
+#include <glm/gtx/random.hpp>
 #include <iostream>
 #include <ctime>
 #include <vector>
+#include <array>
 
-void test_detA()
+std::vector<float> test_detA(std::vector<glm::mat4> const & Data)
 {
-	glm::mat4 Identity(
-		glm::vec4(4.0f, 0.7f, 0.1f, 0.01f),
-		glm::vec4(0.5f, 3.0f, 0.6f, 0.02f),
-		glm::vec4(0.2f, 0.4f, 2.0f, 0.03f),
-		glm::vec4(4.0f, 3.0f, 2.0f, 1.00f));
-
-	std::vector<float> Test(10000000);
+	std::vector<float> Test(Data.size());
 
 	std::clock_t TimeStart = clock();
 
 	for(std::size_t i = 0; i < Test.size(); ++i)
-		Test[i] = glm::determinant(Identity);
+		Test[i] = glm::determinant(Data[i]);
 
 	std::clock_t TimeEnd = clock();
 	printf("Det A: %d\n", TimeEnd - TimeStart);
+
+	return Test;
 }
 
-void test_detB()
+std::vector<float> test_detB(std::vector<glm::mat4> const & Data)
 {
-	glm::simd_mat4 IdentityB(
-		glm::simd_vec4(4.0f, 0.7f, 0.1f, 0.01f),
-		glm::simd_vec4(0.5f, 3.0f, 0.6f, 0.02f),
-		glm::simd_vec4(0.2f, 0.4f, 2.0f, 0.03f),
-		glm::simd_vec4(4.0f, 3.0f, 2.0f, 1.00f));
-
-	std::vector<__m128> Test(10000000);
+	std::vector<float> Test(Data.size());
 
 	std::clock_t TimeStart = clock();
 
 	for(std::size_t i = 0; i < Test.size(); ++i)
-		Test[i] = glm::detail::sse_slow_det_ps(&IdentityB.Data[0].Data); 
+	{
+		glm::simd_mat4 m(Data[i]);
+		Test[i] = glm::simd_vec4(glm::detail::sse_slow_det_ps((__m128 const * const)&m)).x; 
+	}
 
 	std::clock_t TimeEnd = clock();
 	printf("Det B: %d\n", TimeEnd - TimeStart);
+
+	return Test;
 }
 
-void test_detC()
+std::vector<float> test_detC(std::vector<glm::mat4> const & Data)
 {
-	glm::simd_mat4 IdentityB(
-		glm::simd_vec4(4.0f, 0.7f, 0.1f, 0.01f),
-		glm::simd_vec4(0.5f, 3.0f, 0.6f, 0.02f),
-		glm::simd_vec4(0.2f, 0.4f, 2.0f, 0.03f),
-		glm::simd_vec4(4.0f, 3.0f, 2.0f, 1.00f));
+	std::vector<float> Test(Data.size());
+
+	std::clock_t TimeStart = clock();
+
+	for(std::size_t i = 0; i < Test.size(); ++i)
+	{
+		glm::simd_mat4 m(Data[i]);
+		Test[i] = glm::simd_vec4(glm::detail::sse_det_ps((__m128 const * const)&m)).x; 
+	}
+
+	std::clock_t TimeEnd = clock();
+	printf("Det C: %d\n", TimeEnd - TimeStart);
 
-	std::vector<__m128> Test(10000000);
+	return Test;
+}
+
+std::vector<float> test_detD(std::vector<glm::mat4> const & Data)
+{
+	std::vector<float> Test(Data.size());
 
 	std::clock_t TimeStart = clock();
 
 	for(std::size_t i = 0; i < Test.size(); ++i)
-		Test[i] = glm::detail::sse_det_ps(&IdentityB.Data[0].Data); 
+	{
+		glm::simd_mat4 m(Data[i]);
+		Test[i] = glm::simd_vec4(glm::detail::sse_detd_ps((__m128 const * const)&m)).x; 
+	}
 
 	std::clock_t TimeEnd = clock();
 	printf("Det C: %d\n", TimeEnd - TimeStart);
+
+	return Test;
 }
 
 int main(int argc, void* argv[])
 {
-	test_detA();
-	test_detB();
-	test_detC();
+	std::vector<glm::mat4> Data(1024 * 1024 * 16);
+	for(std::size_t i = 0; i < Data.size(); ++i)
+		Data[i] = glm::mat4(
+			glm::vec4(glm::compRand4(-2.0f, 2.0f)),
+			glm::vec4(glm::compRand4(-2.0f, 2.0f)),
+			glm::vec4(glm::compRand4(-2.0f, 2.0f)),
+			glm::vec4(glm::compRand4(-2.0f, 2.0f)));
+
+	std::vector<float> TestDetA = test_detA(Data);
+	std::vector<float> TestDetB = test_detB(Data);
+	std::vector<float> TestDetC = test_detC(Data);
+	std::vector<float> TestDetD = test_detD(Data);
+
+	for(std::size_t i = 0; i < TestDetA.size(); ++i)
+		if(TestDetA[i] != TestDetB[i] && TestDetC[i] != TestDetB[i] && TestDetC[i] != TestDetD[i])
+			return 1;
 
 	// shuffle test
 	glm::simd_vec4 A(1.0f, 2.0f, 3.0f, 4.0f);
 	glm::simd_vec4 B(5.0f, 6.0f, 7.0f, 8.0f);
 	__m128 C = _mm_shuffle_ps(A.Data, B.Data, _MM_SHUFFLE(1, 0, 1, 0));
 
-	glm::mat4 IdentityA(
-		glm::vec4(4.0f, 0.7f, 0.1f, 0.01f),
-		glm::vec4(0.5f, 3.0f, 0.6f, 0.02f),
-		glm::vec4(0.2f, 0.4f, 2.0f, 0.03f),
-		glm::vec4(4.0f, 3.0f, 2.0f, 1.00f));
-	float DetA = glm::determinant(IdentityA);
-
-	glm::simd_mat4 IdentityB(
-		glm::simd_vec4(4.0f, 0.7f, 0.1f, 0.01f),
-		glm::simd_vec4(0.5f, 3.0f, 0.6f, 0.02f),
-		glm::simd_vec4(0.2f, 0.4f, 2.0f, 0.03f),
-		glm::simd_vec4(4.0f, 3.0f, 2.0f, 1.00f));
-	__m128 DetB = glm::detail::sse_slow_det_ps(&IdentityB.Data[0].Data); 
-	__m128 DetC = glm::detail::sse_det_ps(&IdentityB.Data[0].Data);
-
-	std::vector<float> TestA(100000);
-
-
-	std::vector<__m128> TestB(100000);
-	std::vector<__m128> TestC(100000);
-
 	return 0;
 }