Просмотр исходного кода

Added bit interleave for 3 and 4 integers

Christophe Riccio 13 лет назад
Родитель
Сommit
bd7125c50b
4 измененных файлов с 252 добавлено и 3 удалено
  1. 115 0
      glm/core/intrinsic_integer.inl
  2. 64 0
      glm/gtx/bit.inl
  3. 43 0
      test/gtc/gtc_random.cpp
  4. 30 3
      test/gtx/gtx_bit.cpp

+ 115 - 0
glm/core/intrinsic_integer.inl

@@ -136,5 +136,120 @@ namespace detail
 		return Reg1;
 	}
 
+	inline __m128i _mm_bit_interleave3_si128(__m128i x)
+	{
+		__m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF);
+		__m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF);
+		__m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F);
+		__m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3);
+		__m128i const Mask0 = _mm_set1_epi32(0x9249249249249249);
+
+		__m128i Reg1;
+		__m128i Reg2;
+
+		// REG1 = x;
+		// REG2 = y;
+		Reg1 = _mm_unpacklo_epi64(x, y);
+
+		//REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
+		//REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
+		//REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
+		Reg2 = _mm_slli_si128(Reg1, 4);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask4);
+
+		//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
+		//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
+		//REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
+		Reg2 = _mm_slli_si128(Reg1, 2);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask3);
+
+		//REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
+		//REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
+		//REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
+		Reg2 = _mm_slli_si128(Reg1, 1);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask2);
+
+		//REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
+		//REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
+		//REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
+		Reg2 = _mm_slli_epi32(Reg1, 4);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask1);
+
+		//REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x9249249249249249);
+		//REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x9249249249249249);
+		//REG3 = ((REG3 <<  2) | REG3) & glm::uint64(0x9249249249249249);
+		Reg2 = _mm_slli_epi32(Reg1, 2);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask0);
+
+		//return REG1 | (REG2 << 1) | (REG3 << 2);
+		Reg2 = _mm_slli_epi32(Reg1, 1);
+		Reg2 = _mm_srli_si128(Reg2, 8);
+		Reg1 = _mm_or_si128(Reg1, Reg2);
+	
+		return Reg1;
+	}
+
+	inline __m128i _mm_bit_interleave4_si128(__m128i x)
+	{
+		__m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF);
+		__m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF);
+		__m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F);
+		__m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3);
+		__m128i const Mask0 = _mm_set1_epi32(0x9249249249249249);
+
+		__m128i Reg1;
+		__m128i Reg2;
+
+		// REG1 = x;
+		// REG2 = y;
+		Reg1 = _mm_unpacklo_epi64(x, y);
+
+		//REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
+		//REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
+		//REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
+		Reg2 = _mm_slli_si128(Reg1, 4);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask4);
+
+		//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
+		//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
+		//REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
+		Reg2 = _mm_slli_si128(Reg1, 2);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask3);
+
+		//REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
+		//REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
+		//REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
+		Reg2 = _mm_slli_si128(Reg1, 1);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask2);
+
+		//REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
+		//REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
+		//REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
+		Reg2 = _mm_slli_epi32(Reg1, 4);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask1);
+
+		//REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x9249249249249249);
+		//REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x9249249249249249);
+		//REG3 = ((REG3 <<  2) | REG3) & glm::uint64(0x9249249249249249);
+		Reg2 = _mm_slli_epi32(Reg1, 2);
+		Reg1 = _mm_or_si128(Reg2, Reg1);
+		Reg1 = _mm_and_si128(Reg1, Mask0);
+
+		//return REG1 | (REG2 << 1) | (REG3 << 2);
+		Reg2 = _mm_slli_epi32(Reg1, 1);
+		Reg2 = _mm_srli_si128(Reg2, 8);
+		Reg1 = _mm_or_si128(Reg1, Reg2);
+	
+		return Reg1;
+	}
 }//namespace detail
 }//namespace glms

+ 64 - 0
glm/gtx/bit.inl

@@ -671,6 +671,70 @@ namespace glm
 
 			return REG1 | (REG2 << 1);
 		}
+
+		inline glm::uint64 bitfieldInterleave(glm::uint32 x, glm::uint32 y, glm::uint32 z)
+		{
+			glm::uint64 REG1(x);
+			glm::uint64 REG2(y);
+			glm::uint64 REG3(z);
+
+			REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
+			REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
+			REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
+
+			REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
+			REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
+			REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
+
+			REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
+			REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
+			REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
+
+			REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
+			REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
+			REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
+
+			REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x9249249249249249);
+			REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x9249249249249249);
+			REG3 = ((REG3 <<  2) | REG3) & glm::uint64(0x9249249249249249);
+
+			return REG1 | (REG2 << 1) | (REG3 << 2);
+		}
+
+		inline glm::uint64 bitfieldInterleave(glm::uint16 x, glm::uint16 y, glm::uint16 z, glm::uint16 w)
+		{
+			glm::uint64 REG1(x);
+			glm::uint64 REG2(y);
+			glm::uint64 REG3(z);
+			glm::uint64 REG4(w);
+/*
+			REG1 = ((REG1 << 64) | REG1) & glm::uint64(0x000000000000FFFF);
+			REG2 = ((REG2 << 64) | REG2) & glm::uint64(0x000000000000FFFF);
+			REG3 = ((REG3 << 64) | REG3) & glm::uint64(0x000000000000FFFF);
+			REG4 = ((REG4 << 64) | REG4) & glm::uint64(0x000000000000FFFF);
+*/
+			REG1 = ((REG1 << 32) | REG1) & glm::uint64(0x000000FF000000FF);
+			REG2 = ((REG2 << 32) | REG2) & glm::uint64(0x000000FF000000FF);
+			REG3 = ((REG3 << 32) | REG3) & glm::uint64(0x000000FF000000FF);
+			REG4 = ((REG4 << 32) | REG4) & glm::uint64(0x000000FF000000FF);
+
+			REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x000F000F000F000F);
+			REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x000F000F000F000F);
+			REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x000F000F000F000F);
+			REG4 = ((REG4 << 16) | REG4) & glm::uint64(0x000F000F000F000F);
+
+			REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0x0303030303030303);
+			REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0x0303030303030303);
+			REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0x0303030303030303);
+			REG4 = ((REG4 <<  8) | REG4) & glm::uint64(0x0303030303030303);
+
+			REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x1111111111111111);
+			REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x1111111111111111);
+			REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x1111111111111111);
+			REG4 = ((REG4 <<  4) | REG4) & glm::uint64(0x1111111111111111);
+
+			return REG1 | (REG2 << 1) | (REG3 << 2) | (REG4 << 3);
+		}
 	}//namespace detail
 
 	inline int16 bitfieldInterleave(int8 x, int8 y)

+ 43 - 0
test/gtc/gtc_random.cpp

@@ -11,6 +11,9 @@
 #include <glm/gtc/random.hpp>
 #include <glm/gtc/epsilon.hpp>
 #include <iostream>
+#if(GLM_LANG & GLM_LANG_CXX0X)
+#	include <array>
+#endif
 
 int test_linearRand()
 {
@@ -136,6 +139,46 @@ int test_ballRand()
 	return Error;
 }
 
+#if(GLM_LANG & GLM_LANG_CXX0X)
+int test_grid()
+{
+	int Error = 0;
+
+	typedef std::array<int, 8> colors;
+	typedef std::array<int, 8 * 8> grid;
+
+	grid Grid;
+	colors Colors;
+
+	grid GridBest;
+	colors ColorsBest;
+
+	while(true)
+	{
+		for(std::size_t i = 0; i < Grid.size(); ++i)
+			Grid[i] = int(glm::linearRand(0.0, 8.0 * 8.0 * 8.0 - 1.0) / 64.0);
+
+		for(std::size_t i = 0; i < Grid.size(); ++i)
+			++Colors[Grid[i]];
+
+		bool Exit = true;
+		for(std::size_t i = 0; i < Colors.size(); ++i)
+		{
+			if(Colors[i] == 8)
+				continue;
+
+			Exit = false;
+			break;
+		}
+
+		if(Exit == true)
+			break;
+	}
+
+	return Error;
+}
+#endif
+
 int main()
 {
 	int Error = 0;

+ 30 - 3
test/gtx/gtx_bit.cpp

@@ -7,6 +7,8 @@
 // File    : test/gtx/bit.cpp
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
+#include <emmintrin.h>
+
 #include <glm/glm.hpp>
 #include <glm/gtc/type_precision.hpp>
 #include <glm/gtx/bit.hpp>
@@ -19,8 +21,6 @@
 #include <vector>
 #include <ctime>
 
-#include <emmintrin.h>
-
 enum result
 {
 	SUCCESS,
@@ -479,6 +479,17 @@ namespace bitfieldInterleave
 			std::cout << "sseUnalignedBitfieldInterleave Time " << Time << " clocks" << std::endl;
 		}
 
+		{
+			std::clock_t LastTime = std::clock();
+
+			for(std::size_t i = 0; i < Data.size(); ++i)
+				Data[i] = glm::detail::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
+
+			std::clock_t Time = std::clock() - LastTime;
+
+			std::cout << "glm::detail::bitfieldInterleave Time " << Time << " clocks" << std::endl;
+		}
+
 #		if(GLM_ARCH != GLM_ARCH_PURE)
 		{
 			// SIMD
@@ -505,12 +516,28 @@ namespace bitfieldInterleave
 	}
 }
 
+namespace bitfieldInterleave3
+{
+	int test()
+	{
+		int Error(0);
+
+		glm::uint64 Result = glm::detail::bitfieldInterleave(0xFFFFFFFF, 0x00000000, 0x00000000);
+
+		return Error;
+	}
+}
+
 int main()
 {
-	int Error = 0;
+	int Error(0);
+
+	Error += ::bitfieldInterleave3::test();
 	Error += ::bitfieldInterleave::test();
 	Error += ::extractField::test();
 	Error += ::bitRevert::test();
 
+	while(true);
+
 	return Error;
 }