Browse Source

Completed bit interleaving for 3 and 4 values

Christophe Riccio 13 years ago
parent
commit
1e3cb00fe5
4 changed files with 362 additions and 155 deletions
  1. 0 118
      glm/core/intrinsic_integer.inl
  2. 94 6
      glm/gtx/bit.hpp
  3. 200 28
      glm/gtx/bit.inl
  4. 68 3
      test/gtx/gtx_bit.cpp

+ 0 - 118
glm/core/intrinsic_integer.inl

@@ -135,123 +135,5 @@ namespace detail
 	
 		return Reg1;
 	}
-
-/*
-	inline __m128i _mm_bit_interleave3_si128(__m128i x)
-	{
-		__m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF);
-		__m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF);
-		__m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F);
-		__m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3);
-		__m128i const Mask0 = _mm_set1_epi32(0x9249249249249249);
-
-		__m128i Reg1;
-		__m128i Reg2;
-
-		// REG1 = x;
-		// REG2 = y;
-		Reg1 = _mm_unpacklo_epi64(x, y);
-
-		//REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
-		//REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
-		//REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
-		Reg2 = _mm_slli_si128(Reg1, 4);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask4);
-
-		//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
-		//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
-		//REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
-		Reg2 = _mm_slli_si128(Reg1, 2);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask3);
-
-		//REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
-		//REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
-		//REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
-		Reg2 = _mm_slli_si128(Reg1, 1);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask2);
-
-		//REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
-		//REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
-		//REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
-		Reg2 = _mm_slli_epi32(Reg1, 4);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask1);
-
-		//REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x9249249249249249);
-		//REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x9249249249249249);
-		//REG3 = ((REG3 <<  2) | REG3) & glm::uint64(0x9249249249249249);
-		Reg2 = _mm_slli_epi32(Reg1, 2);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask0);
-
-		//return REG1 | (REG2 << 1) | (REG3 << 2);
-		Reg2 = _mm_slli_epi32(Reg1, 1);
-		Reg2 = _mm_srli_si128(Reg2, 8);
-		Reg1 = _mm_or_si128(Reg1, Reg2);
-	
-		return Reg1;
-	}
-
-	inline __m128i _mm_bit_interleave4_si128(__m128i x)
-	{
-		__m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF);
-		__m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF);
-		__m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F);
-		__m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3);
-		__m128i const Mask0 = _mm_set1_epi32(0x9249249249249249);
-
-		__m128i Reg1;
-		__m128i Reg2;
-
-		// REG1 = x;
-		// REG2 = y;
-		Reg1 = _mm_unpacklo_epi64(x, y);
-
-		//REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
-		//REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
-		//REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
-		Reg2 = _mm_slli_si128(Reg1, 4);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask4);
-
-		//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
-		//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
-		//REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
-		Reg2 = _mm_slli_si128(Reg1, 2);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask3);
-
-		//REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
-		//REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
-		//REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
-		Reg2 = _mm_slli_si128(Reg1, 1);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask2);
-
-		//REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
-		//REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
-		//REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
-		Reg2 = _mm_slli_epi32(Reg1, 4);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask1);
-
-		//REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x9249249249249249);
-		//REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x9249249249249249);
-		//REG3 = ((REG3 <<  2) | REG3) & glm::uint64(0x9249249249249249);
-		Reg2 = _mm_slli_epi32(Reg1, 2);
-		Reg1 = _mm_or_si128(Reg2, Reg1);
-		Reg1 = _mm_and_si128(Reg1, Mask0);
-
-		//return REG1 | (REG2 << 1) | (REG3 << 2);
-		Reg2 = _mm_slli_epi32(Reg1, 1);
-		Reg2 = _mm_srli_si128(Reg2, 8);
-		Reg1 = _mm_or_si128(Reg1, Reg2);
-	
-		return Reg1;
-	}
-*/
 }//namespace detail
 }//namespace glms

+ 94 - 6
glm/gtx/bit.hpp

@@ -132,30 +132,118 @@ namespace glm
 		int const & FromBit, 
 		int const & ToBit);
 
-	///
+	/// Interleaves the bits of x and y. 
+	/// The first bit is the first bit of x followed by the first bit of y.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
 	/// @see gtx_bit
 	int16 bitfieldInterleave(int8 x, int8 y);
 
-	///
+	/// Interleaves the bits of x and y. 
+	/// The first bit is the first bit of x followed by the first bit of y.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
 	/// @see gtx_bit
 	uint16 bitfieldInterleave(uint8 x, uint8 y);
 
-	///
+	/// Interleaves the bits of x and y. 
+	/// The first bit is the first bit of x followed by the first bit of y.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
 	/// @see gtx_bit
 	int32 bitfieldInterleave(int16 x, int16 y);
 
-	///
+	/// Interleaves the bits of x and y. 
+	/// The first bit is the first bit of x followed by the first bit of y.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
 	/// @see gtx_bit
 	uint32 bitfieldInterleave(uint16 x, uint16 y);
 
-	///
+	/// Interleaves the bits of x and y. 
+	/// The first bit is the first bit of x followed by the first bit of y.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
 	/// @see gtx_bit
 	int64 bitfieldInterleave(int32 x, int32 y);
 
-	///
+	/// Interleaves the bits of x and y. 
+	/// The first bit is the first bit of x followed by the first bit of y.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
 	/// @see gtx_bit
 	uint64 bitfieldInterleave(uint32 x, uint32 y);
 
+	/// Interleaves the bits of x, y and z. 
+	/// The first bit is the first bit of x followed by the first bit of y and the first bit of z.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	int32 bitfieldInterleave(int8 x, int8 y, int8 z);
+
+	/// Interleaves the bits of x, y and z. 
+	/// The first bit is the first bit of x followed by the first bit of y and the first bit of z.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z);
+
+	/// Interleaves the bits of x, y and z. 
+	/// The first bit is the first bit of x followed by the first bit of y and the first bit of z.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	int64 bitfieldInterleave(int16 x, int16 y, int16 z);
+
+	/// Interleaves the bits of x, y and z. 
+	/// The first bit is the first bit of x followed by the first bit of y and the first bit of z.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z);
+
+	/// Interleaves the bits of x, y and z. 
+	/// The first bit is the first bit of x followed by the first bit of y and the first bit of z.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	int64 bitfieldInterleave(int32 x, int32 y, int32 z);
+
+	/// Interleaves the bits of x, y and z. 
+	/// The first bit is the first bit of x followed by the first bit of y and the first bit of z.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	uint64 bitfieldInterleave(uint32 x, uint32 y, uint32 z);
+
+	/// Interleaves the bits of x, y, z and w. 
+	/// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	int32 bitfieldInterleave(int8 x, int8 y, int8 z, int8 w);
+
+	/// Interleaves the bits of x, y, z and w. 
+	/// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z, uint8 w);
+
+	/// Interleaves the bits of x, y, z and w. 
+	/// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	int64 bitfieldInterleave(int16 x, int16 y, int16 z, int16 w);
+
+	/// Interleaves the bits of x, y, z and w. 
+	/// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w.
+	/// The other bits are interleaved following the previous sequence.
+	/// 
+	/// @see gtx_bit
+	uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z, uint16 w);
+
 	/// @}
 } //namespace glm
 

+ 200 - 28
glm/gtx/bit.inl

@@ -600,6 +600,16 @@ namespace glm
 
 	namespace detail
 	{
+		template <typename PARAM, typename RET>
+		RET bitfieldInterleave(PARAM x, PARAM y);
+
+		template <typename PARAM, typename RET>
+		RET bitfieldInterleave(PARAM x, PARAM y, PARAM z);
+
+		template <typename PARAM, typename RET>
+		RET bitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w);
+
+/*
 		template <typename PARAM, typename RET>
 		inline RET bitfieldInterleave(PARAM x, PARAM y)
 		{
@@ -609,6 +619,33 @@ namespace glm
 			return Result;
 		}
 
+		template <typename PARAM, typename RET>
+		inline RET bitfieldInterleave(PARAM x, PARAM y, PARAM z)
+		{
+			RET Result = 0; 
+			for (RET i = 0; i < sizeof(PARAM) * 8; i++)
+			{
+				Result |= ((RET(x) & (RET(1) << i)) << ((i << 1) + 0));
+				Result |= ((RET(y) & (RET(1) << i)) << ((i << 1) + 1));
+				Result |= ((RET(z) & (RET(1) << i)) << ((i << 1) + 2));
+			}
+			return Result;
+		}
+
+		template <typename PARAM, typename RET>
+		inline RET bitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w)
+		{
+			RET Result = 0; 
+			for (int i = 0; i < sizeof(PARAM) * 8; i++)
+			{
+				Result |= ((((RET(x) >> i) & RET(1))) << RET((i << 2) + 0));
+				Result |= ((((RET(y) >> i) & RET(1))) << RET((i << 2) + 1));
+				Result |= ((((RET(z) >> i) & RET(1))) << RET((i << 2) + 2));
+				Result |= ((((RET(w) >> i) & RET(1))) << RET((i << 2) + 3));
+			}
+			return Result;
+		}
+*/
 		template <>
 		inline glm::uint16 bitfieldInterleave(glm::uint8 x, glm::uint8 y)
 		{
@@ -672,6 +709,7 @@ namespace glm
 			return REG1 | (REG2 << 1);
 		}
 
+		template <>
 		inline glm::uint64 bitfieldInterleave(glm::uint32 x, glm::uint32 y, glm::uint32 z)
 		{
 			glm::uint64 REG1(x);
@@ -701,37 +739,33 @@ namespace glm
 			return REG1 | (REG2 << 1) | (REG3 << 2);
 		}
 
+		template <>
 		inline glm::uint64 bitfieldInterleave(glm::uint16 x, glm::uint16 y, glm::uint16 z, glm::uint16 w)
 		{
 			glm::uint64 REG1(x);
 			glm::uint64 REG2(y);
 			glm::uint64 REG3(z);
 			glm::uint64 REG4(w);
-/*
-			REG1 = ((REG1 << 64) | REG1) & glm::uint64(0x000000000000FFFF);
-			REG2 = ((REG2 << 64) | REG2) & glm::uint64(0x000000000000FFFF);
-			REG3 = ((REG3 << 64) | REG3) & glm::uint64(0x000000000000FFFF);
-			REG4 = ((REG4 << 64) | REG4) & glm::uint64(0x000000000000FFFF);
-*/
-			REG1 = ((REG1 << 32) | REG1) & glm::uint64(0x000000FF000000FF);
-			REG2 = ((REG2 << 32) | REG2) & glm::uint64(0x000000FF000000FF);
-			REG3 = ((REG3 << 32) | REG3) & glm::uint64(0x000000FF000000FF);
-			REG4 = ((REG4 << 32) | REG4) & glm::uint64(0x000000FF000000FF);
-
-			REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x000F000F000F000F);
-			REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x000F000F000F000F);
-			REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x000F000F000F000F);
-			REG4 = ((REG4 << 16) | REG4) & glm::uint64(0x000F000F000F000F);
-
-			REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0x0303030303030303);
-			REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0x0303030303030303);
-			REG3 = ((REG3 <<  8) | REG3) & glm::uint64(0x0303030303030303);
-			REG4 = ((REG4 <<  8) | REG4) & glm::uint64(0x0303030303030303);
-
-			REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x1111111111111111);
-			REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x1111111111111111);
-			REG3 = ((REG3 <<  4) | REG3) & glm::uint64(0x1111111111111111);
-			REG4 = ((REG4 <<  4) | REG4) & glm::uint64(0x1111111111111111);
+
+			REG1 = ((REG1 << 24) | REG1) & glm::uint64(0x000000FF000000FF);
+			REG2 = ((REG2 << 24) | REG2) & glm::uint64(0x000000FF000000FF);
+			REG3 = ((REG3 << 24) | REG3) & glm::uint64(0x000000FF000000FF);
+			REG4 = ((REG4 << 24) | REG4) & glm::uint64(0x000000FF000000FF);
+
+			REG1 = ((REG1 << 12) | REG1) & glm::uint64(0x000F000F000F000F);
+			REG2 = ((REG2 << 12) | REG2) & glm::uint64(0x000F000F000F000F);
+			REG3 = ((REG3 << 12) | REG3) & glm::uint64(0x000F000F000F000F);
+			REG4 = ((REG4 << 12) | REG4) & glm::uint64(0x000F000F000F000F);
+
+			REG1 = ((REG1 <<  6) | REG1) & glm::uint64(0x0303030303030303);
+			REG2 = ((REG2 <<  6) | REG2) & glm::uint64(0x0303030303030303);
+			REG3 = ((REG3 <<  6) | REG3) & glm::uint64(0x0303030303030303);
+			REG4 = ((REG4 <<  6) | REG4) & glm::uint64(0x0303030303030303);
+
+			REG1 = ((REG1 <<  3) | REG1) & glm::uint64(0x1111111111111111);
+			REG2 = ((REG2 <<  3) | REG2) & glm::uint64(0x1111111111111111);
+			REG3 = ((REG3 <<  3) | REG3) & glm::uint64(0x1111111111111111);
+			REG4 = ((REG4 <<  3) | REG4) & glm::uint64(0x1111111111111111);
 
 			return REG1 | (REG2 << 1) | (REG3 << 2) | (REG4 << 3);
 		}
@@ -753,7 +787,7 @@ namespace glm
 
 		sign_x.i = x;
 		sign_y.i = y;
-		result.u = detail::bitfieldInterleave<int8, int16>(sign_x.u, sign_y.u);
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u);
 
 		return result.i;
 	}
@@ -779,7 +813,7 @@ namespace glm
 
 		sign_x.i = x;
 		sign_y.i = y;
-		result.u = detail::bitfieldInterleave<int16, int32>(sign_x.u, sign_y.u);
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u);
 
 		return result.i;
 	}
@@ -805,7 +839,7 @@ namespace glm
 
 		sign_x.i = x;
 		sign_y.i = y;
-		result.u = detail::bitfieldInterleave<int32, int64>(sign_x.u, sign_y.u);
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u);
 
 		return result.i;
 	}
@@ -814,4 +848,142 @@ namespace glm
 	{
 		return detail::bitfieldInterleave<uint32, uint64>(x, y);
 	}
+
+	inline int32 bitfieldInterleave(int8 x, int8 y, int8 z)
+	{
+		union sign8
+		{
+			int8 i;
+			uint8 u;
+		} sign_x, sign_y, sign_z;
+
+		union sign32
+		{
+			int32 i;
+			uint32 u;
+		} result;
+
+		sign_x.i = x;
+		sign_y.i = y;
+		sign_z.i = z;
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u);
+
+		return result.i;
+	}
+
+	inline uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z)
+	{
+		return detail::bitfieldInterleave<uint8, uint32>(x, y, z);
+	}
+
+	inline int64 bitfieldInterleave(int16 x, int16 y, int16 z)
+	{
+		union sign16
+		{
+			int16 i;
+			uint16 u;
+		} sign_x, sign_y, sign_z;
+
+		union sign64
+		{
+			int64 i;
+			uint64 u;
+		} result;
+
+		sign_x.i = x;
+		sign_y.i = y;
+		sign_z.i = z;
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u);
+
+		return result.i;
+	}
+
+	inline uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z)
+	{
+		return detail::bitfieldInterleave<uint32, uint64>(x, y, z);
+	}
+
+	inline int64 bitfieldInterleave(int32 x, int32 y, int32 z)
+	{
+		union sign16
+		{
+			int32 i;
+			uint32 u;
+		} sign_x, sign_y, sign_z;
+
+		union sign64
+		{
+			int64 i;
+			uint64 u;
+		} result;
+
+		sign_x.i = x;
+		sign_y.i = y;
+		sign_z.i = z;
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u);
+
+		return result.i;
+	}
+
+	inline uint64 bitfieldInterleave(uint32 x, uint32 y, uint32 z)
+	{
+		return detail::bitfieldInterleave<uint32, uint64>(x, y, z);
+	}
+
+	inline int32 bitfieldInterleave(int8 x, int8 y, int8 z, int8 w)
+	{
+		union sign8
+		{
+			int8 i;
+			uint8 u;
+		} sign_x, sign_y, sign_z, sign_w;
+
+		union sign32
+		{
+			int32 i;
+			uint32 u;
+		} result;
+
+		sign_x.i = x;
+		sign_y.i = y;
+		sign_z.i = z;
+		sign_w.i = w;
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u);
+
+		return result.i;
+	}
+
+	inline uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z, uint8 w)
+	{
+		return detail::bitfieldInterleave<uint8, uint32>(x, y, z);
+	}
+
+	inline int64 bitfieldInterleave(int16 x, int16 y, int16 z, int16 w)
+	{
+		union sign16
+		{
+			int16 i;
+			uint16 u;
+		} sign_x, sign_y, sign_z, sign_w;
+
+		union sign64
+		{
+			int64 i;
+			uint64 u;
+		} result;
+
+		sign_x.i = x;
+		sign_y.i = y;
+		sign_z.i = z;
+		sign_w.i = w;
+		result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u);
+
+		return result.i;
+	}
+
+	inline uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z, uint16 w)
+	{
+		return detail::bitfieldInterleave<uint16, uint64>(x, y, z, w);
+	}
+
 }//namespace glm

+ 68 - 3
test/gtx/gtx_bit.cpp

@@ -388,7 +388,7 @@ namespace bitfieldInterleave
 				assert(A == F);
 
 #				if(GLM_ARCH != GLM_ARCH_PURE)
-					__m128i G = _mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x));
+					__m128i G = glm::detail::_mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x));
 					glm::uint64 Result[2];
 					_mm_storeu_si128((__m128i*)Result, G);
 					assert(A == Result[0]);
@@ -483,7 +483,7 @@ namespace bitfieldInterleave
 			std::clock_t LastTime = std::clock();
 
 			for(std::size_t i = 0; i < Data.size(); ++i)
-				Data[i] = glm::detail::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
+				Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
 
 			std::clock_t Time = std::clock() - LastTime;
 
@@ -518,11 +518,75 @@ namespace bitfieldInterleave
 
 namespace bitfieldInterleave3
 {
+	template <typename PARAM, typename RET>
+	inline RET refBitfieldInterleave(PARAM x, PARAM y, PARAM z)
+	{
+		RET Result = 0; 
+		for(RET i = 0; i < sizeof(PARAM) * 8; ++i)
+		{
+			Result |= ((RET(x) & (RET(1U) << i)) << ((i << 1) + 0));
+			Result |= ((RET(y) & (RET(1U) << i)) << ((i << 1) + 1));
+			Result |= ((RET(z) & (RET(1U) << i)) << ((i << 1) + 2));
+		}
+		return Result;
+	}
+
+	int test()
+	{
+		int Error(0);
+
+		glm::uint16 x_max = 1 << 11;
+		glm::uint16 y_max = 1 << 11;
+		glm::uint16 z_max = 1 << 11;
+
+		for(glm::uint16 z = 0; z < z_max; z += 27)
+		for(glm::uint16 y = 0; y < y_max; y += 27)
+		for(glm::uint16 x = 0; x < x_max; x += 27)
+		{
+			glm::uint64 ResultA = refBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z);
+			glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z);
+			Error += ResultA == ResultB ? 0 : 1;
+		}
+
+		return Error;
+	}
+}
+
+namespace bitfieldInterleave4
+{
+	template <typename PARAM, typename RET>
+	inline RET loopBitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w)
+	{
+		RET const v[4] = {x, y, z, w};
+		RET Result = 0; 
+		for(RET i = 0; i < sizeof(PARAM) * 8; i++)
+		{
+			Result |= ((((v[0] >> i) & 1U)) << ((i << 2) + 0));
+			Result |= ((((v[1] >> i) & 1U)) << ((i << 2) + 1));
+			Result |= ((((v[2] >> i) & 1U)) << ((i << 2) + 2));
+			Result |= ((((v[3] >> i) & 1U)) << ((i << 2) + 3));
+		}
+		return Result;
+	}
+
 	int test()
 	{
 		int Error(0);
 
-		glm::uint64 Result = glm::detail::bitfieldInterleave(0xFFFFFFFF, 0x00000000, 0x00000000);
+		glm::uint16 x_max = 1 << 11;
+		glm::uint16 y_max = 1 << 11;
+		glm::uint16 z_max = 1 << 11;
+		glm::uint16 w_max = 1 << 11;
+
+		for(glm::uint16 w = 0; w < w_max; w += 27)
+		for(glm::uint16 z = 0; z < z_max; z += 27)
+		for(glm::uint16 y = 0; y < y_max; y += 27)
+		for(glm::uint16 x = 0; x < x_max; x += 27)
+		{
+			glm::uint64 ResultA = loopBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z, w);
+			glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z, w);
+			Error += ResultA == ResultB ? 0 : 1;
+		}
 
 		return Error;
 	}
@@ -533,6 +597,7 @@ int main()
 	int Error(0);
 
 	Error += ::bitfieldInterleave3::test();
+	Error += ::bitfieldInterleave4::test();
 	Error += ::bitfieldInterleave::test();
 	Error += ::extractField::test();
 	Error += ::bitRevert::test();