Browse Source

Fixed sign with signed integer function on non-x86 architecture

Christophe Riccio 9 years ago
parent
commit
72c741d8ea

+ 4 - 11
glm/detail/func_common.inl

@@ -115,7 +115,7 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
-	template <typename T, precision P, template <class, precision> class vecType, bool isFloat = true, bool isSigned = true>
+	template <typename T, precision P, template <class, precision> class vecType, bool isFloat = true>
 	struct compute_sign
 	struct compute_sign
 	{
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@@ -124,17 +124,9 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
+#	if GLM_ARCH == GLM_ARCH_X86
 	template <typename T, precision P, template <class, precision> class vecType>
 	template <typename T, precision P, template <class, precision> class vecType>
-	struct compute_sign<T, P, vecType, false, false>
-	{
-		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
-		{
-			return vecType<T, P>(glm::greaterThan(x , vecType<T, P>(0)));
-		}
-	};
-
-	template <typename T, precision P, template <class, precision> class vecType>
-	struct compute_sign<T, P, vecType, false, true>
+	struct compute_sign<T, P, vecType, false>
 	{
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
 		{
 		{
@@ -144,6 +136,7 @@ namespace detail
 			return (x >> Shift) | y;
 			return (x >> Shift) | y;
 		}
 		}
 	};
 	};
+#	endif
 
 
 	template <typename T, precision P, template <class, precision> class vecType, typename genType, bool isFloat = true>
 	template <typename T, precision P, template <class, precision> class vecType, typename genType, bool isFloat = true>
 	struct compute_mod
 	struct compute_mod

+ 71 - 17
glm/detail/type_vec4_simd.inl

@@ -62,60 +62,114 @@ namespace detail
 	};
 	};
 
 
 	template <precision P>
 	template <precision P>
-	struct compute_vec4_and<int, P>
+	struct compute_vec4_and<int32, P>
 	{
 	{
-		static tvec4<int, P> call(tvec4<int, P> const& a, tvec4<int, P> const& b)
+		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
 		{
 		{
-			tvec4<int, P> Result(uninitialize);
+			tvec4<int32, P> Result(uninitialize);
 			Result.data = _mm_and_si128(a.data, b.data);
 			Result.data = _mm_and_si128(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
 
 
 	template <precision P>
 	template <precision P>
-	struct compute_vec4_or<int, P>
+	struct compute_vec4_and<uint32, P>
 	{
 	{
-		static tvec4<int, P> call(tvec4<int, P> const& a, tvec4<int, P> const& b)
+		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
 		{
 		{
-			tvec4<int, P> Result(uninitialize);
+			tvec4<uint32, P> Result(uninitialize);
+			Result.data = _mm_and_si128(a.data, b.data);
+			return Result;
+		}
+	};
+
+	template <precision P>
+	struct compute_vec4_or<int32, P>
+	{
+		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
+		{
+			tvec4<int32, P> Result(uninitialize);
 			Result.data = _mm_or_si128(a.data, b.data);
 			Result.data = _mm_or_si128(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
 
 
 	template <precision P>
 	template <precision P>
-	struct compute_vec4_xor<int, P>
+	struct compute_vec4_or<uint32, P>
 	{
 	{
-		static tvec4<int, P> call(tvec4<int, P> const& a, tvec4<int, P> const& b)
+		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
 		{
 		{
-			tvec4<int, P> Result(uninitialize);
+			tvec4<uint32, P> Result(uninitialize);
+			Result.data = _mm_or_si128(a.data, b.data);
+			return Result;
+		}
+	};
+
+	template <precision P>
+	struct compute_vec4_xor<int32, P>
+	{
+		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
+		{
+			tvec4<int32, P> Result(uninitialize);
 			Result.data = _mm_xor_si128(a.data, b.data);
 			Result.data = _mm_xor_si128(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
-/*
+
 	template <precision P>
 	template <precision P>
-	struct compute_vec4_shift_left<int, P>
+	struct compute_vec4_xor<uint32, P>
 	{
 	{
-		static tvec4<int, P> call(tvec4<int, P> const& a, tvec4<int, P> const& b)
+		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
 		{
 		{
-			tvec4<int, P> Result(uninitialize);
+			tvec4<uint32, P> Result(uninitialize);
+			Result.data = _mm_xor_si128(a.data, b.data);
+			return Result;
+		}
+	};
+
+	template <precision P>
+	struct compute_vec4_shift_left<int32, P>
+	{
+		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
+		{
+			tvec4<int32, P> Result(uninitialize);
+			Result.data = _mm_sll_epi32(a.data, b.data);
+			return Result;
+		}
+	};
+
+	template <precision P>
+	struct compute_vec4_shift_left<uint32, P>
+	{
+		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
+		{
+			tvec4<uint32, P> Result(uninitialize);
 			Result.data = _mm_sll_epi32(a.data, b.data);
 			Result.data = _mm_sll_epi32(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
 
 
 	template <precision P>
 	template <precision P>
-	struct compute_vec4_shift_right<int, P>
+	struct compute_vec4_shift_right<int32, P>
+	{
+		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
+		{
+			tvec4<int32, P> Result(uninitialize);
+			Result.data = _mm_srl_epi32(a.data, b.data);
+			return Result;
+		}
+	};
+
+	template <precision P>
+	struct compute_vec4_shift_right<uint32, P>
 	{
 	{
-		static tvec4<int, P> call(tvec4<int, P> const& a, tvec4<int, P> const& b)
+		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
 		{
 		{
-			tvec4<int, P> Result(uninitialize);
+			tvec4<uint32, P> Result(uninitialize);
 			Result.data = _mm_srl_epi32(a.data, b.data);
 			Result.data = _mm_srl_epi32(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
-*/
 }//namespace detail
 }//namespace detail
 
 
 #	if !GLM_HAS_DEFAULTED_FUNCTIONS
 #	if !GLM_HAS_DEFAULTED_FUNCTIONS

+ 1 - 0
readme.md

@@ -80,6 +80,7 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate)
 - Fixed GTX_extended_min_max filename typo #386
 - Fixed GTX_extended_min_max filename typo #386
 - Fixed intersectRayTriangle to not do any unintentional backface culling
 - Fixed intersectRayTriangle to not do any unintentional backface culling
 - Fixed long long warnings when using C++98 on GCC and Clang #482
 - Fixed long long warnings when using C++98 on GCC and Clang #482
+- Fixed sign with signed integer function on non-x86 architecture
 
 
 ##### Deprecation:
 ##### Deprecation:
 - Removed GLM_FORCE_SIZE_FUNC define
 - Removed GLM_FORCE_SIZE_FUNC define

+ 1 - 1
test/core/core_func_common.cpp

@@ -1235,7 +1235,7 @@ namespace ldexp_
 
 
 int main()
 int main()
 {
 {
-	int Error(0);
+	int Error = 0;
 
 
 	Error += sign::test();
 	Error += sign::test();
 	Error += floor_::test();
 	Error += floor_::test();

+ 40 - 0
test/core/core_type_vec4.cpp

@@ -502,7 +502,47 @@ int test_vec4_simd()
 int main()
 int main()
 {
 {
 	int Error(0);
 	int Error(0);
+/*
+	{
+		glm::ivec4 const a1(2);
+		glm::ivec4 const b1 = a1 >> 1;
+
+		__m128i const e1 = _mm_set1_epi32(2);
+		__m128i const f1 = _mm_srli_epi32(e1, 1);
+
+		glm::ivec4 const g1 = *reinterpret_cast<glm::ivec4 const* const>(&f1);
+
+		glm::ivec4 const a2(-2);
+		glm::ivec4 const b2 = a2 >> 1;
+
+		__m128i const e2 = _mm_set1_epi32(-1);
+		__m128i const f2 = _mm_srli_epi32(e2, 1);
+
+		glm::ivec4 const g2 = *reinterpret_cast<glm::ivec4 const* const>(&f2);
+
+		printf("GNI\n");
+	}
 
 
+	{
+		glm::uvec4 const a1(2);
+		glm::uvec4 const b1 = a1 >> 1u;
+
+		__m128i const e1 = _mm_set1_epi32(2);
+		__m128i const f1 = _mm_srli_epi32(e1, 1);
+
+		glm::uvec4 const g1 = *reinterpret_cast<glm::uvec4 const* const>(&f1);
+
+		glm::uvec4 const a2(-1);
+		glm::uvec4 const b2 = a2 >> 1u;
+
+		__m128i const e2 = _mm_set1_epi32(-1);
+		__m128i const f2 = _mm_srli_epi32(e2, 1);
+
+		glm::uvec4 const g2 = *reinterpret_cast<glm::uvec4 const* const>(&f2);
+
+		printf("GNI\n");
+	}
+*/
 	glm::vec4 v;
 	glm::vec4 v;
 	assert(v.length() == 4);
 	assert(v.length() == 4);