Browse Source

Added bitwise inverse SIMD optimization. Factorized bitwise optimization code

Christophe Riccio 9 years ago
parent
commit
f577611328
3 changed files with 54 additions and 43 deletions
  1. 31 4
      glm/detail/type_vec4.inl
  2. 17 39
      glm/detail/type_vec4_simd.inl
  3. 6 0
      test/core/core_func_common.cpp

+ 31 - 4
glm/detail/type_vec4.inl

@@ -4,6 +4,24 @@
 namespace glm{
 namespace detail
 {
+	template <typename T>
+	struct is_int32
+	{
+		enum test {value = 0};
+	};
+
+	template <>
+	struct is_int32<uint32>
+	{
+		enum test {value = ~0};
+	};
+
+	template <>
+	struct is_int32<int32>
+	{
+		enum test {value = ~0};
+	};
+
 	template <typename T, precision P>
 	struct compute_vec4_add
 	{
@@ -67,7 +85,7 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
+	template <typename T, precision P, int IsInt32>
 	struct compute_vec4_xor
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@@ -76,7 +94,7 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
+	template <typename T, precision P, int IsInt32>
 	struct compute_vec4_shift_left
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@@ -85,7 +103,7 @@ namespace detail
 		}
 	};
 
-	template <typename T, precision P>
+	template <typename T, precision P, int IsInt32>
 	struct compute_vec4_shift_right
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@@ -93,6 +111,15 @@ namespace detail
 			return tvec4<T, P>(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w);
 		}
 	};
+
+	template <typename T, precision P, int IsInt32>
+	struct compute_vec4_logical_not
+	{
+		static tvec4<T, P> call(tvec4<T, P> const & v)
+		{
+			return tvec4<T, P>(~v.x, ~v.y, ~v.z, ~v.w);
+		}
+	};
 }//namespace detail
 
 	// -- Implicit basic constructors --
@@ -883,7 +910,7 @@ namespace detail
 	template <typename T, precision P> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> operator~(tvec4<T, P> const & v)
 	{
-		return tvec4<T, P>(~v.x, ~v.y, ~v.z, ~v.w);
+		return detail::compute_vec4_logical_not<T, P, detail::is_int32<T>::value>::call(v);
 	}
 
 	// -- Boolean operators --

+ 17 - 39
glm/detail/type_vec4_simd.inl

@@ -105,68 +105,46 @@ namespace detail
 		}
 	};
 
-	template <precision P>
-	struct compute_vec4_xor<int32, P>
+	template <typename T, precision P>
+	struct compute_vec4_xor<T, P, true>
 	{
-		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
+		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
-			tvec4<int32, P> Result(uninitialize);
+			tvec4<T, P> Result(uninitialize);
 			Result.data = _mm_xor_si128(a.data, b.data);
 			return Result;
 		}
 	};
 
-	template <precision P>
-	struct compute_vec4_xor<uint32, P>
+	template <typename T, precision P>
+	struct compute_vec4_shift_left<T, P, true>
 	{
-		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
+		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
-			tvec4<uint32, P> Result(uninitialize);
-			Result.data = _mm_xor_si128(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template <precision P>
-	struct compute_vec4_shift_left<int32, P>
-	{
-		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
-		{
-			tvec4<int32, P> Result(uninitialize);
+			tvec4<T, P> Result(uninitialize);
 			Result.data = _mm_sll_epi32(a.data, b.data);
 			return Result;
 		}
 	};
 
-	template <precision P>
-	struct compute_vec4_shift_left<uint32, P>
+	template <typename T, precision P>
+	struct compute_vec4_shift_right<T, P, true>
 	{
-		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
-		{
-			tvec4<uint32, P> Result(uninitialize);
-			Result.data = _mm_sll_epi32(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template <precision P>
-	struct compute_vec4_shift_right<int32, P>
-	{
-		static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
+		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
-			tvec4<int32, P> Result(uninitialize);
+			tvec4<T, P> Result(uninitialize);
 			Result.data = _mm_srl_epi32(a.data, b.data);
 			return Result;
 		}
 	};
 
-	template <precision P>
-	struct compute_vec4_shift_right<uint32, P>
+	template <typename T, precision P>
+	struct compute_vec4_logical_not<T, P, true>
 	{
-		static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b)
+		static tvec4<T, P> call(tvec4<T, P> const & v)
 		{
-			tvec4<uint32, P> Result(uninitialize);
-			Result.data = _mm_srl_epi32(a.data, b.data);
+			tvec4<T, P> Result(uninitialize);
+			Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
 			return Result;
 		}
 	};

+ 6 - 0
test/core/core_func_common.cpp

@@ -1237,6 +1237,12 @@ int main()
 {
 	int Error = 0;
 
+	glm::ivec4 const a(1);
+	glm::ivec4 const b = ~a;
+
+	glm::int32 const c(1);
+	glm::int32 const d = ~c;
+
 	Error += sign::test();
 	Error += floor_::test();
 	Error += mod_::test();