瀏覽代碼

Added AVX optimizations and equal tests

Christophe Riccio 9 年之前
父節點
當前提交
52f8ecd973

+ 5 - 0
glm/detail/func_trigonometric.inl

@@ -193,3 +193,8 @@ namespace glm
 		return detail::functor1<T, T, P, vecType>::call(atanh, v);
 		return detail::functor1<T, T, P, vecType>::call(atanh, v);
 	}
 	}
 }//namespace glm
 }//namespace glm
+
+#if GLM_ARCH != GLM_ARCH_PURE && GLM_HAS_UNRESTRICTED_UNIONS
+#	include "func_trigonometric_simd.inl"
+#endif
+

+ 0 - 0
glm/detail/func_trigonometric_simd.inl


+ 72 - 2
glm/detail/type_vec4_simd.inl

@@ -62,6 +62,19 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_add<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_add_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	template <precision P>
 	struct compute_vec4_sub<float, P>
 	struct compute_vec4_sub<float, P>
 	{
 	{
@@ -73,6 +86,19 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_sub<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_sub_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	template <precision P>
 	struct compute_vec4_mul<float, P>
 	struct compute_vec4_mul<float, P>
 	{
 	{
@@ -84,6 +110,19 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_mul<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_mul_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	template <precision P>
 	struct compute_vec4_div<float, P>
 	struct compute_vec4_div<float, P>
 	{
 	{
@@ -95,6 +134,19 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
+	#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_div<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_div_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <>
 	template <>
 	struct compute_vec4_div<float, lowp>
 	struct compute_vec4_div<float, lowp>
 	{
 	{
@@ -124,7 +176,7 @@ namespace detail
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
 		{
 			tvec4<T, P> Result(uninitialize);
 			tvec4<T, P> Result(uninitialize);
-			Result.data = _mm_and_si256(a.data, b.data);
+			Result.data = _mm256_and_si256(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
@@ -148,7 +200,7 @@ namespace detail
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
 		{
 			tvec4<T, P> Result(uninitialize);
 			tvec4<T, P> Result(uninitialize);
-			Result.data = _mm_or_si256(a.data, b.data);
+			Result.data = _mm256_or_si256(a.data, b.data);
 			return Result;
 			return Result;
 		}
 		}
 	};
 	};
@@ -259,6 +311,15 @@ namespace detail
 		}
 		}
 	};
 	};
 
 
+	template <precision P>
+	struct compute_vec4_equal<int32, P, true, 32>
+	{
+		static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
+		{
+			return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
+		}
+	};
+
 	template <precision P>
 	template <precision P>
 	struct compute_vec4_nequal<float, P, false, 32>
 	struct compute_vec4_nequal<float, P, false, 32>
 	{
 	{
@@ -267,6 +328,15 @@ namespace detail
 			return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
 			return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
 		}
 		}
 	};
 	};
+
+	template <precision P>
+	struct compute_vec4_nequal<int32, P, true, 32>
+	{
+		static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
+		{
+			return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
+		}
+	};
 }//namespace detail
 }//namespace detail
 
 
 #	if !GLM_HAS_DEFAULTED_FUNCTIONS
 #	if !GLM_HAS_DEFAULTED_FUNCTIONS

+ 3 - 2
test/core/core_func_swizzle.cpp

@@ -83,11 +83,12 @@ int test_vec4_swizzle()
 	glm::vec4 B = A.wzyx();
 	glm::vec4 B = A.wzyx();
 	glm::vec4 C = B.wzyx();
 	glm::vec4 C = B.wzyx();
 
 
-	float f = glm::dot(C.wzyx(), C.xyzw());
-
 	Error += A != B ? 0 : 1;
 	Error += A != B ? 0 : 1;
 	Error += A == C ? 0 : 1;
 	Error += A == C ? 0 : 1;
 
 
+	float f = glm::dot(C.wzyx(), C.xyzw());
+	Error += glm::abs(f - 20.f) < 0.01f ? 0 : 1;
+
 	return Error;
 	return Error;
 }
 }
 
 

+ 22 - 0
test/core/core_type_vec4.cpp

@@ -310,6 +310,27 @@ int test_vec4_operators()
 	return Error;
 	return Error;
 }
 }
 
 
+int test_vec4_equal()
+{
+	int Error = 0;
+
+	{
+		glm::vec4 const A(1, 2, 3, 4);
+		glm::vec4 const B(1, 2, 3, 4);
+		Error += A == B ? 0 : 1;
+		Error += A != B ? 1 : 0;
+	}
+
+	{
+		glm::ivec4 const A(1, 2, 3, 4);
+		glm::ivec4 const B(1, 2, 3, 4);
+		Error += A == B ? 0 : 1;
+		Error += A != B ? 1 : 0;
+	}
+
+	return Error;
+}
+
 int test_vec4_size()
 int test_vec4_size()
 {
 {
 	int Error = 0;
 	int Error = 0;
@@ -557,6 +578,7 @@ int main()
 	Error += test_bvec4_ctor();
 	Error += test_bvec4_ctor();
 	Error += test_vec4_size();
 	Error += test_vec4_size();
 	Error += test_vec4_operators();
 	Error += test_vec4_operators();
+	Error += test_vec4_equal();
 	Error += test_vec4_swizzle_partial();
 	Error += test_vec4_swizzle_partial();
 	Error += test_vec4_simd();
 	Error += test_vec4_simd();
 	Error += test_operator_increment();
 	Error += test_operator_increment();