Browse Source

Adde vec4 swizzle operators SIMD optimizations

Christophe Riccio 9 years ago
parent
commit
9ed2a9601b
3 changed files with 47 additions and 14 deletions
  1. 0 14
      glm/detail/type_vec4.hpp
  2. 45 0
      glm/detail/type_vec4_simd.inl
  3. 2 0
      test/core/core_func_common.cpp

+ 0 - 14
glm/detail/type_vec4.hpp

@@ -484,20 +484,6 @@ namespace detail
 
 
 	template <precision P>
 	template <precision P>
 	GLM_FUNC_DECL tvec4<bool, P> operator||(tvec4<bool, P> const & v1, tvec4<bool, P> const & v2);
 	GLM_FUNC_DECL tvec4<bool, P> operator||(tvec4<bool, P> const & v1, tvec4<bool, P> const & v2);
-
-/*
-namespace detail
-{
-	template <precision P, int E0, int E1, int E2, int E3>
-	struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<float, 4>
-	{ 
-		GLM_FUNC_QUALIFIER tvec4<float, P> operator ()()  const
-		{
-			return tvec4<float, P>(this->elem(E0), this->elem(E1), this->elem(E2), this->elem(E3));
-		}
-	};
-}//namespace detail
-*/
 }//namespace glm
 }//namespace glm
 
 
 #ifndef GLM_EXTERNAL_TEMPLATE
 #ifndef GLM_EXTERNAL_TEMPLATE

+ 45 - 0
glm/detail/type_vec4_simd.inl

@@ -6,6 +6,51 @@
 namespace glm{
 namespace glm{
 namespace detail
 namespace detail
 {
 {
+#	ifdef GLM_SWIZZLE
+	template <precision P, int E0, int E1, int E2, int E3>
+	struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<float, 4>
+	{ 
+		GLM_FUNC_QUALIFIER tvec4<float, P> operator ()()  const
+		{
+			__m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
+
+			tvec4<float, P> Result(uninitialize);
+#			if GLM_ARCH & GLM_ARCH_AVX_BIT
+				Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
+#			else
+				Result.data = _mm_shuffle_ps(data, data, _MM_SHUFFLE(E3, E2, E1, E0));
+#			endif
+			return Result;
+		}
+	};
+
+	template <precision P, int E0, int E1, int E2, int E3>
+	struct _swizzle_base1<4, int32, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<int32, 4>
+	{ 
+		GLM_FUNC_QUALIFIER tvec4<int32, P> operator ()()  const
+		{
+			__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
+
+			tvec4<int32, P> Result(uninitialize);
+			Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
+			return Result;
+		}
+	};
+
+	template <precision P, int E0, int E1, int E2, int E3>
+	struct _swizzle_base1<4, uint32, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<uint32, 4>
+	{ 
+		GLM_FUNC_QUALIFIER tvec4<uint32, P> operator ()()  const
+		{
+			__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
+
+			tvec4<uint32, P> Result(uninitialize);
+			Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	template <precision P>
 	struct compute_vec4_add<float, P>
 	struct compute_vec4_add<float, P>
 	{
 	{

+ 2 - 0
test/core/core_func_common.cpp

@@ -1243,11 +1243,13 @@ int main()
 	glm::int32 const c(1);
 	glm::int32 const c(1);
 	glm::int32 const d = ~c;
 	glm::int32 const d = ~c;
 
 
+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	glm_vec4 const A = _mm_set_ps(4, 3, 2, 1);
 	glm_vec4 const A = _mm_set_ps(4, 3, 2, 1);
 	glm_vec4 const B = glm_vec4_swizzle_xyzw(A);
 	glm_vec4 const B = glm_vec4_swizzle_xyzw(A);
 	glm_vec4 const C = _mm_permute_ps(A, _MM_SHUFFLE(3, 2, 1, 0));
 	glm_vec4 const C = _mm_permute_ps(A, _MM_SHUFFLE(3, 2, 1, 0));
 	glm_vec4 const D = _mm_permute_ps(A, _MM_SHUFFLE(0, 1, 2, 3));
 	glm_vec4 const D = _mm_permute_ps(A, _MM_SHUFFLE(0, 1, 2, 3));
 	glm_vec4 const E = _mm_shuffle_ps(A, A, _MM_SHUFFLE(0, 1, 2, 3));
 	glm_vec4 const E = _mm_shuffle_ps(A, A, _MM_SHUFFLE(0, 1, 2, 3));
+#	endif
 
 
 	Error += sign::test();
 	Error += sign::test();
 	Error += floor_::test();
 	Error += floor_::test();