Branimir Karadžić 9 years ago
parent
commit
30559bbd47
3 changed files with 139 additions and 62 deletions
  1. 33 0
      include/bx/simd256_avx.inl
  2. 87 51
      include/bx/simd256_ref.inl
  3. 19 11
      tests/simd_test.cpp

+ 33 - 0
include/bx/simd256_avx.inl

@@ -37,6 +37,39 @@ namespace bx
 		return result;
 	}
 
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_splat(float _a)
+	{
+		return _mm256_set1_ps(_a);
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_isplat(uint32_t _a)
+	{
+		const __m256i splat          = _mm256_set1_epi32(_a);
+		const simd256_avx_t_t result = _mm256_castsi256_ps(splat);
+
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_itof(simd256_avx_t_t _a)
+	{
+		const __m256i  itof          = _mm256_castps_si256(_a);
+		const simd256_avx_t_t result = _mm256_cvtepi32_ps(itof);
+
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_avx_t_t simd_ftoi(simd256_avx_t_t _a)
+	{
+		const __m256i ftoi           = _mm256_cvtps_epi32(_a);
+		const simd256_avx_t_t result = _mm256_castsi256_ps(ftoi);
+
+		return result;
+	}
+
 	typedef simd256_avx_t simd256_t;
 
 } // namespace bx

+ 87 - 51
include/bx/simd256_ref.inl

@@ -1,51 +1,87 @@
-/*
- * Copyright 2010-2016 Branimir Karadzic. All rights reserved.
- * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
- */
-
-#ifndef BX_SIMD256_REF_H_HEADER_GUARD
-#define BX_SIMD256_REF_H_HEADER_GUARD
-
-#include "simd_ni.inl"
-
-namespace bx
-{
-	template<>
-	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr)
-	{
-		const simd256_ref_t::type* ptr = reinterpret_cast<const simd256_ref_t::type*>(_ptr);
-		simd256_ref_t result;
-		result.simd128_0 = simd_ld<simd256_ref_t::type>(&ptr[0]);
-		result.simd128_1 = simd_ld<simd256_ref_t::type>(&ptr[1]);
-		return result;
-	}
-
-	template<>
-	BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t& _a)
-	{
-		simd256_ref_t* result = reinterpret_cast<simd256_ref_t*>(_ptr);
-		simd_st<simd256_ref_t::type>(&result[0], _a.simd128_0);
-		simd_st<simd256_ref_t::type>(&result[1], _a.simd128_1);
-	}
-
-	template<>
-	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d)
-	{
-		simd256_ref_t result;
-		result.simd128_0 = simd_ld<simd256_ref_t::type>(_x, _y, _z, _w);
-		result.simd128_1 = simd_ld<simd256_ref_t::type>(_a, _b, _c, _d);
-		return result;
-	}
-
-	template<>
-	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d)
-	{
-		simd256_ref_t result;
-		result.simd128_0 = simd_ild<simd256_ref_t::type>(_x, _y, _z, _w);
-		result.simd128_1 = simd_ild<simd256_ref_t::type>(_a, _b, _c, _d);
-		return result;
-	}
-
-} // namespace bx
-
-#endif // BX_SIMD256_REF_H_HEADER_GUARD
+/*
+ * Copyright 2010-2016 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
+ */
+
+#ifndef BX_SIMD256_REF_H_HEADER_GUARD
+#define BX_SIMD256_REF_H_HEADER_GUARD
+
+#include "simd_ni.inl"
+
+namespace bx
+{
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr)
+	{
+		const simd256_ref_t::type* ptr = reinterpret_cast<const simd256_ref_t::type*>(_ptr);
+		simd256_ref_t result;
+		result.simd128_0 = simd_ld<simd256_ref_t::type>(&ptr[0]);
+		result.simd128_1 = simd_ld<simd256_ref_t::type>(&ptr[1]);
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t& _a)
+	{
+		simd256_ref_t* result = reinterpret_cast<simd256_ref_t*>(_ptr);
+		simd_st<simd256_ref_t::type>(&result[0], _a.simd128_0);
+		simd_st<simd256_ref_t::type>(&result[1], _a.simd128_1);
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d)
+	{
+		simd256_ref_t result;
+		result.simd128_0 = simd_ld<simd256_ref_t::type>(_x, _y, _z, _w);
+		result.simd128_1 = simd_ld<simd256_ref_t::type>(_a, _b, _c, _d);
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d)
+	{
+		simd256_ref_t result;
+		result.simd128_0 = simd_ild<simd256_ref_t::type>(_x, _y, _z, _w);
+		result.simd128_1 = simd_ild<simd256_ref_t::type>(_a, _b, _c, _d);
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_splat(float _a)
+	{
+		simd256_ref_t result;
+		result.simd128_0 = simd_splat<simd256_ref_t::type>(_a);
+		result.simd128_1 = simd_splat<simd256_ref_t::type>(_a);
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_isplat(uint32_t _a)
+	{
+		simd256_ref_t result;
+		result.simd128_0 = simd_isplat<simd256_ref_t::type>(_a);
+		result.simd128_1 = simd_isplat<simd256_ref_t::type>(_a);
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_itof(simd256_ref_t _a)
+	{
+		simd256_ref_t result;
+		result.simd128_0 = simd_itof(_a.simd128_0);
+		result.simd128_1 = simd_itof(_a.simd128_1);
+		return result;
+	}
+
+	template<>
+	BX_SIMD_FORCE_INLINE simd256_ref_t simd_ftoi(simd256_ref_t _a)
+	{
+		simd256_ref_t result;
+		result.simd128_0 = simd_ftoi(_a.simd128_0);
+		result.simd128_1 = simd_ftoi(_a.simd128_1);
+		return result;
+	}
+
+} // namespace bx
+
+#endif // BX_SIMD256_REF_H_HEADER_GUARD

+ 19 - 11
tests/simd_test.cpp

@@ -322,20 +322,20 @@ TEST_CASE("simd_load", "")
 		, 0.0f, 1.0f, 2.0f, 3.0f
 		);
 
-//	simd_check_float("ld"
-//		, simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
-//		, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
-//		);
+	simd_check_float("ld"
+		, simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
+		, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
+		);
 
 	simd_check_int32("ild"
 		, simd_ild(uint32_t(-1), 0, 1, 2)
 		, uint32_t(-1), 0, 1, 2
 		);
 
-//	simd_check_int32("ild"
-//		, simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
-//		, uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
-//		);
+	simd_check_int32("ild"
+		, simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
+		, uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
+		);
 
 	simd_check_int32("ild"
 		, simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) )
@@ -346,13 +346,21 @@ TEST_CASE("simd_load", "")
 		, 0, 0, 0, 0
 		);
 
-	simd_check_uint32("isplat", simd_isplat(0x80000001)
+	simd_check_uint32("isplat", simd_isplat<simd128_t>(0x80000001)
 		, 0x80000001, 0x80000001, 0x80000001, 0x80000001
 		);
 
-	simd_check_float("isplat", simd_splat(1.0f)
+	simd_check_float("splat", simd_splat<simd128_t>(1.0f)
 		, 1.0f, 1.0f, 1.0f, 1.0f
 		);
+
+	simd_check_uint32("isplat", simd_isplat<simd256_t>(0x80000001)
+		, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001, 0x80000001
+		);
+
+	simd_check_float("splat", simd_splat<simd256_t>(1.0f)
+		, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
+		);
 }
 
 TEST_CASE("simd_arithmetic", "")
@@ -386,7 +394,7 @@ TEST_CASE("simd_sqrt", "")
 		);
 }
 
-TEST_CASE("float4", "")
+TEST_CASE("simd", "")
 {
 	const simd128_t isplat = simd_isplat(0x80000001);
 	simd_check_uint32("sll"