Бранимир Караџић 7 месяцев назад
Родитель
Сommit
d4096a8446
3 измененных файлов с 76 добавлено и 0 удалено
  1. 53 0
      include/bx/inline/uint32_t.inl
  2. 8 0
      include/bx/uint32_t.h
  3. 15 0
      tests/uint32_test.cpp

+ 53 - 0
include/bx/inline/uint32_t.inl

@@ -184,6 +184,26 @@ namespace bx
 		return -!!_a;
 	}
 
+	template<>
+	inline BX_CONSTEXPR_FUNC uint32_t uint32_splat(uint8_t _val)
+	{
+		const uint32_t tmp0   = uint32_sll(_val, 8);
+		const uint32_t tmp1   = uint32_or(tmp0, _val);
+		const uint32_t tmp2   = uint32_sll(tmp1, 16);
+		const uint32_t result = uint32_or(tmp2, tmp1);
+
+		return result;
+	}
+
+	template<>
+	inline BX_CONSTEXPR_FUNC uint32_t uint32_splat(uint16_t _val)
+	{
+		const uint32_t tmp    = uint32_sll(_val, 16);
+		const uint32_t result = uint32_or(tmp, _val);
+
+		return result;
+	}
+
 	inline BX_CONSTEXPR_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
 	{
 		const uint32_t add    = uint32_add(_a, _b);
@@ -559,6 +579,39 @@ namespace bx
 		return _a * _b;
 	}
 
+	template<>
+	inline BX_CONSTEXPR_FUNC uint64_t uint64_splat(uint8_t _val)
+	{
+		const uint64_t tmp0   = uint64_sll(_val, 8);
+		const uint64_t tmp1   = uint64_or(tmp0, _val);
+		const uint64_t tmp2   = uint64_sll(tmp1, 16);
+		const uint64_t tmp3   = uint64_or(tmp2, tmp1);
+		const uint64_t tmp4   = uint64_sll(tmp3, 32);
+		const uint64_t result = uint64_or(tmp4, tmp3);
+
+		return result;
+	}
+
+	template<>
+	inline BX_CONSTEXPR_FUNC uint64_t uint64_splat(uint16_t _val)
+	{
+		const uint64_t tmp0   = uint64_sll(_val, 16);
+		const uint64_t tmp1   = uint64_or(tmp0, _val);
+		const uint64_t tmp2   = uint64_sll(tmp1, 32);
+		const uint64_t result = uint64_or(tmp2, tmp1);
+
+		return result;
+	}
+
+	template<>
+	inline BX_CONSTEXPR_FUNC uint64_t uint64_splat(uint32_t _val)
+	{
+		const uint64_t tmp    = uint64_sll(_val, 32);
+		const uint64_t result = uint64_or(tmp, _val);
+
+		return result;
+	}
+
 	inline BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val)
 	{
 #if BX_COMPILER_GCC || BX_COMPILER_CLANG

+ 8 - 0
include/bx/uint32_t.h

@@ -102,6 +102,10 @@ namespace bx
 	///
 	BX_CONSTEXPR_FUNC uint32_t uint32_setnz(uint32_t _a);
 
+	///
+	template<typename Ty>
+	BX_CONSTEXPR_FUNC uint32_t uint32_splat(Ty _val);
+
 	///
 	BX_CONSTEXPR_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b);
 
@@ -235,6 +239,10 @@ namespace bx
 	///
 	BX_CONSTEXPR_FUNC uint64_t uint64_mul(uint64_t _a, uint64_t _b);
 
+	///
+	template<typename Ty>
+	BX_CONSTEXPR_FUNC uint64_t uint64_splat(Ty _val);
+
 	///
 	BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val);
 

+ 15 - 0
tests/uint32_test.cpp

@@ -34,6 +34,21 @@ TEST_CASE("uint32_part", "[uint32_t]")
 	REQUIRE(UINT32_C(0x09249249) == bx::uint32_part1by2(0x3ff) );
 }
 
+TEST_CASE("uint32_splat", "[uint32_t]")
+{
+	REQUIRE(UINT32_C(0x01010101) == bx::uint32_splat<uint8_t>(0x01) );
+	REQUIRE(UINT32_C(0x55555555) == bx::uint32_splat<uint8_t>(0x55) );
+	REQUIRE(UINT32_C(0x13891389) == bx::uint32_splat<uint16_t>(0x1389) );
+}
+
+TEST_CASE("uint64_splat", "[uint32_t]")
+{
+	REQUIRE(UINT64_C(0x0101010101010101) == bx::uint64_splat<uint8_t>(0x01) );
+	REQUIRE(UINT64_C(0x5555555555555555) == bx::uint64_splat<uint8_t>(0x55) );
+	REQUIRE(UINT32_C(0x1389138913891389) == bx::uint64_splat<uint16_t>(0x1389) );
+	REQUIRE(UINT32_C(0x1506138915061389) == bx::uint64_splat<uint32_t>(0x15061389) );
+}
+
 TEST_CASE("uint32_gcd", "[uint32_t]")
 {
 	REQUIRE(1 == bx::uint32_gcd(13, 89) );