Преглед на файлове

Updated count bits, count leading/trailing zeros.

Бранимир Караџић преди 6 години
родител
ревизия
421eaf58a9
променени са 3 файла, в които са добавени 113 реда и са изтрити 23 реда
  1. 77 0
      include/bx/inline/uint32_t.inl
  2. 7 14
      include/bx/uint32_t.h
  3. 29 9
      tests/uint32_test.cpp

+ 77 - 0
include/bx/inline/uint32_t.inl

@@ -307,8 +307,12 @@ namespace bx
 		return result;
 	}
 
+	template<>
 	inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
 	{
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
+		return __builtin_popcount(_val);
+#else
 		const uint32_t tmp0   = uint32_srl(_val, 1);
 		const uint32_t tmp1   = uint32_and(tmp0, 0x55555555);
 		const uint32_t tmp2   = uint32_sub(_val, tmp1);
@@ -328,10 +332,37 @@ namespace bx
 		const uint32_t result = uint32_and(tmpF, 0x3f);
 
 		return result;
+#endif // BX_COMPILER_*
 	}
 
+	template<>
+	inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint64_t _val)
+	{
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
+		return __builtin_popcountll(_val);
+#else
+		const uint32_t lo = uint32_t(_val&UINT32_MAX);
+		const uint32_t hi = uint32_t(_val>>32);
+
+		const uint32_t total = uint32_cntbits(lo)
+							 + uint32_cntbits(hi);
+		return total;
+#endif // BX_COMPILER_*
+	}
+
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint8_t  _val) { return uint32_cntbits<uint32_t>(_val); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int8_t   _val) { return uint32_cntbits<uint8_t >(_val); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint16_t _val) { return uint32_cntbits<uint32_t>(_val); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int16_t  _val) { return uint32_cntbits<uint16_t>(_val); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int32_t  _val) { return uint32_cntbits<uint32_t>(_val); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int64_t  _val) { return uint32_cntbits<uint64_t>(_val); }
+
+	template<>
 	inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
 	{
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
+		return 0 == _val ? 32 : __builtin_clz(_val);
+#else
 		const uint32_t tmp0   = uint32_srl(_val, 1);
 		const uint32_t tmp1   = uint32_or(tmp0, _val);
 		const uint32_t tmp2   = uint32_srl(tmp1, 2);
@@ -346,18 +377,64 @@ namespace bx
 		const uint32_t result = uint32_cntbits(tmpA);
 
 		return result;
+#endif // BX_COMPILER_*
+	}
+
+	template<>
+	inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint64_t _val)
+	{
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
+		return 0 == _val ? 64 : __builtin_clzll(_val);
+#else
+		return _val & UINT64_C(0xffffffff00000000)
+			 ? uint32_cntlz(uint32_t(_val>>32) )
+			 : uint32_cntlz(uint32_t(_val) ) + 32
+			 ;
+#endif // BX_COMPILER_*
 	}
 
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint8_t  _val) { return uint32_cntlz<uint32_t>(_val)-24; }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int8_t   _val) { return uint32_cntlz<uint8_t >(_val);    }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint16_t _val) { return uint32_cntlz<uint32_t>(_val)-16; }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int16_t  _val) { return uint32_cntlz<uint16_t>(_val);    }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int32_t  _val) { return uint32_cntlz<uint32_t>(_val);    }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int64_t  _val) { return uint32_cntlz<uint64_t>(_val);    }
+
+	template<>
 	inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
 	{
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
+		return 0 == _val ? 32 : __builtin_ctz(_val);
+#else
 		const uint32_t tmp0   = uint32_not(_val);
 		const uint32_t tmp1   = uint32_dec(_val);
 		const uint32_t tmp2   = uint32_and(tmp0, tmp1);
 		const uint32_t result = uint32_cntbits(tmp2);
 
 		return result;
+#endif // BX_COMPILER_*
+	}
+
+	template<>
+	inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint64_t _val)
+	{
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
+		return 0 == _val ? 64 : __builtin_ctzll(_val);
+#else
+		return _val & UINT64_C(0xffffffff)
+			? uint32_cnttz(uint32_t(_val) )
+			: uint32_cnttz(uint32_t(_val>>32) ) + 32
+			;
+#endif // BX_COMPILER_*
 	}
 
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint8_t  _val) { return bx::min(8u,  uint32_cnttz<uint32_t>(_val) ); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int8_t   _val) { return              uint32_cnttz<uint8_t >(_val);   }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint16_t _val) { return bx::min(16u, uint32_cnttz<uint32_t>(_val) ); }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int16_t  _val) { return              uint32_cnttz<uint16_t>(_val);   }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int32_t  _val) { return              uint32_cnttz<uint32_t>(_val);   }
+	template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int64_t  _val) { return              uint32_cnttz<uint64_t>(_val);   }
+
 	inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
 	{
 		// shuffle:

+ 7 - 14
include/bx/uint32_t.h

@@ -149,14 +149,18 @@ namespace bx
 
 	/// Count number of bits set.
 	///
-	BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val);
+	template<typename Ty>
+	BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(Ty _val);
 
 	/// Count number of leading zeros.
 	///
-	BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val);
+	template<typename Ty>
+	BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(Ty _val);
 
+	/// Count number of trailing zeros.
 	///
-	BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val);
+	template<typename Ty>
+	BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(Ty _val);
 
 	///
 	BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a);
@@ -170,17 +174,6 @@ namespace bx
 	///
 	BX_CONSTEXPR_FUNC uint32_t uint32_nextpow2(uint32_t _a);
 
-	/// Count number of bits set.
-	///
-	BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val);
-
-	/// Count number of leading zeros.
-	///
-	BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val);
-
-	///
-	BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val);
-
 	///
 	BX_CONSTEXPR_FUNC uint64_t uint64_li(uint64_t _a);
 

+ 29 - 9
tests/uint32_test.cpp

@@ -30,19 +30,39 @@ TEST_CASE("StrideAlign")
 
 TEST_CASE("uint32_cnt")
 {
-	REQUIRE( 0 == bx::uint32_cnttz(UINT32_C(1) ) );
-
-	REQUIRE(31 == bx::uint32_cntlz(UINT32_C(1) ) );
-
-	REQUIRE( 0 == bx::uint64_cnttz(UINT64_C(1) ) );
-
-	REQUIRE(63 == bx::uint64_cntlz(UINT64_C(1) ) );
-
+	REQUIRE( 0 == bx::uint32_cnttz<uint8_t >(1) );
+	REQUIRE( 7 == bx::uint32_cnttz<uint8_t >(1<<7) );
+	REQUIRE( 8 == bx::uint32_cnttz<uint8_t >(0) );
+	REQUIRE( 0 == bx::uint32_cnttz<uint16_t>(1) );
+	REQUIRE(15 == bx::uint32_cnttz<uint16_t>(1<<15) );
+	REQUIRE(16 == bx::uint32_cnttz<uint16_t>(0) );
+	REQUIRE( 0 == bx::uint32_cnttz<uint32_t>(1) );
+	REQUIRE(32 == bx::uint32_cnttz<uint32_t>(0) );
+	REQUIRE(31 == bx::uint32_cnttz<uint32_t>(1<<31) );
+	REQUIRE( 0 == bx::uint32_cnttz<uint64_t>(1) );
+	REQUIRE(64 == bx::uint32_cnttz<uint64_t>(0) );
+
+	REQUIRE( 7 == bx::uint32_cntlz<uint8_t >(1) );
+	REQUIRE( 8 == bx::uint32_cntlz<uint8_t >(0) );
+	REQUIRE(15 == bx::uint32_cntlz<uint16_t>(1) );
+	REQUIRE(16 == bx::uint32_cntlz<uint16_t>(0) );
+	REQUIRE(31 == bx::uint32_cntlz<uint32_t>(1) );
+	REQUIRE(32 == bx::uint32_cntlz<uint32_t>(0) );
+	REQUIRE(63 == bx::uint32_cntlz<uint64_t>(1) );
+	REQUIRE(64 == bx::uint32_cntlz<uint64_t>(0) );
+
+	REQUIRE( 0 == bx::uint32_cntbits(0) );
 	REQUIRE( 1 == bx::uint32_cntbits(1) );
 
-	REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) );
+	REQUIRE( 4 == bx::uint32_cntbits<uint8_t>(0x55) );
+	REQUIRE( 8 == bx::uint32_cntbits<uint16_t>(0x5555) );
+	REQUIRE(16 == bx::uint32_cntbits<uint32_t>(0x55555555) );
+	REQUIRE(32 == bx::uint32_cntbits<uint64_t>(0x5555555555555555) );
 
+	REQUIRE( 8 == bx::uint32_cntbits(UINT8_MAX) );
+	REQUIRE(16 == bx::uint32_cntbits(UINT16_MAX) );
 	REQUIRE(32 == bx::uint32_cntbits(UINT32_MAX) );
+	REQUIRE(64 == bx::uint32_cntbits(UINT64_MAX) );
 }
 
 TEST_CASE("uint32_part")