|
@@ -307,8 +307,12 @@ namespace bx
|
|
|
return result;
|
|
return result;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ template<>
|
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
|
|
|
{
|
|
{
|
|
|
|
|
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
|
|
+ return __builtin_popcount(_val);
|
|
|
|
|
+#else
|
|
|
const uint32_t tmp0 = uint32_srl(_val, 1);
|
|
const uint32_t tmp0 = uint32_srl(_val, 1);
|
|
|
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
|
|
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
|
|
|
const uint32_t tmp2 = uint32_sub(_val, tmp1);
|
|
const uint32_t tmp2 = uint32_sub(_val, tmp1);
|
|
@@ -328,10 +332,37 @@ namespace bx
|
|
|
const uint32_t result = uint32_and(tmpF, 0x3f);
|
|
const uint32_t result = uint32_and(tmpF, 0x3f);
|
|
|
|
|
|
|
|
return result;
|
|
return result;
|
|
|
|
|
+#endif // BX_COMPILER_*
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ template<>
|
|
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint64_t _val)
|
|
|
|
|
+ {
|
|
|
|
|
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
|
|
+ return __builtin_popcountll(_val);
|
|
|
|
|
+#else
|
|
|
|
|
+ const uint32_t lo = uint32_t(_val&UINT32_MAX);
|
|
|
|
|
+ const uint32_t hi = uint32_t(_val>>32);
|
|
|
|
|
+
|
|
|
|
|
+ const uint32_t total = uint32_cntbits(lo)
|
|
|
|
|
+ + uint32_cntbits(hi);
|
|
|
|
|
+ return total;
|
|
|
|
|
+#endif // BX_COMPILER_*
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint8_t _val) { return uint32_cntbits<uint32_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int8_t _val) { return uint32_cntbits<uint8_t >(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint16_t _val) { return uint32_cntbits<uint32_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int16_t _val) { return uint32_cntbits<uint16_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int32_t _val) { return uint32_cntbits<uint32_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int64_t _val) { return uint32_cntbits<uint64_t>(_val); }
|
|
|
|
|
+
|
|
|
|
|
+ template<>
|
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
|
|
|
{
|
|
{
|
|
|
|
|
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
|
|
+ return 0 == _val ? 32 : __builtin_clz(_val);
|
|
|
|
|
+#else
|
|
|
const uint32_t tmp0 = uint32_srl(_val, 1);
|
|
const uint32_t tmp0 = uint32_srl(_val, 1);
|
|
|
const uint32_t tmp1 = uint32_or(tmp0, _val);
|
|
const uint32_t tmp1 = uint32_or(tmp0, _val);
|
|
|
const uint32_t tmp2 = uint32_srl(tmp1, 2);
|
|
const uint32_t tmp2 = uint32_srl(tmp1, 2);
|
|
@@ -346,18 +377,64 @@ namespace bx
|
|
|
const uint32_t result = uint32_cntbits(tmpA);
|
|
const uint32_t result = uint32_cntbits(tmpA);
|
|
|
|
|
|
|
|
return result;
|
|
return result;
|
|
|
|
|
+#endif // BX_COMPILER_*
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ template<>
|
|
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint64_t _val)
|
|
|
|
|
+ {
|
|
|
|
|
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
|
|
+ return 0 == _val ? 64 : __builtin_clzll(_val);
|
|
|
|
|
+#else
|
|
|
|
|
+ return _val & UINT64_C(0xffffffff00000000)
|
|
|
|
|
+ ? uint32_cntlz(uint32_t(_val>>32) )
|
|
|
|
|
+ : uint32_cntlz(uint32_t(_val) ) + 32
|
|
|
|
|
+ ;
|
|
|
|
|
+#endif // BX_COMPILER_*
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint8_t _val) { return uint32_cntlz<uint32_t>(_val)-24; }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int8_t _val) { return uint32_cntlz<uint8_t >(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint16_t _val) { return uint32_cntlz<uint32_t>(_val)-16; }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int16_t _val) { return uint32_cntlz<uint16_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int32_t _val) { return uint32_cntlz<uint32_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int64_t _val) { return uint32_cntlz<uint64_t>(_val); }
|
|
|
|
|
+
|
|
|
|
|
+ template<>
|
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
|
|
|
{
|
|
{
|
|
|
|
|
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
|
|
+ return 0 == _val ? 32 : __builtin_ctz(_val);
|
|
|
|
|
+#else
|
|
|
const uint32_t tmp0 = uint32_not(_val);
|
|
const uint32_t tmp0 = uint32_not(_val);
|
|
|
const uint32_t tmp1 = uint32_dec(_val);
|
|
const uint32_t tmp1 = uint32_dec(_val);
|
|
|
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
|
|
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
|
|
|
const uint32_t result = uint32_cntbits(tmp2);
|
|
const uint32_t result = uint32_cntbits(tmp2);
|
|
|
|
|
|
|
|
return result;
|
|
return result;
|
|
|
|
|
+#endif // BX_COMPILER_*
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ template<>
|
|
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint64_t _val)
|
|
|
|
|
+ {
|
|
|
|
|
+#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
|
|
+ return 0 == _val ? 64 : __builtin_ctzll(_val);
|
|
|
|
|
+#else
|
|
|
|
|
+ return _val & UINT64_C(0xffffffff)
|
|
|
|
|
+ ? uint32_cnttz(uint32_t(_val) )
|
|
|
|
|
+ : uint32_cnttz(uint32_t(_val>>32) ) + 32
|
|
|
|
|
+ ;
|
|
|
|
|
+#endif // BX_COMPILER_*
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint8_t _val) { return bx::min(8u, uint32_cnttz<uint32_t>(_val) ); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int8_t _val) { return uint32_cnttz<uint8_t >(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint16_t _val) { return bx::min(16u, uint32_cnttz<uint32_t>(_val) ); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int16_t _val) { return uint32_cnttz<uint16_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int32_t _val) { return uint32_cnttz<uint32_t>(_val); }
|
|
|
|
|
+ template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int64_t _val) { return uint32_cnttz<uint64_t>(_val); }
|
|
|
|
|
+
|
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
|
|
inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
|
|
|
{
|
|
{
|
|
|
// shuffle:
|
|
// shuffle:
|