|
@@ -29,162 +29,162 @@
|
|
|
|
|
|
namespace bx
|
|
|
{
|
|
|
- inline uint32_t uint32_li(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_li(uint32_t _a)
|
|
|
{
|
|
|
return _a;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_dec(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_dec(uint32_t _a)
|
|
|
{
|
|
|
return _a - 1;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_inc(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_inc(uint32_t _a)
|
|
|
{
|
|
|
return _a + 1;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_not(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_not(uint32_t _a)
|
|
|
{
|
|
|
return ~_a;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_neg(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_neg(uint32_t _a)
|
|
|
{
|
|
|
return -(int32_t)_a;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_ext(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_ext(uint32_t _a)
|
|
|
{
|
|
|
return ( (int32_t)_a)>>31;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_and(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_and(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a & _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_andc(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_andc(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a & ~_b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_xor(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_xor(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a ^ _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_xorl(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return !_a != !_b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_or(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_or(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a | _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_orc(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_orc(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a | ~_b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_sll(uint32_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_sll(uint32_t _a, int32_t _sa)
|
|
|
{
|
|
|
return _a << _sa;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_srl(uint32_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_srl(uint32_t _a, int32_t _sa)
|
|
|
{
|
|
|
return _a >> _sa;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_sra(uint32_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_sra(uint32_t _a, int32_t _sa)
|
|
|
{
|
|
|
return ( (int32_t)_a) >> _sa;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_rol(uint32_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_rol(uint32_t _a, int32_t _sa)
|
|
|
{
|
|
|
return ( _a << _sa) | (_a >> (32-_sa) );
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_ror(uint32_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_ror(uint32_t _a, int32_t _sa)
|
|
|
{
|
|
|
return ( _a >> _sa) | (_a << (32-_sa) );
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_add(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_add(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a + _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_iadd(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_iadd(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return int32_t(_a) + int32_t(_b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_sub(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a - _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_isub(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_isub(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return int32_t(_a) - int32_t(_b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_mul(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a * _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_div(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_div(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
- return (_a / _b);
|
|
|
+ return _a / _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_mod(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_mod(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
- return (_a % _b);
|
|
|
+ return _a % _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return -(_a == _b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return -(_a != _b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return -(_a < _b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cmple(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return -(_a <= _b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return -(_a > _b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return -(_a >= _b);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_setnz(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_setnz(uint32_t _a)
|
|
|
{
|
|
|
return -!!_a;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint32_t add = uint32_add(_a, _b);
|
|
|
const uint32_t lt = uint32_cmplt(add, _a);
|
|
@@ -193,7 +193,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_satsub(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint32_t sub = uint32_sub(_a, _b);
|
|
|
const uint32_t le = uint32_cmple(sub, _a);
|
|
@@ -202,7 +202,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_satmul(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint64_t mul = (uint64_t)_a * (uint64_t)_b;
|
|
|
const uint32_t hi = mul >> 32;
|
|
@@ -212,7 +212,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint32_t mask = uint32_ext(test);
|
|
|
const uint32_t sel_a = uint32_and(_a, mask);
|
|
@@ -222,7 +222,7 @@ namespace bx
|
|
|
return (result);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint32_t sel_a = uint32_and(_a, _mask);
|
|
|
const uint32_t sel_b = uint32_andc(_b, _mask);
|
|
@@ -231,7 +231,7 @@ namespace bx
|
|
|
return (result);
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_imin(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_imin(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint32_t a_sub_b = uint32_sub(_a, _b);
|
|
|
const uint32_t result = uint32_sels(a_sub_b, _a, _b);
|
|
@@ -239,7 +239,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_imax(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_imax(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
const uint32_t b_sub_a = uint32_sub(_b, _a);
|
|
|
const uint32_t result = uint32_sels(b_sub_a, _a, _b);
|
|
@@ -247,27 +247,27 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_min(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_min(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a > _b ? _b : _a;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_min(uint32_t _a, uint32_t _b, uint32_t _c)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_min(uint32_t _a, uint32_t _b, uint32_t _c)
|
|
|
{
|
|
|
return uint32_min(_a, uint32_min(_b, _c) );
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_max(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_max(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a > _b ? _a : _b;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_max(uint32_t _a, uint32_t _b, uint32_t _c)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_max(uint32_t _a, uint32_t _b, uint32_t _c)
|
|
|
{
|
|
|
return uint32_max(_a, uint32_max(_b, _c) );
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_clamp(uint32_t _a, uint32_t _min, uint32_t _max)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_clamp(uint32_t _a, uint32_t _min, uint32_t _max)
|
|
|
{
|
|
|
const uint32_t tmp = uint32_max(_a, _min);
|
|
|
const uint32_t result = uint32_min(tmp, _max);
|
|
@@ -275,7 +275,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_iclamp(uint32_t _a, uint32_t _min, uint32_t _max)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_iclamp(uint32_t _a, uint32_t _min, uint32_t _max)
|
|
|
{
|
|
|
const uint32_t tmp = uint32_imax(_a, _min);
|
|
|
const uint32_t result = uint32_imin(tmp, _max);
|
|
@@ -283,7 +283,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)
|
|
|
{
|
|
|
const uint32_t inc = uint32_inc(_val);
|
|
|
const uint32_t max_diff = uint32_sub(_max, _val);
|
|
@@ -295,7 +295,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)
|
|
|
{
|
|
|
const uint32_t dec = uint32_dec(_val);
|
|
|
const uint32_t min_diff = uint32_sub(_min, _val);
|
|
@@ -307,7 +307,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cntbits_ref(uint32_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
|
|
|
{
|
|
|
const uint32_t tmp0 = uint32_srl(_val, 1);
|
|
|
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
|
|
@@ -330,19 +330,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- /// Count number of bits set.
|
|
|
- inline uint32_t uint32_cntbits(uint32_t _val)
|
|
|
- {
|
|
|
-#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
- return __builtin_popcount(_val);
|
|
|
-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
|
|
|
- return __popcnt(_val);
|
|
|
-#else
|
|
|
- return uint32_cntbits_ref(_val);
|
|
|
-#endif // BX_COMPILER_
|
|
|
- }
|
|
|
-
|
|
|
- inline uint32_t uint32_cntlz_ref(uint32_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
|
|
|
{
|
|
|
const uint32_t tmp0 = uint32_srl(_val, 1);
|
|
|
const uint32_t tmp1 = uint32_or(tmp0, _val);
|
|
@@ -360,21 +348,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- /// Count number of leading zeros.
|
|
|
- inline uint32_t uint32_cntlz(uint32_t _val)
|
|
|
- {
|
|
|
-#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
- return __builtin_clz(_val);
|
|
|
-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
|
|
|
- unsigned long index;
|
|
|
- _BitScanReverse(&index, _val);
|
|
|
- return 31 - index;
|
|
|
-#else
|
|
|
- return uint32_cntlz_ref(_val);
|
|
|
-#endif // BX_COMPILER_
|
|
|
- }
|
|
|
-
|
|
|
- inline uint32_t uint32_cnttz_ref(uint32_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
|
|
|
{
|
|
|
const uint32_t tmp0 = uint32_not(_val);
|
|
|
const uint32_t tmp1 = uint32_dec(_val);
|
|
@@ -384,25 +358,13 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_cnttz(uint32_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
|
|
|
{
|
|
|
-#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
- return __builtin_ctz(_val);
|
|
|
-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
|
|
|
- unsigned long index;
|
|
|
- _BitScanForward(&index, _val);
|
|
|
- return index;
|
|
|
-#else
|
|
|
- return uint32_cnttz_ref(_val);
|
|
|
-#endif // BX_COMPILER_
|
|
|
- }
|
|
|
+ // shuffle:
|
|
|
+ // ---- ---- ---- ---- fedc ba98 7654 3210
|
|
|
+ // to:
|
|
|
+ // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
|
|
|
|
|
|
- // shuffle:
|
|
|
- // ---- ---- ---- ---- fedc ba98 7654 3210
|
|
|
- // to:
|
|
|
- // -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
|
|
|
- inline uint32_t uint32_part1by1(uint32_t _a)
|
|
|
- {
|
|
|
const uint32_t val = uint32_and(_a, 0xffff);
|
|
|
|
|
|
const uint32_t tmp0 = uint32_sll(val, 8);
|
|
@@ -424,12 +386,13 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- // shuffle:
|
|
|
- // ---- ---- ---- ---- ---- --98 7654 3210
|
|
|
- // to:
|
|
|
- // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
|
|
|
- inline uint32_t uint32_part1by2(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by2(uint32_t _a)
|
|
|
{
|
|
|
+ // shuffle:
|
|
|
+ // ---- ---- ---- ---- ---- --98 7654 3210
|
|
|
+ // to:
|
|
|
+ // ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
|
|
|
+
|
|
|
const uint32_t val = uint32_and(_a, 0x3ff);
|
|
|
|
|
|
const uint32_t tmp0 = uint32_sll(val, 16);
|
|
@@ -451,7 +414,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_testpow2(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_testpow2(uint32_t _a)
|
|
|
{
|
|
|
const uint32_t tmp0 = uint32_dec(_a);
|
|
|
const uint32_t tmp1 = uint32_xor(_a, tmp0);
|
|
@@ -461,7 +424,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint32_nextpow2(uint32_t _a)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_nextpow2(uint32_t _a)
|
|
|
{
|
|
|
const uint32_t tmp0 = uint32_dec(_a);
|
|
|
const uint32_t tmp1 = uint32_srl(tmp0, 1);
|
|
@@ -479,151 +442,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- inline uint16_t halfFromFloat(float _a)
|
|
|
- {
|
|
|
- union { uint32_t ui; float flt; } ftou;
|
|
|
- ftou.flt = _a;
|
|
|
-
|
|
|
- const uint32_t one = uint32_li(0x00000001);
|
|
|
- const uint32_t f_s_mask = uint32_li(0x80000000);
|
|
|
- const uint32_t f_e_mask = uint32_li(0x7f800000);
|
|
|
- const uint32_t f_m_mask = uint32_li(0x007fffff);
|
|
|
- const uint32_t f_m_hidden_bit = uint32_li(0x00800000);
|
|
|
- const uint32_t f_m_round_bit = uint32_li(0x00001000);
|
|
|
- const uint32_t f_snan_mask = uint32_li(0x7fc00000);
|
|
|
- const uint32_t f_e_pos = uint32_li(0x00000017);
|
|
|
- const uint32_t h_e_pos = uint32_li(0x0000000a);
|
|
|
- const uint32_t h_e_mask = uint32_li(0x00007c00);
|
|
|
- const uint32_t h_snan_mask = uint32_li(0x00007e00);
|
|
|
- const uint32_t h_e_mask_value = uint32_li(0x0000001f);
|
|
|
- const uint32_t f_h_s_pos_offset = uint32_li(0x00000010);
|
|
|
- const uint32_t f_h_bias_offset = uint32_li(0x00000070);
|
|
|
- const uint32_t f_h_m_pos_offset = uint32_li(0x0000000d);
|
|
|
- const uint32_t h_nan_min = uint32_li(0x00007c01);
|
|
|
- const uint32_t f_h_e_biased_flag = uint32_li(0x0000008f);
|
|
|
- const uint32_t f_s = uint32_and(ftou.ui, f_s_mask);
|
|
|
- const uint32_t f_e = uint32_and(ftou.ui, f_e_mask);
|
|
|
- const uint16_t h_s = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset);
|
|
|
- const uint32_t f_m = uint32_and(ftou.ui, f_m_mask);
|
|
|
- const uint16_t f_e_amount = (uint16_t)uint32_srl(f_e, f_e_pos);
|
|
|
- const uint32_t f_e_half_bias = uint32_sub(f_e_amount, f_h_bias_offset);
|
|
|
- const uint32_t f_snan = uint32_and(ftou.ui, f_snan_mask);
|
|
|
- const uint32_t f_m_round_mask = uint32_and(f_m, f_m_round_bit);
|
|
|
- const uint32_t f_m_round_offset = uint32_sll(f_m_round_mask, one);
|
|
|
- const uint32_t f_m_rounded = uint32_add(f_m, f_m_round_offset);
|
|
|
- const uint32_t f_m_denorm_sa = uint32_sub(one, f_e_half_bias);
|
|
|
- const uint32_t f_m_with_hidden = uint32_or(f_m_rounded, f_m_hidden_bit);
|
|
|
- const uint32_t f_m_denorm = uint32_srl(f_m_with_hidden, f_m_denorm_sa);
|
|
|
- const uint32_t h_m_denorm = uint32_srl(f_m_denorm, f_h_m_pos_offset);
|
|
|
- const uint32_t f_m_rounded_overflow = uint32_and(f_m_rounded, f_m_hidden_bit);
|
|
|
- const uint32_t m_nan = uint32_srl(f_m, f_h_m_pos_offset);
|
|
|
- const uint32_t h_em_nan = uint32_or(h_e_mask, m_nan);
|
|
|
- const uint32_t h_e_norm_overflow_offset = uint32_inc(f_e_half_bias);
|
|
|
- const uint32_t h_e_norm_overflow = uint32_sll(h_e_norm_overflow_offset, h_e_pos);
|
|
|
- const uint32_t h_e_norm = uint32_sll(f_e_half_bias, h_e_pos);
|
|
|
- const uint32_t h_m_norm = uint32_srl(f_m_rounded, f_h_m_pos_offset);
|
|
|
- const uint32_t h_em_norm = uint32_or(h_e_norm, h_m_norm);
|
|
|
- const uint32_t is_h_ndenorm_msb = uint32_sub(f_h_bias_offset, f_e_amount);
|
|
|
- const uint32_t is_f_e_flagged_msb = uint32_sub(f_h_e_biased_flag, f_e_half_bias);
|
|
|
- const uint32_t is_h_denorm_msb = uint32_not(is_h_ndenorm_msb);
|
|
|
- const uint32_t is_f_m_eqz_msb = uint32_dec(f_m);
|
|
|
- const uint32_t is_h_nan_eqz_msb = uint32_dec(m_nan);
|
|
|
- const uint32_t is_f_inf_msb = uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb);
|
|
|
- const uint32_t is_f_nan_underflow_msb = uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb);
|
|
|
- const uint32_t is_e_overflow_msb = uint32_sub(h_e_mask_value, f_e_half_bias);
|
|
|
- const uint32_t is_h_inf_msb = uint32_or(is_e_overflow_msb, is_f_inf_msb);
|
|
|
- const uint32_t is_f_nsnan_msb = uint32_sub(f_snan, f_snan_mask);
|
|
|
- const uint32_t is_m_norm_overflow_msb = uint32_neg(f_m_rounded_overflow);
|
|
|
- const uint32_t is_f_snan_msb = uint32_not(is_f_nsnan_msb);
|
|
|
- const uint32_t h_em_overflow_result = uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm);
|
|
|
- const uint32_t h_em_nan_result = uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result);
|
|
|
- const uint32_t h_em_nan_underflow_result = uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result);
|
|
|
- const uint32_t h_em_inf_result = uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result);
|
|
|
- const uint32_t h_em_denorm_result = uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result);
|
|
|
- const uint32_t h_em_snan_result = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
|
|
|
- const uint32_t h_result = uint32_or(h_s, h_em_snan_result);
|
|
|
-
|
|
|
- return (uint16_t)(h_result);
|
|
|
- }
|
|
|
-
|
|
|
- inline float halfToFloat(uint16_t _a)
|
|
|
- {
|
|
|
- const uint32_t h_e_mask = uint32_li(0x00007c00);
|
|
|
- const uint32_t h_m_mask = uint32_li(0x000003ff);
|
|
|
- const uint32_t h_s_mask = uint32_li(0x00008000);
|
|
|
- const uint32_t h_f_s_pos_offset = uint32_li(0x00000010);
|
|
|
- const uint32_t h_f_e_pos_offset = uint32_li(0x0000000d);
|
|
|
- const uint32_t h_f_bias_offset = uint32_li(0x0001c000);
|
|
|
- const uint32_t f_e_mask = uint32_li(0x7f800000);
|
|
|
- const uint32_t f_m_mask = uint32_li(0x007fffff);
|
|
|
- const uint32_t h_f_e_denorm_bias = uint32_li(0x0000007e);
|
|
|
- const uint32_t h_f_m_denorm_sa_bias = uint32_li(0x00000008);
|
|
|
- const uint32_t f_e_pos = uint32_li(0x00000017);
|
|
|
- const uint32_t h_e_mask_minus_one = uint32_li(0x00007bff);
|
|
|
- const uint32_t h_e = uint32_and(_a, h_e_mask);
|
|
|
- const uint32_t h_m = uint32_and(_a, h_m_mask);
|
|
|
- const uint32_t h_s = uint32_and(_a, h_s_mask);
|
|
|
- const uint32_t h_e_f_bias = uint32_add(h_e, h_f_bias_offset);
|
|
|
- const uint32_t h_m_nlz = uint32_cntlz(h_m);
|
|
|
- const uint32_t f_s = uint32_sll(h_s, h_f_s_pos_offset);
|
|
|
- const uint32_t f_e = uint32_sll(h_e_f_bias, h_f_e_pos_offset);
|
|
|
- const uint32_t f_m = uint32_sll(h_m, h_f_e_pos_offset);
|
|
|
- const uint32_t f_em = uint32_or(f_e, f_m);
|
|
|
- const uint32_t h_f_m_sa = uint32_sub(h_m_nlz, h_f_m_denorm_sa_bias);
|
|
|
- const uint32_t f_e_denorm_unpacked = uint32_sub(h_f_e_denorm_bias, h_f_m_sa);
|
|
|
- const uint32_t h_f_m = uint32_sll(h_m, h_f_m_sa);
|
|
|
- const uint32_t f_m_denorm = uint32_and(h_f_m, f_m_mask);
|
|
|
- const uint32_t f_e_denorm = uint32_sll(f_e_denorm_unpacked, f_e_pos);
|
|
|
- const uint32_t f_em_denorm = uint32_or(f_e_denorm, f_m_denorm);
|
|
|
- const uint32_t f_em_nan = uint32_or(f_e_mask, f_m);
|
|
|
- const uint32_t is_e_eqz_msb = uint32_dec(h_e);
|
|
|
- const uint32_t is_m_nez_msb = uint32_neg(h_m);
|
|
|
- const uint32_t is_e_flagged_msb = uint32_sub(h_e_mask_minus_one, h_e);
|
|
|
- const uint32_t is_zero_msb = uint32_andc(is_e_eqz_msb, is_m_nez_msb);
|
|
|
- const uint32_t is_inf_msb = uint32_andc(is_e_flagged_msb, is_m_nez_msb);
|
|
|
- const uint32_t is_denorm_msb = uint32_and(is_m_nez_msb, is_e_eqz_msb);
|
|
|
- const uint32_t is_nan_msb = uint32_and(is_e_flagged_msb, is_m_nez_msb);
|
|
|
- const uint32_t is_zero = uint32_ext(is_zero_msb);
|
|
|
- const uint32_t f_zero_result = uint32_andc(f_em, is_zero);
|
|
|
- const uint32_t f_denorm_result = uint32_sels(is_denorm_msb, f_em_denorm, f_zero_result);
|
|
|
- const uint32_t f_inf_result = uint32_sels(is_inf_msb, f_e_mask, f_denorm_result);
|
|
|
- const uint32_t f_nan_result = uint32_sels(is_nan_msb, f_em_nan, f_inf_result);
|
|
|
- const uint32_t f_result = uint32_or(f_s, f_nan_result);
|
|
|
-
|
|
|
- union { uint32_t ui; float flt; } utof;
|
|
|
- utof.ui = f_result;
|
|
|
- return utof.flt;
|
|
|
- }
|
|
|
-
|
|
|
- inline uint16_t uint16_min(uint16_t _a, uint16_t _b)
|
|
|
- {
|
|
|
- return _a > _b ? _b : _a;
|
|
|
- }
|
|
|
-
|
|
|
- inline uint16_t uint16_max(uint16_t _a, uint16_t _b)
|
|
|
- {
|
|
|
- return _a < _b ? _b : _a;
|
|
|
- }
|
|
|
-
|
|
|
- inline int64_t int64_min(int64_t _a, int64_t _b)
|
|
|
- {
|
|
|
- return _a < _b ? _a : _b;
|
|
|
- }
|
|
|
-
|
|
|
- inline int64_t int64_max(int64_t _a, int64_t _b)
|
|
|
- {
|
|
|
- return _a > _b ? _a : _b;
|
|
|
- }
|
|
|
-
|
|
|
- inline int64_t int64_clamp(int64_t _a, int64_t _min, int64_t _max)
|
|
|
- {
|
|
|
- const int64_t min = int64_min(_a, _max);
|
|
|
- const int64_t result = int64_max(_min, min);
|
|
|
-
|
|
|
- return result;
|
|
|
- }
|
|
|
-
|
|
|
- inline uint32_t uint64_cntbits_ref(uint64_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val)
|
|
|
{
|
|
|
const uint32_t lo = uint32_t(_val&UINT32_MAX);
|
|
|
const uint32_t hi = uint32_t(_val>>32);
|
|
@@ -633,19 +452,7 @@ namespace bx
|
|
|
return total;
|
|
|
}
|
|
|
|
|
|
- /// Count number of bits set.
|
|
|
- inline uint32_t uint64_cntbits(uint64_t _val)
|
|
|
- {
|
|
|
-#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
- return __builtin_popcountll(_val);
|
|
|
-#elif BX_COMPILER_MSVC && BX_ARCH_64BIT
|
|
|
- return uint32_t(__popcnt64(_val) );
|
|
|
-#else
|
|
|
- return uint64_cntbits_ref(_val);
|
|
|
-#endif // BX_COMPILER_
|
|
|
- }
|
|
|
-
|
|
|
- inline uint32_t uint64_cntlz_ref(uint64_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val)
|
|
|
{
|
|
|
return _val & UINT64_C(0xffffffff00000000)
|
|
|
? uint32_cntlz(uint32_t(_val>>32) )
|
|
@@ -653,21 +460,7 @@ namespace bx
|
|
|
;
|
|
|
}
|
|
|
|
|
|
- /// Count number of leading zeros.
|
|
|
- inline uint32_t uint64_cntlz(uint64_t _val)
|
|
|
- {
|
|
|
-#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
- return __builtin_clzll(_val);
|
|
|
-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS && BX_ARCH_64BIT
|
|
|
- unsigned long index;
|
|
|
- _BitScanReverse64(&index, _val);
|
|
|
- return uint32_t(63 - index);
|
|
|
-#else
|
|
|
- return uint64_cntlz_ref(_val);
|
|
|
-#endif // BX_COMPILER_
|
|
|
- }
|
|
|
-
|
|
|
- inline uint32_t uint64_cnttz_ref(uint64_t _val)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val)
|
|
|
{
|
|
|
return _val & UINT64_C(0xffffffff)
|
|
|
? uint32_cnttz(uint32_t(_val) )
|
|
@@ -675,65 +468,51 @@ namespace bx
|
|
|
;
|
|
|
}
|
|
|
|
|
|
- inline uint32_t uint64_cnttz(uint64_t _val)
|
|
|
- {
|
|
|
-#if BX_COMPILER_GCC || BX_COMPILER_CLANG
|
|
|
- return __builtin_ctzll(_val);
|
|
|
-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS && BX_ARCH_64BIT
|
|
|
- unsigned long index;
|
|
|
- _BitScanForward64(&index, _val);
|
|
|
- return uint32_t(index);
|
|
|
-#else
|
|
|
- return uint64_cnttz_ref(_val);
|
|
|
-#endif // BX_COMPILER_
|
|
|
- }
|
|
|
-
|
|
|
- inline uint64_t uint64_sll(uint64_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_sll(uint64_t _a, int32_t _sa)
|
|
|
{
|
|
|
return _a << _sa;
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_srl(uint64_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_srl(uint64_t _a, int32_t _sa)
|
|
|
{
|
|
|
return _a >> _sa;
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_sra(uint64_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_sra(uint64_t _a, int32_t _sa)
|
|
|
{
|
|
|
return ( (int64_t)_a) >> _sa;
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_rol(uint64_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_rol(uint64_t _a, int32_t _sa)
|
|
|
{
|
|
|
- return ( _a << _sa) | (_a >> (32-_sa) );
|
|
|
+ return ( _a << _sa) | (_a >> (64-_sa) );
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_ror(uint64_t _a, int _sa)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_ror(uint64_t _a, int32_t _sa)
|
|
|
{
|
|
|
- return ( _a >> _sa) | (_a << (32-_sa) );
|
|
|
+ return ( _a >> _sa) | (_a << (64-_sa) );
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_add(uint64_t _a, uint64_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_add(uint64_t _a, uint64_t _b)
|
|
|
{
|
|
|
return _a + _b;
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_sub(uint64_t _a, uint64_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_sub(uint64_t _a, uint64_t _b)
|
|
|
{
|
|
|
return _a - _b;
|
|
|
}
|
|
|
|
|
|
- inline uint64_t uint64_mul(uint64_t _a, uint64_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint64_t uint64_mul(uint64_t _a, uint64_t _b)
|
|
|
{
|
|
|
return _a * _b;
|
|
|
}
|
|
|
|
|
|
- /// Greatest common divisor.
|
|
|
- inline uint32_t uint32_gcd(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_gcd(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
do
|
|
|
{
|
|
|
- uint32_t tmp = _a % _b;
|
|
|
+ const uint32_t tmp = uint32_mod(_a, _b);
|
|
|
_a = _b;
|
|
|
_b = tmp;
|
|
|
}
|
|
@@ -742,14 +521,12 @@ namespace bx
|
|
|
return _a;
|
|
|
}
|
|
|
|
|
|
- /// Least common multiple.
|
|
|
- inline uint32_t uint32_lcm(uint32_t _a, uint32_t _b)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t uint32_lcm(uint32_t _a, uint32_t _b)
|
|
|
{
|
|
|
return _a * (_b / uint32_gcd(_a, _b) );
|
|
|
}
|
|
|
|
|
|
- /// Align to arbitrary stride.
|
|
|
- inline uint32_t strideAlign(uint32_t _offset, uint32_t _stride)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t strideAlign(uint32_t _offset, uint32_t _stride)
|
|
|
{
|
|
|
const uint32_t mod = uint32_mod(_offset, _stride);
|
|
|
const uint32_t add = uint32_sub(_stride, mod);
|
|
@@ -760,8 +537,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- /// Align to arbitrary stride and 16-bytes.
|
|
|
- inline uint32_t strideAlign16(uint32_t _offset, uint32_t _stride)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t strideAlign16(uint32_t _offset, uint32_t _stride)
|
|
|
{
|
|
|
const uint32_t align = uint32_lcm(16, _stride);
|
|
|
const uint32_t mod = uint32_mod(_offset, align);
|
|
@@ -773,8 +549,7 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- /// Align to arbitrary stride and 256-bytes.
|
|
|
- inline uint32_t strideAlign256(uint32_t _offset, uint32_t _stride)
|
|
|
+ inline BX_CONSTEXPR_FUNC uint32_t strideAlign256(uint32_t _offset, uint32_t _stride)
|
|
|
{
|
|
|
const uint32_t align = uint32_lcm(256, _stride);
|
|
|
const uint32_t mod = uint32_mod(_offset, align);
|
|
@@ -786,4 +561,120 @@ namespace bx
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
+ inline BX_CONST_FUNC uint16_t halfFromFloat(float _a)
|
|
|
+ {
|
|
|
+ union { uint32_t ui; float flt; } ftou;
|
|
|
+ ftou.flt = _a;
|
|
|
+
|
|
|
+ const uint32_t one = uint32_li(0x00000001);
|
|
|
+ const uint32_t f_s_mask = uint32_li(0x80000000);
|
|
|
+ const uint32_t f_e_mask = uint32_li(0x7f800000);
|
|
|
+ const uint32_t f_m_mask = uint32_li(0x007fffff);
|
|
|
+ const uint32_t f_m_hidden_bit = uint32_li(0x00800000);
|
|
|
+ const uint32_t f_m_round_bit = uint32_li(0x00001000);
|
|
|
+ const uint32_t f_snan_mask = uint32_li(0x7fc00000);
|
|
|
+ const uint32_t f_e_pos = uint32_li(0x00000017);
|
|
|
+ const uint32_t h_e_pos = uint32_li(0x0000000a);
|
|
|
+ const uint32_t h_e_mask = uint32_li(0x00007c00);
|
|
|
+ const uint32_t h_snan_mask = uint32_li(0x00007e00);
|
|
|
+ const uint32_t h_e_mask_value = uint32_li(0x0000001f);
|
|
|
+ const uint32_t f_h_s_pos_offset = uint32_li(0x00000010);
|
|
|
+ const uint32_t f_h_bias_offset = uint32_li(0x00000070);
|
|
|
+ const uint32_t f_h_m_pos_offset = uint32_li(0x0000000d);
|
|
|
+ const uint32_t h_nan_min = uint32_li(0x00007c01);
|
|
|
+ const uint32_t f_h_e_biased_flag = uint32_li(0x0000008f);
|
|
|
+ const uint32_t f_s = uint32_and(ftou.ui, f_s_mask);
|
|
|
+ const uint32_t f_e = uint32_and(ftou.ui, f_e_mask);
|
|
|
+ const uint16_t h_s = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset);
|
|
|
+ const uint32_t f_m = uint32_and(ftou.ui, f_m_mask);
|
|
|
+ const uint16_t f_e_amount = (uint16_t)uint32_srl(f_e, f_e_pos);
|
|
|
+ const uint32_t f_e_half_bias = uint32_sub(f_e_amount, f_h_bias_offset);
|
|
|
+ const uint32_t f_snan = uint32_and(ftou.ui, f_snan_mask);
|
|
|
+ const uint32_t f_m_round_mask = uint32_and(f_m, f_m_round_bit);
|
|
|
+ const uint32_t f_m_round_offset = uint32_sll(f_m_round_mask, one);
|
|
|
+ const uint32_t f_m_rounded = uint32_add(f_m, f_m_round_offset);
|
|
|
+ const uint32_t f_m_denorm_sa = uint32_sub(one, f_e_half_bias);
|
|
|
+ const uint32_t f_m_with_hidden = uint32_or(f_m_rounded, f_m_hidden_bit);
|
|
|
+ const uint32_t f_m_denorm = uint32_srl(f_m_with_hidden, f_m_denorm_sa);
|
|
|
+ const uint32_t h_m_denorm = uint32_srl(f_m_denorm, f_h_m_pos_offset);
|
|
|
+ const uint32_t f_m_rounded_overflow = uint32_and(f_m_rounded, f_m_hidden_bit);
|
|
|
+ const uint32_t m_nan = uint32_srl(f_m, f_h_m_pos_offset);
|
|
|
+ const uint32_t h_em_nan = uint32_or(h_e_mask, m_nan);
|
|
|
+ const uint32_t h_e_norm_overflow_offset = uint32_inc(f_e_half_bias);
|
|
|
+ const uint32_t h_e_norm_overflow = uint32_sll(h_e_norm_overflow_offset, h_e_pos);
|
|
|
+ const uint32_t h_e_norm = uint32_sll(f_e_half_bias, h_e_pos);
|
|
|
+ const uint32_t h_m_norm = uint32_srl(f_m_rounded, f_h_m_pos_offset);
|
|
|
+ const uint32_t h_em_norm = uint32_or(h_e_norm, h_m_norm);
|
|
|
+ const uint32_t is_h_ndenorm_msb = uint32_sub(f_h_bias_offset, f_e_amount);
|
|
|
+ const uint32_t is_f_e_flagged_msb = uint32_sub(f_h_e_biased_flag, f_e_half_bias);
|
|
|
+ const uint32_t is_h_denorm_msb = uint32_not(is_h_ndenorm_msb);
|
|
|
+ const uint32_t is_f_m_eqz_msb = uint32_dec(f_m);
|
|
|
+ const uint32_t is_h_nan_eqz_msb = uint32_dec(m_nan);
|
|
|
+ const uint32_t is_f_inf_msb = uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb);
|
|
|
+ const uint32_t is_f_nan_underflow_msb = uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb);
|
|
|
+ const uint32_t is_e_overflow_msb = uint32_sub(h_e_mask_value, f_e_half_bias);
|
|
|
+ const uint32_t is_h_inf_msb = uint32_or(is_e_overflow_msb, is_f_inf_msb);
|
|
|
+ const uint32_t is_f_nsnan_msb = uint32_sub(f_snan, f_snan_mask);
|
|
|
+ const uint32_t is_m_norm_overflow_msb = uint32_neg(f_m_rounded_overflow);
|
|
|
+ const uint32_t is_f_snan_msb = uint32_not(is_f_nsnan_msb);
|
|
|
+ const uint32_t h_em_overflow_result = uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm);
|
|
|
+ const uint32_t h_em_nan_result = uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result);
|
|
|
+ const uint32_t h_em_nan_underflow_result = uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result);
|
|
|
+ const uint32_t h_em_inf_result = uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result);
|
|
|
+ const uint32_t h_em_denorm_result = uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result);
|
|
|
+ const uint32_t h_em_snan_result = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
|
|
|
+ const uint32_t h_result = uint32_or(h_s, h_em_snan_result);
|
|
|
+
|
|
|
+ return (uint16_t)(h_result);
|
|
|
+ }
|
|
|
+
|
|
|
+ inline BX_CONST_FUNC float halfToFloat(uint16_t _a)
|
|
|
+ {
|
|
|
+ const uint32_t h_e_mask = uint32_li(0x00007c00);
|
|
|
+ const uint32_t h_m_mask = uint32_li(0x000003ff);
|
|
|
+ const uint32_t h_s_mask = uint32_li(0x00008000);
|
|
|
+ const uint32_t h_f_s_pos_offset = uint32_li(0x00000010);
|
|
|
+ const uint32_t h_f_e_pos_offset = uint32_li(0x0000000d);
|
|
|
+ const uint32_t h_f_bias_offset = uint32_li(0x0001c000);
|
|
|
+ const uint32_t f_e_mask = uint32_li(0x7f800000);
|
|
|
+ const uint32_t f_m_mask = uint32_li(0x007fffff);
|
|
|
+ const uint32_t h_f_e_denorm_bias = uint32_li(0x0000007e);
|
|
|
+ const uint32_t h_f_m_denorm_sa_bias = uint32_li(0x00000008);
|
|
|
+ const uint32_t f_e_pos = uint32_li(0x00000017);
|
|
|
+ const uint32_t h_e_mask_minus_one = uint32_li(0x00007bff);
|
|
|
+ const uint32_t h_e = uint32_and(_a, h_e_mask);
|
|
|
+ const uint32_t h_m = uint32_and(_a, h_m_mask);
|
|
|
+ const uint32_t h_s = uint32_and(_a, h_s_mask);
|
|
|
+ const uint32_t h_e_f_bias = uint32_add(h_e, h_f_bias_offset);
|
|
|
+ const uint32_t h_m_nlz = uint32_cntlz(h_m);
|
|
|
+ const uint32_t f_s = uint32_sll(h_s, h_f_s_pos_offset);
|
|
|
+ const uint32_t f_e = uint32_sll(h_e_f_bias, h_f_e_pos_offset);
|
|
|
+ const uint32_t f_m = uint32_sll(h_m, h_f_e_pos_offset);
|
|
|
+ const uint32_t f_em = uint32_or(f_e, f_m);
|
|
|
+ const uint32_t h_f_m_sa = uint32_sub(h_m_nlz, h_f_m_denorm_sa_bias);
|
|
|
+ const uint32_t f_e_denorm_unpacked = uint32_sub(h_f_e_denorm_bias, h_f_m_sa);
|
|
|
+ const uint32_t h_f_m = uint32_sll(h_m, h_f_m_sa);
|
|
|
+ const uint32_t f_m_denorm = uint32_and(h_f_m, f_m_mask);
|
|
|
+ const uint32_t f_e_denorm = uint32_sll(f_e_denorm_unpacked, f_e_pos);
|
|
|
+ const uint32_t f_em_denorm = uint32_or(f_e_denorm, f_m_denorm);
|
|
|
+ const uint32_t f_em_nan = uint32_or(f_e_mask, f_m);
|
|
|
+ const uint32_t is_e_eqz_msb = uint32_dec(h_e);
|
|
|
+ const uint32_t is_m_nez_msb = uint32_neg(h_m);
|
|
|
+ const uint32_t is_e_flagged_msb = uint32_sub(h_e_mask_minus_one, h_e);
|
|
|
+ const uint32_t is_zero_msb = uint32_andc(is_e_eqz_msb, is_m_nez_msb);
|
|
|
+ const uint32_t is_inf_msb = uint32_andc(is_e_flagged_msb, is_m_nez_msb);
|
|
|
+ const uint32_t is_denorm_msb = uint32_and(is_m_nez_msb, is_e_eqz_msb);
|
|
|
+ const uint32_t is_nan_msb = uint32_and(is_e_flagged_msb, is_m_nez_msb);
|
|
|
+ const uint32_t is_zero = uint32_ext(is_zero_msb);
|
|
|
+ const uint32_t f_zero_result = uint32_andc(f_em, is_zero);
|
|
|
+ const uint32_t f_denorm_result = uint32_sels(is_denorm_msb, f_em_denorm, f_zero_result);
|
|
|
+ const uint32_t f_inf_result = uint32_sels(is_inf_msb, f_e_mask, f_denorm_result);
|
|
|
+ const uint32_t f_nan_result = uint32_sels(is_nan_msb, f_em_nan, f_inf_result);
|
|
|
+ const uint32_t f_result = uint32_or(f_s, f_nan_result);
|
|
|
+
|
|
|
+ union { uint32_t ui; float flt; } utof;
|
|
|
+ utof.ui = f_result;
|
|
|
+ return utof.flt;
|
|
|
+ }
|
|
|
+
|
|
|
} // namespace bx
|