Browse Source

Moved math functions to be constexpr.

Бранимир Караџић 1 year ago
parent
commit
7eace058a2
8 changed files with 336 additions and 318 deletions
  1. 3 0
      include/bx/bx.h
  2. 3 6
      include/bx/constants.h
  3. 5 0
      include/bx/inline/bx.inl
  4. 244 40
      include/bx/inline/math.inl
  5. 2 2
      include/bx/inline/simd128_ref.inl
  6. 26 26
      include/bx/math.h
  7. 0 215
      src/math.cpp
  8. 53 29
      tests/math_test.cpp

+ 3 - 0
include/bx/bx.h

@@ -216,6 +216,9 @@ namespace bx
 	template<typename Ty>
 	constexpr bool isPowerOf2(Ty _a);
 
+	/// Returns true if it's evaluated as constexpr.
+	constexpr bool isConstantEvaluated();
+
 	/// Returns a value of type `Ty` by reinterpreting the object representation of `FromT`.
 	template <typename Ty, typename FromT>
 	constexpr Ty bitCast(const FromT& _from);

+ 3 - 6
include/bx/constants.h

@@ -42,10 +42,7 @@ namespace bx
 	constexpr float    kInvLogNat2     = 1.4426950408889634073599246810019f;
 
 	/// The natural logarithm of the 2. ln(2)
-	constexpr float    kLogNat2Hi      = 0.6931471805599453094172321214582f;
-
-	///
-	constexpr float    kLogNat2Lo      = 1.90821492927058770002e-10f;
+	constexpr float    kLogNat2        = 0.6931471805599453094172321214582f;
 
 	/// The base of natural logarithms. e(1)
 	constexpr float    kE              = 2.7182818284590452353602874713527f;
@@ -83,7 +80,7 @@ namespace bx
 	/// Maximum representable floating-point number.
 	constexpr float    kFloatLargest   = 3.402823466e+38f;
 
-	///
+	/// Floating-point infinity.
 //	constexpr float    kFloatInfinity;
 
 	///
@@ -104,7 +101,7 @@ namespace bx
 	/// Largest representable double-precision floating-point number.
 	constexpr double   kDoubleLargest  = 1.7976931348623158e+308;
 
-	//
+	// Double-precision floating-point infinity.
 //	constexpr double   kDoubleInfinity;
 
 } // namespace bx

+ 5 - 0
include/bx/inline/bx.inl

@@ -147,6 +147,11 @@ namespace bx
 		return _a && !(_a & (_a - 1) );
 	}
 
+	constexpr bool isConstantEvaluated()
+	{
+		return __builtin_is_constant_evaluated();
+	}
+
 	template <typename Ty, typename FromT>
 	inline constexpr Ty bitCast(const FromT& _from)
 	{

+ 244 - 40
include/bx/inline/math.inl

@@ -158,6 +158,76 @@ namespace bx
 		return _a * _a;
 	}
 
+	inline BX_CONSTEXPR_FUNC float cos(float _a)
+	{
+		const float scaled = _a * 2.0f*kInvPi;
+		const float real   = floor(scaled);
+		const float xx     = _a - real * kPiHalf;
+		const int32_t bits = int32_t(real) & 3;
+
+		constexpr float kSinC2  = -0.16666667163372039794921875f;
+		constexpr float kSinC4  =  8.333347737789154052734375e-3f;
+		constexpr float kSinC6  = -1.9842604524455964565277099609375e-4f;
+		constexpr float kSinC8  =  2.760012648650445044040679931640625e-6f;
+		constexpr float kSinC10 = -2.50293279435709337121807038784027099609375e-8f;
+
+		constexpr float kCosC2  = -0.5f;
+		constexpr float kCosC4  =  4.166664183139801025390625e-2f;
+		constexpr float kCosC6  = -1.388833043165504932403564453125e-3f;
+		constexpr float kCosC8  =  2.47562347794882953166961669921875e-5f;
+		constexpr float kCosC10 = -2.59630184018533327616751194000244140625e-7f;
+
+		float c0  = xx;
+		float c2  = kSinC2;
+		float c4  = kSinC4;
+		float c6  = kSinC6;
+		float c8  = kSinC8;
+		float c10 = kSinC10;
+
+		if (bits == 0
+		||  bits == 2)
+		{
+			c0  = 1.0f;
+			c2  = kCosC2;
+			c4  = kCosC4;
+			c6  = kCosC6;
+			c8  = kCosC8;
+			c10 = kCosC10;
+		}
+
+		const float xsq    = square(xx);
+		const float tmp0   = mad(c10,  xsq, c8 );
+		const float tmp1   = mad(tmp0, xsq, c6 );
+		const float tmp2   = mad(tmp1, xsq, c4 );
+		const float tmp3   = mad(tmp2, xsq, c2 );
+		const float tmp4   = mad(tmp3, xsq, 1.0);
+		const float result = tmp4 * c0;
+
+		return bits == 1 || bits == 2
+			? -result
+			:  result
+			;
+	}
+
+	inline BX_CONSTEXPR_FUNC float acos(float _a)
+	{
+		constexpr float kAcosC0 =  1.5707288f;
+		constexpr float kAcosC1 = -0.2121144f;
+		constexpr float kAcosC2 =  0.0742610f;
+		constexpr float kAcosC3 = -0.0187293f;
+
+		const float absa   = abs(_a);
+		const float tmp0   = mad(kAcosC3, absa, kAcosC2);
+		const float tmp1   = mad(tmp0,    absa, kAcosC1);
+		const float tmp2   = mad(tmp1,    absa, kAcosC0);
+		const float tmp3   = tmp2 * sqrt(1.0f - absa);
+		const float negate = float(_a < 0.0f);
+		const float tmp4   = tmp3 - 2.0f*negate*tmp3;
+		const float result = negate*kPi + tmp4;
+
+		return result;
+	}
+
 	inline void sinCosApprox(float& _outSinApprox, float& _outCos, float _a)
 	{
 		const float aa     = _a - floor(_a*kInvPi2)*kPi2;
@@ -172,32 +242,32 @@ namespace bx
 		_outCos = cosA;
 	}
 
-	inline BX_CONST_FUNC float sin(float _a)
+	inline BX_CONSTEXPR_FUNC float sin(float _a)
 	{
 		return cos(_a - kPiHalf);
 	}
 
-	inline BX_CONST_FUNC float sinh(float _a)
+	inline BX_CONSTEXPR_FUNC float sinh(float _a)
 	{
 		return 0.5f*(exp(_a) - exp(-_a) );
 	}
 
-	inline BX_CONST_FUNC float asin(float _a)
+	inline BX_CONSTEXPR_FUNC float asin(float _a)
 	{
 		return kPiHalf - acos(_a);
 	}
 
-	inline BX_CONST_FUNC float cosh(float _a)
+	inline BX_CONSTEXPR_FUNC float cosh(float _a)
 	{
 		return 0.5f*(exp(_a) + exp(-_a) );
 	}
 
-	inline BX_CONST_FUNC float tan(float _a)
+	inline BX_CONSTEXPR_FUNC float tan(float _a)
 	{
 		return sin(_a) / cos(_a);
 	}
 
-	inline BX_CONST_FUNC float tanh(float _a)
+	inline BX_CONSTEXPR_FUNC float tanh(float _a)
 	{
 		const float tmp0   = exp(2.0f*_a);
 		const float tmp1   = tmp0 - 1.0f;
@@ -207,34 +277,57 @@ namespace bx
 		return result;
 	}
 
-	inline BX_CONST_FUNC float atan(float _a)
+	inline BX_CONSTEXPR_FUNC float atan(float _a)
 	{
 		return atan2(_a, 1.0f);
 	}
 
-	inline BX_CONST_FUNC float pow(float _a, float _b)
+	inline BX_CONSTEXPR_FUNC float atan2(float _y, float _x)
 	{
-		if (abs(_b) < kFloatSmallest)
-		{
-			return 1.0f;
-		}
+		const float ax     = abs(_x);
+		const float ay     = abs(_y);
+		const float maxaxy = max(ax, ay);
+		const float minaxy = min(ax, ay);
 
-		if (abs(_a) < kFloatSmallest)
+		if (maxaxy == 0.0f)
 		{
-			return 0.0f;
+			return _y < 0.0f ? -0.0f : 0.0f;
 		}
 
-		return copySign(exp(_b * log(abs(_a) ) ), _a);
-	}
+		constexpr float kAtan2C0 = -0.013480470f;
+		constexpr float kAtan2C1 =  0.057477314f;
+		constexpr float kAtan2C2 = -0.121239071f;
+		constexpr float kAtan2C3 =  0.195635925f;
+		constexpr float kAtan2C4 = -0.332994597f;
+		constexpr float kAtan2C5 =  0.999995630f;
+
+		const float mxy    = minaxy / maxaxy;
+		const float mxysq  = square(mxy);
+		const float tmp0   = mad(kAtan2C0, mxysq, kAtan2C1);
+		const float tmp1   = mad(tmp0,     mxysq, kAtan2C2);
+		const float tmp2   = mad(tmp1,     mxysq, kAtan2C3);
+		const float tmp3   = mad(tmp2,     mxysq, kAtan2C4);
+		const float tmp4   = mad(tmp3,     mxysq, kAtan2C5);
+		const float tmp5   = tmp4 * mxy;
+		const float tmp6   = ay > ax   ? kPiHalf - tmp5 : tmp5;
+		const float tmp7   = _x < 0.0f ? kPi     - tmp6 : tmp6;
+		const float result = _y < 0.0f ? -tmp7 : tmp7;
 
-	inline BX_CONST_FUNC float exp2(float _a)
-	{
-		return pow(2.0f, _a);
+		return result;
 	}
 
-	inline BX_CONST_FUNC float log2(float _a)
+	inline BX_CONSTEXPR_FUNC float frexp(float _a, int32_t* _outExp)
 	{
-		return log(_a) * kInvLogNat2;
+		const uint32_t ftob     = floatToBits(_a);
+		const uint32_t masked0  = uint32_and(ftob, kFloatExponentMask);
+		const uint32_t exp0     = uint32_srl(masked0, kFloatExponentBitShift);
+		const uint32_t masked1  = uint32_and(ftob,   kFloatSignMask | kFloatMantissaMask);
+		const uint32_t bits     = uint32_or(masked1, UINT32_C(0x3f000000) );
+		const float    result   = bitsToFloat(bits);
+
+		*_outExp = int32_t(exp0 - 0x7e);
+
+		return result;
 	}
 
 	inline BX_CONSTEXPR_FUNC float ldexp(float _a, int32_t _b)
@@ -251,6 +344,106 @@ namespace bx
 		return result;
 	}
 
+	inline BX_CONSTEXPR_FUNC float exp(float _a)
+	{
+		if (abs(_a) <= kNearZero)
+		{
+			return _a + 1.0f;
+		}
+
+		constexpr float kExpC0  =  1.66666666666666019037e-01f;
+		constexpr float kExpC1  = -2.77777777770155933842e-03f;
+		constexpr float kExpC2  =  6.61375632143793436117e-05f;
+		constexpr float kExpC3  = -1.65339022054652515390e-06f;
+		constexpr float kExpC4  =  4.13813679705723846039e-08f;
+		constexpr float kLogNat2Lo = 1.90821492927058770002e-10f;
+
+		const float kk     = round(_a*kInvLogNat2);
+		const float hi     = _a - kk*kLogNat2;
+		const float lo     =      kk*kLogNat2Lo;
+		const float hml    = hi - lo;
+		const float hmlsq  = square(hml);
+		const float tmp0   = mad(kExpC4, hmlsq, kExpC3);
+		const float tmp1   = mad(tmp0,   hmlsq, kExpC2);
+		const float tmp2   = mad(tmp1,   hmlsq, kExpC1);
+		const float tmp3   = mad(tmp2,   hmlsq, kExpC0);
+		const float tmp4   = hml - hmlsq * tmp3;
+		const float tmp5   = hml*tmp4/(2.0f-tmp4);
+		const float tmp6   = 1.0f - ( (lo - tmp5) - hi);
+		const float result = ldexp(tmp6, int32_t(kk) );
+
+		return result;
+	}
+
+	inline BX_CONSTEXPR_FUNC float log(float _a)
+	{
+		int32_t exp = 0;
+		float ff = frexp(_a, &exp);
+
+		if (ff < kSqrt2*0.5f)
+		{
+			ff *= 2.0f;
+			--exp;
+		}
+
+		constexpr float kLogC0 = 6.666666666666735130e-01f;
+		constexpr float kLogC1 = 3.999999999940941908e-01f;
+		constexpr float kLogC2 = 2.857142874366239149e-01f;
+		constexpr float kLogC3 = 2.222219843214978396e-01f;
+		constexpr float kLogC4 = 1.818357216161805012e-01f;
+		constexpr float kLogC5 = 1.531383769920937332e-01f;
+		constexpr float kLogC6 = 1.479819860511658591e-01f;
+		constexpr float kLogNat2Lo = 1.90821492927058770002e-10f;
+
+		ff -= 1.0f;
+		const float kk     = float(exp);
+		const float hi     = kk*kLogNat2;
+		const float lo     = kk*kLogNat2Lo;
+		const float ss     = ff / (2.0f + ff);
+		const float s2     = square(ss);
+		const float s4     = square(s2);
+
+		const float tmp0   = mad(kLogC6, s4, kLogC4);
+		const float tmp1   = mad(tmp0,   s4, kLogC2);
+		const float tmp2   = mad(tmp1,   s4, kLogC0);
+		const float t1     = s2*tmp2;
+
+		const float tmp3   = mad(kLogC5, s4, kLogC3);
+		const float tmp4   = mad(tmp3,   s4, kLogC1);
+		const float t2     = s4*tmp4;
+
+		const float t12    = t1 + t2;
+		const float hfsq   = 0.5f*square(ff);
+		const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff);
+
+		return result;
+	}
+
+	inline BX_CONSTEXPR_FUNC float pow(float _a, float _b)
+	{
+		if (abs(_b) < kFloatSmallest)
+		{
+			return 1.0f;
+		}
+
+		if (abs(_a) < kFloatSmallest)
+		{
+			return 0.0f;
+		}
+
+		return copySign(exp(_b * log(abs(_a) ) ), _a);
+	}
+
+	inline BX_CONSTEXPR_FUNC float exp2(float _a)
+	{
+		return pow(2.0f, _a);
+	}
+
+	inline BX_CONSTEXPR_FUNC float log2(float _a)
+	{
+		return log(_a) * kInvLogNat2;
+	}
+
 	template<>
 	inline BX_CONSTEXPR_FUNC uint8_t countBits(uint32_t _val)
 	{
@@ -437,7 +630,7 @@ namespace bx
 		return Ty(1)<<log2;
 	}
 
-	inline BX_CONST_FUNC float rsqrtRef(float _a)
+	inline BX_CONSTEXPR_FUNC float rsqrtRef(float _a)
 	{
 		if (_a < kFloatSmallest)
 		{
@@ -461,13 +654,13 @@ namespace bx
 		const simd128_t rsqrta = simd_rsqrt_ni(aa);
 #endif // BX_SIMD_NEON
 
-		float result;
+		float result = 0.0f;
 		simd_stx(&result, rsqrta);
 
 		return result;
 	}
 
-	inline BX_CONST_FUNC float sqrtRef(float _a)
+	inline BX_CONSTEXPR_FUNC float sqrtRef(float _a)
 	{
 		if (_a < 0.0f)
 		{
@@ -491,24 +684,34 @@ namespace bx
 		const simd128_t aa   = simd_splat(_a);
 		const simd128_t sqrt = simd_sqrt(aa);
 
-		float result;
+		float result = 0.0f;
 		simd_stx(&result, sqrt);
 
 		return result;
 	}
 
-	inline BX_CONST_FUNC float rsqrt(float _a)
+	inline BX_CONSTEXPR_FUNC float rsqrt(float _a)
 	{
 #if BX_SIMD_SUPPORTED
+		if (isConstantEvaluated() )
+		{
+			return rsqrtRef(_a);
+		}
+
 		return rsqrtSimd(_a);
 #else
 		return rsqrtRef(_a);
 #endif // BX_SIMD_SUPPORTED
 	}
 
-	inline BX_CONST_FUNC float sqrt(float _a)
+	inline BX_CONSTEXPR_FUNC float sqrt(float _a)
 	{
 #if BX_SIMD_SUPPORTED
+		if (isConstantEvaluated() )
+		{
+			return sqrtRef(_a);
+		}
+
 		return sqrtSimd(_a);
 #else
 		return sqrtRef(_a);
@@ -628,7 +831,7 @@ namespace bx
 		return square(_a)*(3.0f - 2.0f*_a);
 	}
 
-	inline BX_CONST_FUNC float invSmoothStep(float _a)
+	inline BX_CONSTEXPR_FUNC float invSmoothStep(float _a)
 	{
 		return 0.5f - sin(asin(1.0f - 2.0f * _a) / 3.0f);
 	}
@@ -906,18 +1109,18 @@ namespace bx
 		};
 	}
 
-	inline BX_CONST_FUNC float length(const Vec3 _a)
+	inline BX_CONSTEXPR_FUNC float length(const Vec3 _a)
 	{
 		return sqrt(dot(_a, _a) );
 	}
 
-	inline BX_CONST_FUNC float distanceSq(const Vec3 _a, const Vec3 _b)
+	inline BX_CONSTEXPR_FUNC float distanceSq(const Vec3 _a, const Vec3 _b)
 	{
 		const Vec3 ba = sub(_b, _a);
 		return dot(ba, ba);
 	}
 
-	inline BX_CONST_FUNC float distance(const Vec3 _a, const Vec3 _b)
+	inline BX_CONSTEXPR_FUNC float distance(const Vec3 _a, const Vec3 _b)
 	{
 		return length(sub(_b, _a) );
 	}
@@ -942,7 +1145,7 @@ namespace bx
 		};
 	}
 
-	inline BX_CONST_FUNC Vec3 normalize(const Vec3 _a)
+	inline BX_CONSTEXPR_FUNC Vec3 normalize(const Vec3 _a)
 	{
 		const float len   = length(_a);
 		const Vec3 result = divSafe(_a, len);
@@ -1035,9 +1238,8 @@ namespace bx
 		_outB = cross(_n, _outT);
 	}
 
-	inline BX_CONST_FUNC Vec3 fromLatLong(float _u, float _v)
+	inline BX_CONSTEXPR_FUNC Vec3 fromLatLong(float _u, float _v)
 	{
-		Vec3 result(InitNone);
 		const float phi   = _u * kPi2;
 		const float theta = _v * kPi;
 
@@ -1046,10 +1248,12 @@ namespace bx
 		const float ct = cos(theta);
 		const float cp = cos(phi);
 
-		result.x = -st*sp;
-		result.y =  ct;
-		result.z = -st*cp;
-		return result;
+		return
+		{
+			-st*sp,
+			 ct,
+			-st*cp,
+		};
 	}
 
 	inline void toLatLong(float* _outU, float* _outV, const Vec3 _dir)
@@ -1583,7 +1787,7 @@ namespace bx
 			;
 	}
 
-	inline BX_CONST_FUNC float toLinear(float _a)
+	inline BX_CONSTEXPR_FUNC float toLinear(float _a)
 	{
 		const float lo     = _a / 12.92f;
 		const float hi     = pow( (_a + 0.055f) / 1.055f, 2.4f);
@@ -1591,7 +1795,7 @@ namespace bx
 		return result;
 	}
 
-	inline BX_CONST_FUNC float toGamma(float _a)
+	inline BX_CONSTEXPR_FUNC float toGamma(float _a)
 	{
 		const float lo     = _a * 12.92f;
 		const float hi     = pow(abs(_a), 1.0f/2.4f) * 1.055f - 0.055f;

+ 2 - 2
include/bx/inline/simd128_ref.inl

@@ -9,8 +9,8 @@
 
 namespace bx
 {
-	BX_CONST_FUNC float sqrt(float);
-	BX_CONST_FUNC float rsqrt(float);
+	BX_CONSTEXPR_FUNC float sqrt(float);
+	BX_CONSTEXPR_FUNC float rsqrt(float);
 
 #define ELEMx 0
 #define ELEMy 1

+ 26 - 26
include/bx/math.h

@@ -212,47 +212,47 @@ namespace bx
 
 	/// Returns the sine of the argument _a.
 	///
-	BX_CONST_FUNC float sin(float _a);
+	BX_CONSTEXPR_FUNC float sin(float _a);
 
 	/// Returns hyperbolic sine of the argument _a.
 	///
-	BX_CONST_FUNC float sinh(float _a);
+	BX_CONSTEXPR_FUNC float sinh(float _a);
 
 	/// Returns radian angle between -pi/2 and +pi/2 whose sine is _a.
 	///
-	BX_CONST_FUNC float asin(float _a);
+	BX_CONSTEXPR_FUNC float asin(float _a);
 
 	/// Returns the cosine of the argument _a.
 	///
-	BX_CONST_FUNC float cos(float _a);
+	BX_CONSTEXPR_FUNC float cos(float _a);
 
 	/// Returns hyperbolic cosine of the argument _a.
 	///
-	BX_CONST_FUNC float cosh(float _a);
+	BX_CONSTEXPR_FUNC float cosh(float _a);
 
 	/// Returns radian angle between 0 and pi whose cosine is _a.
 	///
-	BX_CONST_FUNC float acos(float _a);
+	BX_CONSTEXPR_FUNC float acos(float _a);
 
 	/// Returns the circular tangent of the radian argument _a.
 	///
-	BX_CONST_FUNC float tan(float _a);
+	BX_CONSTEXPR_FUNC float tan(float _a);
 
 	/// Returns hyperbolic tangent of the argument _a.
 	///
-	BX_CONST_FUNC float tanh(float _a);
+	BX_CONSTEXPR_FUNC float tanh(float _a);
 
 	/// Returns radian angle between -pi/2 and +pi/2 whose tangent is _a.
 	///
-	BX_CONST_FUNC float atan(float _a);
+	BX_CONSTEXPR_FUNC float atan(float _a);
 
 	/// Returns the inverse tangent of _y/_x.
 	///
-	BX_CONST_FUNC float atan2(float _y, float _x);
+	BX_CONSTEXPR_FUNC float atan2(float _y, float _x);
 
 	/// Computes _a raised to the _b power.
 	///
-	BX_CONST_FUNC float pow(float _a, float _b);
+	BX_CONSTEXPR_FUNC float pow(float _a, float _b);
 
 	/// Returns the result of multiplying _a by 2 raised to the power of the exponent `_a * (2^_b)`.
 	///
@@ -261,23 +261,23 @@ namespace bx
 	/// Returns decomposed given floating point value _a into a normalized fraction and
 	/// an integral power of two.
 	///
-	float frexp(float _a, int32_t* _outExp);
+	BX_CONSTEXPR_FUNC float frexp(float _a, int32_t* _outExp);
 
 	/// Returns e (2.71828...) raised to the _a power.
 	///
-	BX_CONST_FUNC float exp(float _a);
+	BX_CONSTEXPR_FUNC float exp(float _a);
 
 	/// Returns 2 raised to the _a power.
 	///
-	BX_CONST_FUNC float exp2(float _a);
+	BX_CONSTEXPR_FUNC float exp2(float _a);
 
 	/// Returns the base e (2.71828...) logarithm of _a.
 	///
-	BX_CONST_FUNC float log(float _a);
+	BX_CONSTEXPR_FUNC float log(float _a);
 
 	/// Returns the base 2 logarithm of _a.
 	///
-	BX_CONST_FUNC float log2(float _a);
+	BX_CONSTEXPR_FUNC float log2(float _a);
 
 	/// Count number of bits set.
 	///
@@ -321,11 +321,11 @@ namespace bx
 
 	/// Returns the square root of _a.
 	///
-	BX_CONST_FUNC float sqrt(float _a);
+	BX_CONSTEXPR_FUNC float sqrt(float _a);
 
 	/// Returns reciprocal square root of _a.
 	///
-	BX_CONST_FUNC float rsqrt(float _a);
+	BX_CONSTEXPR_FUNC float rsqrt(float _a);
 
 	/// Returns the nearest integer not greater in magnitude than _a.
 	///
@@ -403,7 +403,7 @@ namespace bx
 	BX_CONSTEXPR_FUNC float smoothStep(float _a);
 
 	///
-	BX_CONST_FUNC float invSmoothStep(float _a);
+	BX_CONSTEXPR_FUNC float invSmoothStep(float _a);
 
 	///
 	BX_CONSTEXPR_FUNC float bias(float _time, float _bias);
@@ -486,13 +486,13 @@ namespace bx
 	BX_CONSTEXPR_FUNC Vec3 cross(const Vec3 _a, const Vec3 _b);
 
 	///
-	BX_CONST_FUNC float length(const Vec3 _a);
+	BX_CONSTEXPR_FUNC float length(const Vec3 _a);
 
 	///
-	BX_CONST_FUNC float distanceSq(const Vec3 _a, const Vec3 _b);
+	BX_CONSTEXPR_FUNC float distanceSq(const Vec3 _a, const Vec3 _b);
 
 	///
-	BX_CONST_FUNC float distance(const Vec3 _a, const Vec3 _b);
+	BX_CONSTEXPR_FUNC float distance(const Vec3 _a, const Vec3 _b);
 
 	///
 	BX_CONSTEXPR_FUNC Vec3 lerp(const Vec3 _a, const Vec3 _b, float _t);
@@ -501,7 +501,7 @@ namespace bx
 	BX_CONSTEXPR_FUNC Vec3 lerp(const Vec3 _a, const Vec3 _b, const Vec3 _t);
 
 	///
-	BX_CONST_FUNC Vec3 normalize(const Vec3 _a);
+	BX_CONSTEXPR_FUNC Vec3 normalize(const Vec3 _a);
 
 	///
 	BX_CONSTEXPR_FUNC Vec3 min(const Vec3 _a, const Vec3 _b);
@@ -527,7 +527,7 @@ namespace bx
 	void calcTangentFrame(Vec3& _outT, Vec3& _outB, const Vec3 _n, float _angle);
 
 	///
-	BX_CONST_FUNC Vec3 fromLatLong(float _u, float _v);
+	BX_CONSTEXPR_FUNC Vec3 fromLatLong(float _u, float _v);
 
 	///
 	void toLatLong(float* _outU, float* _outV, const Vec3 _dir);
@@ -811,10 +811,10 @@ namespace bx
 	void hsvToRgb(float _rgb[3], const float _hsv[3]);
 
 	///
-	BX_CONST_FUNC float toLinear(float _a);
+	BX_CONSTEXPR_FUNC float toLinear(float _a);
 
 	///
-	BX_CONST_FUNC float toGamma(float _a);
+	BX_CONSTEXPR_FUNC float toGamma(float _a);
 
 } // namespace bx
 

+ 0 - 215
src/math.cpp

@@ -10,221 +10,6 @@
 
 namespace bx
 {
-	namespace
-	{
-		constexpr float kSinC2  = -0.16666667163372039794921875f;
-		constexpr float kSinC4  =  8.333347737789154052734375e-3f;
-		constexpr float kSinC6  = -1.9842604524455964565277099609375e-4f;
-		constexpr float kSinC8  =  2.760012648650445044040679931640625e-6f;
-		constexpr float kSinC10 = -2.50293279435709337121807038784027099609375e-8f;
-
-		constexpr float kCosC2  = -0.5f;
-		constexpr float kCosC4  =  4.166664183139801025390625e-2f;
-		constexpr float kCosC6  = -1.388833043165504932403564453125e-3f;
-		constexpr float kCosC8  =  2.47562347794882953166961669921875e-5f;
-		constexpr float kCosC10 = -2.59630184018533327616751194000244140625e-7f;
-
-	} // namespace
-
-	BX_CONST_FUNC float cos(float _a)
-	{
-		const float scaled = _a * 2.0f*kInvPi;
-		const float real   = floor(scaled);
-		const float xx     = _a - real * kPiHalf;
-		const int32_t bits = int32_t(real) & 3;
-
-		float c0, c2, c4, c6, c8, c10;
-
-		if (bits == 0
-		||  bits == 2)
-		{
-			c0  = 1.0f;
-			c2  = kCosC2;
-			c4  = kCosC4;
-			c6  = kCosC6;
-			c8  = kCosC8;
-			c10 = kCosC10;
-		}
-		else
-		{
-			c0  = xx;
-			c2  = kSinC2;
-			c4  = kSinC4;
-			c6  = kSinC6;
-			c8  = kSinC8;
-			c10 = kSinC10;
-		}
-
-		const float xsq    = square(xx);
-		const float tmp0   = mad(c10,  xsq, c8 );
-		const float tmp1   = mad(tmp0, xsq, c6 );
-		const float tmp2   = mad(tmp1, xsq, c4 );
-		const float tmp3   = mad(tmp2, xsq, c2 );
-		const float tmp4   = mad(tmp3, xsq, 1.0);
-		const float result = tmp4 * c0;
-
-		return bits == 1 || bits == 2
-			? -result
-			:  result
-			;
-	}
-
-	namespace
-	{
-		constexpr float kAcosC0 =  1.5707288f;
-		constexpr float kAcosC1 = -0.2121144f;
-		constexpr float kAcosC2 =  0.0742610f;
-		constexpr float kAcosC3 = -0.0187293f;
-
-	} // namespace
-
-	BX_CONST_FUNC float acos(float _a)
-	{
-		const float absa   = abs(_a);
-		const float tmp0   = mad(kAcosC3, absa, kAcosC2);
-		const float tmp1   = mad(tmp0,    absa, kAcosC1);
-		const float tmp2   = mad(tmp1,    absa, kAcosC0);
-		const float tmp3   = tmp2 * sqrt(1.0f - absa);
-		const float negate = float(_a < 0.0f);
-		const float tmp4   = tmp3 - 2.0f*negate*tmp3;
-		const float result = negate*kPi + tmp4;
-
-		return result;
-	}
-
-	namespace
-	{
-		constexpr float kAtan2C0 = -0.013480470f;
-		constexpr float kAtan2C1 =  0.057477314f;
-		constexpr float kAtan2C2 = -0.121239071f;
-		constexpr float kAtan2C3 =  0.195635925f;
-		constexpr float kAtan2C4 = -0.332994597f;
-		constexpr float kAtan2C5 =  0.999995630f;
-
-	} // namespace
-
-	BX_CONST_FUNC float atan2(float _y, float _x)
-	{
-		const float ax     = abs(_x);
-		const float ay     = abs(_y);
-		const float maxaxy = max(ax, ay);
-		const float minaxy = min(ax, ay);
-
-		if (maxaxy == 0.0f)
-		{
-			return _y < 0.0f ? -0.0f : 0.0f;
-		}
-
-		const float mxy    = minaxy / maxaxy;
-		const float mxysq  = square(mxy);
-		const float tmp0   = mad(kAtan2C0, mxysq, kAtan2C1);
-		const float tmp1   = mad(tmp0,     mxysq, kAtan2C2);
-		const float tmp2   = mad(tmp1,     mxysq, kAtan2C3);
-		const float tmp3   = mad(tmp2,     mxysq, kAtan2C4);
-		const float tmp4   = mad(tmp3,     mxysq, kAtan2C5);
-		const float tmp5   = tmp4 * mxy;
-		const float tmp6   = ay > ax   ? kPiHalf - tmp5 : tmp5;
-		const float tmp7   = _x < 0.0f ? kPi     - tmp6 : tmp6;
-		const float result = _y < 0.0f ? -tmp7 : tmp7;
-
-		return result;
-	}
-
-	float frexp(float _a, int32_t* _outExp)
-	{
-		const uint32_t ftob     = floatToBits(_a);
-		const uint32_t masked0  = uint32_and(ftob, kFloatExponentMask);
-		const uint32_t exp0     = uint32_srl(masked0, kFloatExponentBitShift);
-		const uint32_t masked1  = uint32_and(ftob,   kFloatSignMask | kFloatMantissaMask);
-		const uint32_t bits     = uint32_or(masked1, UINT32_C(0x3f000000) );
-		const float    result   = bitsToFloat(bits);
-
-		*_outExp = int32_t(exp0 - 0x7e);
-
-		return result;
-	}
-
-	namespace
-	{
-		constexpr float kExpC0  =  1.66666666666666019037e-01f;
-		constexpr float kExpC1  = -2.77777777770155933842e-03f;
-		constexpr float kExpC2  =  6.61375632143793436117e-05f;
-		constexpr float kExpC3  = -1.65339022054652515390e-06f;
-		constexpr float kExpC4  =  4.13813679705723846039e-08f;
-
-	} // namespace
-
-	BX_CONST_FUNC float exp(float _a)
-	{
-		if (abs(_a) <= kNearZero)
-		{
-			return _a + 1.0f;
-		}
-
-		const float kk     = round(_a*kInvLogNat2);
-		const float hi     = _a - kk*kLogNat2Hi;
-		const float lo     =      kk*kLogNat2Lo;
-		const float hml    = hi - lo;
-		const float hmlsq  = square(hml);
-		const float tmp0   = mad(kExpC4, hmlsq, kExpC3);
-		const float tmp1   = mad(tmp0,   hmlsq, kExpC2);
-		const float tmp2   = mad(tmp1,   hmlsq, kExpC1);
-		const float tmp3   = mad(tmp2,   hmlsq, kExpC0);
-		const float tmp4   = hml - hmlsq * tmp3;
-		const float tmp5   = hml*tmp4/(2.0f-tmp4);
-		const float tmp6   = 1.0f - ( (lo - tmp5) - hi);
-		const float result = ldexp(tmp6, int32_t(kk) );
-
-		return result;
-	}
-
-	namespace
-	{
-		constexpr float kLogC0 = 6.666666666666735130e-01f;
-		constexpr float kLogC1 = 3.999999999940941908e-01f;
-		constexpr float kLogC2 = 2.857142874366239149e-01f;
-		constexpr float kLogC3 = 2.222219843214978396e-01f;
-		constexpr float kLogC4 = 1.818357216161805012e-01f;
-		constexpr float kLogC5 = 1.531383769920937332e-01f;
-		constexpr float kLogC6 = 1.479819860511658591e-01f;
-
-	} // namespace
-
-	BX_CONST_FUNC float log(float _a)
-	{
-		int32_t exp;
-		float ff = frexp(_a, &exp);
-
-		if (ff < kSqrt2*0.5f)
-		{
-			ff *= 2.0f;
-			--exp;
-		}
-
-		ff -= 1.0f;
-		const float kk     = float(exp);
-		const float hi     = kk*kLogNat2Hi;
-		const float lo     = kk*kLogNat2Lo;
-		const float ss     = ff / (2.0f + ff);
-		const float s2     = square(ss);
-		const float s4     = square(s2);
-
-		const float tmp0   = mad(kLogC6, s4, kLogC4);
-		const float tmp1   = mad(tmp0,   s4, kLogC2);
-		const float tmp2   = mad(tmp1,   s4, kLogC0);
-		const float t1     = s2*tmp2;
-
-		const float tmp3   = mad(kLogC5, s4, kLogC3);
-		const float tmp4   = mad(tmp3,   s4, kLogC1);
-		const float t2     = s4*tmp4;
-
-		const float t12    = t1 + t2;
-		const float hfsq   = 0.5f*square(ff);
-		const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff);
-
-		return result;
-	}
-
 	void mtxLookAt(float* _result, const Vec3& _eye, const Vec3& _at, const Vec3& _up, Handedness::Enum _handedness)
 	{
 		const Vec3 view = normalize(

+ 53 - 29
tests/math_test.cpp

@@ -33,9 +33,16 @@ TEST_CASE("isFinite, isInfinite, isNan", "[math]")
 	}
 }
 
-static bool testLog2(float _a)
+TEST_CASE("log", "[math][libm]")
 {
-	return bx::log2(_a) == bx::log(_a) * (1.0f / bx::log(2.0f) );
+	STATIC_REQUIRE(0.0f == bx::log(1.0f) );
+	STATIC_REQUIRE(1.0f == bx::log(bx::kE) );
+	STATIC_REQUIRE(bx::kLogNat2 == bx::log(2.0f) );
+}
+
+static void testLog2(float _a)
+{
+	REQUIRE(bx::log2(_a) == bx::log(_a) * (1.0f / bx::log(2.0f) ) );
 }
 
 TEST_CASE("log2", "[math][libm]")
@@ -43,15 +50,15 @@ TEST_CASE("log2", "[math][libm]")
 	testLog2(0.0f);
 	testLog2(256.0f);
 
-	REQUIRE(0.0f == bx::log2(1.0f) );
-	REQUIRE(1.0f == bx::log2(2.0f) );
-	REQUIRE(2.0f == bx::log2(4.0f) );
-	REQUIRE(3.0f == bx::log2(8.0f) );
-	REQUIRE(4.0f == bx::log2(16.0f) );
-	REQUIRE(5.0f == bx::log2(32.0f) );
-	REQUIRE(6.0f == bx::log2(64.0f) );
-	REQUIRE(7.0f == bx::log2(128.0f) );
-	REQUIRE(8.0f == bx::log2(256.0f) );
+	STATIC_REQUIRE(0.0f == bx::log2(1.0f) );
+	STATIC_REQUIRE(1.0f == bx::log2(2.0f) );
+	STATIC_REQUIRE(2.0f == bx::log2(4.0f) );
+	STATIC_REQUIRE(3.0f == bx::log2(8.0f) );
+	STATIC_REQUIRE(4.0f == bx::log2(16.0f) );
+	STATIC_REQUIRE(5.0f == bx::log2(32.0f) );
+	STATIC_REQUIRE(6.0f == bx::log2(64.0f) );
+	STATIC_REQUIRE(7.0f == bx::log2(128.0f) );
+	STATIC_REQUIRE(8.0f == bx::log2(256.0f) );
 }
 
 TEST_CASE("ceilLog2", "[math]")
@@ -324,7 +331,7 @@ TEST_CASE("rsqrt", "[math][libm]")
 	bx::Error err;
 
 	// rsqrtRef
-	REQUIRE(bx::isInfinite(bx::rsqrtRef(0.0f) ) );
+	STATIC_REQUIRE(bx::isInfinite(bx::rsqrtRef(0.0f) ) );
 
 	for (float xx = bx::kNearZero; xx < 100.0f; xx += 0.1f)
 	{
@@ -361,7 +368,8 @@ TEST_CASE("sqrt", "[math][libm]")
 	bx::Error err;
 
 	// sqrtRef
-	REQUIRE(bx::isNan(bx::sqrtRef(-1.0f) ) );
+	STATIC_REQUIRE(bx::isNan(bx::sqrtRef(-1.0f) ) );
+
 	REQUIRE(bx::isEqual(bx::sqrtRef(0.0f), ::sqrtf(0.0f), 0.0f) );
 	REQUIRE(bx::isEqual(bx::sqrtRef(1.0f), ::sqrtf(1.0f), 0.0f) );
 
@@ -434,7 +442,7 @@ TEST_CASE("mod", "[math][libm]")
 typedef float (*MathFloatFn)(float);
 
 template<MathFloatFn BxT, MathFloatFn CrtT>
-void testMathFunc1Float(float _value)
+static void testMathFunc1Float(float _value)
 {
 	REQUIRE(CrtT(_value) == BxT(_value) );
 }
@@ -519,21 +527,21 @@ TEST_CASE("exp", "[math][libm]")
 
 TEST_CASE("pow", "[math][libm]")
 {
-	REQUIRE(1.0f == bx::pow(0.0f, 0.0f) );
-	REQUIRE(1.0f == bx::pow(1.0f, 0.0f) );
-	REQUIRE(1.0f == bx::pow(3.0f, 0.0f) );
-	REQUIRE(1.0f == bx::pow(8.0f, 0.0f) );
-	REQUIRE(1.0f == bx::pow(9.0f, 0.0f) );
-	REQUIRE(0.0f == bx::pow(0.0f, 2.0f) );
-
-	REQUIRE(   4.0f == bx::pow( 2.0f,  2.0f) );
-	REQUIRE(  -4.0f == bx::pow(-2.0f,  2.0f) );
-	REQUIRE(  0.25f == bx::pow( 2.0f, -2.0f) );
-	REQUIRE( -0.25f == bx::pow(-2.0f, -2.0f) );
-	REQUIRE(   8.0f == bx::pow( 2.0f,  3.0f) );
-	REQUIRE(  -8.0f == bx::pow(-2.0f,  3.0f) );
-	REQUIRE( 0.125f == bx::pow( 2.0f, -3.0f) );
-	REQUIRE(-0.125f == bx::pow(-2.0f, -3.0f) );
+	STATIC_REQUIRE(1.0f == bx::pow(0.0f, 0.0f) );
+	STATIC_REQUIRE(1.0f == bx::pow(1.0f, 0.0f) );
+	STATIC_REQUIRE(1.0f == bx::pow(3.0f, 0.0f) );
+	STATIC_REQUIRE(1.0f == bx::pow(8.0f, 0.0f) );
+	STATIC_REQUIRE(1.0f == bx::pow(9.0f, 0.0f) );
+	STATIC_REQUIRE(0.0f == bx::pow(0.0f, 2.0f) );
+
+	STATIC_REQUIRE(   4.0f == bx::pow( 2.0f,  2.0f) );
+	STATIC_REQUIRE(  -4.0f == bx::pow(-2.0f,  2.0f) );
+	STATIC_REQUIRE(  0.25f == bx::pow( 2.0f, -2.0f) );
+	STATIC_REQUIRE( -0.25f == bx::pow(-2.0f, -2.0f) );
+	STATIC_REQUIRE(   8.0f == bx::pow( 2.0f,  3.0f) );
+	STATIC_REQUIRE(  -8.0f == bx::pow(-2.0f,  3.0f) );
+	STATIC_REQUIRE( 0.125f == bx::pow( 2.0f, -3.0f) );
+	STATIC_REQUIRE(-0.125f == bx::pow(-2.0f, -3.0f) );
 
 	bx::WriterI* writer = bx::getNullOut();
 	bx::Error err;
@@ -548,6 +556,9 @@ TEST_CASE("pow", "[math][libm]")
 
 TEST_CASE("asin", "[math][libm]")
 {
+	STATIC_REQUIRE(bx::isEqual(       0.0f, bx::asin(0.0f), 0.0001f) );
+	STATIC_REQUIRE(bx::isEqual(bx::kPiHalf, bx::asin(1.0f), 0.0001f) );
+
 	bx::WriterI* writer = bx::getNullOut();
 	bx::Error err;
 
@@ -561,6 +572,11 @@ TEST_CASE("asin", "[math][libm]")
 
 TEST_CASE("sin", "[math][libm]")
 {
+	STATIC_REQUIRE(bx::isEqual( 0.0f, bx::sin(0.0f            ), 0.0000001f) );
+	STATIC_REQUIRE(bx::isEqual( 1.0f, bx::sin(bx::kPiHalf     ), 0.0000001f) );
+	STATIC_REQUIRE(bx::isEqual( 0.0f, bx::sin(bx::kPi         ), 0.0000001f) );
+	STATIC_REQUIRE(bx::isEqual(-1.0f, bx::sin(bx::kPiHalf*3.0f), 0.0000001f) );
+
 	bx::WriterI* writer = bx::getNullOut();
 	bx::Error err;
 
@@ -624,6 +640,9 @@ TEST_CASE("sinh", "[math][libm]")
 
 TEST_CASE("acos", "[math][libm]")
 {
+	STATIC_REQUIRE(bx::isEqual(bx::kPiHalf, bx::acos(0.0f), 0.0001f) );
+	STATIC_REQUIRE(bx::isEqual(       0.0f, bx::acos(1.0f), 0.0001f) );
+
 	bx::WriterI* writer = bx::getNullOut();
 	bx::Error err;
 
@@ -637,6 +656,11 @@ TEST_CASE("acos", "[math][libm]")
 
 TEST_CASE("cos", "[math][libm]")
 {
+	STATIC_REQUIRE(bx::isEqual( 1.0f, bx::cos(0.0f            ), 0.0000001f) );
+	STATIC_REQUIRE(bx::isEqual( 0.0f, bx::cos(bx::kPiHalf     ), 0.0000001f) );
+	STATIC_REQUIRE(bx::isEqual(-1.0f, bx::cos(bx::kPi         ), 0.0000001f) );
+	STATIC_REQUIRE(bx::isEqual( 0.0f, bx::cos(bx::kPiHalf*3.0f), 0.0000001f) );
+
 	bx::WriterI* writer = bx::getNullOut();
 	bx::Error err;