فهرست منبع

Implemented vectorized versions of ASin, ACos, ATan, ATan2 (#188)

* Removed locations where we clamped values before sending to ACos, this is now done internally
* This should make all trigonometric functions platform independent
Jorrit Rouwe 3 سال پیش
والد
کامیت
9cbee369a6

+ 7 - 0
Jolt/Core/Core.h

@@ -287,6 +287,13 @@ static_assert(sizeof(void *) == (JPH_CPU_ADDRESS_BITS == 64? 8 : 4), "Invalid si
 	#define JPH_IF_DEBUG(...)
 #endif
 
+// Shorthand for #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED / #endif
+#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
+	#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)	__VA_ARGS__
+#else
+	#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)
+#endif
+
 // Macro to indicate that a parameter / variable is unused
 #define JPH_UNUSED(x)			(void)x
 

+ 13 - 11
Jolt/Math/Trigonometry.h

@@ -7,7 +7,7 @@ JPH_NAMESPACE_BEGIN
 
 // Note that this file exists because std::sin etc. are not platform independent and will lead to non-deterministic simulation
 
-/// Sine of x
+/// Sine of x (input in radians)
 JPH_INLINE float Sin(float inX)
 {
 	Vec4 s, c;
@@ -15,7 +15,7 @@ JPH_INLINE float Sin(float inX)
 	return s.GetX();
 }
 
-/// Cosine of x
+/// Cosine of x (input in radians)
 JPH_INLINE float Cos(float inX)
 {
 	Vec4 s, c;
@@ -23,34 +23,36 @@ JPH_INLINE float Cos(float inX)
 	return c.GetX();
 }
 
-/// Tangent of x
+/// Tangent of x (input in radians)
 JPH_INLINE float Tan(float inX)
 {
 	return Vec4::sReplicate(inX).Tan().GetX();
 }
 
-/// Arc sine of x
+/// Arc sine of x (returns value in the range [-PI / 2, PI / 2])
+/// Note that all input values will be clamped to the range [-1, 1] and this function will not return NaNs like std::asin
 JPH_INLINE float ASin(float inX)
 {
-	return asin(inX);
+	return Vec4::sReplicate(inX).ASin().GetX();
 }
 
-/// Arc cosine of x
+/// Arc cosine of x (returns value in the range [0, PI])
+/// Note that all input values will be clamped to the range [-1, 1] and this function will not return NaNs like std::acos
 JPH_INLINE float ACos(float inX)
 {
-	return acos(inX);
+	return Vec4::sReplicate(inX).ACos().GetX();
 }
 
-/// Arc tangent of x
+/// Arc tangent of x (returns value in the range [-PI / 2, PI / 2])
 JPH_INLINE float ATan(float inX)
 {
-	return atan(inX);
+	return Vec4::sReplicate(inX).ATan().GetX();
 }
 
-/// Arc tangent of y / x
+/// Arc tangent of y / x using the signs of the arguments to determine the correct quadrant (returns value in the range [-PI, PI])
 JPH_INLINE float ATan2(float inY, float inX)
 {
-	return atan2(inY, inX);
+	return Vec4::sATan2(Vec4::sReplicate(inY), Vec4::sReplicate(inX)).GetX();
 }
 
 JPH_NAMESPACE_END

+ 16 - 2
Jolt/Math/Vec4.h

@@ -234,12 +234,26 @@ public:
 	/// Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
 	JPH_INLINE Vec4				GetSign() const;
 
-	/// Calcluate the sine and cosine for each element of this vector
+	/// Calcluate the sine and cosine for each element of this vector (input in radians)
 	inline void					SinCos(Vec4 &outSin, Vec4 &outCos) const;
 
-	/// Calcluate the tangent for each element of this vector
+	/// Calcluate the tangent for each element of this vector (input in radians)
 	inline Vec4					Tan() const;
 
+	/// Calculate the arc sine for each element of this vector (returns value in the range [-PI / 2, PI / 2])
+	/// Note that all input values will be clamped to the range [-1, 1] and this function will not return NaNs like std::asin
+	inline Vec4					ASin() const;
+
+	/// Calculate the arc cosine for each element of this vector (returns value in the range [0, PI])
+	/// Note that all input values will be clamped to the range [-1, 1] and this function will not return NaNs like std::acos
+	inline Vec4					ACos() const;
+
+	/// Calculate the arc tangent for each element of this vector (returns value in the range [-PI / 2, PI / 2])
+	inline Vec4					ATan() const;
+
+	/// Calculate the arc tangent of y / x using the signs of the arguments to determine the correct quadrant (returns value in the range [-PI, PI])
+	inline static Vec4			sATan2(Vec4Arg inY, Vec4Arg inX);
+
 	/// To String
 	friend ostream &			operator << (ostream &inStream, Vec4Arg inV)
 	{

+ 109 - 1
Jolt/Math/Vec4.inl

@@ -804,10 +804,118 @@ Vec4 Vec4::Tan() const
 
 	// For the 2nd and 4th quadrant we need to invert the value
 	UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
-	tan = Vec4::sSelect(tan, Vec4::sReplicate(-1.0f) / tan, bit1);
+	tan = Vec4::sSelect(tan, Vec4::sReplicate(-1.0f) / (tan JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(+ Vec4::sReplicate(FLT_MIN))), bit1); // Add small epsilon to prevent div by zero, works because tan is always positive
 
 	// Put the sign back
 	return Vec4::sXor(tan, tan_sign.ReinterpretAsFloat());
 }
 
+Vec4 Vec4::ASin() const
+{
+	// Implementation based on asinf.c from the cephes library
+	// Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
+
+	// Make argument positive
+	UVec4 asin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
+	Vec4 a = Vec4::sXor(*this, asin_sign.ReinterpretAsFloat());
+
+	// ASin is not defined outside the range [-1, 1] but it often happens that a value is slightly above 1 so we just clamp here
+	a = Vec4::sMin(a, Vec4::sReplicate(1.0f));
+
+	// When |x| <= 0.5 we use the asin approximation as is
+	Vec4 z1 = a * a;
+	Vec4 x1 = a;
+
+	// When |x| > 0.5 we use the identity asin(x) = PI / 2 - 2 * asin(sqrt((1 - x) / 2))
+	Vec4 z2 = 0.5f * (Vec4::sReplicate(1.0f) - a);
+	Vec4 x2 = z2.Sqrt();
+
+	// Select which of the two situations we have
+	UVec4 greater = Vec4::sGreater(a, Vec4::sReplicate(0.5f));
+	Vec4 z = Vec4::sSelect(z1, z2, greater);
+	Vec4 x = Vec4::sSelect(x1, x2, greater);
+
+	// Polynomial approximation of asin
+	z = ((((4.2163199048e-2f * z + Vec4::sReplicate(2.4181311049e-2f)) * z + Vec4::sReplicate(4.5470025998e-2f)) * z + Vec4::sReplicate(7.4953002686e-2f)) * z + Vec4::sReplicate(1.6666752422e-1f)) * z * x + x;
+
+	// If |x| > 0.5 we need to apply the remainder of the identity above
+	z = Vec4::sSelect(z, Vec4::sReplicate(0.5f * JPH_PI) - (z + z), greater);
+
+	// Put the sign back
+	return Vec4::sXor(z, asin_sign.ReinterpretAsFloat());
+}
+
+Vec4 Vec4::ACos() const
+{
+	// Not the most accurate, but simple
+	return Vec4::sReplicate(0.5f * JPH_PI) - ASin();
+}
+
+Vec4 Vec4::ATan() const
+{
+	// Implementation based on atanf.c from the cephes library
+	// Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
+
+	// Make argument positive
+	UVec4 atan_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
+	Vec4 x = Vec4::sXor(*this, atan_sign.ReinterpretAsFloat());
+	Vec4 y = Vec4::sZero();
+
+	// If x > Tan(PI / 8)
+	UVec4 greater1 = Vec4::sGreater(x, Vec4::sReplicate(0.4142135623730950f)); 
+	Vec4 x1 = (x - Vec4::sReplicate(1.0f)) / (x + Vec4::sReplicate(1.0f));
+
+	// If x > Tan(3 * PI / 8)
+	UVec4 greater2 = Vec4::sGreater(x, Vec4::sReplicate(2.414213562373095f)); 
+	Vec4 x2 = Vec4::sReplicate(-1.0f) / (x JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(+ Vec4::sReplicate(FLT_MIN))); // Add small epsilon to prevent div by zero, works because x is always positive
+
+	// Apply first if
+	x = Vec4::sSelect(x, x1, greater1);
+	y = Vec4::sSelect(y, Vec4::sReplicate(0.25f * JPH_PI), greater1);
+
+	// Apply second if
+	x = Vec4::sSelect(x, x2, greater2);
+	y = Vec4::sSelect(y, Vec4::sReplicate(0.5f * JPH_PI), greater2);
+
+	// Polynomial approximation
+	Vec4 z = x * x;
+	y += (((8.05374449538e-2f * z - Vec4::sReplicate(1.38776856032e-1f)) * z + Vec4::sReplicate(1.99777106478e-1f)) * z - Vec4::sReplicate(3.33329491539e-1f)) * z * x + x;
+
+	// Put the sign back
+	return Vec4::sXor(y, atan_sign.ReinterpretAsFloat());
+}
+
+Vec4 Vec4::sATan2(Vec4Arg inY, Vec4Arg inX)
+{
+	UVec4 sign_mask = UVec4::sReplicate(0x80000000U);
+
+	// Determine absolute value and sign of y
+	UVec4 y_sign = UVec4::sAnd(inY.ReinterpretAsInt(), sign_mask);
+	Vec4 y_abs = Vec4::sXor(inY, y_sign.ReinterpretAsFloat());
+
+	// Determine absolute value and sign of x
+	UVec4 x_sign = UVec4::sAnd(inX.ReinterpretAsInt(), sign_mask);
+	Vec4 x_abs = Vec4::sXor(inX, x_sign.ReinterpretAsFloat());
+
+	// Always divide smallest / largest to avoid dividing by zero
+	UVec4 x_is_numerator = Vec4::sLess(x_abs, y_abs);
+	Vec4 numerator = Vec4::sSelect(y_abs, x_abs, x_is_numerator);
+	Vec4 denominator = Vec4::sSelect(x_abs, y_abs, x_is_numerator);
+	Vec4 atan = (numerator / denominator).ATan();
+
+	// If we calculated x / y instead of y / x the result is PI / 2 - result (note that this is true because we know the result is positive because the input was positive)
+	atan = Vec4::sSelect(atan, Vec4::sReplicate(0.5f * JPH_PI) - atan, x_is_numerator);
+
+	// Now we need to map to the correct quadrant
+	// x_sign	y_sign	result
+	// +1		+1		atan
+	// -1		+1		-atan + PI
+	// -1		-1		atan - PI
+	// +1		-1		-atan
+	// This can be written as: x_sign * y_sign * (atan - (x_sign < 0? PI : 0))
+	atan -= Vec4::sAnd(x_sign.ArithmeticShiftRight<31>().ReinterpretAsFloat(), Vec4::sReplicate(JPH_PI));
+	atan = Vec4::sXor(atan, UVec4::sXor(x_sign, y_sign).ReinterpretAsFloat());
+	return atan;
+}
+
 JPH_NAMESPACE_END

+ 1 - 1
Jolt/Physics/Vehicle/WheeledVehicleController.cpp

@@ -109,7 +109,7 @@ void WheelWV::Update(float inDeltaTime, const VehicleConstraint &inConstraint)
 
 		// Calculate lateral friction based on slip angle
 		float relative_velocity_len = relative_velocity.Length();
-		float lateral_slip_angle = relative_velocity_len < 1.0e-3f? 0.0f : RadiansToDegrees(ACos(min(abs(relative_longitudinal_velocity) / relative_velocity_len, 1.0f)));
+		float lateral_slip_angle = relative_velocity_len < 1.0e-3f? 0.0f : RadiansToDegrees(ACos(abs(relative_longitudinal_velocity) / relative_velocity_len));
 		float lateral_slip_friction = settings->mLateralFriction.GetValue(lateral_slip_angle);
 
 		// Tire friction

+ 1 - 1
Samples/Tests/ConvexCollision/EPATest.cpp

@@ -70,7 +70,7 @@ bool EPATest::CollideBoxSphere(Mat44Arg inMatrix, const AABox &inBox, const Sphe
 		// Check angle between v1 and v2
 		float dot = v1.Dot(v2);
 		float len = v1.Length() * v2.Length();
-		float angle = RadiansToDegrees(ACos(Clamp(dot / len, -1.0f, 1.0f)));
+		float angle = RadiansToDegrees(ACos(dot / len));
 		JPH_ASSERT(angle < 0.1f);
 		Trace("Angle = %.9g", (double)angle);
 

+ 1 - 1
UnitTests/Geometry/EPATests.cpp

@@ -19,7 +19,7 @@ TEST_SUITE("EPATests")
 	{
 		float dot = inV1.Dot(inV2);
 		float len = inV1.Length() * inV2.Length();
-		return RadiansToDegrees(ACos(Clamp(dot / len, -1.0f, 1.0f)));
+		return RadiansToDegrees(ACos(dot / len));
 	}
 
 	/// Test box versus sphere and compare analytical solution with that of the EPA algorithm

+ 148 - 1
UnitTests/Math/Vec4Tests.cpp

@@ -510,6 +510,12 @@ TEST_SUITE("Vec4Tests")
 
 	TEST_CASE("TestVec4SinCos")
 	{
+		// Check edge cases
+		Vec4 vs, vc;
+		Vec4(0, 0.5f * JPH_PI, JPH_PI, -0.5f * JPH_PI).SinCos(vs, vc);
+		CHECK(vs.IsClose(Vec4(0, 1, 0, -1), 1.0e-7f));
+		CHECK(vc.IsClose(Vec4(1, 0, -1, 0), 1.0e-7f));
+
 		double ms = 0.0, mc = 0.0;
 
 		for (float x = -100.0f * JPH_PI; x < 100.0f * JPH_PI; x += 1.0e-3f)
@@ -518,7 +524,6 @@ TEST_SUITE("Vec4Tests")
 			Vec4 xv = Vec4::sReplicate(x) + Vec4(0.0e-4f, 2.5e-4f, 5.0e-4f, 7.5e-4f);
 
 			// Calculate sin and cos
-			Vec4 vs, vc;
 			xv.SinCos(vs, vc);
 
 			for (int i = 0; i < 4; ++i)
@@ -541,6 +546,11 @@ TEST_SUITE("Vec4Tests")
 
 	TEST_CASE("TestVec4Tan")
 	{
+		// Check edge cases
+		CHECK(Vec4::sReplicate(0.0f).Tan() == Vec4::sZero());
+		CHECK(Vec4::sReplicate(0.5f * JPH_PI - 1.0e-6f).Tan().GetX() > 1.0e6f);
+		CHECK(Vec4::sReplicate(0.5f * JPH_PI + 1.0e-6f).Tan().GetX() < -1.0e6f);
+
 		double mt = 0.0;
 
 		for (float x = -100.0f * JPH_PI; x < 100.0f * JPH_PI; x += 1.0e-3f)
@@ -562,4 +572,141 @@ TEST_SUITE("Vec4Tests")
 
 		CHECK(mt < 1.5e-7f);
 	}
+
+	TEST_CASE("TestVec4ASin")
+	{
+		// Check edge cases
+		CHECK(Vec4::sReplicate(0.0f).ASin() == Vec4::sZero());
+		CHECK(Vec4::sReplicate(1.0f).ASin() == Vec4::sReplicate(0.5f * JPH_PI));
+		CHECK(Vec4::sReplicate(-1.0f).ASin() == Vec4::sReplicate(-0.5f * JPH_PI));
+
+		double ma = 0.0;
+
+		for (float x = -1.0f; x <= 1.0f; x += 1.0e-3f)
+		{
+			// Create a vector with intermediate values
+			Vec4 xv = Vec4::sMin(Vec4::sReplicate(x) + Vec4(0.0e-4f, 2.5e-4f, 5.0e-4f, 7.5e-4f), Vec4::sReplicate(1.0f));
+
+			// Calculate asin
+			Vec4 va = xv.ASin();
+
+			for (int i = 0; i < 4; ++i)
+			{
+				// Check accuracy of asin
+				double a1 = asin((double)xv[i]), a2 = (double)va[i];
+				double da = abs(a2 - a1);
+				ma = max(ma, da);
+			}
+		}
+
+		CHECK(ma < 2.0e-7f);
+
+		// Check that inputs are clamped as promised
+		CHECK(Vec4::sReplicate(-1.1f).ASin() == Vec4::sReplicate(-0.5f * JPH_PI));
+		CHECK(Vec4::sReplicate(1.1f).ASin() == Vec4::sReplicate(0.5f * JPH_PI));
+	}
+
+	TEST_CASE("TestVec4ACos")
+	{
+		// Check edge cases
+		CHECK(Vec4::sReplicate(0.0f).ACos() == Vec4::sReplicate(0.5f * JPH_PI));
+		CHECK(Vec4::sReplicate(1.0f).ACos() == Vec4::sZero());
+		CHECK(Vec4::sReplicate(-1.0f).ACos() == Vec4::sReplicate(JPH_PI));
+
+		double ma = 0.0;
+
+		for (float x = -1.0f; x <= 1.0f; x += 1.0e-3f)
+		{
+			// Create a vector with intermediate values
+			Vec4 xv = Vec4::sMin(Vec4::sReplicate(x) + Vec4(0.0e-4f, 2.5e-4f, 5.0e-4f, 7.5e-4f), Vec4::sReplicate(1.0f));
+
+			// Calculate acos
+			Vec4 va = xv.ACos();
+
+			for (int i = 0; i < 4; ++i)
+			{
+				// Check accuracy of acos
+				double a1 = acos((double)xv[i]), a2 = (double)va[i];
+				double da = abs(a2 - a1);
+				ma = max(ma, da);
+			}
+		}
+
+		CHECK(ma < 3.5e-7f);
+
+		// Check that inputs are clamped as promised
+		CHECK(Vec4::sReplicate(-1.1f).ACos() == Vec4::sReplicate(JPH_PI));
+		CHECK(Vec4::sReplicate(1.1f).ACos() == Vec4::sZero());
+	}
+
+	TEST_CASE("TestVec4ATan")
+	{
+		// Check edge cases
+		CHECK(Vec4::sReplicate(0.0f).ATan() == Vec4::sZero());
+		CHECK(Vec4::sReplicate(FLT_MAX).ATan() == Vec4::sReplicate(0.5f * JPH_PI));
+		CHECK(Vec4::sReplicate(-FLT_MAX).ATan() == Vec4::sReplicate(-0.5f * JPH_PI));
+
+		double ma = 0.0;
+
+		for (float x = -100.0f; x < 100.0f; x += 1.0e-3f)
+		{
+			// Create a vector with intermediate values
+			Vec4 xv = Vec4::sReplicate(x) + Vec4(0.0e-4f, 2.5e-4f, 5.0e-4f, 7.5e-4f);
+
+			// Calculate atan
+			Vec4 va = xv.ATan();
+
+			for (int i = 0; i < 4; ++i)
+			{
+				// Check accuracy of atan
+				double a1 = atan((double)xv[i]), a2 = (double)va[i];
+				double da = abs(a2 - a1);
+				ma = max(ma, da);
+			}
+		}
+
+		CHECK(ma < 1.5e-7f);
+	}
+
+	TEST_CASE("TestVec4ATan2")
+	{
+		double ma = 0.0;
+
+		// Test the axis
+		CHECK(Vec4::sATan2(Vec4::sZero(), Vec4::sReplicate(10.0f)) == Vec4::sZero());
+		CHECK(Vec4::sATan2(Vec4::sZero(), Vec4::sReplicate(-10.0f)) == Vec4::sReplicate(JPH_PI));
+		CHECK(Vec4::sATan2(Vec4::sReplicate(10.0f), Vec4::sZero()) == Vec4::sReplicate(0.5f * JPH_PI));
+		CHECK(Vec4::sATan2(Vec4::sReplicate(-10.0f), Vec4::sZero()) == Vec4::sReplicate(-0.5f * JPH_PI));
+
+		// Test the 4 quadrants
+		CHECK(Vec4::sATan2(Vec4::sReplicate(10.0f), Vec4::sReplicate(10.0f)) == Vec4::sReplicate(0.25f * JPH_PI));
+		CHECK(Vec4::sATan2(Vec4::sReplicate(10.0f), Vec4::sReplicate(-10.0f)) == Vec4::sReplicate(0.75f * JPH_PI));
+		CHECK(Vec4::sATan2(Vec4::sReplicate(-10.0f), Vec4::sReplicate(-10.0f)) == Vec4::sReplicate(-0.75f * JPH_PI));
+		CHECK(Vec4::sATan2(Vec4::sReplicate(-10.0f), Vec4::sReplicate(10.0f)) == Vec4::sReplicate(-0.25f * JPH_PI));
+
+		for (float y = -5.0f; y < 5.0f; y += 1.0e-2f)
+		{
+			// Create a vector with intermediate values
+			Vec4 yv = Vec4::sReplicate(y) + Vec4(0.0e-3f, 2.5e-3f, 5.0e-3f, 7.5e-3f);
+
+			for (float x = -5.0f; x < 5.0f; x += 1.0e-2f)
+			{
+				// Create a vector with intermediate values
+				Vec4 xv = Vec4::sReplicate(x) + Vec4(0.0e-3f, 2.5e-3f, 5.0e-3f, 7.5e-3f);
+
+				// Calculate atan
+				Vec4 va = Vec4::sATan2(yv, xv);
+
+				for (int i = 0; i < 4; ++i)
+				{
+					// Check accuracy of atan
+					double a1 = atan2((double)yv[i], (double)xv[i]), a2 = (double)va[i];
+					double da = abs(a2 - a1);
+					ma = max(ma, da);
+				}
+			}
+		}
+
+		CHECK(ma < 3.0e-7f);
+	}
 }