3 years ago · 6793dcf7b4
--- a/Jolt/Math/Vec4.inl
+++ b/Jolt/Math/Vec4.inl
@@ -717,73 +717,60 @@ float Vec4::ReduceMax() const
 
				 
			
 
				 void Vec4::SinCos(Vec4 &outSin, Vec4 &outCos) const
			
 
				 {
			
 
				-	// Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function and vectorizes it
			
 
				-	// Original implementation by Stephen L. Moshier (See: http://www.netlib.org/cephes/)
			
 
				+	// Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function, changes octants to quadrants and vectorizes it
			
 
				+	// Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
			
 
				 
			
 
				-	// Make argument positive and remember sign (highest bit set is negative)
			
 
				+	// Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit)
			
 
				 	UVec4 sin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
			
 
				 	Vec4 x = Vec4::sXor(*this, sin_sign.ReinterpretAsFloat());
			
 
				 
			
 
				-	// Integer part of x / (PI / 4)
			
 
				-	UVec4 int_val = (1.27323954473516f * x).ToInt();
			
 
				-	Vec4 y = int_val.ToFloat();
			
 
				-
			
 
				-	// Integer and fractional part modulo one octant, map zeros to origin
			
 
				-	// if (int_val & 1) int_val++, y += 1;
			
 
				-	UVec4 and_1 = int_val.LogicalShiftLeft<31>().ArithmeticShiftRight<31>();
			
 
				-	int_val += UVec4::sAnd(and_1, UVec4::sReplicate(1));
			
 
				-	y += Vec4::sAnd(and_1.ReinterpretAsFloat(), Vec4::sReplicate(1.0f));
			
 
				-
			
 
				-	// Extended precision modular arithmetic
			
 
				-	x = ((x - y * 0.78515625f) - y * 2.4187564849853515625e-4f) - y * 3.77489497744594108e-8f;
			
 
				-
			
 
				-	// Calculate both results
			
 
				-	Vec4 z = x * x;
			
 
				-	Vec4 y1 = ((2.443315711809948e-5f * z - Vec4::sReplicate(1.388731625493765e-3f)) * z + Vec4::sReplicate(4.166664568298827e-2f)) * z * z - 0.5f * z + Vec4::sReplicate(1.0f);
			
 
				-	Vec4 y2 = ((-1.9515295891e-4f * z + Vec4::sReplicate(8.3321608736e-3f)) * z - Vec4::sReplicate(1.6666654611e-1f)) * z * x + x;
			
 
				-
			
 
				-	// From here we deviate form the original cephes code, we would have to write:
			
 
				-	//
			
 
				-	// j &= 7;
			
 
				-	// 
			
 
				-	// if (j > 3)
			
 
				-	// {
			
 
				-	//		j -= 4;
			
 
				-	//		sin_sign = -sin_sign;
			
 
				-	//		cos_sign = -cos_sign;
			
 
				-	// }
			
 
				-	// 
			
 
				-	// if (j > 1)
			
 
				-	//		cos_sign = -cos_sign;
			
 
				-	//
			
 
				-	// ...
			
 
				-	//
			
 
				-	// if (j == 1 || j == 2) // condition
			
 
				-	//		...
			
 
				+	// x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
			
 
				+	UVec4 quadrant = (0.6366197723675814f * x + Vec4::sReplicate(0.5f)).ToInt();
			
 
				+
			
 
				+	// Make x relative to the closest quadrant.
			
 
				+	// This does x = x - quadrant * PI / 2 using a two step Cody-Waite argument reduction.
			
 
				+	// This improves the accuracy of the result by avoiding loss of significant bits in the subtraction.
			
 
				+	// We start with x = x - quadrant * PI / 2, PI / 2 in hexadecimal notation is 0x3fc90fdb, we remove the lowest 16 bits to
			
 
				+	// get 0x3fc90000 (= 1.5703125) this means we can now multiply with a number of up to 2^16 without losing any bits.
			
 
				+	// This leaves us with: x = (x - quadrant * 1.5703125) - quadrant * (PI / 2 - 1.5703125).
			
 
				+	// PI / 2 - 1.5703125 in hexadecimal is 0x39fdaa22, stripping the lowest 12 bits we get 0x39fda000 (= 0.0004837512969970703125)
			
 
				+	// This leaves uw with: x = ((x - quadrant * 1.5703125) - quadrant * 0.0004837512969970703125) - quadrant * (PI / 2 - 1.5703125 - 0.0004837512969970703125)
			
 
				+	// See: https://stackoverflow.com/questions/42455143/sine-cosine-modular-extended-precision-arithmetic
			
 
				+	// After this we have x in the range [-PI / 4, PI / 4].
			
 
				+	Vec4 float_quadrant = quadrant.ToFloat();
			
 
				+	x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
			
 
				+
			
 
				+	// Calculate x2 = x^2
			
 
				+	Vec4 x2 = x * x;
			
 
				+
			
 
				+	// Taylor expansion:
			
 
				+	// Cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! + ... = (((x2/8!- 1/6!) * x2 + 1/4!) * x2 - 1/2!) * x2 + 1
			
 
				+	Vec4 taylor_cos = ((2.443315711809948e-5f * x2 - Vec4::sReplicate(1.388731625493765e-3f)) * x2 + Vec4::sReplicate(4.166664568298827e-2f)) * x2 * x2 - 0.5f * x2 + Vec4::sReplicate(1.0f);
			
 
				+	// Sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... = ((-x2/7! + 1/5!) * x2 - 1/3!) * x2 * x + x
			
 
				+	Vec4 taylor_sin = ((-1.9515295891e-4f * x2 + Vec4::sReplicate(8.3321608736e-3f)) * x2 - Vec4::sReplicate(1.6666654611e-1f)) * x2 * x + x;
			
 
				+
			
 
				+	// The lowest 2 bits of quadrant indicate the quadrant that we are in.
			
 
				+	// Let x be the original input value and x' our value that has been mapped to the range [-PI / 4, PI / 4].
			
 
				+	// since cos(x) = sin(x - PI / 2) and since we want to use the Taylor expansion as close as possible to 0,
			
 
				+	// we can alternate between using the Taylor expansion for sin and cos according to the following table:
			
 
				 	// 
			
 
				-	// j		sin_sign	cos_sign	condition
			
 
				-	// 000b     1			1			0
			
 
				-	// 001b     1			1			1
			
 
				-	// 010b     1			-1			1
			
 
				-	// 011b     1			-1			0
			
 
				-	// 100b     -1			-1			0
			
 
				-	// 101b     -1			-1			1
			
 
				-	// 110b     -1			1			1
			
 
				-	// 111b		-1			1			0
			
 
				+	// quadrant	 sin(x)		 cos(x)
			
 
				+	// XXX00b	 sin(x')	 cos(x')
			
 
				+	// XXX01b	 cos(x')	-sin(x')
			
 
				+	// XXX10b	-sin(x')	-cos(x')
			
 
				+	// XXX11b	-cos(x')	 sin(x')
			
 
				 	//
			
 
				-	// So: sin_sign = bit3, cos_sign = bit2 ^ bit3, condition = bit1 ^ bit2
			
 
				-	UVec4 bit1 = int_val.LogicalShiftLeft<31>();
			
 
				-	UVec4 bit2 = UVec4::sAnd(int_val.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
			
 
				-	UVec4 bit3 = UVec4::sAnd(int_val.LogicalShiftLeft<29>(), UVec4::sReplicate(0x80000000U));
			
 
				+	// So: sin_sign = bit2, cos_sign = bit1 ^ bit2, bit1 determines if we use sin or cos Taylor expansion
			
 
				+	UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
			
 
				+	UVec4 bit2 = UVec4::sAnd(quadrant.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
			
 
				 
			
 
				 	// Select which one of the results is sin and which one is cos
			
 
				-	UVec4 xor_1_2 = UVec4::sXor(bit1, bit2);
			
 
				-	Vec4 s = Vec4::sSelect(y2, y1, xor_1_2);
			
 
				-	Vec4 c = Vec4::sSelect(y1, y2, xor_1_2);
			
 
				+	Vec4 s = Vec4::sSelect(taylor_sin, taylor_cos, bit1);
			
 
				+	Vec4 c = Vec4::sSelect(taylor_cos, taylor_sin, bit1);
			
 
				 
			
 
				 	// Update the signs
			
 
				-	sin_sign = UVec4::sXor(sin_sign, bit3);
			
 
				-	UVec4 cos_sign = UVec4::sXor(bit2, bit3);
			
 
				+	sin_sign = UVec4::sXor(sin_sign, bit2);
			
 
				+	UVec4 cos_sign = UVec4::sXor(bit1, bit2);
			
 
				 
			
 
				 	// Correct the signs
			
 
				 	outSin = Vec4::sXor(s, sin_sign.ReinterpretAsFloat());